diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..5c37f7f2bbfcea4b66e04093a04dd46a74f598a7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +chemprop-updated/docs/source/_static/images/message_passing.png filter=lfs diff=lfs merge=lfs -text +chemprop/docs/source/_static/images/message_passing.png filter=lfs diff=lfs merge=lfs -text diff --git a/chemprop-updated/.bumpversion.cfg b/chemprop-updated/.bumpversion.cfg new file mode 100644 index 0000000000000000000000000000000000000000..6cd39fbd0a2f9ec573a3a412e5c42fdb6c077e24 --- /dev/null +++ b/chemprop-updated/.bumpversion.cfg @@ -0,0 +1,10 @@ +[bumpversion] +current_version = 2.1.2 +commit = True +tag = True + +[bumpversion:file:pyproject.toml] + +[bumpversion:file:chemprop/__init__.py] + +[bumpversion:file:docs/source/conf.py] diff --git a/chemprop-updated/.dockerignore b/chemprop-updated/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..c5e0ea7ad77234ddf99146c9320aea416495adbb --- /dev/null +++ b/chemprop-updated/.dockerignore @@ -0,0 +1,3 @@ +**.git* +.dockerignore +Dockerfile diff --git a/chemprop-updated/.flake8 b/chemprop-updated/.flake8 new file mode 100644 index 0000000000000000000000000000000000000000..a188a7e321cb817e76c3bf76f08c196a709a2a99 --- /dev/null +++ b/chemprop-updated/.flake8 @@ -0,0 +1,9 @@ +[flake8] +ignore = E203, E266, E501, F403, E741, W503, W605 +max-line-length = 100 +max-complexity = 18 +per-file-ignores = + __init__.py: F401 + chemprop/nn/predictors.py: F405 + chemprop/nn/metrics.py: F405 + tests/unit/nn/test_metrics.py: E121, E122, E131, E241, W291 diff --git a/chemprop-updated/.github/ISSUE_TEMPLATE/todo.md b/chemprop-updated/.github/ISSUE_TEMPLATE/todo.md new file mode 100644 index 0000000000000000000000000000000000000000..e88203a8de5130928a009d023fe6cc25f865facb --- /dev/null +++ b/chemprop-updated/.github/ISSUE_TEMPLATE/todo.md @@ -0,0 +1,11 @@ +--- +name: to-do +about: Add an item to the to-do list. More generic than a feature request +title: "[TODO]: " +labels: todo +assignees: '' + +--- + +**Notes** +_these could be implementation or more specific details to keep in mind, if they'll be helpful for issue tracking_ diff --git a/chemprop-updated/.github/ISSUE_TEMPLATE/v1_bug_report.md b/chemprop-updated/.github/ISSUE_TEMPLATE/v1_bug_report.md new file mode 100644 index 0000000000000000000000000000000000000000..43808ed6f1bae881d608b8c1bb0a223a714a7ce4 --- /dev/null +++ b/chemprop-updated/.github/ISSUE_TEMPLATE/v1_bug_report.md @@ -0,0 +1,35 @@ +--- +name: v1 Bug Report +about: Report a bug in v1 (will not be fixed) +title: "[v1 BUG]: " +labels: bug, v1-wontfix +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**Example(s)** +Provide some examples of where the current code fails. Feel free to share your actual code for additional context, but a minimal and isolated example is preferred. + +**Expected behavior** +A clear and concise description of what you expected to happen. If there is correct, expected output, include that here as well. + +**Error Stack Trace** +If the bug is resulting in an error message, provide the _full_ stack trace (not just the last line). This is helpful for debugging, especially in cases where you aren't able to provide a minimum/isolated working example with accompanying files. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Environment** +- python version +- package versions: `conda list` or `pip list` +- OS + +**Checklist** +- [ ] all dependencies are satisifed: `conda list` or `pip list` shows the packages listed in the `pyproject.toml` +- [ ] the unit tests are working: `pytest -v` reports no errors + +**Additional context** +Add any other context about the problem here. diff --git a/chemprop-updated/.github/ISSUE_TEMPLATE/v1_question.md b/chemprop-updated/.github/ISSUE_TEMPLATE/v1_question.md new file mode 100644 index 0000000000000000000000000000000000000000..77227d669535d1c59391df2e829e43f1619bfdf1 --- /dev/null +++ b/chemprop-updated/.github/ISSUE_TEMPLATE/v1_question.md @@ -0,0 +1,17 @@ +--- +name: v1 Question +about: Have a question about how to use Chemprop v1? +title: "[v1 QUESTION]: " +labels: question +assignees: '' + +--- + +**What are you trying to do?** +Please tell us what you're trying to do with Chemprop, providing as much detail as possible + +**Previous attempts** +If possible, provide some examples of what you've already tried and what the output was. + +**Screenshots** +If applicable, add screenshots to help explain your problem. diff --git a/chemprop-updated/.github/ISSUE_TEMPLATE/v2_bug_report.md b/chemprop-updated/.github/ISSUE_TEMPLATE/v2_bug_report.md new file mode 100644 index 0000000000000000000000000000000000000000..36894da38e9b2ab8c8291b1e98a5393b107725ee --- /dev/null +++ b/chemprop-updated/.github/ISSUE_TEMPLATE/v2_bug_report.md @@ -0,0 +1,35 @@ +--- +name: v2 Bug Report +about: Create a report to help us improve +title: "[v2 BUG]: " +labels: bug +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**Example(s)** +Provide some examples of where the current code fails. Feel free to share your actual code for additional context, but a minimal and isolated example is preferred. + +**Expected behavior** +A clear and concise description of what you expected to happen. If there is correct, expected output, include that here as well. + +**Error Stack Trace** +If the bug is resulting in an error message, provide the _full_ stack trace (not just the last line). This is helpful for debugging, especially in cases where you aren't able to provide a minimum/isolated working example with accompanying files. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Environment** +- python version +- package versions: `conda list` or `pip list` +- OS + +**Checklist** +- [ ] all dependencies are satisifed: `conda list` or `pip list` shows the packages listed in the `pyproject.toml` +- [ ] the unit tests are working: `pytest -v` reports no errors + +**Additional context** +Add any other context about the problem here. diff --git a/chemprop-updated/.github/ISSUE_TEMPLATE/v2_feature_request.md b/chemprop-updated/.github/ISSUE_TEMPLATE/v2_feature_request.md new file mode 100644 index 0000000000000000000000000000000000000000..2df14257c2da81ebf200dc0aa55630bf77fc32ec --- /dev/null +++ b/chemprop-updated/.github/ISSUE_TEMPLATE/v2_feature_request.md @@ -0,0 +1,23 @@ +--- +name: v2 Feature Request +about: Suggest an idea for this project +title: "[v2 FEATURE]: " +labels: enhancement +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. + +**Use-cases/examples of this new feature** +What are some example workflows that would employ this new feature? Are there any relevant issues? + +**Desired solution/workflow** +A clear and concise description of what you want to happen. Include some (pseudo)code, if possible + +**Discussion** +What are some considerations around this new feature? Are there alternative approaches to consider? What should the scope of the feature be? + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/chemprop-updated/.github/ISSUE_TEMPLATE/v2_question.md b/chemprop-updated/.github/ISSUE_TEMPLATE/v2_question.md new file mode 100644 index 0000000000000000000000000000000000000000..8a79ad871770c5122033baee35226c6b99cfb6d8 --- /dev/null +++ b/chemprop-updated/.github/ISSUE_TEMPLATE/v2_question.md @@ -0,0 +1,17 @@ +--- +name: v2 Question +about: Have a question about how to use Chemprop v2? +title: "[v2 QUESTION]: " +labels: question +assignees: '' + +--- + +**What are you trying to do?** +Please tell us what you're trying to do with Chemprop, providing as much detail as possible + +**Previous attempts** +If possible, provide some examples of what you've already tried and what the output was. + +**Screenshots** +If applicable, add screenshots to help explain your problem. diff --git a/chemprop-updated/.github/PULL_REQUEST_TEMPLATE.md b/chemprop-updated/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000000000000000000000000000000000000..7f0331e7fa838665894689fbc6ff4f1f9d440cdc --- /dev/null +++ b/chemprop-updated/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,18 @@ +## Description +Include a brief summary of the bug/feature/etc. that this PR seeks to address + +## Example / Current workflow +Include a sample workflow to either **(a)** reproduce the bug with current codebase or **(b)** showcase the deficiency does this PR seeks to address + +## Bugfix / Desired workflow +Include either **(a)** the same workflow from above with the correct output produced via this PR **(b)** some (pseudo)code containing the new workflow that this PR will (seek to) implement + +## Questions +If there are open questions about implementation strategy or scope of the PR, include them here + +## Relevant issues +If appropriate, please tag them here and include a quick summary + +## Checklist +- [ ] linted with flake8? +- [ ] (if appropriate) unit tests added? diff --git a/chemprop-updated/.github/PULL_REQUEST_TEMPLATE/bugfix.md b/chemprop-updated/.github/PULL_REQUEST_TEMPLATE/bugfix.md new file mode 100644 index 0000000000000000000000000000000000000000..3e79367a22854d83ac45896ceb9afb1b7be43462 --- /dev/null +++ b/chemprop-updated/.github/PULL_REQUEST_TEMPLATE/bugfix.md @@ -0,0 +1,12 @@ +## Bug report +Include a brief summary of the bug that this PR seeks to address. If possible, include relevant issue tags + +## Example +Include a sample execution to reproduce the bug with current codebase, and some sample output showcasing that the PR fixes this bug + +## Questions +If there are open questions about implementation strategy or scope of the PR, include them here + +## Checklist +- [ ] linted with flake8? +- [ ] (if necessary) appropriate unit tests added? diff --git a/chemprop-updated/.github/PULL_REQUEST_TEMPLATE/new_feature.md b/chemprop-updated/.github/PULL_REQUEST_TEMPLATE/new_feature.md new file mode 100644 index 0000000000000000000000000000000000000000..a38b8ab66c3bfe610f1b1b6315252f93e83eb8f8 --- /dev/null +++ b/chemprop-updated/.github/PULL_REQUEST_TEMPLATE/new_feature.md @@ -0,0 +1,15 @@ +## Statement of need +What deficiency does this PR seek to address? If there are relevant issues, please tag them here + +## Current workflow +How is this need achieved with the current codebase? + +## Desired workflow +Include some (pseudo)code containing the new workflow that this PR will (seek to) implement + +## Questions +If there are open questions about implementation strategy or scope of the PR, include them here + +## Checklist +- [ ] linted with flake8? +- [ ] appropriate unit tests added? diff --git a/chemprop-updated/.github/workflows/ci.yml b/chemprop-updated/.github/workflows/ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..1f2b92c5a3d688c9fc3b371ef3163d3dd1659719 --- /dev/null +++ b/chemprop-updated/.github/workflows/ci.yml @@ -0,0 +1,158 @@ +# ci.yml +# +# Continuous Integration for Chemprop - checks build, code formatting, and runs tests for all +# proposed changes and on a regular schedule +# +# Note: this file contains extensive inline documentation to aid with knowledge transfer. + +name: Continuous Integration + +on: + # run on pushes/pull requests to/against main + push: + branches: [main] + pull_request: + branches: [main] + # run this in the morning on weekdays to catch dependency issues + schedule: + - cron: "0 8 * * 1-5" + # allow manual runs + workflow_dispatch: + +# cancel previously running tests if new commits are made +# https://docs.github.com/en/actions/examples/using-concurrency-expressions-and-a-test-matrix +concurrency: + group: actions-id-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + USE_LIBUV: 0 # libuv doesn't work on GitHub actions Windows runner + +jobs: + build: + name: Check Build + runs-on: ubuntu-latest + steps: + # clone the repo, attempt to build + - uses: actions/checkout@v4 + - run: python -m pip install build + - run: python -m build . + + lint: + name: Check Formatting + needs: build + runs-on: ubuntu-latest + steps: + # clone the repo, run black and flake8 on it + - uses: actions/checkout@v4 + - run: python -m pip install black==23.* flake8 isort + - run: black --check . + - run: flake8 . + - run: isort --check . + + test: + name: Execute Tests + needs: lint + runs-on: ${{ matrix.os }} + defaults: + run: + # run with a login shell (so that the conda environment is activated) + # and echo the commands we run as we do them (for debugging purposes) + shell: bash -el {0} + strategy: + # if one platform/python version fails, continue testing the others + fail-fast: false + matrix: + # test on all platforms with both supported versions of Python + os: [ubuntu-latest, macos-13, windows-latest] + python-version: [3.11, 3.12] + steps: + - uses: actions/checkout@v4 + # use a version of the conda virtual environment manager to set up an + # isolated environment with the Python version we want + - uses: conda-incubator/setup-miniconda@v3 + with: + python-version: ${{ matrix.python-version }} + auto-update-conda: true + show-channel-urls: true + conda-remove-defaults: "true" + environment-file: environment.yml + activate-environment: chemprop + - name: Install dependencies + shell: bash -l {0} + run: | + python -m pip install nbmake + python -m pip install ".[dev,docs,test,hpopt]" + - name: Test with pytest + shell: bash -l {0} + run: | + pytest -v tests + - name: Test notebooks + shell: bash -l {0} + run: | + python -m pip install matplotlib + pytest --no-cov -v --nbmake $(find examples -name '*.ipynb' ! -name 'use_featurizer_with_other_libraries.ipynb' ! -name 'shapley_value_with_customized_featurizers.ipynb') + pytest --no-cov -v --nbmake $(find docs/source/tutorial/python -name "*.ipynb") + pypi: + name: Build and publish Python 🐍 distributions 📦 to PyPI + runs-on: ubuntu-latest + # only run if the tests pass + needs: [test] + # run only on pushes to main on chemprop + if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' && github.repository == 'chemprop/chemprop'}} + steps: + - uses: actions/checkout@master + - name: Set up Python 3.12 + uses: actions/setup-python@v3 + with: + python-version: "3.11" + - name: Install pypa/build + run: >- + python -m + pip install + build + --user + - name: Build a binary wheel and a source tarball + run: >- + python -m + build + --sdist + --wheel + --outdir dist/ + . + - name: Publish distribution 📦 to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_API_TOKEN }} + skip-existing: true + verbose: true + + build-and-push-docker: + # shamelessly copied from: + # https://github.com/ReactionMechanismGenerator/RMG-Py/blob/bfaee1cad9909a17103a8e6ef9a22569c475964c/.github/workflows/CI.yml#L359C1-L386C54 + # which is also shamelessly copied from somewhere + runs-on: ubuntu-latest + # only run if the tests pass + needs: [test] + # run only on pushes to main on chemprop + if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' && github.repository == 'chemprop/chemprop'}} + steps: + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Login to Docker Hub + uses: docker/login-action@v2 + with: + # repository secretes managed by the maintainers + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Build and Push + uses: docker/build-push-action@v4 + with: + push: true + tags: chemprop/chemprop:latest + diff --git a/chemprop-updated/.gitignore b/chemprop-updated/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..ea9f6e3054d1c4e93965c11b90db8fa3bb880486 --- /dev/null +++ b/chemprop-updated/.gitignore @@ -0,0 +1,178 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +*.idea +*.DS_Store +*.vscode +*.csv +*.pkl +*.pt +*.json +*.sqlite3 +*.yaml +*.tfevents.* +*.ckpt +chemprop/_version.py +*.ckpt +*.ipynb +config.toml + +!tests/data/* diff --git a/chemprop-updated/.readthedocs.yml b/chemprop-updated/.readthedocs.yml new file mode 100644 index 0000000000000000000000000000000000000000..9110336dbdc6ef24b22efa666e1095593276cf5a --- /dev/null +++ b/chemprop-updated/.readthedocs.yml @@ -0,0 +1,19 @@ +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.11" + jobs: + post_install: + - python -m pip install --upgrade --upgrade-strategy only-if-needed --no-cache-dir ".[docs]" + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/source/conf.py diff --git a/chemprop-updated/CITATIONS.bib b/chemprop-updated/CITATIONS.bib new file mode 100644 index 0000000000000000000000000000000000000000..1eb6f4b3554ed25e2a7f2aa75a4d8f2eecb7eba0 --- /dev/null +++ b/chemprop-updated/CITATIONS.bib @@ -0,0 +1,37 @@ +# this was downloaded from ACS: https://pubs.acs.org/doi/10.1021/acs.jcim.9b00237 +@article{chemprop_theory, + author = {Yang, Kevin and Swanson, Kyle and Jin, Wengong and Coley, Connor and Eiden, Philipp and Gao, Hua and Guzman-Perez, Angel and Hopper, Timothy and Kelley, Brian and Mathea, Miriam and Palmer, Andrew and Settels, Volker and Jaakkola, Tommi and Jensen, Klavs and Barzilay, Regina}, + title = {Analyzing Learned Molecular Representations for Property Prediction}, + journal = {Journal of Chemical Information and Modeling}, + volume = {59}, + number = {8}, + pages = {3370-3388}, + year = {2019}, + doi = {10.1021/acs.jcim.9b00237}, + note ={PMID: 31361484}, + URL = { + https://doi.org/10.1021/acs.jcim.9b00237 + }, + eprint = { + https://doi.org/10.1021/acs.jcim.9b00237 + } +} + +# this was downloaded from ACS: https://pubs.acs.org/doi/10.1021/acs.jcim.3c01250 +@article{chemprop_software, + author = {Heid, Esther and Greenman, Kevin P. and Chung, Yunsie and Li, Shih-Cheng and Graff, David E. and Vermeire, Florence H. and Wu, Haoyang and Green, William H. and McGill, Charles J.}, + title = {Chemprop: A Machine Learning Package for Chemical Property Prediction}, + journal = {Journal of Chemical Information and Modeling}, + volume = {64}, + number = {1}, + pages = {9-17}, + year = {2024}, + doi = {10.1021/acs.jcim.3c01250}, + note ={PMID: 38147829}, + URL = { + https://doi.org/10.1021/acs.jcim.3c01250 + }, + eprint = { + https://doi.org/10.1021/acs.jcim.3c01250 + } +} diff --git a/chemprop-updated/CONTRIBUTING.md b/chemprop-updated/CONTRIBUTING.md new file mode 100644 index 0000000000000000000000000000000000000000..514fe849a9f0e2214ad314f9d32170d07b9300de --- /dev/null +++ b/chemprop-updated/CONTRIBUTING.md @@ -0,0 +1,40 @@ +# How to contribute + +We welcome contributions from external contributors, and this document +describes how to merge code changes into this repository. + +## Getting Started + +* Make sure you have a [GitHub account](https://github.com/signup/free). +* [Fork](https://help.github.com/articles/fork-a-repo/) this repository on GitHub. +* On your local machine, + [clone](https://help.github.com/articles/cloning-a-repository/) your fork of + the repository. + +## Making Changes + +* Add some really awesome code to your local fork. It's usually a [good + idea](http://blog.jasonmeridth.com/posts/do-not-issue-pull-requests-from-your-master-branch/) + to make changes on a + [branch](https://help.github.com/articles/creating-and-deleting-branches-within-your-repository/) + with the branch name relating to the feature you are going to add. +* When you are ready for others to examine and comment on your new feature, + navigate to your fork of `chemprop` on GitHub and open a [pull + request](https://help.github.com/articles/using-pull-requests/) (PR). Note that + after you launch a PR from one of your fork's branches, all + subsequent commits to that branch will be added to the open pull request + automatically. Each commit added to the PR will be validated for + mergability, compilation and test suite compliance; the results of these tests + will be visible on the PR page. +* If you're providing a new feature, you **must** add test cases and documentation. +* When the code is ready to go, run the test suite: `pytest`. +* When you're ready to be considered for merging, click the "Ready for review" + box on the PR page to let the Chemprop devs know that the changes are complete. + The code will not be merged until the continuous integration returns checkmarks, + and at least one core developer gives "Approved" reviews. + +## Additional Resources + +* [General GitHub documentation](https://help.github.com/) +* [PR best practices](http://codeinthehole.com/writing/pull-requests-and-other-good-practices-for-teams-using-github/) +* [A guide to contributing to software packages](http://www.contribution-guide.org) diff --git a/chemprop-updated/Dockerfile b/chemprop-updated/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..20f773292efa9b59bc348a1b7025c175e90b50d2 --- /dev/null +++ b/chemprop-updated/Dockerfile @@ -0,0 +1,50 @@ +# Dockerfile +# +# Builds a Docker image containing Chemprop and its required dependencies. +# +# Build this image with: +# git clone https://github.com/chemprop/chemprop.git +# cd chemprop +# docker build --tag=chemprop:latest . +# +# Run the built image with: +# docker run --name chemprop_container -it chemprop:latest +# +# Note: +# This image only runs on CPU - we do not provide a Dockerfile +# for GPU use (see installation documentation). + +# Parent Image +FROM continuumio/miniconda3:latest + +# Install libxrender1 (required by RDKit) and then clean up +RUN apt-get update && \ + apt-get install -y \ + libxrender1 && \ + apt-get autoremove -y && \ + apt-get clean -y + +WORKDIR /opt/chemprop + +# build an empty conda environment with appropriate Python version +RUN conda create --name chemprop_env python=3.11* + +# This runs all subsequent commands inside the chemprop_env conda environment +# +# Analogous to just activating the environment, which we can't actually do here +# since that requires running conda init and restarting the shell (not possible +# in a Dockerfile build script) +SHELL ["conda", "run", "--no-capture-output", "-n", "chemprop_env", "/bin/bash", "-c"] + +# Follow the installation instructions then clear the cache +ADD chemprop chemprop +ENV PYTHONPATH /opt/chemprop +ADD LICENSE.txt pyproject.toml README.md ./ +RUN conda install pytorch cpuonly -c pytorch && \ + conda clean --all --yes && \ + python -m pip install . && \ + python -m pip cache purge + +# when running this image, open an interactive bash terminal inside the conda environment +RUN echo "conda activate chemprop_env" > ~/.bashrc +ENTRYPOINT ["/bin/bash", "--login"] diff --git a/chemprop-updated/LICENSE.txt b/chemprop-updated/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..77e0c51d93cc3e4e315ba539a4a801828dc1e366 --- /dev/null +++ b/chemprop-updated/LICENSE.txt @@ -0,0 +1,27 @@ +MIT License + +Copyright (c) 2024 The Chemprop Development Team (Regina Barzilay, +Jackson Burns, Yunsie Chung, Anna Doner, Xiaorui Dong, David Graff, +William Green, Kevin Greenman, Yanfei Guan, Esther Heid, Lior Hirschfeld, +Tommi Jaakkola, Wengong Jin, Olivier Lafontant-Joseph, Shih-Cheng Li, +Mengjie Liu, Joel Manu, Charles McGill, Angiras Menon, Nathan Morgan, +Hao-Wei Pang, Kevin Spiekermann, Kyle Swanson, Allison Tam, +Florence Vermeire, Haoyang Wu, and Kevin Yang, Jonathan Zheng) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/chemprop-updated/README.md b/chemprop-updated/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2f8816bfabfebdd0b7338a685c26866c8621dcc2 --- /dev/null +++ b/chemprop-updated/README.md @@ -0,0 +1,63 @@ + + + ChemProp Logo + + +# Chemprop + +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/chemprop)](https://badge.fury.io/py/chemprop) +[![PyPI version](https://badge.fury.io/py/chemprop.svg)](https://badge.fury.io/py/chemprop) +[![Anaconda-Server Badge](https://anaconda.org/conda-forge/chemprop/badges/version.svg)](https://anaconda.org/conda-forge/chemprop) +[![Build Status](https://github.com/chemprop/chemprop/workflows/tests/badge.svg)](https://github.com/chemprop/chemprop/actions/workflows/tests.yml) +[![Documentation Status](https://readthedocs.org/projects/chemprop/badge/?version=main)](https://chemprop.readthedocs.io/en/main/?badge=main) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) +[![Downloads](https://static.pepy.tech/badge/chemprop)](https://pepy.tech/project/chemprop) +[![Downloads](https://static.pepy.tech/badge/chemprop/month)](https://pepy.tech/project/chemprop) +[![Downloads](https://static.pepy.tech/badge/chemprop/week)](https://pepy.tech/project/chemprop) + +Chemprop is a repository containing message passing neural networks for molecular property prediction. + +Documentation can be found [here](https://chemprop.readthedocs.io/en/main/). + +There are tutorial notebooks in the [`examples/`](https://github.com/chemprop/chemprop/tree/main/examples) directory. + +Chemprop recently underwent a ground-up rewrite and new major release (v2.0.0). A helpful transition guide from Chemprop v1 to v2 can be found [here](https://docs.google.com/spreadsheets/u/3/d/e/2PACX-1vRshySIknVBBsTs5P18jL4WeqisxDAnDE5VRnzxqYEhYrMe4GLS17w5KeKPw9sged6TmmPZ4eEZSTIy/pubhtml). This includes a side-by-side comparison of CLI argument options, a list of which arguments will be implemented in later versions of v2, and a list of changes to default hyperparameters. + +**License:** Chemprop is free to use under the [MIT License](LICENSE.txt). The Chemprop logo is free to use under [CC0 1.0](docs/source/_static/images/logo/LICENSE.txt). + +**References**: Please cite the appropriate papers if Chemprop is helpful to your research. + +- Chemprop was initially described in the papers [Analyzing Learned Molecular Representations for Property Prediction](https://pubs.acs.org/doi/abs/10.1021/acs.jcim.9b00237) for molecules and [Machine Learning of Reaction Properties via Learned Representations of the Condensed Graph of Reaction](https://doi.org/10.1021/acs.jcim.1c00975) for reactions. +- The interpretation functionality (available in v1, but not yet implemented in v2) is based on the paper [Multi-Objective Molecule Generation using Interpretable Substructures](https://arxiv.org/abs/2002.03244). +- Chemprop now has its own dedicated manuscript that describes and benchmarks it in more detail: [Chemprop: A Machine Learning Package for Chemical Property Prediction](https://doi.org/10.1021/acs.jcim.3c01250). +- A paper describing and benchmarking the changes in v2.0.0 is forthcoming. + +**Selected Applications**: Chemprop has been successfully used in the following works. + +- [A Deep Learning Approach to Antibiotic Discovery](https://www.cell.com/cell/fulltext/S0092-8674(20)30102-1) - _Cell_ (2020): Chemprop was used to predict antibiotic activity against _E. coli_, leading to the discovery of [Halicin](https://en.wikipedia.org/wiki/Halicin), a novel antibiotic candidate. Model checkpoints are availabile on [Zenodo](https://doi.org/10.5281/zenodo.6527882). +- [Discovery of a structural class of antibiotics with explainable deep learning](https://www.nature.com/articles/s41586-023-06887-8) - _Nature_ (2023): Identified a structural class of antibiotics selective against methicillin-resistant _S. aureus_ (MRSA) and vancomycin-resistant enterococci using ensembles of Chemprop models, and explained results using Chemprop's interpret method. +- [ADMET-AI: A machine learning ADMET platform for evaluation of large-scale chemical libraries](https://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/btae416/7698030?utm_source=authortollfreelink&utm_campaign=bioinformatics&utm_medium=email&guestAccessKey=f4fca1d2-49ec-4b10-b476-5aea3bf37045): Chemprop was trained on 41 absorption, distribution, metabolism, excretion, and toxicity (ADMET) datasets from the [Therapeutics Data Commons](https://tdcommons.ai). The Chemprop models in ADMET-AI are available both as a web server at [admet.ai.greenstonebio.com](https://admet.ai.greenstonebio.com) and as a Python package at [github.com/swansonk14/admet_ai](https://github.com/swansonk14/admet_ai). +- A more extensive list of successful Chemprop applications is given in our [2023 paper](https://doi.org/10.1021/acs.jcim.3c01250) + +## Version 1.x + +For users who have not yet made the switch to Chemprop v2.0, please reference the following resources. + +### v1 Documentation + +- Documentation of Chemprop v1 is available [here](https://chemprop.readthedocs.io/en/v1.7.1/). Note that the content of this site is several versions behind the final v1 release (v1.7.1) and does not cover the full scope of features available in chemprop v1. +- The v1 [README](https://github.com/chemprop/chemprop/blob/v1.7.1/README.md) is the best source for documentation on more recently-added features. +- Please also see descriptions of all the possible command line arguments in the v1 [`args.py`](https://github.com/chemprop/chemprop/blob/v1.7.1/chemprop/args.py) file. + +### v1 Tutorials and Examples + +- [Benchmark scripts](https://github.com/chemprop/chemprop_benchmark) - scripts from our 2023 paper, providing examples of many features using Chemprop v1.6.1 +- [ACS Fall 2023 Workshop](https://github.com/chemprop/chemprop-workshop-acs-fall2023) - presentation, interactive demo, exercises on Google Colab with solution key +- [Google Colab notebook](https://colab.research.google.com/github/chemprop/chemprop/blob/v1.7.1/colab_demo.ipynb) - several examples, intended to be run in Google Colab rather than as a Jupyter notebook on your local machine +- [nanoHUB tool](https://nanohub.org/resources/chempropdemo/) - a notebook of examples similar to the Colab notebook above, doesn't require any installation + - [YouTube video](https://www.youtube.com/watch?v=TeOl5E8Wo2M) - lecture accompanying nanoHUB tool +- These [slides](https://docs.google.com/presentation/d/14pbd9LTXzfPSJHyXYkfLxnK8Q80LhVnjImg8a3WqCRM/edit?usp=sharing) provide a Chemprop tutorial and highlight additions as of April 28th, 2020 + +### v1 Known Issues + +We have discontinued support for v1 since v2 has been released, but we still appreciate v1 bug reports and will tag them as [`v1-wontfix`](https://github.com/chemprop/chemprop/issues?q=label%3Av1-wontfix+) so the community can find them easily. diff --git a/chemprop-updated/chemprop/__init__.py b/chemprop-updated/chemprop/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4c2a05683e98de16fe555275a8f1430a97a9008e --- /dev/null +++ b/chemprop-updated/chemprop/__init__.py @@ -0,0 +1,5 @@ +from . import data, exceptions, featurizers, models, nn, schedulers, utils + +__all__ = ["data", "featurizers", "models", "nn", "utils", "exceptions", "schedulers"] + +__version__ = "2.1.2" diff --git a/chemprop-updated/chemprop/__pycache__/__init__.cpython-37.pyc b/chemprop-updated/chemprop/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c9b7095c4cb5147e6fb1867912ea804331ba5bb5 Binary files /dev/null and b/chemprop-updated/chemprop/__pycache__/__init__.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/__pycache__/args.cpython-37.pyc b/chemprop-updated/chemprop/__pycache__/args.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3e72640d8427d96850ec6362d5443893265b037c Binary files /dev/null and b/chemprop-updated/chemprop/__pycache__/args.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/__pycache__/constants.cpython-37.pyc b/chemprop-updated/chemprop/__pycache__/constants.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a79288e0006c7aa20463e3569efdda612645f38f Binary files /dev/null and b/chemprop-updated/chemprop/__pycache__/constants.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/__pycache__/hyperopt_utils.cpython-37.pyc b/chemprop-updated/chemprop/__pycache__/hyperopt_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0efad6a7d927f8adfe8268cc54e3c2fad10b12e3 Binary files /dev/null and b/chemprop-updated/chemprop/__pycache__/hyperopt_utils.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/__pycache__/hyperparameter_optimization.cpython-37.pyc b/chemprop-updated/chemprop/__pycache__/hyperparameter_optimization.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a80289e1131169088a082fda6cb6fd723970ef9d Binary files /dev/null and b/chemprop-updated/chemprop/__pycache__/hyperparameter_optimization.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/__pycache__/interpret.cpython-37.pyc b/chemprop-updated/chemprop/__pycache__/interpret.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e4ff850a4d89900133c1cddf13a4ae1abfd861e0 Binary files /dev/null and b/chemprop-updated/chemprop/__pycache__/interpret.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/__pycache__/multitask_utils.cpython-37.pyc b/chemprop-updated/chemprop/__pycache__/multitask_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..52f37ea7d193887ee7e45c97f61e997b896465cf Binary files /dev/null and b/chemprop-updated/chemprop/__pycache__/multitask_utils.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/__pycache__/nn_utils.cpython-37.pyc b/chemprop-updated/chemprop/__pycache__/nn_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5edd6b47a163590b196d1b1e4dd9faa05f2175eb Binary files /dev/null and b/chemprop-updated/chemprop/__pycache__/nn_utils.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/__pycache__/rdkit.cpython-37.pyc b/chemprop-updated/chemprop/__pycache__/rdkit.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..556cc7ee61a15e08f0ec64256af87fdbce6d3e33 Binary files /dev/null and b/chemprop-updated/chemprop/__pycache__/rdkit.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/__pycache__/sklearn_predict.cpython-37.pyc b/chemprop-updated/chemprop/__pycache__/sklearn_predict.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1ed0056d83f64e771778cd13fd8ad8a18f8007c6 Binary files /dev/null and b/chemprop-updated/chemprop/__pycache__/sklearn_predict.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/__pycache__/sklearn_train.cpython-37.pyc b/chemprop-updated/chemprop/__pycache__/sklearn_train.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e3647d20a264473bf844ab3f1bed7b7a073c710f Binary files /dev/null and b/chemprop-updated/chemprop/__pycache__/sklearn_train.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/__pycache__/spectra_utils.cpython-37.pyc b/chemprop-updated/chemprop/__pycache__/spectra_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..16ce436997ebd26f9d522ff691aa6b39708d705e Binary files /dev/null and b/chemprop-updated/chemprop/__pycache__/spectra_utils.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/__pycache__/utils.cpython-37.pyc b/chemprop-updated/chemprop/__pycache__/utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d5b9d9fd85df60238ed748527d1a06c5290a2231 Binary files /dev/null and b/chemprop-updated/chemprop/__pycache__/utils.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/cli/common.py b/chemprop-updated/chemprop/cli/common.py new file mode 100644 index 0000000000000000000000000000000000000000..49113418eb3079a65e1533db925bd5fcc544af5b --- /dev/null +++ b/chemprop-updated/chemprop/cli/common.py @@ -0,0 +1,211 @@ +from argparse import ArgumentError, ArgumentParser, Namespace +import logging +from pathlib import Path + +from chemprop.cli.utils import LookupAction +from chemprop.cli.utils.args import uppercase +from chemprop.featurizers import AtomFeatureMode, MoleculeFeaturizerRegistry, RxnMode + +logger = logging.getLogger(__name__) + + +def add_common_args(parser: ArgumentParser) -> ArgumentParser: + data_args = parser.add_argument_group("Shared input data args") + data_args.add_argument( + "-s", + "--smiles-columns", + nargs="+", + help="Column names in the input CSV containing SMILES strings (uses the 0th column by default)", + ) + data_args.add_argument( + "-r", + "--reaction-columns", + nargs="+", + help="Column names in the input CSV containing reaction SMILES in the format ``REACTANT>AGENT>PRODUCT``, where 'AGENT' is optional", + ) + data_args.add_argument( + "--no-header-row", + action="store_true", + help="Turn off using the first row in the input CSV as column names", + ) + + dataloader_args = parser.add_argument_group("Dataloader args") + dataloader_args.add_argument( + "-n", + "--num-workers", + type=int, + default=0, + help="""Number of workers for parallel data loading where 0 means sequential +(Warning: setting ``num_workers`` to a value greater than 0 can cause hangs on Windows and MacOS)""", + ) + dataloader_args.add_argument("-b", "--batch-size", type=int, default=64, help="Batch size") + + parser.add_argument( + "--accelerator", default="auto", help="Passed directly to the lightning ``Trainer()``" + ) + parser.add_argument( + "--devices", + default="auto", + help="Passed directly to the lightning ``Trainer()`` (must be a single string of comma separated devices, e.g. '1, 2' if specifying multiple devices)", + ) + + featurization_args = parser.add_argument_group("Featurization args") + featurization_args.add_argument( + "--rxn-mode", + "--reaction-mode", + type=uppercase, + default="REAC_DIFF", + choices=list(RxnMode.keys()), + help="""Choices for construction of atom and bond features for reactions (case insensitive): + +- ``REAC_PROD``: concatenates the reactants feature with the products feature +- ``REAC_DIFF``: concatenates the reactants feature with the difference in features between reactants and products (Default) +- ``PROD_DIFF``: concatenates the products feature with the difference in features between reactants and products +- ``REAC_PROD_BALANCE``: concatenates the reactants feature with the products feature, balances imbalanced reactions +- ``REAC_DIFF_BALANCE``: concatenates the reactants feature with the difference in features between reactants and products, balances imbalanced reactions +- ``PROD_DIFF_BALANCE``: concatenates the products feature with the difference in features between reactants and products, balances imbalanced reactions""", + ) + # TODO: Update documenation for multi_hot_atom_featurizer_mode + featurization_args.add_argument( + "--multi-hot-atom-featurizer-mode", + type=uppercase, + default="V2", + choices=list(AtomFeatureMode.keys()), + help="""Choices for multi-hot atom featurization scheme. This will affect both non-reaction and reaction feturization (case insensitive): + +- ``V1``: Corresponds to the original configuration employed in the Chemprop V1 +- ``V2``: Tailored for a broad range of molecules, this configuration encompasses all elements in the first four rows of the periodic table, along with iodine. It is the default in Chemprop V2. +- ``ORGANIC``: This configuration is designed specifically for use with organic molecules for drug research and development and includes a subset of elements most common in organic chemistry, including H, B, C, N, O, F, Si, P, S, Cl, Br, and I. +- ``RIGR``: Modified V2 (default) featurizer using only the resonance-invariant atom and bond features.""", + ) + featurization_args.add_argument( + "--keep-h", + action="store_true", + help="Whether hydrogens explicitly specified in input should be kept in the mol graph", + ) + featurization_args.add_argument( + "--add-h", action="store_true", help="Whether hydrogens should be added to the mol graph" + ) + featurization_args.add_argument( + "--molecule-featurizers", + "--features-generators", + nargs="+", + action=LookupAction(MoleculeFeaturizerRegistry), + help="Method(s) of generating molecule features to use as extra descriptors", + ) + # TODO: add in v2.1 to deprecate features-generators and then remove in v2.2 + # featurization_args.add_argument( + # "--features-generators", nargs="+", help="Renamed to `--molecule-featurizers`." + # ) + featurization_args.add_argument( + "--descriptors-path", + type=Path, + help="Path to extra descriptors to concatenate to learned representation", + ) + # TODO: Add in v2.1 + # featurization_args.add_argument( + # "--phase-features-path", + # help="Path to features used to indicate the phase of the data in one-hot vector form. Used in spectra datatype.", + # ) + featurization_args.add_argument( + "--no-descriptor-scaling", action="store_true", help="Turn off extra descriptor scaling" + ) + featurization_args.add_argument( + "--no-atom-feature-scaling", action="store_true", help="Turn off extra atom feature scaling" + ) + featurization_args.add_argument( + "--no-atom-descriptor-scaling", + action="store_true", + help="Turn off extra atom descriptor scaling", + ) + featurization_args.add_argument( + "--no-bond-feature-scaling", action="store_true", help="Turn off extra bond feature scaling" + ) + featurization_args.add_argument( + "--atom-features-path", + nargs="+", + action="append", + help="If a single path is given, it is assumed to correspond to the 0-th molecule. Alternatively, it can be a two-tuple of molecule index and path to additional atom features to supply before message passing (e.g., ``--atom-features-path 0 /path/to/features_0.npz``) indicates that the features at the given path should be supplied to the 0-th component. To supply additional features for multiple components, repeat this argument on the command line for each component's respective values (e.g., ``--atom-features-path [...] --atom-features-path [...]``).", + ) + featurization_args.add_argument( + "--atom-descriptors-path", + nargs="+", + action="append", + help="If a single path is given, it is assumed to correspond to the 0-th molecule. Alternatively, it can be a two-tuple of molecule index and path to additional atom descriptors to supply after message passing (e.g., ``--atom-descriptors-path 0 /path/to/descriptors_0.npz`` indicates that the descriptors at the given path should be supplied to the 0-th component. To supply additional descriptors for multiple components, repeat this argument on the command line for each component's respective values (e.g., ``--atom-descriptors-path [...] --atom-descriptors-path [...]``).", + ) + featurization_args.add_argument( + "--bond-features-path", + nargs="+", + action="append", + help="If a single path is given, it is assumed to correspond to the 0-th molecule. Alternatively, it can be a two-tuple of molecule index and path to additional bond features to supply before message passing (e.g., ``--bond-features-path 0 /path/to/features_0.npz`` indicates that the features at the given path should be supplied to the 0-th component. To supply additional features for multiple components, repeat this argument on the command line for each component's respective values (e.g., ``--bond-features-path [...] --bond-features-path [...]``).", + ) + # TODO: Add in v2.2 + # parser.add_argument( + # "--constraints-path", + # help="Path to constraints applied to atomic/bond properties prediction.", + # ) + + return parser + + +def process_common_args(args: Namespace) -> Namespace: + # TODO: add in v2.1 to deprecate features-generators and then remove in v2.2 + # if args.features_generators is not None: + # raise ArgumentError( + # argument=None, + # message="`--features-generators` has been renamed to `--molecule-featurizers`.", + # ) + + for key in ["atom_features_path", "atom_descriptors_path", "bond_features_path"]: + inds_paths = getattr(args, key) + + if not inds_paths: + continue + + ind_path_dict = {} + + for ind_path in inds_paths: + if len(ind_path) > 2: + raise ArgumentError( + argument=None, + message="Too many arguments were given for atom features/descriptors or bond features. It should be either a two-tuple of molecule index and a path, or a single path (assumed to be the 0-th molecule).", + ) + + if len(ind_path) == 1: + ind = 0 + path = ind_path[0] + else: + ind, path = ind_path + + if ind_path_dict.get(int(ind), None): + raise ArgumentError( + argument=None, + message=f"Duplicate atom features/descriptors or bond features given for molecule index {ind}", + ) + + ind_path_dict[int(ind)] = Path(path) + + setattr(args, key, ind_path_dict) + + return args + + +def validate_common_args(args): + pass + + +def find_models(model_paths: list[Path]): + collected_model_paths = [] + + for model_path in model_paths: + if model_path.suffix in [".ckpt", ".pt"]: + collected_model_paths.append(model_path) + elif model_path.is_dir(): + collected_model_paths.extend(list(model_path.rglob("*.pt"))) + else: + raise ArgumentError( + argument=None, + message=f"Expected a .ckpt or .pt file, or a directory. Got {model_path}", + ) + + return collected_model_paths diff --git a/chemprop-updated/chemprop/cli/conf.py b/chemprop-updated/chemprop/cli/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..be7701c52cc6509817a7cb9d4223ea33083f422b --- /dev/null +++ b/chemprop-updated/chemprop/cli/conf.py @@ -0,0 +1,9 @@ +from datetime import datetime +import logging +import os +from pathlib import Path + +LOG_DIR = Path(os.getenv("CHEMPROP_LOG_DIR", "chemprop_logs")) +LOG_LEVELS = {0: logging.INFO, 1: logging.DEBUG, -1: logging.WARNING, -2: logging.ERROR} +NOW = datetime.now().strftime("%Y-%m-%dT%H-%M-%S") +CHEMPROP_TRAIN_DIR = Path(os.getenv("CHEMPROP_TRAIN_DIR", "chemprop_training")) diff --git a/chemprop-updated/chemprop/cli/convert.py b/chemprop-updated/chemprop/cli/convert.py new file mode 100644 index 0000000000000000000000000000000000000000..e75795e9cb19985d49108d014b447d29209340fe --- /dev/null +++ b/chemprop-updated/chemprop/cli/convert.py @@ -0,0 +1,55 @@ +from argparse import ArgumentError, ArgumentParser, Namespace +import logging +from pathlib import Path +import sys + +from chemprop.cli.utils import Subcommand +from chemprop.utils.v1_to_v2 import convert_model_file_v1_to_v2 + +logger = logging.getLogger(__name__) + + +class ConvertSubcommand(Subcommand): + COMMAND = "convert" + HELP = "Convert a v1 model checkpoint (.pt) to a v2 model checkpoint (.pt)." + + @classmethod + def add_args(cls, parser: ArgumentParser) -> ArgumentParser: + parser.add_argument( + "-i", + "--input-path", + required=True, + type=Path, + help="Path to a v1 model .pt checkpoint file", + ) + parser.add_argument( + "-o", + "--output-path", + type=Path, + help="Path to which the converted model will be saved (``CURRENT_DIRECTORY/STEM_OF_INPUT_v2.pt`` by default)", + ) + return parser + + @classmethod + def func(cls, args: Namespace): + if args.output_path is None: + args.output_path = Path(args.input_path.stem + "_v2.pt") + if args.output_path.suffix != ".pt": + raise ArgumentError( + argument=None, message=f"Output must be a `.pt` file. Got {args.output_path}" + ) + + logger.info( + f"Converting v1 model checkpoint '{args.input_path}' to v2 model checkpoint '{args.output_path}'..." + ) + convert_model_file_v1_to_v2(args.input_path, args.output_path) + + +if __name__ == "__main__": + parser = ArgumentParser() + parser = ConvertSubcommand.add_args(parser) + + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, force=True) + + args = parser.parse_args() + ConvertSubcommand.func(args) diff --git a/chemprop-updated/chemprop/cli/fingerprint.py b/chemprop-updated/chemprop/cli/fingerprint.py new file mode 100644 index 0000000000000000000000000000000000000000..45daed203e13b5f3ddd8f45e53b910b46a4ddb34 --- /dev/null +++ b/chemprop-updated/chemprop/cli/fingerprint.py @@ -0,0 +1,182 @@ +from argparse import ArgumentError, ArgumentParser, Namespace +import logging +from pathlib import Path +import sys + +import numpy as np +import pandas as pd +import torch + +from chemprop import data +from chemprop.cli.common import add_common_args, process_common_args, validate_common_args +from chemprop.cli.predict import find_models +from chemprop.cli.utils import Subcommand, build_data_from_files, make_dataset +from chemprop.models import load_model +from chemprop.nn.metrics import LossFunctionRegistry + +logger = logging.getLogger(__name__) + + +class FingerprintSubcommand(Subcommand): + COMMAND = "fingerprint" + HELP = "Use a pretrained chemprop model to calculate learned representations." + + @classmethod + def add_args(cls, parser: ArgumentParser) -> ArgumentParser: + parser = add_common_args(parser) + parser.add_argument( + "-i", + "--test-path", + required=True, + type=Path, + help="Path to an input CSV file containing SMILES", + ) + parser.add_argument( + "-o", + "--output", + "--preds-path", + type=Path, + help="Specify the path where predictions will be saved. If the file extension is .npz, they will be saved as a npz file. Otherwise, the predictions will be saved as a CSV. The index of the model will be appended to the filename's stem. By default, predictions will be saved to the same location as ``--test-path`` with '_fps' appended (e.g., 'PATH/TO/TEST_PATH_fps_0.csv').", + ) + parser.add_argument( + "--model-paths", + "--model-path", + required=True, + type=Path, + nargs="+", + help="Specify location of checkpoint(s) or model file(s) to use for prediction. It can be a path to either a single pretrained model checkpoint (.ckpt) or single pretrained model file (.pt), a directory that contains these files, or a list of path(s) and directory(s). If a directory, chemprop will recursively search and predict on all found (.pt) models.", + ) + parser.add_argument( + "--ffn-block-index", + required=True, + type=int, + default=-1, + help="The index indicates which linear layer returns the encoding in the FFN. An index of 0 denotes the post-aggregation representation through a 0-layer MLP, while an index of 1 represents the output from the first linear layer in the FFN, and so forth.", + ) + + return parser + + @classmethod + def func(cls, args: Namespace): + args = process_common_args(args) + validate_common_args(args) + args = process_fingerprint_args(args) + main(args) + + +def process_fingerprint_args(args: Namespace) -> Namespace: + if args.test_path.suffix not in [".csv"]: + raise ArgumentError( + argument=None, message=f"Input data must be a CSV file. Got {args.test_path}" + ) + if args.output is None: + args.output = args.test_path.parent / (args.test_path.stem + "_fps.csv") + if args.output.suffix not in [".csv", ".npz"]: + raise ArgumentError( + argument=None, message=f"Output must be a CSV or NPZ file. Got '{args.output}'." + ) + return args + + +def make_fingerprint_for_model( + args: Namespace, model_path: Path, multicomponent: bool, output_path: Path +): + model = load_model(model_path, multicomponent) + model.eval() + + bounded = any( + isinstance(model.criterion, LossFunctionRegistry[loss_function]) + for loss_function in LossFunctionRegistry.keys() + if "bounded" in loss_function + ) + + format_kwargs = dict( + no_header_row=args.no_header_row, + smiles_cols=args.smiles_columns, + rxn_cols=args.reaction_columns, + target_cols=[], + ignore_cols=None, + splits_col=None, + weight_col=None, + bounded=bounded, + ) + + featurization_kwargs = dict( + molecule_featurizers=args.molecule_featurizers, keep_h=args.keep_h, add_h=args.add_h + ) + + test_data = build_data_from_files( + args.test_path, + **format_kwargs, + p_descriptors=args.descriptors_path, + p_atom_feats=args.atom_features_path, + p_bond_feats=args.bond_features_path, + p_atom_descs=args.atom_descriptors_path, + **featurization_kwargs, + ) + logger.info(f"test size: {len(test_data[0])}") + test_dsets = [ + make_dataset(d, args.rxn_mode, args.multi_hot_atom_featurizer_mode) for d in test_data + ] + + if multicomponent: + test_dset = data.MulticomponentDataset(test_dsets) + else: + test_dset = test_dsets[0] + + test_loader = data.build_dataloader(test_dset, args.batch_size, args.num_workers, shuffle=False) + + logger.info(model) + + with torch.no_grad(): + if multicomponent: + encodings = [ + model.encoding(batch.bmgs, batch.V_ds, batch.X_d, args.ffn_block_index) + for batch in test_loader + ] + else: + encodings = [ + model.encoding(batch.bmg, batch.V_d, batch.X_d, args.ffn_block_index) + for batch in test_loader + ] + H = torch.cat(encodings, 0).numpy() + + if output_path.suffix in [".npz"]: + np.savez(output_path, H=H) + elif output_path.suffix == ".csv": + fingerprint_columns = [f"fp_{i}" for i in range(H.shape[1])] + df_fingerprints = pd.DataFrame(H, columns=fingerprint_columns) + df_fingerprints.to_csv(output_path, index=False) + else: + raise ArgumentError( + argument=None, message=f"Output must be a CSV or npz file. Got {args.output}." + ) + logger.info(f"Fingerprints saved to '{output_path}'") + + +def main(args): + match (args.smiles_columns, args.reaction_columns): + case [None, None]: + n_components = 1 + case [_, None]: + n_components = len(args.smiles_columns) + case [None, _]: + n_components = len(args.reaction_columns) + case _: + n_components = len(args.smiles_columns) + len(args.reaction_columns) + + multicomponent = n_components > 1 + + for i, model_path in enumerate(find_models(args.model_paths)): + logger.info(f"Fingerprints with model {i} at '{model_path}'") + output_path = args.output.parent / f"{args.output.stem}_{i}{args.output.suffix}" + make_fingerprint_for_model(args, model_path, multicomponent, output_path) + + +if __name__ == "__main__": + parser = ArgumentParser() + parser = FingerprintSubcommand.add_args(parser) + + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, force=True) + args = parser.parse_args() + args = FingerprintSubcommand.func(args) diff --git a/chemprop-updated/chemprop/cli/hpopt.py b/chemprop-updated/chemprop/cli/hpopt.py new file mode 100644 index 0000000000000000000000000000000000000000..50984b00d292d600c3346d76df1abf3e7f287c43 --- /dev/null +++ b/chemprop-updated/chemprop/cli/hpopt.py @@ -0,0 +1,537 @@ +from copy import deepcopy +import logging +from pathlib import Path +import shutil +import sys + +from configargparse import ArgumentParser, Namespace +from lightning import pytorch as pl +from lightning.pytorch.callbacks import EarlyStopping +import numpy as np +import torch + +from chemprop.cli.common import add_common_args, process_common_args, validate_common_args +from chemprop.cli.train import ( + TrainSubcommand, + add_train_args, + build_datasets, + build_model, + build_splits, + normalize_inputs, + process_train_args, + save_config, + validate_train_args, +) +from chemprop.cli.utils.command import Subcommand +from chemprop.data import build_dataloader +from chemprop.nn import AggregationRegistry, MetricRegistry +from chemprop.nn.transforms import UnscaleTransform +from chemprop.nn.utils import Activation + +NO_RAY = False +DEFAULT_SEARCH_SPACE = { + "activation": None, + "aggregation": None, + "aggregation_norm": None, + "batch_size": None, + "depth": None, + "dropout": None, + "ffn_hidden_dim": None, + "ffn_num_layers": None, + "final_lr_ratio": None, + "message_hidden_dim": None, + "init_lr_ratio": None, + "max_lr": None, + "warmup_epochs": None, +} + +try: + import ray + from ray import tune + from ray.train import CheckpointConfig, RunConfig, ScalingConfig + from ray.train.lightning import ( + RayDDPStrategy, + RayLightningEnvironment, + RayTrainReportCallback, + prepare_trainer, + ) + from ray.train.torch import TorchTrainer + from ray.tune.schedulers import ASHAScheduler, FIFOScheduler + + DEFAULT_SEARCH_SPACE = { + "activation": tune.choice(categories=list(Activation.keys())), + "aggregation": tune.choice(categories=list(AggregationRegistry.keys())), + "aggregation_norm": tune.quniform(lower=1, upper=200, q=1), + "batch_size": tune.choice([16, 32, 64, 128, 256]), + "depth": tune.qrandint(lower=2, upper=6, q=1), + "dropout": tune.choice([0.0] * 8 + list(np.arange(0.05, 0.45, 0.05))), + "ffn_hidden_dim": tune.qrandint(lower=300, upper=2400, q=100), + "ffn_num_layers": tune.qrandint(lower=1, upper=3, q=1), + "final_lr_ratio": tune.loguniform(lower=1e-2, upper=1), + "message_hidden_dim": tune.qrandint(lower=300, upper=2400, q=100), + "init_lr_ratio": tune.loguniform(lower=1e-2, upper=1), + "max_lr": tune.loguniform(lower=1e-4, upper=1e-2), + "warmup_epochs": None, + } +except ImportError: + NO_RAY = True + +NO_HYPEROPT = False +try: + from ray.tune.search.hyperopt import HyperOptSearch +except ImportError: + NO_HYPEROPT = True + +NO_OPTUNA = False +try: + from ray.tune.search.optuna import OptunaSearch +except ImportError: + NO_OPTUNA = True + + +logger = logging.getLogger(__name__) + +SEARCH_SPACE = DEFAULT_SEARCH_SPACE + +SEARCH_PARAM_KEYWORDS_MAP = { + "basic": ["depth", "ffn_num_layers", "dropout", "ffn_hidden_dim", "message_hidden_dim"], + "learning_rate": ["max_lr", "init_lr_ratio", "final_lr_ratio", "warmup_epochs"], + "all": list(DEFAULT_SEARCH_SPACE.keys()), + "init_lr": ["init_lr_ratio"], + "final_lr": ["final_lr_ratio"], +} + + +class HpoptSubcommand(Subcommand): + COMMAND = "hpopt" + HELP = "Perform hyperparameter optimization on the given task." + + @classmethod + def add_args(cls, parser: ArgumentParser) -> ArgumentParser: + parser = add_common_args(parser) + parser = add_train_args(parser) + return add_hpopt_args(parser) + + @classmethod + def func(cls, args: Namespace): + args = process_common_args(args) + args = process_train_args(args) + args = process_hpopt_args(args) + validate_common_args(args) + validate_train_args(args) + main(args) + + +def add_hpopt_args(parser: ArgumentParser) -> ArgumentParser: + hpopt_args = parser.add_argument_group("Chemprop hyperparameter optimization arguments") + + hpopt_args.add_argument( + "--search-parameter-keywords", + type=str, + nargs="+", + default=["basic"], + help=f"""The model parameters over which to search for an optimal hyperparameter configuration. Some options are bundles of parameters or otherwise special parameter operations. Special keywords include: + - ``basic``: Default set of hyperparameters for search (depth, ffn_num_layers, dropout, message_hidden_dim, and ffn_hidden_dim) + - ``learning_rate``: Search for max_lr, init_lr_ratio, final_lr_ratio, and warmup_epochs. The search for init_lr and final_lr values are defined as fractions of the max_lr value. The search for warmup_epochs is as a fraction of the total epochs used. + - ``all``: Include search for all 13 individual keyword options (including: activation, aggregation, aggregation_norm, and batch_size which aren't included in the other two keywords). + Individual supported parameters: + {list(DEFAULT_SEARCH_SPACE.keys())} + """, + ) + + hpopt_args.add_argument( + "--hpopt-save-dir", + type=Path, + help="Directory to save the hyperparameter optimization results", + ) + + raytune_args = parser.add_argument_group("Ray Tune arguments") + + raytune_args.add_argument( + "--raytune-num-samples", + type=int, + default=10, + help="Passed directly to Ray Tune ``TuneConfig`` to control number of trials to run", + ) + + raytune_args.add_argument( + "--raytune-search-algorithm", + choices=["random", "hyperopt", "optuna"], + default="hyperopt", + help="Passed to Ray Tune ``TuneConfig`` to control search algorithm", + ) + + raytune_args.add_argument( + "--raytune-trial-scheduler", + choices=["FIFO", "AsyncHyperBand"], + default="FIFO", + help="Passed to Ray Tune ``TuneConfig`` to control trial scheduler", + ) + + raytune_args.add_argument( + "--raytune-num-workers", + type=int, + default=1, + help="Passed directly to Ray Tune ``ScalingConfig`` to control number of workers to use", + ) + + raytune_args.add_argument( + "--raytune-use-gpu", + action="store_true", + help="Passed directly to Ray Tune ``ScalingConfig`` to control whether to use GPUs", + ) + + raytune_args.add_argument( + "--raytune-num-checkpoints-to-keep", + type=int, + default=1, + help="Passed directly to Ray Tune ``CheckpointConfig`` to control number of checkpoints to keep", + ) + + raytune_args.add_argument( + "--raytune-grace-period", + type=int, + default=10, + help="Passed directly to Ray Tune ``ASHAScheduler`` to control grace period", + ) + + raytune_args.add_argument( + "--raytune-reduction-factor", + type=int, + default=2, + help="Passed directly to Ray Tune ``ASHAScheduler`` to control reduction factor", + ) + + raytune_args.add_argument( + "--raytune-temp-dir", help="Passed directly to Ray Tune init to control temporary directory" + ) + + raytune_args.add_argument( + "--raytune-num-cpus", + type=int, + help="Passed directly to Ray Tune init to control number of CPUs to use", + ) + + raytune_args.add_argument( + "--raytune-num-gpus", + type=int, + help="Passed directly to Ray Tune init to control number of GPUs to use", + ) + + raytune_args.add_argument( + "--raytune-max-concurrent-trials", + type=int, + help="Passed directly to Ray Tune TuneConfig to control maximum concurrent trials", + ) + + hyperopt_args = parser.add_argument_group("Hyperopt arguments") + + hyperopt_args.add_argument( + "--hyperopt-n-initial-points", + type=int, + help="Passed directly to ``HyperOptSearch`` to control number of initial points to sample", + ) + + hyperopt_args.add_argument( + "--hyperopt-random-state-seed", + type=int, + default=None, + help="Passed directly to ``HyperOptSearch`` to control random state seed", + ) + + return parser + + +def process_hpopt_args(args: Namespace) -> Namespace: + if args.hpopt_save_dir is None: + args.hpopt_save_dir = Path(f"chemprop_hpopt/{args.data_path.stem}") + + args.hpopt_save_dir.mkdir(exist_ok=True, parents=True) + + search_parameters = set() + + available_search_parameters = list(SEARCH_SPACE.keys()) + list(SEARCH_PARAM_KEYWORDS_MAP.keys()) + + for keyword in args.search_parameter_keywords: + if keyword not in available_search_parameters: + raise ValueError( + f"Search parameter keyword: {keyword} not in available options: {available_search_parameters}." + ) + + search_parameters.update( + SEARCH_PARAM_KEYWORDS_MAP[keyword] + if keyword in SEARCH_PARAM_KEYWORDS_MAP + else [keyword] + ) + + args.search_parameter_keywords = list(search_parameters) + + if not args.hyperopt_n_initial_points: + args.hyperopt_n_initial_points = args.raytune_num_samples // 2 + + return args + + +def build_search_space(search_parameters: list[str], train_epochs: int) -> dict: + if "warmup_epochs" in search_parameters and SEARCH_SPACE.get("warmup_epochs", None) is None: + assert ( + train_epochs >= 6 + ), "Training epochs must be at least 6 to perform hyperparameter optimization for warmup_epochs." + SEARCH_SPACE["warmup_epochs"] = tune.qrandint(lower=1, upper=train_epochs // 2, q=1) + + return {param: SEARCH_SPACE[param] for param in search_parameters} + + +def update_args_with_config(args: Namespace, config: dict) -> Namespace: + args = deepcopy(args) + + for key, value in config.items(): + match key: + case "final_lr_ratio": + setattr(args, "final_lr", value * config.get("max_lr", args.max_lr)) + + case "init_lr_ratio": + setattr(args, "init_lr", value * config.get("max_lr", args.max_lr)) + + case _: + assert key in args, f"Key: {key} not found in args." + setattr(args, key, value) + + return args + + +def train_model(config, args, train_dset, val_dset, logger, output_transform, input_transforms): + args = update_args_with_config(args, config) + + train_loader = build_dataloader( + train_dset, args.batch_size, args.num_workers, seed=args.data_seed + ) + val_loader = build_dataloader(val_dset, args.batch_size, args.num_workers, shuffle=False) + + seed = args.pytorch_seed if args.pytorch_seed is not None else torch.seed() + + torch.manual_seed(seed) + + model = build_model(args, train_loader.dataset, output_transform, input_transforms) + logger.info(model) + + if args.tracking_metric == "val_loss": + T_tracking_metric = model.criterion.__class__ + else: + T_tracking_metric = MetricRegistry[args.tracking_metric] + args.tracking_metric = "val/" + args.tracking_metric + + monitor_mode = "max" if T_tracking_metric.higher_is_better else "min" + logger.debug(f"Evaluation metric: '{T_tracking_metric.alias}', mode: '{monitor_mode}'") + + patience = args.patience if args.patience is not None else args.epochs + early_stopping = EarlyStopping(args.tracking_metric, patience=patience, mode=monitor_mode) + + trainer = pl.Trainer( + accelerator=args.accelerator, + devices=args.devices, + max_epochs=args.epochs, + gradient_clip_val=args.grad_clip, + strategy=RayDDPStrategy(), + callbacks=[RayTrainReportCallback(), early_stopping], + plugins=[RayLightningEnvironment()], + deterministic=args.pytorch_seed is not None, + ) + trainer = prepare_trainer(trainer) + trainer.fit(model, train_loader, val_loader) + + +def tune_model( + args, train_dset, val_dset, logger, monitor_mode, output_transform, input_transforms +): + match args.raytune_trial_scheduler: + case "FIFO": + scheduler = FIFOScheduler() + case "AsyncHyperBand": + scheduler = ASHAScheduler( + max_t=args.epochs, + grace_period=min(args.raytune_grace_period, args.epochs), + reduction_factor=args.raytune_reduction_factor, + ) + case _: + raise ValueError(f"Invalid trial scheduler! got: {args.raytune_trial_scheduler}.") + + resources_per_worker = {} + if args.raytune_num_cpus and args.raytune_max_concurrent_trials: + resources_per_worker["CPU"] = args.raytune_num_cpus / args.raytune_max_concurrent_trials + if args.raytune_num_gpus and args.raytune_max_concurrent_trials: + resources_per_worker["GPU"] = args.raytune_num_gpus / args.raytune_max_concurrent_trials + if not resources_per_worker: + resources_per_worker = None + + if args.raytune_num_gpus: + use_gpu = True + else: + use_gpu = args.raytune_use_gpu + + scaling_config = ScalingConfig( + num_workers=args.raytune_num_workers, + use_gpu=use_gpu, + resources_per_worker=resources_per_worker, + trainer_resources={"CPU": 0}, + ) + + checkpoint_config = CheckpointConfig( + num_to_keep=args.raytune_num_checkpoints_to_keep, + checkpoint_score_attribute=args.tracking_metric, + checkpoint_score_order=monitor_mode, + ) + + run_config = RunConfig( + checkpoint_config=checkpoint_config, + storage_path=args.hpopt_save_dir.absolute() / "ray_results", + ) + + ray_trainer = TorchTrainer( + lambda config: train_model( + config, args, train_dset, val_dset, logger, output_transform, input_transforms + ), + scaling_config=scaling_config, + run_config=run_config, + ) + + match args.raytune_search_algorithm: + case "random": + search_alg = None + case "hyperopt": + if NO_HYPEROPT: + raise ImportError( + "HyperOptSearch requires hyperopt to be installed. Use 'pip install -U hyperopt' to install or use 'pip install -e .[hpopt]' in chemprop folder if you installed from source to install all hpopt relevant packages." + ) + + search_alg = HyperOptSearch( + n_initial_points=args.hyperopt_n_initial_points, + random_state_seed=args.hyperopt_random_state_seed, + ) + case "optuna": + if NO_OPTUNA: + raise ImportError( + "OptunaSearch requires optuna to be installed. Use 'pip install -U optuna' to install or use 'pip install -e .[hpopt]' in chemprop folder if you installed from source to install all hpopt relevant packages." + ) + + search_alg = OptunaSearch() + + tune_config = tune.TuneConfig( + metric=args.tracking_metric, + mode=monitor_mode, + num_samples=args.raytune_num_samples, + scheduler=scheduler, + search_alg=search_alg, + trial_dirname_creator=lambda trial: str(trial.trial_id), + ) + + tuner = tune.Tuner( + ray_trainer, + param_space={ + "train_loop_config": build_search_space(args.search_parameter_keywords, args.epochs) + }, + tune_config=tune_config, + ) + + return tuner.fit() + + +def main(args: Namespace): + if NO_RAY: + raise ImportError( + "Ray Tune requires ray to be installed. If you installed Chemprop from PyPI, run 'pip install -U ray[tune]' to install ray. If you installed from source, use 'pip install -e .[hpopt]' in Chemprop folder to install all hpopt relevant packages." + ) + + if not ray.is_initialized(): + try: + ray.init( + _temp_dir=args.raytune_temp_dir, + num_cpus=args.raytune_num_cpus, + num_gpus=args.raytune_num_gpus, + ) + except OSError as e: + if "AF_UNIX path length cannot exceed 107 bytes" in str(e): + raise OSError( + f"Ray Tune fails due to: {e}. This can sometimes be solved by providing a temporary directory, num_cpus, and num_gpus to Ray Tune via the CLI: --raytune-temp-dir --raytune-num-cpus --raytune-num-gpus ." + ) + else: + raise e + else: + logger.info("Ray is already initialized.") + + format_kwargs = dict( + no_header_row=args.no_header_row, + smiles_cols=args.smiles_columns, + rxn_cols=args.reaction_columns, + target_cols=args.target_columns, + ignore_cols=args.ignore_columns, + splits_col=args.splits_column, + weight_col=args.weight_column, + bounded=args.loss_function is not None and "bounded" in args.loss_function, + ) + + featurization_kwargs = dict( + molecule_featurizers=args.molecule_featurizers, keep_h=args.keep_h, add_h=args.add_h + ) + + train_data, val_data, test_data = build_splits(args, format_kwargs, featurization_kwargs) + train_dset, val_dset, test_dset = build_datasets(args, train_data[0], val_data[0], test_data[0]) + + input_transforms = normalize_inputs(train_dset, val_dset, args) + + if "regression" in args.task_type: + output_scaler = train_dset.normalize_targets() + val_dset.normalize_targets(output_scaler) + logger.info(f"Train data: mean = {output_scaler.mean_} | std = {output_scaler.scale_}") + output_transform = UnscaleTransform.from_standard_scaler(output_scaler) + else: + output_transform = None + + train_loader = build_dataloader( + train_dset, args.batch_size, args.num_workers, seed=args.data_seed + ) + + model = build_model(args, train_loader.dataset, output_transform, input_transforms) + monitor_mode = "max" if model.metrics[0].higher_is_better else "min" + + results = tune_model( + args, train_dset, val_dset, logger, monitor_mode, output_transform, input_transforms + ) + + best_result = results.get_best_result() + best_config = best_result.config["train_loop_config"] + best_checkpoint_path = Path(best_result.checkpoint.path) / "checkpoint.ckpt" + + best_config_save_path = args.hpopt_save_dir / "best_config.toml" + best_checkpoint_save_path = args.hpopt_save_dir / "best_checkpoint.ckpt" + all_progress_save_path = args.hpopt_save_dir / "all_progress.csv" + + logger.info(f"Best hyperparameters saved to: '{best_config_save_path}'") + + args = update_args_with_config(args, best_config) + + args = TrainSubcommand.parser.parse_known_args(namespace=args)[0] + save_config(TrainSubcommand.parser, args, best_config_save_path) + + logger.info( + f"Best hyperparameter configuration checkpoint saved to '{best_checkpoint_save_path}'" + ) + + shutil.copyfile(best_checkpoint_path, best_checkpoint_save_path) + + logger.info(f"Hyperparameter optimization results saved to '{all_progress_save_path}'") + + result_df = results.get_dataframe() + + result_df.to_csv(all_progress_save_path, index=False) + + ray.shutdown() + + +if __name__ == "__main__": + parser = ArgumentParser() + parser = HpoptSubcommand.add_args(parser) + + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, force=True) + args = parser.parse_args() + HpoptSubcommand.func(args) diff --git a/chemprop-updated/chemprop/cli/main.py b/chemprop-updated/chemprop/cli/main.py new file mode 100644 index 0000000000000000000000000000000000000000..56d4f5205a0b7a5a50003a1c3ddba971260784ef --- /dev/null +++ b/chemprop-updated/chemprop/cli/main.py @@ -0,0 +1,85 @@ +import logging +from pathlib import Path +import sys + +from configargparse import ArgumentParser + +from chemprop.cli.conf import LOG_DIR, LOG_LEVELS, NOW +from chemprop.cli.convert import ConvertSubcommand +from chemprop.cli.fingerprint import FingerprintSubcommand +from chemprop.cli.hpopt import HpoptSubcommand +from chemprop.cli.predict import PredictSubcommand +from chemprop.cli.train import TrainSubcommand +from chemprop.cli.utils import pop_attr + +logger = logging.getLogger(__name__) + +SUBCOMMANDS = [ + TrainSubcommand, + PredictSubcommand, + ConvertSubcommand, + FingerprintSubcommand, + HpoptSubcommand, +] + + +def construct_parser(): + parser = ArgumentParser() + subparsers = parser.add_subparsers(title="mode", dest="mode", required=True) + + parent = ArgumentParser(add_help=False) + parent.add_argument( + "--logfile", + "--log", + nargs="?", + const="default", + help=f"Path to which the log file should be written (specifying just the flag alone will automatically log to a file ``{LOG_DIR}/MODE/TIMESTAMP.log`` , where 'MODE' is the CLI mode chosen, e.g., ``{LOG_DIR}/MODE/{NOW}.log``)", + ) + parent.add_argument("-v", action="store_true", help="Increase verbosity level to DEBUG") + parent.add_argument( + "-q", + action="count", + default=0, + help="Decrease verbosity level to WARNING or ERROR if specified twice", + ) + + parents = [parent] + for subcommand in SUBCOMMANDS: + subcommand.add(subparsers, parents) + + return parser + + +def main(): + parser = construct_parser() + args = parser.parse_args() + logfile, v_flag, q_count, mode, func = ( + pop_attr(args, attr) for attr in ["logfile", "v", "q", "mode", "func"] + ) + + if v_flag and q_count: + parser.error("The -v and -q options cannot be used together.") + + match logfile: + case None: + handler = logging.StreamHandler(sys.stderr) + case "default": + (LOG_DIR / mode).mkdir(parents=True, exist_ok=True) + handler = logging.FileHandler(str(LOG_DIR / mode / f"{NOW}.log")) + case _: + Path(logfile).parent.mkdir(parents=True, exist_ok=True) + handler = logging.FileHandler(logfile) + + verbosity = q_count * -1 if q_count else (1 if v_flag else 0) + logging_level = LOG_LEVELS.get(verbosity, logging.ERROR) + logging.basicConfig( + handlers=[handler], + format="%(asctime)s - %(levelname)s:%(name)s - %(message)s", + level=logging_level, + datefmt="%Y-%m-%dT%H:%M:%S", + force=True, + ) + + logger.info(f"Running in mode '{mode}' with args: {vars(args)}") + + func(args) diff --git a/chemprop-updated/chemprop/cli/predict.py b/chemprop-updated/chemprop/cli/predict.py new file mode 100644 index 0000000000000000000000000000000000000000..377e9c94c4703f71b796773d143f547b16e6041f --- /dev/null +++ b/chemprop-updated/chemprop/cli/predict.py @@ -0,0 +1,444 @@ +from argparse import ArgumentError, ArgumentParser, Namespace +import logging +from pathlib import Path +import sys +from typing import Iterator + +from lightning import pytorch as pl +import numpy as np +import pandas as pd +import torch + +from chemprop import data +from chemprop.cli.common import ( + add_common_args, + find_models, + process_common_args, + validate_common_args, +) +from chemprop.cli.utils import LookupAction, Subcommand, build_data_from_files, make_dataset +from chemprop.models.utils import load_model, load_output_columns +from chemprop.nn.metrics import LossFunctionRegistry +from chemprop.nn.predictors import EvidentialFFN, MulticlassClassificationFFN, MveFFN +from chemprop.uncertainty import ( + MVEWeightingCalibrator, + NoUncertaintyEstimator, + RegressionCalibrator, + RegressionEvaluator, + UncertaintyCalibratorRegistry, + UncertaintyEstimatorRegistry, + UncertaintyEvaluatorRegistry, +) +from chemprop.utils import Factory + +logger = logging.getLogger(__name__) + + +class PredictSubcommand(Subcommand): + COMMAND = "predict" + HELP = "use a pretrained chemprop model for prediction" + + @classmethod + def add_args(cls, parser: ArgumentParser) -> ArgumentParser: + parser = add_common_args(parser) + return add_predict_args(parser) + + @classmethod + def func(cls, args: Namespace): + args = process_common_args(args) + validate_common_args(args) + args = process_predict_args(args) + main(args) + + +def add_predict_args(parser: ArgumentParser) -> ArgumentParser: + parser.add_argument( + "-i", + "--test-path", + required=True, + type=Path, + help="Path to an input CSV file containing SMILES", + ) + parser.add_argument( + "-o", + "--output", + "--preds-path", + type=Path, + help="Specify path to which predictions will be saved. If the file extension is .pkl, it will be saved as a pickle file. Otherwise, chemprop will save predictions as a CSV. If multiple models are used to make predictions, the average predictions will be saved in the file, and another file ending in '_individual' with the same file extension will save the predictions for each individual model, with the column names being the target names appended with the model index (e.g., '_model_').", + ) + parser.add_argument( + "--drop-extra-columns", + action="store_true", + help="Whether to drop all columns from the test data file besides the SMILES columns and the new prediction columns", + ) + parser.add_argument( + "--model-paths", + "--model-path", + required=True, + type=Path, + nargs="+", + help="Location of checkpoint(s) or model file(s) to use for prediction. It can be a path to either a single pretrained model checkpoint (.ckpt) or single pretrained model file (.pt), a directory that contains these files, or a list of path(s) and directory(s). If a directory, will recursively search and predict on all found (.pt) models.", + ) + + unc_args = parser.add_argument_group("Uncertainty and calibration args") + unc_args.add_argument( + "--cal-path", type=Path, help="Path to data file to be used for uncertainty calibration." + ) + unc_args.add_argument( + "--uncertainty-method", + default="none", + action=LookupAction(UncertaintyEstimatorRegistry), + help="The method of calculating uncertainty.", + ) + unc_args.add_argument( + "--calibration-method", + action=LookupAction(UncertaintyCalibratorRegistry), + help="The method used for calibrating the uncertainty calculated with uncertainty method.", + ) + unc_args.add_argument( + "--evaluation-methods", + "--evaluation-method", + nargs="+", + action=LookupAction(UncertaintyEvaluatorRegistry), + help="The methods used for evaluating the uncertainty performance if the test data provided includes targets. Available methods are [nll, miscalibration_area, ence, spearman] or any available classification or multiclass metric.", + ) + # unc_args.add_argument( + # "--evaluation-scores-path", help="Location to save the results of uncertainty evaluations." + # ) + unc_args.add_argument( + "--uncertainty-dropout-p", + type=float, + default=0.1, + help="The probability to use for Monte Carlo dropout uncertainty estimation.", + ) + unc_args.add_argument( + "--dropout-sampling-size", + type=int, + default=10, + help="The number of samples to use for Monte Carlo dropout uncertainty estimation. Distinct from the dropout used during training.", + ) + unc_args.add_argument( + "--calibration-interval-percentile", + type=float, + default=95, + help="Sets the percentile used in the calibration methods. Must be in the range (1, 100).", + ) + unc_args.add_argument( + "--conformal-alpha", + type=float, + default=0.1, + help="Target error rate for conformal prediction. Must be in the range (0, 1).", + ) + # TODO: Decide if we want to implment this in v2.1.x + # unc_args.add_argument( + # "--regression-calibrator-metric", + # choices=["stdev", "interval"], + # help="Regression calibrators can output either a stdev or an inverval.", + # ) + unc_args.add_argument( + "--cal-descriptors-path", + nargs="+", + action="append", + help="Path to extra descriptors to concatenate to learned representation in calibration dataset.", + ) + # TODO: Add in v2.1.x + # unc_args.add_argument( + # "--calibration-phase-features-path", + # help=" ", + # ) + unc_args.add_argument( + "--cal-atom-features-path", + nargs="+", + action="append", + help="Path to the extra atom features in calibration dataset.", + ) + unc_args.add_argument( + "--cal-atom-descriptors-path", + nargs="+", + action="append", + help="Path to the extra atom descriptors in calibration dataset.", + ) + unc_args.add_argument( + "--cal-bond-features-path", + nargs="+", + action="append", + help="Path to the extra bond descriptors in calibration dataset.", + ) + + return parser + + +def process_predict_args(args: Namespace) -> Namespace: + if args.test_path.suffix not in [".csv"]: + raise ArgumentError( + argument=None, message=f"Input data must be a CSV file. Got {args.test_path}" + ) + if args.output is None: + args.output = args.test_path.parent / (args.test_path.stem + "_preds.csv") + if args.output.suffix not in [".csv", ".pkl"]: + raise ArgumentError( + argument=None, message=f"Output must be a CSV or Pickle file. Got {args.output}" + ) + return args + + +def prepare_data_loader( + args: Namespace, multicomponent: bool, is_calibration: bool, format_kwargs: dict +): + data_path = args.cal_path if is_calibration else args.test_path + descriptors_path = args.cal_descriptors_path if is_calibration else args.descriptors_path + atom_feats_path = args.cal_atom_features_path if is_calibration else args.atom_features_path + bond_feats_path = args.cal_bond_features_path if is_calibration else args.bond_features_path + atom_descs_path = ( + args.cal_atom_descriptors_path if is_calibration else args.atom_descriptors_path + ) + + featurization_kwargs = dict( + molecule_featurizers=args.molecule_featurizers, keep_h=args.keep_h, add_h=args.add_h + ) + + datas = build_data_from_files( + data_path, + **format_kwargs, + p_descriptors=descriptors_path, + p_atom_feats=atom_feats_path, + p_bond_feats=bond_feats_path, + p_atom_descs=atom_descs_path, + **featurization_kwargs, + ) + + dsets = [make_dataset(d, args.rxn_mode, args.multi_hot_atom_featurizer_mode) for d in datas] + dset = data.MulticomponentDataset(dsets) if multicomponent else dsets[0] + + return data.build_dataloader(dset, args.batch_size, args.num_workers, shuffle=False) + + +def make_prediction_for_models( + args: Namespace, model_paths: Iterator[Path], multicomponent: bool, output_path: Path +): + model = load_model(model_paths[0], multicomponent) + output_columns = load_output_columns(model_paths[0]) + bounded = any( + isinstance(model.criterion, LossFunctionRegistry[loss_function]) + for loss_function in LossFunctionRegistry.keys() + if "bounded" in loss_function + ) + format_kwargs = dict( + no_header_row=args.no_header_row, + smiles_cols=args.smiles_columns, + rxn_cols=args.reaction_columns, + ignore_cols=None, + splits_col=None, + weight_col=None, + bounded=bounded, + ) + format_kwargs["target_cols"] = output_columns if args.evaluation_methods is not None else [] + test_loader = prepare_data_loader(args, multicomponent, False, format_kwargs) + logger.info(f"test size: {len(test_loader.dataset)}") + if args.cal_path is not None: + format_kwargs["target_cols"] = output_columns + cal_loader = prepare_data_loader(args, multicomponent, True, format_kwargs) + logger.info(f"calibration size: {len(cal_loader.dataset)}") + + uncertainty_estimator = Factory.build( + UncertaintyEstimatorRegistry[args.uncertainty_method], + ensemble_size=args.dropout_sampling_size, + dropout=args.uncertainty_dropout_p, + ) + + models = [load_model(model_path, multicomponent) for model_path in model_paths] + trainer = pl.Trainer( + logger=False, enable_progress_bar=True, accelerator=args.accelerator, devices=args.devices + ) + test_individual_preds, test_individual_uncs = uncertainty_estimator( + test_loader, models, trainer + ) + test_preds = torch.mean(test_individual_preds, dim=0) + if not isinstance(uncertainty_estimator, NoUncertaintyEstimator): + test_uncs = torch.mean(test_individual_uncs, dim=0) + else: + test_uncs = None + + if args.calibration_method is not None: + uncertainty_calibrator = Factory.build( + UncertaintyCalibratorRegistry[args.calibration_method], + p=args.calibration_interval_percentile / 100, + alpha=args.conformal_alpha, + ) + cal_targets = cal_loader.dataset.Y + cal_mask = torch.from_numpy(np.isfinite(cal_targets)) + cal_targets = np.nan_to_num(cal_targets, nan=0.0) + cal_targets = torch.from_numpy(cal_targets) + cal_individual_preds, cal_individual_uncs = uncertainty_estimator( + cal_loader, models, trainer + ) + cal_preds = torch.mean(cal_individual_preds, dim=0) + cal_uncs = torch.mean(cal_individual_uncs, dim=0) + if isinstance(uncertainty_calibrator, MVEWeightingCalibrator): + uncertainty_calibrator.fit(cal_preds, cal_individual_uncs, cal_targets, cal_mask) + test_uncs = uncertainty_calibrator.apply(cal_individual_uncs) + else: + if isinstance(uncertainty_calibrator, RegressionCalibrator): + uncertainty_calibrator.fit(cal_preds, cal_uncs, cal_targets, cal_mask) + else: + uncertainty_calibrator.fit(cal_uncs, cal_targets, cal_mask) + test_uncs = uncertainty_calibrator.apply(test_uncs) + for i in range(test_individual_uncs.shape[0]): + test_individual_uncs[i] = uncertainty_calibrator.apply(test_individual_uncs[i]) + + if args.evaluation_methods is not None: + uncertainty_evaluators = [ + Factory.build(UncertaintyEvaluatorRegistry[method]) + for method in args.evaluation_methods + ] + logger.info("Uncertainty evaluation metric:") + for evaluator in uncertainty_evaluators: + test_targets = test_loader.dataset.Y + test_mask = torch.from_numpy(np.isfinite(test_targets)) + test_targets = np.nan_to_num(test_targets, nan=0.0) + test_targets = torch.from_numpy(test_targets) + if isinstance(evaluator, RegressionEvaluator): + metric_value = evaluator.evaluate(test_preds, test_uncs, test_targets, test_mask) + else: + metric_value = evaluator.evaluate(test_uncs, test_targets, test_mask) + logger.info(f"{evaluator.alias}: {metric_value.tolist()}") + + if args.uncertainty_method == "none" and ( + isinstance(model.predictor, MveFFN) or isinstance(model.predictor, EvidentialFFN) + ): + test_preds = test_preds[..., 0] + test_individual_preds = test_individual_preds[..., 0] + + if output_columns is None: + output_columns = [ + f"pred_{i}" for i in range(test_preds.shape[1]) + ] # TODO: need to improve this for cases like multi-task MVE and multi-task multiclass + + save_predictions(args, model, output_columns, test_preds, test_uncs, output_path) + + if len(model_paths) > 1: + save_individual_predictions( + args, + model, + model_paths, + output_columns, + test_individual_preds, + test_individual_uncs, + output_path, + ) + + +def save_predictions(args, model, output_columns, test_preds, test_uncs, output_path): + unc_columns = [f"{col}_unc" for col in output_columns] + + if isinstance(model.predictor, MulticlassClassificationFFN): + output_columns = output_columns + [f"{col}_prob" for col in output_columns] + predicted_class_labels = test_preds.argmax(axis=-1) + formatted_probability_strings = np.apply_along_axis( + lambda x: ",".join(map(str, x)), 2, test_preds.numpy() + ) + test_preds = np.concatenate( + (predicted_class_labels, formatted_probability_strings), axis=-1 + ) + + df_test = pd.read_csv( + args.test_path, header=None if args.no_header_row else "infer", index_col=False + ) + df_test[output_columns] = test_preds + + if args.uncertainty_method not in ["none", "classification"]: + df_test[unc_columns] = np.round(test_uncs, 6) + + if output_path.suffix == ".pkl": + df_test = df_test.reset_index(drop=True) + df_test.to_pickle(output_path) + else: + df_test.to_csv(output_path, index=False) + logger.info(f"Predictions saved to '{output_path}'") + + +def save_individual_predictions( + args, + model, + model_paths, + output_columns, + test_individual_preds, + test_individual_uncs, + output_path, +): + unc_columns = [ + f"{col}_unc_model_{i}" for i in range(len(model_paths)) for col in output_columns + ] + + if isinstance(model.predictor, MulticlassClassificationFFN): + output_columns = [ + item + for i in range(len(model_paths)) + for col in output_columns + for item in (f"{col}_model_{i}", f"{col}_prob_model_{i}") + ] + + predicted_class_labels = test_individual_preds.argmax(axis=-1) + formatted_probability_strings = np.apply_along_axis( + lambda x: ",".join(map(str, x)), 3, test_individual_preds.numpy() + ) + test_individual_preds = np.concatenate( + (predicted_class_labels, formatted_probability_strings), axis=-1 + ) + else: + output_columns = [ + f"{col}_model_{i}" for i in range(len(model_paths)) for col in output_columns + ] + + m, n, t = test_individual_preds.shape + test_individual_preds = np.transpose(test_individual_preds, (1, 0, 2)).reshape(n, m * t) + df_test = pd.read_csv( + args.test_path, header=None if args.no_header_row else "infer", index_col=False + ) + df_test[output_columns] = test_individual_preds + + if args.uncertainty_method not in ["none", "classification", "ensemble"]: + m, n, t = test_individual_uncs.shape + test_individual_uncs = np.transpose(test_individual_uncs, (1, 0, 2)).reshape(n, m * t) + df_test[unc_columns] = np.round(test_individual_uncs, 6) + + output_path = output_path.parent / Path( + str(args.output.stem) + "_individual" + str(output_path.suffix) + ) + if output_path.suffix == ".pkl": + df_test = df_test.reset_index(drop=True) + df_test.to_pickle(output_path) + else: + df_test.to_csv(output_path, index=False) + logger.info(f"Individual predictions saved to '{output_path}'") + for i, model_path in enumerate(model_paths): + logger.info( + f"Results from model path {model_path} are saved under the column name ending with 'model_{i}'" + ) + + +def main(args): + match (args.smiles_columns, args.reaction_columns): + case [None, None]: + n_components = 1 + case [_, None]: + n_components = len(args.smiles_columns) + case [None, _]: + n_components = len(args.reaction_columns) + case _: + n_components = len(args.smiles_columns) + len(args.reaction_columns) + + multicomponent = n_components > 1 + + model_paths = find_models(args.model_paths) + + make_prediction_for_models(args, model_paths, multicomponent, output_path=args.output) + + +if __name__ == "__main__": + parser = ArgumentParser() + parser = PredictSubcommand.add_args(parser) + + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, force=True) + args = parser.parse_args() + args = PredictSubcommand.func(args) diff --git a/chemprop-updated/chemprop/cli/train.py b/chemprop-updated/chemprop/cli/train.py new file mode 100644 index 0000000000000000000000000000000000000000..ab0c3be288dba0047c03f9470e1d4c9ee280581b --- /dev/null +++ b/chemprop-updated/chemprop/cli/train.py @@ -0,0 +1,1340 @@ +from copy import deepcopy +from io import StringIO +import json +import logging +from pathlib import Path +import sys +from tempfile import TemporaryDirectory + +from configargparse import ArgumentError, ArgumentParser, Namespace +from lightning import pytorch as pl +from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint +from lightning.pytorch.loggers import CSVLogger, TensorBoardLogger +from lightning.pytorch.strategies import DDPStrategy +import numpy as np +import pandas as pd +from rich.console import Console +from rich.table import Column, Table +import torch +import torch.nn as nn + +from chemprop.cli.common import ( + add_common_args, + find_models, + process_common_args, + validate_common_args, +) +from chemprop.cli.conf import CHEMPROP_TRAIN_DIR, NOW +from chemprop.cli.utils import ( + LookupAction, + Subcommand, + build_data_from_files, + get_column_names, + make_dataset, + parse_indices, +) +from chemprop.cli.utils.args import uppercase +from chemprop.data import ( + MoleculeDataset, + MolGraphDataset, + MulticomponentDataset, + ReactionDatapoint, + SplitType, + build_dataloader, + make_split_indices, + split_data_by_indices, +) +from chemprop.data.datasets import _MolGraphDatasetMixin +from chemprop.models import MPNN, MulticomponentMPNN, save_model +from chemprop.nn import AggregationRegistry, LossFunctionRegistry, MetricRegistry, PredictorRegistry +from chemprop.nn.message_passing import ( + AtomMessagePassing, + BondMessagePassing, + MulticomponentMessagePassing, +) +from chemprop.nn.transforms import GraphTransform, ScaleTransform, UnscaleTransform +from chemprop.nn.utils import Activation +from chemprop.utils import Factory + +logger = logging.getLogger(__name__) + + +_CV_REMOVAL_ERROR = ( + "The -k/--num-folds argument was removed in v2.1.0 - use --num-replicates instead." +) + + +class TrainSubcommand(Subcommand): + COMMAND = "train" + HELP = "Train a chemprop model." + parser = None + + @classmethod + def add_args(cls, parser: ArgumentParser) -> ArgumentParser: + parser = add_common_args(parser) + parser = add_train_args(parser) + cls.parser = parser + return parser + + @classmethod + def func(cls, args: Namespace): + args = process_common_args(args) + validate_common_args(args) + args = process_train_args(args) + validate_train_args(args) + + args.output_dir.mkdir(exist_ok=True, parents=True) + config_path = args.output_dir / "config.toml" + save_config(cls.parser, args, config_path) + main(args) + + +def add_train_args(parser: ArgumentParser) -> ArgumentParser: + parser.add_argument( + "--config-path", + type=Path, + is_config_file=True, + help="Path to a configuration file (command line arguments override values in the configuration file)", + ) + parser.add_argument( + "-i", + "--data-path", + type=Path, + help="Path to an input CSV file containing SMILES and the associated target values", + ) + parser.add_argument( + "-o", + "--output-dir", + "--save-dir", + type=Path, + help="Directory where training outputs will be saved (defaults to ``CURRENT_DIRECTORY/chemprop_training/STEM_OF_INPUT/TIME_STAMP``)", + ) + parser.add_argument( + "--remove-checkpoints", + action="store_true", + help="Remove intermediate checkpoint files after training is complete.", + ) + + # TODO: Add in v2.1; see if we can tell lightning how often to log training loss + # parser.add_argument( + # "--log-frequency", + # type=int, + # default=10, + # help="The number of batches between each logging of the training loss.", + # ) + + transfer_args = parser.add_argument_group("transfer learning args") + transfer_args.add_argument( + "--checkpoint", + type=Path, + nargs="+", + help="Path to checkpoint(s) or model file(s) for loading and overwriting weights. Accepts a single pre-trained model checkpoint (.ckpt), a single model file (.pt), a directory containing such files, or a list of paths and directories. If a directory is provided, it will recursively search for and use all (.pt) files found for prediction.", + ) + transfer_args.add_argument( + "--freeze-encoder", + action="store_true", + help="Freeze the message passing layer from the checkpoint model (specified by ``--checkpoint``).", + ) + transfer_args.add_argument( + "--model-frzn", + help="Path to model checkpoint file to be loaded for overwriting and freezing weights. By default, all MPNN weights are frozen with this option.", + ) + transfer_args.add_argument( + "--frzn-ffn-layers", + type=int, + default=0, + help="Freeze the first ``n`` layers of the FFN from the checkpoint model (specified by ``--checkpoint``). The message passing layer should also be frozen with ``--freeze-encoder``.", + ) + # transfer_args.add_argument( + # "--freeze-first-only", + # action="store_true", + # help="Determines whether or not to use checkpoint_frzn for just the first encoder. Default (False) is to use the checkpoint to freeze all encoders. (only relevant for number_of_molecules > 1, where checkpoint model has number_of_molecules = 1)", + # ) + + # TODO: Add in v2.1 + # parser.add_argument( + # "--resume-experiment", + # action="store_true", + # help="Whether to resume the experiment. Loads test results from any folds that have already been completed and skips training those folds.", + # ) + # parser.add_argument( + # "--config-path", + # help="Path to a :code:`.json` file containing arguments. Any arguments present in the config file will override arguments specified via the command line or by the defaults.", + # ) + parser.add_argument( + "--ensemble-size", + type=int, + default=1, + help="Number of models in ensemble for each splitting of data", + ) + + # TODO: Add in v2.2 + # abt_args = parser.add_argument_group("atom/bond target args") + # abt_args.add_argument( + # "--is-atom-bond-targets", + # action="store_true", + # help="Whether this is atomic/bond properties prediction.", + # ) + # abt_args.add_argument( + # "--no-adding-bond-types", + # action="store_true", + # help="Whether the bond types determined by RDKit molecules added to the output of bond targets. This option is intended to be used with the :code:`is_atom_bond_targets`.", + # ) + # abt_args.add_argument( + # "--keeping-atom-map", + # action="store_true", + # help="Whether RDKit molecules keep the original atom mapping. This option is intended to be used when providing atom-mapped SMILES with the :code:`is_atom_bond_targets`.", + # ) + # abt_args.add_argument( + # "--no-shared-atom-bond-ffn", + # action="store_true", + # help="Whether the FFN weights for atom and bond targets should be independent between tasks.", + # ) + # abt_args.add_argument( + # "--weights-ffn-num-layers", + # type=int, + # default=2, + # help="Number of layers in FFN for determining weights used in constrained targets.", + # ) + + mp_args = parser.add_argument_group("message passing") + mp_args.add_argument( + "--message-hidden-dim", type=int, default=300, help="Hidden dimension of the messages" + ) + mp_args.add_argument( + "--message-bias", action="store_true", help="Add bias to the message passing layers" + ) + mp_args.add_argument("--depth", type=int, default=3, help="Number of message passing steps") + mp_args.add_argument( + "--undirected", + action="store_true", + help="Pass messages on undirected bonds/edges (always sum the two relevant bond vectors)", + ) + mp_args.add_argument( + "--dropout", + type=float, + default=0.0, + help="Dropout probability in message passing/FFN layers", + ) + mp_args.add_argument( + "--mpn-shared", + action="store_true", + help="Whether to use the same message passing neural network for all input molecules (only relevant if ``number_of_molecules`` > 1)", + ) + mp_args.add_argument( + "--activation", + type=uppercase, + default="RELU", + choices=list(Activation.keys()), + help="Activation function in message passing/FFN layers", + ) + mp_args.add_argument( + "--aggregation", + "--agg", + default="norm", + action=LookupAction(AggregationRegistry), + help="Aggregation mode to use during graph predictor", + ) + mp_args.add_argument( + "--aggregation-norm", + type=float, + default=100, + help="Normalization factor by which to divide summed up atomic features for ``norm`` aggregation", + ) + mp_args.add_argument( + "--atom-messages", action="store_true", help="Pass messages on atoms rather than bonds." + ) + + # TODO: Add in v2.1 + # mpsolv_args = parser.add_argument_group("message passing with solvent") + # mpsolv_args.add_argument( + # "--reaction-solvent", + # action="store_true", + # help="Whether to adjust the MPNN layer to take as input a reaction and a molecule, and to encode them with separate MPNNs.", + # ) + # mpsolv_args.add_argument( + # "--bias-solvent", + # action="store_true", + # help="Whether to add bias to linear layers for solvent MPN if :code:`reaction_solvent` is True.", + # ) + # mpsolv_args.add_argument( + # "--hidden-size-solvent", + # type=int, + # default=300, + # help="Dimensionality of hidden layers in solvent MPN if :code:`reaction_solvent` is True.", + # ) + # mpsolv_args.add_argument( + # "--depth-solvent", + # type=int, + # default=3, + # help="Number of message passing steps for solvent if :code:`reaction_solvent` is True.", + # ) + + ffn_args = parser.add_argument_group("FFN args") + ffn_args.add_argument( + "--ffn-hidden-dim", type=int, default=300, help="Hidden dimension in the FFN top model" + ) + ffn_args.add_argument( # TODO: the default in v1 was 2. (see weights_ffn_num_layers option) Do we really want the default to now be 1? + "--ffn-num-layers", type=int, default=1, help="Number of layers in FFN top model" + ) + # TODO: Decide if we want to implment this in v2 + # ffn_args.add_argument( + # "--features-only", + # action="store_true", + # help="Use only the additional features in an FFN, no graph network.", + # ) + + extra_mpnn_args = parser.add_argument_group("extra MPNN args") + extra_mpnn_args.add_argument( + "--batch-norm", action="store_true", help="Turn on batch normalization after aggregation" + ) + extra_mpnn_args.add_argument( + "--multiclass-num-classes", + type=int, + default=3, + help="Number of classes when running multiclass classification", + ) + # TODO: Add in v2.1 + # extra_mpnn_args.add_argument( + # "--spectral-activation", + # default="exp", + # choices=["softplus", "exp"], + # help="Indicates which function to use in task_type spectra training to constrain outputs to be positive.", + # ) + + train_data_args = parser.add_argument_group("training input data args") + train_data_args.add_argument( + "-w", + "--weight-column", + help="Name of the column in the input CSV containing individual data weights", + ) + train_data_args.add_argument( + "--target-columns", + nargs="+", + help="Name of the columns containing target values (by default, uses all columns except the SMILES column and the ``ignore_columns``)", + ) + train_data_args.add_argument( + "--ignore-columns", + nargs="+", + help="Name of the columns to ignore when ``target_columns`` is not provided", + ) + train_data_args.add_argument( + "--no-cache", + action="store_true", + help="Turn off caching the featurized ``MolGraph`` s at the beginning of training", + ) + train_data_args.add_argument( + "--splits-column", + help="Name of the column in the input CSV file containing 'train', 'val', or 'test' for each row.", + ) + # TODO: Add in v2.1 + # train_data_args.add_argument( + # "--spectra-phase-mask-path", + # help="Path to a file containing a phase mask array, used for excluding particular regions in spectra predictions.", + # ) + + train_args = parser.add_argument_group("training args") + train_args.add_argument( + "-t", + "--task-type", + default="regression", + action=LookupAction(PredictorRegistry), + help="Type of dataset (determines the default loss function used during training, defaults to ``regression``)", + ) + train_args.add_argument( + "-l", + "--loss-function", + action=LookupAction(LossFunctionRegistry), + help="Loss function to use during training (will use the default loss function for the given task type if not specified)", + ) + train_args.add_argument( + "--v-kl", + "--evidential-regularization", + type=float, + default=0.0, + help="Specify the value used in regularization for evidential loss function. The default value recommended by Soleimany et al. (2021) is 0.2. However, the optimal value is dataset-dependent, so it is recommended that users test different values to find the best value for their model.", + ) + + train_args.add_argument( + "--eps", type=float, default=1e-8, help="Evidential regularization epsilon" + ) + train_args.add_argument( + "--alpha", type=float, default=0.1, help="Target error bounds for quantile interval loss" + ) + # TODO: Add in v2.1 + # train_args.add_argument( # TODO: Is threshold the same thing as the spectra target floor? I'm not sure but combined them. + # "-T", + # "--threshold", + # "--spectra-target-floor", + # type=float, + # default=1e-8, + # help="spectral threshold limit. v1 help string: Values in targets for dataset type spectra are replaced with this value, intended to be a small positive number used to enforce positive values.", + # ) + train_args.add_argument( + "--metrics", + "--metric", + nargs="+", + action=LookupAction(MetricRegistry), + help="Specify the evaluation metrics. If unspecified, chemprop will use the following metrics for given dataset types: regression -> ``rmse``, classification -> ``roc``, multiclass -> ``ce`` ('cross entropy'), spectral -> ``sid``. If multiple metrics are provided, the 0-th one will be used for early stopping and checkpointing.", + ) + train_args.add_argument( + "--tracking-metric", + default="val_loss", + help="The metric to track for early stopping and checkpointing. Defaults to the criterion used during training.", + ) + train_args.add_argument( + "--show-individual-scores", + action="store_true", + help="Show all scores for individual targets, not just average, at the end.", + ) + train_args.add_argument( + "--task-weights", + nargs="+", + type=float, + help="Weights to apply for whole tasks in the loss function", + ) + train_args.add_argument( + "--warmup-epochs", + type=int, + default=2, + help="Number of epochs during which learning rate increases linearly from ``init_lr`` to ``max_lr`` (afterwards, learning rate decreases exponentially from ``max_lr`` to ``final_lr``)", + ) + + train_args.add_argument("--init-lr", type=float, default=1e-4, help="Initial learning rate.") + train_args.add_argument("--max-lr", type=float, default=1e-3, help="Maximum learning rate.") + train_args.add_argument("--final-lr", type=float, default=1e-4, help="Final learning rate.") + train_args.add_argument("--epochs", type=int, default=50, help="Number of epochs to train over") + train_args.add_argument( + "--patience", + type=int, + default=None, + help="Number of epochs to wait for improvement before early stopping", + ) + train_args.add_argument( + "--grad-clip", + type=float, + help="Passed directly to the lightning trainer which controls grad clipping (see the ``Trainer()`` docstring for details)", + ) + train_args.add_argument( + "--class-balance", + action="store_true", + help="Ensures each training batch contains an equal number of positive and negative samples.", + ) + + split_args = parser.add_argument_group("split args") + split_args.add_argument( + "--split", + "--split-type", + type=uppercase, + default="RANDOM", + choices=list(SplitType.keys()), + help="Method of splitting the data into train/val/test (case insensitive)", + ) + split_args.add_argument( + "--split-sizes", + type=float, + nargs=3, + default=[0.8, 0.1, 0.1], + help="Split proportions for train/validation/test sets", + ) + split_args.add_argument( + "--split-key-molecule", + type=int, + default=0, + help="Specify the index of the key molecule used for splitting when multiple molecules are present and constrained split_type is used (e.g., ``scaffold_balanced`` or ``random_with_repeated_smiles``). Note that this index begins with zero for the first molecule.", + ) + split_args.add_argument("--num-replicates", type=int, default=1, help="Number of replicates.") + split_args.add_argument("-k", "--num-folds", help=_CV_REMOVAL_ERROR) + split_args.add_argument( + "--save-smiles-splits", + action="store_true", + help="Whether to store the SMILES in each train/val/test split", + ) + split_args.add_argument( + "--splits-file", + type=Path, + help="Path to a JSON file containing pre-defined splits for the input data, formatted as a list of dictionaries with keys ``train``, ``val``, and ``test`` and values as lists of indices or formatted strings (e.g. [0, 1, 2, 4] or '0-2,4')", + ) + split_args.add_argument( + "--data-seed", + type=int, + default=0, + help="Specify the random seed to use when splitting data into train/val/test sets. When ``--num-replicates`` > 1, the first replicate uses this seed and all subsequent replicates add 1 to the seed (also used for shuffling data in ``build_dataloader`` when ``shuffle`` is True).", + ) + + parser.add_argument( + "--pytorch-seed", + type=int, + default=None, + help="Seed for PyTorch randomness (e.g., random initial weights)", + ) + + return parser + + +def process_train_args(args: Namespace) -> Namespace: + if args.output_dir is None: + args.output_dir = CHEMPROP_TRAIN_DIR / args.data_path.stem / NOW + + return args + + +def validate_train_args(args): + if args.config_path is None and args.data_path is None: + raise ArgumentError(argument=None, message="Data path must be provided for training.") + + if args.num_folds is not None: # i.e. user-specified + raise ArgumentError(argument=None, message=_CV_REMOVAL_ERROR) + + if args.data_path.suffix not in [".csv"]: + raise ArgumentError( + argument=None, message=f"Input data must be a CSV file. Got {args.data_path}" + ) + + if args.epochs != -1 and args.epochs <= args.warmup_epochs: + raise ArgumentError( + argument=None, + message=f"The number of epochs should be higher than the number of epochs during warmup. Got {args.epochs} epochs and {args.warmup_epochs} warmup epochs", + ) + + # TODO: model_frzn is deprecated and then remove in v2.2 + if args.checkpoint is not None and args.model_frzn is not None: + raise ArgumentError( + argument=None, + message="`--checkpoint` and `--model-frzn` cannot be used at the same time.", + ) + + if "--model-frzn" in sys.argv: + logger.warning( + "`--model-frzn` is deprecated and will be removed in v2.2. " + "Please use `--checkpoint` with `--freeze-encoder` instead." + ) + + if args.freeze_encoder and args.checkpoint is None: + raise ArgumentError( + argument=None, + message="`--freeze-encoder` can only be used when `--checkpoint` is used.", + ) + + if args.frzn_ffn_layers > 0: + if args.checkpoint is None and args.model_frzn is None: + raise ArgumentError( + argument=None, + message="`--frzn-ffn-layers` can only be used when `--checkpoint` or `--model-frzn` (depreciated in v2.1) is used.", + ) + if args.checkpoint is not None and not args.freeze_encoder: + raise ArgumentError( + argument=None, + message="To freeze the first `n` layers of the FFN via `--frzn-ffn-layers`. The message passing layer should also be frozen with `--freeze-encoder`.", + ) + + if args.class_balance and args.task_type != "classification": + raise ArgumentError( + argument=None, message="Class balance is only applicable for classification tasks." + ) + + valid_tracking_metrics = ( + args.metrics or [PredictorRegistry[args.task_type]._T_default_metric.alias] + ) + ["val_loss"] + if args.tracking_metric not in valid_tracking_metrics: + raise ArgumentError( + argument=None, + message=f"Tracking metric must be one of {','.join(valid_tracking_metrics)}. " + f"Got {args.tracking_metric}. Additional tracking metric options can be specified with " + "the `--metrics` flag.", + ) + + input_cols, target_cols = get_column_names( + args.data_path, + args.smiles_columns, + args.reaction_columns, + args.target_columns, + args.ignore_columns, + args.splits_column, + args.weight_column, + args.no_header_row, + ) + + args.input_columns = input_cols + args.target_columns = target_cols + + return args + + +def normalize_inputs(train_dset, val_dset, args): + multicomponent = isinstance(train_dset, MulticomponentDataset) + num_components = train_dset.n_components if multicomponent else 1 + + X_d_transform = None + V_f_transforms = [nn.Identity()] * num_components + E_f_transforms = [nn.Identity()] * num_components + V_d_transforms = [None] * num_components + graph_transforms = [] + + d_xd = train_dset.d_xd + d_vf = train_dset.d_vf + d_ef = train_dset.d_ef + d_vd = train_dset.d_vd + + if d_xd > 0 and not args.no_descriptor_scaling: + scaler = train_dset.normalize_inputs("X_d") + val_dset.normalize_inputs("X_d", scaler) + + scaler = scaler if not isinstance(scaler, list) else scaler[0] + + if scaler is not None: + logger.info( + f"Descriptors: loc = {np.array2string(scaler.mean_, precision=3)}, scale = {np.array2string(scaler.scale_, precision=3)}" + ) + X_d_transform = ScaleTransform.from_standard_scaler(scaler) + + if d_vf > 0 and not args.no_atom_feature_scaling: + scaler = train_dset.normalize_inputs("V_f") + val_dset.normalize_inputs("V_f", scaler) + + scalers = [scaler] if not isinstance(scaler, list) else scaler + + for i, scaler in enumerate(scalers): + if scaler is None: + continue + + logger.info( + f"Atom features for mol {i}: loc = {np.array2string(scaler.mean_, precision=3)}, scale = {np.array2string(scaler.scale_, precision=3)}" + ) + featurizer = ( + train_dset.datasets[i].featurizer if multicomponent else train_dset.featurizer + ) + V_f_transforms[i] = ScaleTransform.from_standard_scaler( + scaler, pad=featurizer.atom_fdim - featurizer.extra_atom_fdim + ) + + if d_ef > 0 and not args.no_bond_feature_scaling: + scaler = train_dset.normalize_inputs("E_f") + val_dset.normalize_inputs("E_f", scaler) + + scalers = [scaler] if not isinstance(scaler, list) else scaler + + for i, scaler in enumerate(scalers): + if scaler is None: + continue + + logger.info( + f"Bond features for mol {i}: loc = {np.array2string(scaler.mean_, precision=3)}, scale = {np.array2string(scaler.scale_, precision=3)}" + ) + featurizer = ( + train_dset.datasets[i].featurizer if multicomponent else train_dset.featurizer + ) + E_f_transforms[i] = ScaleTransform.from_standard_scaler( + scaler, pad=featurizer.bond_fdim - featurizer.extra_bond_fdim + ) + + for V_f_transform, E_f_transform in zip(V_f_transforms, E_f_transforms): + graph_transforms.append(GraphTransform(V_f_transform, E_f_transform)) + + if d_vd > 0 and not args.no_atom_descriptor_scaling: + scaler = train_dset.normalize_inputs("V_d") + val_dset.normalize_inputs("V_d", scaler) + + scalers = [scaler] if not isinstance(scaler, list) else scaler + + for i, scaler in enumerate(scalers): + if scaler is None: + continue + + logger.info( + f"Atom descriptors for mol {i}: loc = {np.array2string(scaler.mean_, precision=3)}, scale = {np.array2string(scaler.scale_, precision=3)}" + ) + V_d_transforms[i] = ScaleTransform.from_standard_scaler(scaler) + + return X_d_transform, graph_transforms, V_d_transforms + + +def load_and_use_pretrained_model_scalers(model_path: Path, train_dset, val_dset) -> None: + if isinstance(train_dset, MulticomponentDataset): + _model = MulticomponentMPNN.load_from_file(model_path) + blocks = _model.message_passing.blocks + train_dsets = train_dset.datasets + val_dsets = val_dset.datasets + else: + _model = MPNN.load_from_file(model_path) + blocks = [_model.message_passing] + train_dsets = [train_dset] + val_dsets = [val_dset] + + for i in range(len(blocks)): + if isinstance(_model.X_d_transform, ScaleTransform): + scaler = _model.X_d_transform.to_standard_scaler() + train_dsets[i].normalize_inputs("X_d", scaler) + val_dsets[i].normalize_inputs("X_d", scaler) + + if isinstance(blocks[i].graph_transform, GraphTransform): + if isinstance(blocks[i].graph_transform.V_transform, ScaleTransform): + V_anti_pad = ( + train_dsets[i].featurizer.atom_fdim - train_dsets[i].featurizer.extra_atom_fdim + ) + scaler = blocks[i].graph_transform.V_transform.to_standard_scaler( + anti_pad=V_anti_pad + ) + train_dsets[i].normalize_inputs("V_f", scaler) + val_dsets[i].normalize_inputs("V_f", scaler) + if isinstance(blocks[i].graph_transform.E_transform, ScaleTransform): + E_anti_pad = ( + train_dsets[i].featurizer.bond_fdim - train_dsets[i].featurizer.extra_bond_fdim + ) + scaler = blocks[i].graph_transform.E_transform.to_standard_scaler( + anti_pad=E_anti_pad + ) + train_dsets[i].normalize_inputs("E_f", scaler) + val_dsets[i].normalize_inputs("E_f", scaler) + + if isinstance(blocks[i].V_d_transform, ScaleTransform): + scaler = blocks[i].V_d_transform.to_standard_scaler() + train_dsets[i].normalize_inputs("V_d", scaler) + val_dsets[i].normalize_inputs("V_d", scaler) + + if isinstance(_model.predictor.output_transform, UnscaleTransform): + scaler = _model.predictor.output_transform.to_standard_scaler() + train_dset.normalize_targets(scaler) + val_dset.normalize_targets(scaler) + + +def save_config(parser: ArgumentParser, args: Namespace, config_path: Path): + config_args = deepcopy(args) + for key, value in vars(config_args).items(): + if isinstance(value, Path): + setattr(config_args, key, str(value)) + + for key in ["atom_features_path", "atom_descriptors_path", "bond_features_path"]: + if getattr(config_args, key) is not None: + for index, path in getattr(config_args, key).items(): + getattr(config_args, key)[index] = str(path) + + parser.write_config_file(parsed_namespace=config_args, output_file_paths=[str(config_path)]) + + +def save_smiles_splits(args: Namespace, output_dir, train_dset, val_dset, test_dset): + match (args.smiles_columns, args.reaction_columns): + case [_, None]: + column_labels = deepcopy(args.smiles_columns) + case [None, _]: + column_labels = deepcopy(args.reaction_columns) + case _: + column_labels = deepcopy(args.smiles_columns) + column_labels.extend(args.reaction_columns) + + train_smis = train_dset.names + df_train = pd.DataFrame(train_smis, columns=column_labels) + df_train.to_csv(output_dir / "train_smiles.csv", index=False) + + val_smis = val_dset.names + df_val = pd.DataFrame(val_smis, columns=column_labels) + df_val.to_csv(output_dir / "val_smiles.csv", index=False) + + if test_dset is not None: + test_smis = test_dset.names + df_test = pd.DataFrame(test_smis, columns=column_labels) + df_test.to_csv(output_dir / "test_smiles.csv", index=False) + + +def build_splits(args, format_kwargs, featurization_kwargs): + """build the train/val/test splits""" + logger.info(f"Pulling data from file: {args.data_path}") + all_data = build_data_from_files( + args.data_path, + p_descriptors=args.descriptors_path, + p_atom_feats=args.atom_features_path, + p_bond_feats=args.bond_features_path, + p_atom_descs=args.atom_descriptors_path, + **format_kwargs, + **featurization_kwargs, + ) + + if args.splits_column is not None: + df = pd.read_csv( + args.data_path, header=None if args.no_header_row else "infer", index_col=False + ) + grouped = df.groupby(df[args.splits_column].str.lower()) + train_indices = grouped.groups.get("train", pd.Index([])).tolist() + val_indices = grouped.groups.get("val", pd.Index([])).tolist() + test_indices = grouped.groups.get("test", pd.Index([])).tolist() + train_indices, val_indices, test_indices = [train_indices], [val_indices], [test_indices] + + elif args.splits_file is not None: + with open(args.splits_file, "rb") as json_file: + split_idxss = json.load(json_file) + train_indices = [parse_indices(d["train"]) for d in split_idxss] + val_indices = [parse_indices(d["val"]) for d in split_idxss] + test_indices = [parse_indices(d["test"]) for d in split_idxss] + args.num_replicates = len(split_idxss) + + else: + splitting_data = all_data[args.split_key_molecule] + if isinstance(splitting_data[0], ReactionDatapoint): + splitting_mols = [datapoint.rct for datapoint in splitting_data] + else: + splitting_mols = [datapoint.mol for datapoint in splitting_data] + train_indices, val_indices, test_indices = make_split_indices( + splitting_mols, args.split, args.split_sizes, args.data_seed, args.num_replicates + ) + + train_data, val_data, test_data = split_data_by_indices( + all_data, train_indices, val_indices, test_indices + ) + for i_split in range(len(train_data)): + sizes = [len(train_data[i_split][0]), len(val_data[i_split][0]), len(test_data[i_split][0])] + logger.info(f"train/val/test split_{i_split} sizes: {sizes}") + + return train_data, val_data, test_data + + +def summarize( + target_cols: list[str], task_type: str, dataset: _MolGraphDatasetMixin +) -> tuple[list, list]: + if task_type in [ + "regression", + "regression-mve", + "regression-evidential", + "regression-quantile", + ]: + if isinstance(dataset, MulticomponentDataset): + y = dataset.datasets[0].Y + else: + y = dataset.Y + y_mean = np.nanmean(y, axis=0) + y_std = np.nanstd(y, axis=0) + y_median = np.nanmedian(y, axis=0) + mean_dev_abs = np.abs(y - y_mean) + num_targets = np.sum(~np.isnan(y), axis=0) + frac_1_sigma = np.sum((mean_dev_abs < y_std), axis=0) / num_targets + frac_2_sigma = np.sum((mean_dev_abs < 2 * y_std), axis=0) / num_targets + + column_headers = ["Statistic"] + [f"Value ({target_cols[i]})" for i in range(y.shape[1])] + table_rows = [ + ["Num. smiles"] + [f"{len(y)}" for i in range(y.shape[1])], + ["Num. targets"] + [f"{num_targets[i]}" for i in range(y.shape[1])], + ["Num. NaN"] + [f"{len(y) - num_targets[i]}" for i in range(y.shape[1])], + ["Mean"] + [f"{mean:0.3g}" for mean in y_mean], + ["Std. dev."] + [f"{std:0.3g}" for std in y_std], + ["Median"] + [f"{median:0.3g}" for median in y_median], + ["% within 1 s.d."] + [f"{sigma:0.0%}" for sigma in frac_1_sigma], + ["% within 2 s.d."] + [f"{sigma:0.0%}" for sigma in frac_2_sigma], + ] + return (column_headers, table_rows) + elif task_type in [ + "classification", + "classification-dirichlet", + "multiclass", + "multiclass-dirichlet", + ]: + if isinstance(dataset, MulticomponentDataset): + y = dataset.datasets[0].Y + else: + y = dataset.Y + + mask = np.isnan(y) + classes = np.sort(np.unique(y[~mask])) + + class_counts = np.stack([(classes[:, None] == y[:, i]).sum(1) for i in range(y.shape[1])]) + class_fracs = class_counts / y.shape[0] + nan_count = np.nansum(mask, axis=0) + nan_frac = nan_count / y.shape[0] + + column_headers = ["Class"] + [f"Count/Percent {target_cols[i]}" for i in range(y.shape[1])] + + table_rows = [ + [f"{k}"] + [f"{class_counts[j, i]}/{class_fracs[j, i]:0.0%}" for j in range(y.shape[1])] + for i, k in enumerate(classes) + ] + + nan_row = ["NaN"] + [f"{nan_count[i]}/{nan_frac[i]:0.0%}" for i in range(y.shape[1])] + table_rows.append(nan_row) + + total_row = ["Total"] + [f"{y.shape[0]}/{100.00}%" for i in range(y.shape[1])] + table_rows.append(total_row) + + return (column_headers, table_rows) + else: + raise ValueError(f"unsupported task type! Task type '{task_type}' was not recognized.") + + +def build_table(column_headers: list[str], table_rows: list[str], title: str | None = None) -> str: + right_justified_columns = [ + Column(header=column_header, justify="right") for column_header in column_headers + ] + table = Table(*right_justified_columns, title=title) + for row in table_rows: + table.add_row(*row) + + console = Console(record=True, file=StringIO(), width=200) + console.print(table) + return console.export_text() + + +def build_datasets(args, train_data, val_data, test_data): + """build the train/val/test datasets, where :attr:`test_data` may be None""" + multicomponent = len(train_data) > 1 + if multicomponent: + train_dsets = [ + make_dataset(data, args.rxn_mode, args.multi_hot_atom_featurizer_mode) + for data in train_data + ] + val_dsets = [ + make_dataset(data, args.rxn_mode, args.multi_hot_atom_featurizer_mode) + for data in val_data + ] + train_dset = MulticomponentDataset(train_dsets) + val_dset = MulticomponentDataset(val_dsets) + if len(test_data[0]) > 0: + test_dsets = [ + make_dataset(data, args.rxn_mode, args.multi_hot_atom_featurizer_mode) + for data in test_data + ] + test_dset = MulticomponentDataset(test_dsets) + else: + test_dset = None + else: + train_data = train_data[0] + val_data = val_data[0] + test_data = test_data[0] + train_dset = make_dataset(train_data, args.rxn_mode, args.multi_hot_atom_featurizer_mode) + val_dset = make_dataset(val_data, args.rxn_mode, args.multi_hot_atom_featurizer_mode) + if len(test_data) > 0: + test_dset = make_dataset(test_data, args.rxn_mode, args.multi_hot_atom_featurizer_mode) + else: + test_dset = None + if args.task_type != "spectral": + for dataset, label in zip( + [train_dset, val_dset, test_dset], ["Training", "Validation", "Test"] + ): + column_headers, table_rows = summarize(args.target_columns, args.task_type, dataset) + output = build_table(column_headers, table_rows, f"Summary of {label} Data") + logger.info("\n" + output) + + return train_dset, val_dset, test_dset + + +def build_model( + args, + train_dset: MolGraphDataset | MulticomponentDataset, + output_transform: UnscaleTransform, + input_transforms: tuple[ScaleTransform, list[GraphTransform], list[ScaleTransform]], +) -> MPNN: + mp_cls = AtomMessagePassing if args.atom_messages else BondMessagePassing + + X_d_transform, graph_transforms, V_d_transforms = input_transforms + if isinstance(train_dset, MulticomponentDataset): + mp_blocks = [ + mp_cls( + train_dset.datasets[i].featurizer.atom_fdim, + train_dset.datasets[i].featurizer.bond_fdim, + d_h=args.message_hidden_dim, + d_vd=( + train_dset.datasets[i].d_vd + if isinstance(train_dset.datasets[i], MoleculeDataset) + else 0 + ), + bias=args.message_bias, + depth=args.depth, + undirected=args.undirected, + dropout=args.dropout, + activation=args.activation, + V_d_transform=V_d_transforms[i], + graph_transform=graph_transforms[i], + ) + for i in range(train_dset.n_components) + ] + if args.mpn_shared: + if args.reaction_columns is not None and args.smiles_columns is not None: + raise ArgumentError( + argument=None, + message="Cannot use shared MPNN with both molecule and reaction data.", + ) + + mp_block = MulticomponentMessagePassing(mp_blocks, train_dset.n_components, args.mpn_shared) + # NOTE(degraff): this if/else block should be handled by the init of MulticomponentMessagePassing + # if args.mpn_shared: + # mp_block = MulticomponentMessagePassing(mp_blocks[0], n_components, args.mpn_shared) + # else: + d_xd = train_dset.datasets[0].d_xd + n_tasks = train_dset.datasets[0].Y.shape[1] + mpnn_cls = MulticomponentMPNN + else: + mp_block = mp_cls( + train_dset.featurizer.atom_fdim, + train_dset.featurizer.bond_fdim, + d_h=args.message_hidden_dim, + d_vd=train_dset.d_vd if isinstance(train_dset, MoleculeDataset) else 0, + bias=args.message_bias, + depth=args.depth, + undirected=args.undirected, + dropout=args.dropout, + activation=args.activation, + V_d_transform=V_d_transforms[0], + graph_transform=graph_transforms[0], + ) + d_xd = train_dset.d_xd + n_tasks = train_dset.Y.shape[1] + mpnn_cls = MPNN + + agg = Factory.build(AggregationRegistry[args.aggregation], norm=args.aggregation_norm) + predictor_cls = PredictorRegistry[args.task_type] + if args.loss_function is not None: + task_weights = torch.ones(n_tasks) if args.task_weights is None else args.task_weights + criterion = Factory.build( + LossFunctionRegistry[args.loss_function], + task_weights=task_weights, + v_kl=args.v_kl, + # threshold=args.threshold, TODO: Add in v2.1 + eps=args.eps, + alpha=args.alpha, + ) + else: + criterion = None + if args.metrics is not None: + metrics = [Factory.build(MetricRegistry[metric]) for metric in args.metrics] + else: + metrics = None + + predictor = Factory.build( + predictor_cls, + input_dim=mp_block.output_dim + d_xd, + n_tasks=n_tasks, + hidden_dim=args.ffn_hidden_dim, + n_layers=args.ffn_num_layers, + dropout=args.dropout, + activation=args.activation, + criterion=criterion, + task_weights=args.task_weights, + n_classes=args.multiclass_num_classes, + output_transform=output_transform, + # spectral_activation=args.spectral_activation, TODO: Add in v2.1 + ) + + if args.loss_function is None: + logger.info( + f"No loss function was specified! Using class default: {predictor_cls._T_default_criterion}" + ) + + return mpnn_cls( + mp_block, + agg, + predictor, + args.batch_norm, + metrics, + args.warmup_epochs, + args.init_lr, + args.max_lr, + args.final_lr, + X_d_transform=X_d_transform, + ) + + +def train_model( + args, train_loader, val_loader, test_loader, output_dir, output_transform, input_transforms +): + if args.checkpoint is not None: + model_paths = find_models(args.checkpoint) + if args.ensemble_size != len(model_paths): + logger.warning( + f"The number of models in ensemble for each splitting of data is set to {len(model_paths)}." + ) + args.ensemble_size = len(model_paths) + + for model_idx in range(args.ensemble_size): + model_output_dir = output_dir / f"model_{model_idx}" + model_output_dir.mkdir(exist_ok=True, parents=True) + + if args.pytorch_seed is None: + seed = torch.seed() + deterministic = False + else: + seed = args.pytorch_seed + model_idx + deterministic = True + + torch.manual_seed(seed) + + if args.checkpoint or args.model_frzn is not None: + mpnn_cls = ( + MulticomponentMPNN + if isinstance(train_loader.dataset, MulticomponentDataset) + else MPNN + ) + model_path = model_paths[model_idx] if args.checkpoint else args.model_frzn + model = mpnn_cls.load_from_file(model_path) + + if args.checkpoint: + model.apply( + lambda m: setattr(m, "p", args.dropout) + if isinstance(m, torch.nn.Dropout) + else None + ) + + # TODO: model_frzn is deprecated and then remove in v2.2 + if args.model_frzn or args.freeze_encoder: + model.message_passing.apply(lambda module: module.requires_grad_(False)) + model.message_passing.eval() + model.bn.apply(lambda module: module.requires_grad_(False)) + model.bn.eval() + for idx in range(args.frzn_ffn_layers): + model.predictor.ffn[idx].requires_grad_(False) + model.predictor.ffn[idx + 1].eval() + else: + model = build_model(args, train_loader.dataset, output_transform, input_transforms) + logger.info(model) + + try: + trainer_logger = TensorBoardLogger( + model_output_dir, "trainer_logs", default_hp_metric=False + ) + except ModuleNotFoundError as e: + logger.warning( + f"Unable to import TensorBoardLogger, reverting to CSVLogger (original error: {e})." + ) + trainer_logger = CSVLogger(model_output_dir, "trainer_logs") + + if args.tracking_metric == "val_loss": + T_tracking_metric = model.criterion.__class__ + tracking_metric = args.tracking_metric + else: + T_tracking_metric = MetricRegistry[args.tracking_metric] + tracking_metric = "val/" + args.tracking_metric + + monitor_mode = "max" if T_tracking_metric.higher_is_better else "min" + logger.debug(f"Evaluation metric: '{T_tracking_metric.alias}', mode: '{monitor_mode}'") + + if args.remove_checkpoints: + temp_dir = TemporaryDirectory() + checkpoint_dir = Path(temp_dir.name) + else: + checkpoint_dir = model_output_dir + + checkpoint_filename = ( + f"best-epoch={{epoch}}-{tracking_metric.replace('/', '_')}=" + f"{{{tracking_metric}:.2f}}" + ) + checkpointing = ModelCheckpoint( + checkpoint_dir / "checkpoints", + checkpoint_filename, + tracking_metric, + mode=monitor_mode, + save_last=True, + auto_insert_metric_name=False, + ) + + if args.epochs != -1: + patience = args.patience if args.patience is not None else args.epochs + early_stopping = EarlyStopping(tracking_metric, patience=patience, mode=monitor_mode) + callbacks = [checkpointing, early_stopping] + else: + callbacks = [checkpointing] + + trainer = pl.Trainer( + logger=trainer_logger, + enable_progress_bar=True, + accelerator=args.accelerator, + devices=args.devices, + max_epochs=args.epochs, + callbacks=callbacks, + gradient_clip_val=args.grad_clip, + deterministic=deterministic, + ) + trainer.fit(model, train_loader, val_loader) + + if test_loader is not None: + if isinstance(trainer.strategy, DDPStrategy): + torch.distributed.destroy_process_group() + + best_ckpt_path = trainer.checkpoint_callback.best_model_path + trainer = pl.Trainer( + logger=trainer_logger, + enable_progress_bar=True, + accelerator=args.accelerator, + devices=1, + ) + model = model.load_from_checkpoint(best_ckpt_path) + predss = trainer.predict(model, dataloaders=test_loader) + else: + predss = trainer.predict(dataloaders=test_loader) + + preds = torch.concat(predss, 0) + if model.predictor.n_targets > 1: + preds = preds[..., 0] + preds = preds.numpy() + + evaluate_and_save_predictions( + preds, test_loader, model.metrics[:-1], model_output_dir, args + ) + + best_model_path = checkpointing.best_model_path + model = model.__class__.load_from_checkpoint(best_model_path) + p_model = model_output_dir / "best.pt" + save_model(p_model, model, args.target_columns) + logger.info(f"Best model saved to '{p_model}'") + + if args.remove_checkpoints: + temp_dir.cleanup() + + +def evaluate_and_save_predictions(preds, test_loader, metrics, model_output_dir, args): + if isinstance(test_loader.dataset, MulticomponentDataset): + test_dset = test_loader.dataset.datasets[0] + else: + test_dset = test_loader.dataset + targets = test_dset.Y + mask = torch.from_numpy(np.isfinite(targets)) + targets = np.nan_to_num(targets, nan=0.0) + weights = torch.ones(len(test_dset)) + lt_mask = torch.from_numpy(test_dset.lt_mask) if test_dset.lt_mask[0] is not None else None + gt_mask = torch.from_numpy(test_dset.gt_mask) if test_dset.gt_mask[0] is not None else None + + individual_scores = dict() + for metric in metrics: + individual_scores[metric.alias] = [] + for i, col in enumerate(args.target_columns): + if "multiclass" in args.task_type: + preds_slice = torch.from_numpy(preds[:, i : i + 1, :]) + targets_slice = torch.from_numpy(targets[:, i : i + 1]) + else: + preds_slice = torch.from_numpy(preds[:, i : i + 1]) + targets_slice = torch.from_numpy(targets[:, i : i + 1]) + preds_loss = metric( + preds_slice, + targets_slice, + mask[:, i : i + 1], + weights, + lt_mask[:, i] if lt_mask is not None else None, + gt_mask[:, i] if gt_mask is not None else None, + ) + individual_scores[metric.alias].append(preds_loss) + + logger.info("Test Set results:") + for metric in metrics: + avg_loss = sum(individual_scores[metric.alias]) / len(individual_scores[metric.alias]) + logger.info(f"test/{metric.alias}: {avg_loss}") + + if args.show_individual_scores: + logger.info("Entire Test Set individual results:") + for metric in metrics: + for i, col in enumerate(args.target_columns): + logger.info(f"test/{col}/{metric.alias}: {individual_scores[metric.alias][i]}") + + names = test_loader.dataset.names + if isinstance(test_loader.dataset, MulticomponentDataset): + namess = list(zip(*names)) + else: + namess = [names] + + columns = args.input_columns + args.target_columns + if "multiclass" in args.task_type: + columns = columns + [f"{col}_prob" for col in args.target_columns] + formatted_probability_strings = np.apply_along_axis( + lambda x: ",".join(map(str, x)), 2, preds + ) + predicted_class_labels = preds.argmax(axis=-1) + df_preds = pd.DataFrame( + list(zip(*namess, *predicted_class_labels.T, *formatted_probability_strings.T)), + columns=columns, + ) + else: + df_preds = pd.DataFrame(list(zip(*namess, *preds.T)), columns=columns) + df_preds.to_csv(model_output_dir / "test_predictions.csv", index=False) + + +def main(args): + format_kwargs = dict( + no_header_row=args.no_header_row, + smiles_cols=args.smiles_columns, + rxn_cols=args.reaction_columns, + target_cols=args.target_columns, + ignore_cols=args.ignore_columns, + splits_col=args.splits_column, + weight_col=args.weight_column, + bounded=args.loss_function is not None and "bounded" in args.loss_function, + ) + + featurization_kwargs = dict( + molecule_featurizers=args.molecule_featurizers, keep_h=args.keep_h, add_h=args.add_h + ) + + splits = build_splits(args, format_kwargs, featurization_kwargs) + + for replicate_idx, (train_data, val_data, test_data) in enumerate(zip(*splits)): + if args.num_replicates == 1: + output_dir = args.output_dir + else: + output_dir = args.output_dir / f"replicate_{replicate_idx}" + + output_dir.mkdir(exist_ok=True, parents=True) + + train_dset, val_dset, test_dset = build_datasets(args, train_data, val_data, test_data) + + if args.save_smiles_splits: + save_smiles_splits(args, output_dir, train_dset, val_dset, test_dset) + + if args.checkpoint or args.model_frzn is not None: + model_paths = find_models(args.checkpoint) + if len(model_paths) > 1: + logger.warning( + "Multiple checkpoint files were loaded, but only the scalers from " + f"{model_paths[0]} are used. It is assumed that all models provided have the " + "same data scalings, meaning they were trained on the same data." + ) + model_path = model_paths[0] if args.checkpoint else args.model_frzn + load_and_use_pretrained_model_scalers(model_path, train_dset, val_dset) + input_transforms = (None, None, None) + output_transform = None + else: + input_transforms = normalize_inputs(train_dset, val_dset, args) + + if "regression" in args.task_type: + output_scaler = train_dset.normalize_targets() + val_dset.normalize_targets(output_scaler) + logger.info( + f"Train data: mean = {output_scaler.mean_} | std = {output_scaler.scale_}" + ) + output_transform = UnscaleTransform.from_standard_scaler(output_scaler) + else: + output_transform = None + + if not args.no_cache: + train_dset.cache = True + val_dset.cache = True + + train_loader = build_dataloader( + train_dset, + args.batch_size, + args.num_workers, + class_balance=args.class_balance, + seed=args.data_seed, + ) + if args.class_balance: + logger.debug( + f"With `--class-balance`, effective train size = {len(train_loader.sampler)}" + ) + val_loader = build_dataloader(val_dset, args.batch_size, args.num_workers, shuffle=False) + if test_dset is not None: + test_loader = build_dataloader( + test_dset, args.batch_size, args.num_workers, shuffle=False + ) + else: + test_loader = None + + train_model( + args, + train_loader, + val_loader, + test_loader, + output_dir, + output_transform, + input_transforms, + ) + + +if __name__ == "__main__": + # TODO: update this old code or remove it. + parser = ArgumentParser() + parser = TrainSubcommand.add_args(parser) + + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, force=True) + args = parser.parse_args() + TrainSubcommand.func(args) diff --git a/chemprop-updated/chemprop/cli/utils/__init__.py b/chemprop-updated/chemprop/cli/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fdd239a2a06724abe893c0913cd079addab26ea6 --- /dev/null +++ b/chemprop-updated/chemprop/cli/utils/__init__.py @@ -0,0 +1,30 @@ +from .actions import LookupAction +from .args import bounded +from .command import Subcommand +from .parsing import ( + build_data_from_files, + get_column_names, + make_datapoints, + make_dataset, + parse_indices, +) +from .utils import _pop_attr, _pop_attr_d, pop_attr + +__all__ = [ + "bounded", + "LookupAction", + "Subcommand", + "build_data_from_files", + "make_datapoints", + "make_dataset", + "get_column_names", + "parse_indices", + "actions", + "args", + "command", + "parsing", + "utils", + "pop_attr", + "_pop_attr", + "_pop_attr_d", +] diff --git a/chemprop-updated/chemprop/cli/utils/actions.py b/chemprop-updated/chemprop/cli/utils/actions.py new file mode 100644 index 0000000000000000000000000000000000000000..23e870f37b638499235ddccba0f72355efc3b7c7 --- /dev/null +++ b/chemprop-updated/chemprop/cli/utils/actions.py @@ -0,0 +1,19 @@ +from argparse import _StoreAction +from typing import Any, Mapping + + +def LookupAction(obj: Mapping[str, Any]): + class LookupAction_(_StoreAction): + def __init__(self, option_strings, dest, default=None, choices=None, **kwargs): + if default not in obj.keys() and default is not None: + raise ValueError( + f"Invalid value for arg 'default': '{default}'. " + f"Expected one of {tuple(obj.keys())}" + ) + + kwargs["choices"] = choices if choices is not None else obj.keys() + kwargs["default"] = default + + super().__init__(option_strings, dest, **kwargs) + + return LookupAction_ diff --git a/chemprop-updated/chemprop/cli/utils/args.py b/chemprop-updated/chemprop/cli/utils/args.py new file mode 100644 index 0000000000000000000000000000000000000000..5c6f29e3cd48a39cda6555f6a35a133412df2dd2 --- /dev/null +++ b/chemprop-updated/chemprop/cli/utils/args.py @@ -0,0 +1,34 @@ +import functools + +__all__ = ["bounded"] + + +def bounded(lo: float | None = None, hi: float | None = None): + if lo is None and hi is None: + raise ValueError("No bounds provided!") + + def decorator(f): + @functools.wraps(f) + def wrapper(*args, **kwargs): + x = f(*args, **kwargs) + + if (lo is not None and hi is not None) and not lo <= x <= hi: + raise ValueError(f"Parsed value outside of range [{lo}, {hi}]! got: {x}") + if hi is not None and x > hi: + raise ValueError(f"Parsed value below {hi}! got: {x}") + if lo is not None and x < lo: + raise ValueError(f"Parsed value above {lo}]! got: {x}") + + return x + + return wrapper + + return decorator + + +def uppercase(x: str): + return x.upper() + + +def lowercase(x: str): + return x.lower() diff --git a/chemprop-updated/chemprop/cli/utils/command.py b/chemprop-updated/chemprop/cli/utils/command.py new file mode 100644 index 0000000000000000000000000000000000000000..d9edd0d91855240dade06b5d67ae929339d155fa --- /dev/null +++ b/chemprop-updated/chemprop/cli/utils/command.py @@ -0,0 +1,24 @@ +from abc import ABC, abstractmethod +from argparse import ArgumentParser, Namespace, _SubParsersAction + + +class Subcommand(ABC): + COMMAND: str + HELP: str | None = None + + @classmethod + def add(cls, subparsers: _SubParsersAction, parents) -> ArgumentParser: + parser = subparsers.add_parser(cls.COMMAND, help=cls.HELP, parents=parents) + cls.add_args(parser).set_defaults(func=cls.func) + + return parser + + @classmethod + @abstractmethod + def add_args(cls, parser: ArgumentParser) -> ArgumentParser: + pass + + @classmethod + @abstractmethod + def func(cls, args: Namespace): + pass diff --git a/chemprop-updated/chemprop/cli/utils/parsing.py b/chemprop-updated/chemprop/cli/utils/parsing.py new file mode 100644 index 0000000000000000000000000000000000000000..5b84475ebfbebbe40cad65b2becbbf0d617532c7 --- /dev/null +++ b/chemprop-updated/chemprop/cli/utils/parsing.py @@ -0,0 +1,446 @@ +import logging +from os import PathLike +from typing import Literal, Mapping, Sequence + +import numpy as np +import pandas as pd + +from chemprop.data.datapoints import MoleculeDatapoint, ReactionDatapoint +from chemprop.data.datasets import MoleculeDataset, ReactionDataset +from chemprop.featurizers.atom import get_multi_hot_atom_featurizer +from chemprop.featurizers.bond import MultiHotBondFeaturizer, RIGRBondFeaturizer +from chemprop.featurizers.molecule import MoleculeFeaturizerRegistry +from chemprop.featurizers.molgraph import ( + CondensedGraphOfReactionFeaturizer, + SimpleMoleculeMolGraphFeaturizer, +) +from chemprop.utils import make_mol + +logger = logging.getLogger(__name__) + + +def parse_csv( + path: PathLike, + smiles_cols: Sequence[str] | None, + rxn_cols: Sequence[str] | None, + target_cols: Sequence[str] | None, + ignore_cols: Sequence[str] | None, + splits_col: str | None, + weight_col: str | None, + bounded: bool = False, + no_header_row: bool = False, +): + df = pd.read_csv(path, header=None if no_header_row else "infer", index_col=False) + + if smiles_cols is not None and rxn_cols is not None: + smiss = df[smiles_cols].T.values.tolist() + rxnss = df[rxn_cols].T.values.tolist() + input_cols = [*smiles_cols, *rxn_cols] + elif smiles_cols is not None and rxn_cols is None: + smiss = df[smiles_cols].T.values.tolist() + rxnss = None + input_cols = smiles_cols + elif smiles_cols is None and rxn_cols is not None: + smiss = None + rxnss = df[rxn_cols].T.values.tolist() + input_cols = rxn_cols + else: + smiss = df.iloc[:, [0]].T.values.tolist() + rxnss = None + input_cols = [df.columns[0]] + + if target_cols is None: + target_cols = list( + column + for column in df.columns + if column + not in set( # if splits or weight is None, df.columns will never have None + input_cols + (ignore_cols or []) + [splits_col] + [weight_col] + ) + ) + + Y = df[target_cols] + weights = None if weight_col is None else df[weight_col].to_numpy(np.single) + + if bounded: + lt_mask = Y.applymap(lambda x: "<" in x).to_numpy() + gt_mask = Y.applymap(lambda x: ">" in x).to_numpy() + Y = Y.applymap(lambda x: x.strip("<").strip(">")).to_numpy(np.single) + else: + Y = Y.to_numpy(np.single) + lt_mask = None + gt_mask = None + + return smiss, rxnss, Y, weights, lt_mask, gt_mask + + +def get_column_names( + path: PathLike, + smiles_cols: Sequence[str] | None, + rxn_cols: Sequence[str] | None, + target_cols: Sequence[str] | None, + ignore_cols: Sequence[str] | None, + splits_col: str | None, + weight_col: str | None, + no_header_row: bool = False, +) -> tuple[list[str], list[str]]: + df_cols = pd.read_csv(path, index_col=False, nrows=0).columns.tolist() + + if no_header_row: + return ["SMILES"], ["pred_" + str(i) for i in range((len(df_cols) - 1))] + + input_cols = (smiles_cols or []) + (rxn_cols or []) + + if len(input_cols) == 0: + input_cols = [df_cols[0]] + + if target_cols is None: + target_cols = list( + column + for column in df_cols + if column + not in set( + input_cols + (ignore_cols or []) + ([splits_col] or []) + ([weight_col] or []) + ) + ) + + return input_cols, target_cols + + +def make_datapoints( + smiss: list[list[str]] | None, + rxnss: list[list[str]] | None, + Y: np.ndarray, + weights: np.ndarray | None, + lt_mask: np.ndarray | None, + gt_mask: np.ndarray | None, + X_d: np.ndarray | None, + V_fss: list[list[np.ndarray] | list[None]] | None, + E_fss: list[list[np.ndarray] | list[None]] | None, + V_dss: list[list[np.ndarray] | list[None]] | None, + molecule_featurizers: list[str] | None, + keep_h: bool, + add_h: bool, +) -> tuple[list[list[MoleculeDatapoint]], list[list[ReactionDatapoint]]]: + """Make the :class:`MoleculeDatapoint`s and :class:`ReactionDatapoint`s for a given + dataset. + + Parameters + ---------- + smiss : list[list[str]] | None + a list of ``j`` lists of ``n`` SMILES strings, where ``j`` is the number of molecules per + datapoint and ``n`` is the number of datapoints. If ``None``, the corresponding list of + :class:`MoleculeDatapoint`\s will be empty. + rxnss : list[list[str]] | None + a list of ``k`` lists of ``n`` reaction SMILES strings, where ``k`` is the number of + reactions per datapoint. If ``None``, the corresponding list of :class:`ReactionDatapoint`\s + will be empty. + Y : np.ndarray + the target values of shape ``n x m``, where ``m`` is the number of targets + weights : np.ndarray | None + the weights of the datapoints to use in the loss function of shape ``n x m``. If ``None``, + the weights all default to 1. + lt_mask : np.ndarray | None + a boolean mask of shape ``n x m`` indicating whether the targets are less than inequality + targets. If ``None``, ``lt_mask`` for all datapoints will be ``None``. + gt_mask : np.ndarray | None + a boolean mask of shape ``n x m`` indicating whether the targets are greater than inequality + targets. If ``None``, ``gt_mask`` for all datapoints will be ``None``. + X_d : np.ndarray | None + the extra descriptors of shape ``n x p``, where ``p`` is the number of extra descriptors. If + ``None``, ``x_d`` for all datapoints will be ``None``. + V_fss : list[list[np.ndarray] | list[None]] | None + a list of ``j`` lists of ``n`` np.ndarrays each of shape ``v_jn x q_j``, where ``v_jn`` is + the number of atoms in the j-th molecule of the n-th datapoint and ``q_j`` is the number of + extra atom features used for the j-th molecules. Any of the ``j`` lists can be a list of + None values if the corresponding component does not use extra atom features. If ``None``, + ``V_f`` for all datapoints will be ``None``. + E_fss : list[list[np.ndarray] | list[None]] | None + a list of ``j`` lists of ``n`` np.ndarrays each of shape ``e_jn x r_j``, where ``e_jn`` is + the number of bonds in the j-th molecule of the n-th datapoint and ``r_j`` is the number of + extra bond features used for the j-th molecules. Any of the ``j`` lists can be a list of + None values if the corresponding component does not use extra bond features. If ``None``, + ``E_f`` for all datapoints will be ``None``. + V_dss : list[list[np.ndarray] | list[None]] | None + a list of ``j`` lists of ``n`` np.ndarrays each of shape ``v_jn x s_j``, where ``s_j`` is + the number of extra atom descriptors used for the j-th molecules. Any of the ``j`` lists can + be a list of None values if the corresponding component does not use extra atom features. If + ``None``, ``V_d`` for all datapoints will be ``None``. + molecule_featurizers : list[str] | None + a list of molecule featurizer names to generate additional molecule features to use as extra + descriptors. If there are multiple molecules per datapoint, the featurizers will be applied + to each molecule and concatenated. Note that a :code:`ReactionDatapoint` has two + RDKit :class:`~rdkit.Chem.Mol` objects, reactant(s) and product(s). Each + ``molecule_featurizer`` will be applied to both of these objects. + keep_h : bool + add_h : bool + + Returns + ------- + list[list[MoleculeDatapoint]] + a list of ``j`` lists of ``n`` :class:`MoleculeDatapoint`\s + list[list[ReactionDatapoint]] + a list of ``k`` lists of ``n`` :class:`ReactionDatapoint`\s + .. note:: + either ``j`` or ``k`` may be 0, in which case the corresponding list will be empty. + + Raises + ------ + ValueError + if both ``smiss`` and ``rxnss`` are ``None``. + if ``smiss`` and ``rxnss`` are both given and have different lengths. + """ + if smiss is None and rxnss is None: + raise ValueError("args 'smiss' and 'rnxss' were both `None`!") + elif rxnss is None: + N = len(smiss[0]) + rxnss = [] + elif smiss is None: + N = len(rxnss[0]) + smiss = [] + elif len(smiss[0]) != len(rxnss[0]): + raise ValueError( + f"args 'smiss' and 'rxnss' must have same length! got {len(smiss[0])} and {len(rxnss[0])}" + ) + else: + N = len(smiss[0]) + + if len(smiss) > 0: + molss = [[make_mol(smi, keep_h, add_h) for smi in smis] for smis in smiss] + if len(rxnss) > 0: + rctss = [ + [ + make_mol(f"{rct_smi}.{agt_smi}" if agt_smi else rct_smi, keep_h, add_h) + for rct_smi, agt_smi, _ in (rxn.split(">") for rxn in rxns) + ] + for rxns in rxnss + ] + pdtss = [ + [make_mol(pdt_smi, keep_h, add_h) for _, _, pdt_smi in (rxn.split(">") for rxn in rxns)] + for rxns in rxnss + ] + + weights = np.ones(N, dtype=np.single) if weights is None else weights + gt_mask = [None] * N if gt_mask is None else gt_mask + lt_mask = [None] * N if lt_mask is None else lt_mask + + n_mols = len(smiss) if smiss else 0 + V_fss = [[None] * N] * n_mols if V_fss is None else V_fss + E_fss = [[None] * N] * n_mols if E_fss is None else E_fss + V_dss = [[None] * N] * n_mols if V_dss is None else V_dss + + if X_d is None and molecule_featurizers is None: + X_d = [None] * N + elif molecule_featurizers is None: + pass + else: + molecule_featurizers = [MoleculeFeaturizerRegistry[mf]() for mf in molecule_featurizers] + + if len(smiss) > 0: + mol_descriptors = np.hstack( + [ + np.vstack([np.hstack([mf(mol) for mf in molecule_featurizers]) for mol in mols]) + for mols in molss + ] + ) + if X_d is None: + X_d = mol_descriptors + else: + X_d = np.hstack([X_d, mol_descriptors]) + + if len(rxnss) > 0: + rct_pdt_descriptors = np.hstack( + [ + np.vstack( + [ + np.hstack( + [mf(mol) for mf in molecule_featurizers for mol in (rct, pdt)] + ) + for rct, pdt in zip(rcts, pdts) + ] + ) + for rcts, pdts in zip(rctss, pdtss) + ] + ) + if X_d is None: + X_d = rct_pdt_descriptors + else: + X_d = np.hstack([X_d, rct_pdt_descriptors]) + + mol_data = [ + [ + MoleculeDatapoint( + mol=molss[mol_idx][i], + name=smis[i], + y=Y[i], + weight=weights[i], + gt_mask=gt_mask[i], + lt_mask=lt_mask[i], + x_d=X_d[i], + x_phase=None, + V_f=V_fss[mol_idx][i], + E_f=E_fss[mol_idx][i], + V_d=V_dss[mol_idx][i], + ) + for i in range(N) + ] + for mol_idx, smis in enumerate(smiss) + ] + rxn_data = [ + [ + ReactionDatapoint( + rct=rctss[rxn_idx][i], + pdt=pdtss[rxn_idx][i], + name=rxns[i], + y=Y[i], + weight=weights[i], + gt_mask=gt_mask[i], + lt_mask=lt_mask[i], + x_d=X_d[i], + x_phase=None, + ) + for i in range(N) + ] + for rxn_idx, rxns in enumerate(rxnss) + ] + + return mol_data, rxn_data + + +def build_data_from_files( + p_data: PathLike, + no_header_row: bool, + smiles_cols: Sequence[str] | None, + rxn_cols: Sequence[str] | None, + target_cols: Sequence[str] | None, + ignore_cols: Sequence[str] | None, + splits_col: str | None, + weight_col: str | None, + bounded: bool, + p_descriptors: PathLike, + p_atom_feats: dict[int, PathLike], + p_bond_feats: dict[int, PathLike], + p_atom_descs: dict[int, PathLike], + **featurization_kwargs: Mapping, +) -> list[list[MoleculeDatapoint] | list[ReactionDatapoint]]: + smiss, rxnss, Y, weights, lt_mask, gt_mask = parse_csv( + p_data, + smiles_cols, + rxn_cols, + target_cols, + ignore_cols, + splits_col, + weight_col, + bounded, + no_header_row, + ) + n_molecules = len(smiss) if smiss is not None else 0 + n_datapoints = len(Y) + + X_ds = load_input_feats_and_descs(p_descriptors, None, None, feat_desc="X_d") + V_fss = load_input_feats_and_descs(p_atom_feats, n_molecules, n_datapoints, feat_desc="V_f") + E_fss = load_input_feats_and_descs(p_bond_feats, n_molecules, n_datapoints, feat_desc="E_f") + V_dss = load_input_feats_and_descs(p_atom_descs, n_molecules, n_datapoints, feat_desc="V_d") + + mol_data, rxn_data = make_datapoints( + smiss, + rxnss, + Y, + weights, + lt_mask, + gt_mask, + X_ds, + V_fss, + E_fss, + V_dss, + **featurization_kwargs, + ) + + return mol_data + rxn_data + + +def load_input_feats_and_descs( + paths: dict[int, PathLike] | PathLike, + n_molecules: int | None, + n_datapoints: int | None, + feat_desc: str, +): + if paths is None: + return None + + match feat_desc: + case "X_d": + path = paths + loaded_feature = np.load(path) + features = loaded_feature["arr_0"] + + case _: + for index in paths: + if index >= n_molecules: + raise ValueError( + f"For {n_molecules} molecules, atom/bond features/descriptors can only be " + f"specified for indices 0-{n_molecules - 1}! Got index {index}." + ) + + features = [] + for idx in range(n_molecules): + path = paths.get(idx, None) + + if path is not None: + loaded_feature = np.load(path) + loaded_feature = [ + loaded_feature[f"arr_{i}"] for i in range(len(loaded_feature)) + ] + else: + loaded_feature = [None] * n_datapoints + + features.append(loaded_feature) + return features + + +def make_dataset( + data: Sequence[MoleculeDatapoint] | Sequence[ReactionDatapoint], + reaction_mode: str, + multi_hot_atom_featurizer_mode: Literal["V1", "V2", "ORGANIC", "RIGR"] = "V2", +) -> MoleculeDataset | ReactionDataset: + atom_featurizer = get_multi_hot_atom_featurizer(multi_hot_atom_featurizer_mode) + match multi_hot_atom_featurizer_mode: + case "RIGR": + bond_featurizer = RIGRBondFeaturizer() + case "V1" | "V2" | "ORGANIC": + bond_featurizer = MultiHotBondFeaturizer() + case _: + raise TypeError( + f"Unsupported atom featurizer mode '{multi_hot_atom_featurizer_mode=}'!" + ) + + if isinstance(data[0], MoleculeDatapoint): + extra_atom_fdim = data[0].V_f.shape[1] if data[0].V_f is not None else 0 + extra_bond_fdim = data[0].E_f.shape[1] if data[0].E_f is not None else 0 + featurizer = SimpleMoleculeMolGraphFeaturizer( + atom_featurizer=atom_featurizer, + bond_featurizer=bond_featurizer, + extra_atom_fdim=extra_atom_fdim, + extra_bond_fdim=extra_bond_fdim, + ) + return MoleculeDataset(data, featurizer) + + featurizer = CondensedGraphOfReactionFeaturizer( + mode_=reaction_mode, atom_featurizer=atom_featurizer + ) + + return ReactionDataset(data, featurizer) + + +def parse_indices(idxs): + """Parses a string of indices into a list of integers. e.g. '0,1,2-4' -> [0, 1, 2, 3, 4]""" + if isinstance(idxs, str): + indices = [] + for idx in idxs.split(","): + if "-" in idx: + start, end = map(int, idx.split("-")) + indices.extend(range(start, end + 1)) + else: + indices.append(int(idx)) + return indices + return idxs diff --git a/chemprop-updated/chemprop/cli/utils/utils.py b/chemprop-updated/chemprop/cli/utils/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8f63d224a36065f6a3332b7a3450a5ceb6a05568 --- /dev/null +++ b/chemprop-updated/chemprop/cli/utils/utils.py @@ -0,0 +1,31 @@ +from typing import Any + +__all__ = ["pop_attr"] + + +def pop_attr(o: object, attr: str, *args) -> Any | None: + """like ``pop()`` but for attribute maps""" + match len(args): + case 0: + return _pop_attr(o, attr) + case 1: + return _pop_attr_d(o, attr, args[0]) + case _: + raise TypeError(f"Expected at most 2 arguments! got: {len(args)}") + + +def _pop_attr(o: object, attr: str) -> Any: + val = getattr(o, attr) + delattr(o, attr) + + return val + + +def _pop_attr_d(o: object, attr: str, default: Any | None = None) -> Any | None: + try: + val = getattr(o, attr) + delattr(o, attr) + except AttributeError: + val = default + + return val diff --git a/chemprop-updated/chemprop/conf.py b/chemprop-updated/chemprop/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..f8e3681442d4d4cb553b38d698c4b102bd7c088d --- /dev/null +++ b/chemprop-updated/chemprop/conf.py @@ -0,0 +1,6 @@ +"""Global configuration variables for chemprop""" + +from chemprop.featurizers.molgraph.molecule import SimpleMoleculeMolGraphFeaturizer + +DEFAULT_ATOM_FDIM, DEFAULT_BOND_FDIM = SimpleMoleculeMolGraphFeaturizer().shape +DEFAULT_HIDDEN_DIM = 300 diff --git a/chemprop-updated/chemprop/data/__init__.py b/chemprop-updated/chemprop/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..843b2a94583f12bf5ca08ac6052f721d67bb2b37 --- /dev/null +++ b/chemprop-updated/chemprop/data/__init__.py @@ -0,0 +1,41 @@ +from .collate import ( + BatchMolGraph, + MulticomponentTrainingBatch, + TrainingBatch, + collate_batch, + collate_multicomponent, +) +from .dataloader import build_dataloader +from .datapoints import MoleculeDatapoint, ReactionDatapoint +from .datasets import ( + Datum, + MoleculeDataset, + MolGraphDataset, + MulticomponentDataset, + ReactionDataset, +) +from .molgraph import MolGraph +from .samplers import ClassBalanceSampler, SeededSampler +from .splitting import SplitType, make_split_indices, split_data_by_indices + +__all__ = [ + "BatchMolGraph", + "TrainingBatch", + "collate_batch", + "MulticomponentTrainingBatch", + "collate_multicomponent", + "build_dataloader", + "MoleculeDatapoint", + "ReactionDatapoint", + "MoleculeDataset", + "ReactionDataset", + "Datum", + "MulticomponentDataset", + "MolGraphDataset", + "MolGraph", + "ClassBalanceSampler", + "SeededSampler", + "SplitType", + "make_split_indices", + "split_data_by_indices", +] diff --git a/chemprop-updated/chemprop/data/__pycache__/__init__.cpython-37.pyc b/chemprop-updated/chemprop/data/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6ba5b174fcc8ba84a8d88c7393bd31d834300826 Binary files /dev/null and b/chemprop-updated/chemprop/data/__pycache__/__init__.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/data/__pycache__/data.cpython-37.pyc b/chemprop-updated/chemprop/data/__pycache__/data.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..be9b5658c2e6c9bd0459e266f1d96bfe53f7b58a Binary files /dev/null and b/chemprop-updated/chemprop/data/__pycache__/data.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/data/__pycache__/scaffold.cpython-37.pyc b/chemprop-updated/chemprop/data/__pycache__/scaffold.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8dcd3f273cf63eb42e4ea8edd23d1016e7912beb Binary files /dev/null and b/chemprop-updated/chemprop/data/__pycache__/scaffold.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/data/__pycache__/scaler.cpython-37.pyc b/chemprop-updated/chemprop/data/__pycache__/scaler.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e4178af6742c18488b79dfcec26a804fa465c392 Binary files /dev/null and b/chemprop-updated/chemprop/data/__pycache__/scaler.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/data/__pycache__/utils.cpython-37.pyc b/chemprop-updated/chemprop/data/__pycache__/utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c70de06fd22b9a9e8d7894402c32121be965d7b5 Binary files /dev/null and b/chemprop-updated/chemprop/data/__pycache__/utils.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/data/collate.py b/chemprop-updated/chemprop/data/collate.py new file mode 100644 index 0000000000000000000000000000000000000000..1147b136a89286efdea12edafe7e17c6136e3a7c --- /dev/null +++ b/chemprop-updated/chemprop/data/collate.py @@ -0,0 +1,123 @@ +from dataclasses import InitVar, dataclass, field +from typing import Iterable, NamedTuple, Sequence + +import numpy as np +import torch +from torch import Tensor + +from chemprop.data.datasets import Datum +from chemprop.data.molgraph import MolGraph + + +@dataclass(repr=False, eq=False, slots=True) +class BatchMolGraph: + """A :class:`BatchMolGraph` represents a batch of individual :class:`MolGraph`\s. + + It has all the attributes of a ``MolGraph`` with the addition of the ``batch`` attribute. This + class is intended for use with data loading, so it uses :obj:`~torch.Tensor`\s to store data + """ + + mgs: InitVar[Sequence[MolGraph]] + """A list of individual :class:`MolGraph`\s to be batched together""" + V: Tensor = field(init=False) + """the atom feature matrix""" + E: Tensor = field(init=False) + """the bond feature matrix""" + edge_index: Tensor = field(init=False) + """an tensor of shape ``2 x E`` containing the edges of the graph in COO format""" + rev_edge_index: Tensor = field(init=False) + """A tensor of shape ``E`` that maps from an edge index to the index of the source of the + reverse edge in the ``edge_index`` attribute.""" + batch: Tensor = field(init=False) + """the index of the parent :class:`MolGraph` in the batched graph""" + names: list[str] = field(init=False) # Add SMILES strings for the batch + + __size: int = field(init=False) + + def __post_init__(self, mgs: Sequence[MolGraph]): + self.__size = len(mgs) + + Vs = [] + Es = [] + edge_indexes = [] + rev_edge_indexes = [] + batch_indexes = [] + self.names = [] + + num_nodes = 0 + num_edges = 0 + for i, mg in enumerate(mgs): + Vs.append(mg.V) + Es.append(mg.E) + edge_indexes.append(mg.edge_index + num_nodes) + rev_edge_indexes.append(mg.rev_edge_index + num_edges) + batch_indexes.append([i] * len(mg.V)) + self.names.append(mg.name) + + num_nodes += mg.V.shape[0] + num_edges += mg.edge_index.shape[1] + + self.V = torch.from_numpy(np.concatenate(Vs)).float() + self.E = torch.from_numpy(np.concatenate(Es)).float() + self.edge_index = torch.from_numpy(np.hstack(edge_indexes)).long() + self.rev_edge_index = torch.from_numpy(np.concatenate(rev_edge_indexes)).long() + self.batch = torch.tensor(np.concatenate(batch_indexes)).long() + + def __len__(self) -> int: + """the number of individual :class:`MolGraph`\s in this batch""" + return self.__size + + def to(self, device: str | torch.device): + self.V = self.V.to(device) + self.E = self.E.to(device) + self.edge_index = self.edge_index.to(device) + self.rev_edge_index = self.rev_edge_index.to(device) + self.batch = self.batch.to(device) + + +class TrainingBatch(NamedTuple): + bmg: BatchMolGraph + V_d: Tensor | None + X_d: Tensor | None + Y: Tensor | None + w: Tensor + lt_mask: Tensor | None + gt_mask: Tensor | None + + +def collate_batch(batch: Iterable[Datum]) -> TrainingBatch: + mgs, V_ds, x_ds, ys, weights, lt_masks, gt_masks = zip(*batch) + + return TrainingBatch( + BatchMolGraph(mgs), + None if V_ds[0] is None else torch.from_numpy(np.concatenate(V_ds)).float(), + None if x_ds[0] is None else torch.from_numpy(np.array(x_ds)).float(), + None if ys[0] is None else torch.from_numpy(np.array(ys)).float(), + torch.tensor(weights, dtype=torch.float).unsqueeze(1), + None if lt_masks[0] is None else torch.from_numpy(np.array(lt_masks)), + None if gt_masks[0] is None else torch.from_numpy(np.array(gt_masks)), + ) + + +class MulticomponentTrainingBatch(NamedTuple): + bmgs: list[BatchMolGraph] + V_ds: list[Tensor | None] + X_d: Tensor | None + Y: Tensor | None + w: Tensor + lt_mask: Tensor | None + gt_mask: Tensor | None + + +def collate_multicomponent(batches: Iterable[Iterable[Datum]]) -> MulticomponentTrainingBatch: + tbs = [collate_batch(batch) for batch in zip(*batches)] + + return MulticomponentTrainingBatch( + [tb.bmg for tb in tbs], + [tb.V_d for tb in tbs], + tbs[0].X_d, + tbs[0].Y, + tbs[0].w, + tbs[0].lt_mask, + tbs[0].gt_mask, + ) diff --git a/chemprop-updated/chemprop/data/dataloader.py b/chemprop-updated/chemprop/data/dataloader.py new file mode 100644 index 0000000000000000000000000000000000000000..4fc2b2ddee50c70794cb5b60403eecfa5241049f --- /dev/null +++ b/chemprop-updated/chemprop/data/dataloader.py @@ -0,0 +1,71 @@ +import logging + +from torch.utils.data import DataLoader + +from chemprop.data.collate import collate_batch, collate_multicomponent +from chemprop.data.datasets import MoleculeDataset, MulticomponentDataset, ReactionDataset +from chemprop.data.samplers import ClassBalanceSampler, SeededSampler + +logger = logging.getLogger(__name__) + + +def build_dataloader( + dataset: MoleculeDataset | ReactionDataset | MulticomponentDataset, + batch_size: int = 64, + num_workers: int = 0, + class_balance: bool = False, + seed: int | None = None, + shuffle: bool = True, + **kwargs, +): + """Return a :obj:`~torch.utils.data.DataLoader` for :class:`MolGraphDataset`\s + + Parameters + ---------- + dataset : MoleculeDataset | ReactionDataset | MulticomponentDataset + The dataset containing the molecules or reactions to load. + batch_size : int, default=64 + the batch size to load. + num_workers : int, default=0 + the number of workers used to build batches. + class_balance : bool, default=False + Whether to perform class balancing (i.e., use an equal number of positive and negative + molecules). Class balance is only available for single task classification datasets. Set + shuffle to True in order to get a random subset of the larger class. + seed : int, default=None + the random seed to use for shuffling (only used when `shuffle` is `True`). + shuffle : bool, default=False + whether to shuffle the data during sampling. + """ + + if class_balance: + sampler = ClassBalanceSampler(dataset.Y, seed, shuffle) + elif shuffle and seed is not None: + sampler = SeededSampler(len(dataset), seed) + else: + sampler = None + + if isinstance(dataset, MulticomponentDataset): + collate_fn = collate_multicomponent + else: + collate_fn = collate_batch + + if len(dataset) % batch_size == 1: + logger.warning( + f"Dropping last batch of size 1 to avoid issues with batch normalization \ +(dataset size = {len(dataset)}, batch_size = {batch_size})" + ) + drop_last = True + else: + drop_last = False + + return DataLoader( + dataset, + batch_size, + sampler is None and shuffle, + sampler, + num_workers=num_workers, + collate_fn=collate_fn, + drop_last=drop_last, + **kwargs, + ) diff --git a/chemprop-updated/chemprop/data/datapoints.py b/chemprop-updated/chemprop/data/datapoints.py new file mode 100644 index 0000000000000000000000000000000000000000..164d7bf1c28ac0f434d8cf6ce46acc7341262217 --- /dev/null +++ b/chemprop-updated/chemprop/data/datapoints.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import numpy as np +from rdkit.Chem import AllChem as Chem + +from chemprop.featurizers import Featurizer +from chemprop.utils import make_mol + +MoleculeFeaturizer = Featurizer[Chem.Mol, np.ndarray] + + +@dataclass(slots=True) +class _DatapointMixin: + """A mixin class for both molecule- and reaction- and multicomponent-type data""" + + y: np.ndarray | None = None + """the targets for the molecule with unknown targets indicated by `nan`s""" + weight: float = 1.0 + """the weight of this datapoint for the loss calculation.""" + gt_mask: np.ndarray | None = None + """Indicates whether the targets are an inequality regression target of the form `x`""" + x_d: np.ndarray | None = None + """A vector of length ``d_f`` containing additional features (e.g., Morgan fingerprint) that + will be concatenated to the global representation *after* aggregation""" + x_phase: list[float] = None + """A one-hot vector indicating the phase of the data, as used in spectra data.""" + name: str | None = None + """A string identifier for the datapoint.""" + + def __post_init__(self): + NAN_TOKEN = 0 + if self.x_d is not None: + self.x_d[np.isnan(self.x_d)] = NAN_TOKEN + + @property + def t(self) -> int | None: + return len(self.y) if self.y is not None else None + + +@dataclass +class _MoleculeDatapointMixin: + mol: Chem.Mol + """the molecule associated with this datapoint""" + + @classmethod + def from_smi( + cls, smi: str, *args, keep_h: bool = False, add_h: bool = False, **kwargs + ) -> _MoleculeDatapointMixin: + mol = make_mol(smi, keep_h, add_h) + + kwargs["name"] = smi if "name" not in kwargs else kwargs["name"] + + return cls(mol, *args, **kwargs) + + +@dataclass +class MoleculeDatapoint(_DatapointMixin, _MoleculeDatapointMixin): + """A :class:`MoleculeDatapoint` contains a single molecule and its associated features and targets.""" + + V_f: np.ndarray | None = None + """a numpy array of shape ``V x d_vf``, where ``V`` is the number of atoms in the molecule, and + ``d_vf`` is the number of additional features that will be concatenated to atom-level features + *before* message passing""" + E_f: np.ndarray | None = None + """A numpy array of shape ``E x d_ef``, where ``E`` is the number of bonds in the molecule, and + ``d_ef`` is the number of additional features containing additional features that will be + concatenated to bond-level features *before* message passing""" + V_d: np.ndarray | None = None + """A numpy array of shape ``V x d_vd``, where ``V`` is the number of atoms in the molecule, and + ``d_vd`` is the number of additional descriptors that will be concatenated to atom-level + descriptors *after* message passing""" + + def __post_init__(self): + NAN_TOKEN = 0 + if self.V_f is not None: + self.V_f[np.isnan(self.V_f)] = NAN_TOKEN + if self.E_f is not None: + self.E_f[np.isnan(self.E_f)] = NAN_TOKEN + if self.V_d is not None: + self.V_d[np.isnan(self.V_d)] = NAN_TOKEN + + super().__post_init__() + + def __len__(self) -> int: + return 1 + + +@dataclass +class _ReactionDatapointMixin: + rct: Chem.Mol + """the reactant associated with this datapoint""" + pdt: Chem.Mol + """the product associated with this datapoint""" + + @classmethod + def from_smi( + cls, + rxn_or_smis: str | tuple[str, str], + *args, + keep_h: bool = False, + add_h: bool = False, + **kwargs, + ) -> _ReactionDatapointMixin: + match rxn_or_smis: + case str(): + rct_smi, agt_smi, pdt_smi = rxn_or_smis.split(">") + rct_smi = f"{rct_smi}.{agt_smi}" if agt_smi else rct_smi + name = rxn_or_smis + case tuple(): + rct_smi, pdt_smi = rxn_or_smis + name = ">>".join(rxn_or_smis) + case _: + raise TypeError( + "Must provide either a reaction SMARTS string or a tuple of reactant and" + " a product SMILES strings!" + ) + + rct = make_mol(rct_smi, keep_h, add_h) + pdt = make_mol(pdt_smi, keep_h, add_h) + + kwargs["name"] = name if "name" not in kwargs else kwargs["name"] + + return cls(rct, pdt, *args, **kwargs) + + +@dataclass +class ReactionDatapoint(_DatapointMixin, _ReactionDatapointMixin): + """A :class:`ReactionDatapoint` contains a single reaction and its associated features and targets.""" + + def __post_init__(self): + if self.rct is None: + raise ValueError("Reactant cannot be `None`!") + if self.pdt is None: + raise ValueError("Product cannot be `None`!") + + return super().__post_init__() + + def __len__(self) -> int: + return 2 diff --git a/chemprop-updated/chemprop/data/datasets.py b/chemprop-updated/chemprop/data/datasets.py new file mode 100644 index 0000000000000000000000000000000000000000..1bebad817eeb75c07f9cb01ce32b960a326a3e1a --- /dev/null +++ b/chemprop-updated/chemprop/data/datasets.py @@ -0,0 +1,475 @@ +from dataclasses import dataclass, field +from functools import cached_property +from typing import NamedTuple, TypeAlias + +import numpy as np +from numpy.typing import ArrayLike +from rdkit import Chem +from rdkit.Chem import Mol +from sklearn.preprocessing import StandardScaler +from torch.utils.data import Dataset + +from chemprop.data.datapoints import MoleculeDatapoint, ReactionDatapoint +from chemprop.data.molgraph import MolGraph +from chemprop.featurizers.base import Featurizer +from chemprop.featurizers.molgraph import CGRFeaturizer, SimpleMoleculeMolGraphFeaturizer +from chemprop.featurizers.molgraph.cache import MolGraphCache, MolGraphCacheOnTheFly +from chemprop.types import Rxn + + +class Datum(NamedTuple): + """a singular training data point""" + + mg: MolGraph + V_d: np.ndarray | None + x_d: np.ndarray | None + y: np.ndarray | None + weight: float + lt_mask: np.ndarray | None + gt_mask: np.ndarray | None + + +MolGraphDataset: TypeAlias = Dataset[Datum] + + +class _MolGraphDatasetMixin: + def __len__(self) -> int: + return len(self.data) + + @cached_property + def _Y(self) -> np.ndarray: + """the raw targets of the dataset""" + return np.array([d.y for d in self.data], float) + + @property + def Y(self) -> np.ndarray: + """the (scaled) targets of the dataset""" + return self.__Y + + @Y.setter + def Y(self, Y: ArrayLike): + self._validate_attribute(Y, "targets") + + self.__Y = np.array(Y, float) + + @cached_property + def _X_d(self) -> np.ndarray: + """the raw extra descriptors of the dataset""" + return np.array([d.x_d for d in self.data]) + + @property + def X_d(self) -> np.ndarray: + """the (scaled) extra descriptors of the dataset""" + return self.__X_d + + @X_d.setter + def X_d(self, X_d: ArrayLike): + self._validate_attribute(X_d, "extra descriptors") + + self.__X_d = np.array(X_d) + + @property + def weights(self) -> np.ndarray: + return np.array([d.weight for d in self.data]) + + @property + def gt_mask(self) -> np.ndarray: + return np.array([d.gt_mask for d in self.data]) + + @property + def lt_mask(self) -> np.ndarray: + return np.array([d.lt_mask for d in self.data]) + + @property + def t(self) -> int | None: + return self.data[0].t if len(self.data) > 0 else None + + @property + def d_xd(self) -> int: + """the extra molecule descriptor dimension, if any""" + return 0 if self.X_d[0] is None else self.X_d.shape[1] + + @property + def names(self) -> list[str]: + return [d.name for d in self.data] + + def normalize_targets(self, scaler: StandardScaler | None = None) -> StandardScaler: + """Normalizes the targets of this dataset using a :obj:`StandardScaler` + + The :obj:`StandardScaler` subtracts the mean and divides by the standard deviation for + each task independently. NOTE: This should only be used for regression datasets. + + Returns + ------- + StandardScaler + a scaler fit to the targets. + """ + + if scaler is None: + scaler = StandardScaler().fit(self._Y) + + self.Y = scaler.transform(self._Y) + + return scaler + + def normalize_inputs( + self, key: str = "X_d", scaler: StandardScaler | None = None + ) -> StandardScaler: + VALID_KEYS = {"X_d"} + if key not in VALID_KEYS: + raise ValueError(f"Invalid feature key! got: {key}. expected one of: {VALID_KEYS}") + + X = self.X_d if self.X_d[0] is not None else None + + if X is None: + return scaler + + if scaler is None: + scaler = StandardScaler().fit(X) + + self.X_d = scaler.transform(X) + + return scaler + + def reset(self): + """Reset the atom and bond features; atom and extra descriptors; and targets of each + datapoint to their initial, unnormalized values.""" + self.__Y = self._Y + self.__X_d = self._X_d + + def _validate_attribute(self, X: np.ndarray, label: str): + if not len(self.data) == len(X): + raise ValueError( + f"number of molecules ({len(self.data)}) and {label} ({len(X)}) " + "must have same length!" + ) + + +@dataclass +class MoleculeDataset(_MolGraphDatasetMixin, MolGraphDataset): + """A :class:`MoleculeDataset` composed of :class:`MoleculeDatapoint`\s + + A :class:`MoleculeDataset` produces featurized data for input to a + :class:`MPNN` model. Typically, data featurization is performed on-the-fly + and parallelized across multiple workers via the :class:`~torch.utils.data + DataLoader` class. However, for small datasets, it may be more efficient to + featurize the data in advance and cache the results. This can be done by + setting ``MoleculeDataset.cache=True``. + + Parameters + ---------- + data : Iterable[MoleculeDatapoint] + the data from which to create a dataset + featurizer : MoleculeFeaturizer + the featurizer with which to generate MolGraphs of the molecules + """ + + data: list[MoleculeDatapoint] + featurizer: Featurizer[Mol, MolGraph] = field(default_factory=SimpleMoleculeMolGraphFeaturizer) + + def __post_init__(self): + if self.data is None: + raise ValueError("Data cannot be None!") + + self.reset() + self.cache = False + + def __getitem__(self, idx: int) -> Datum: + d = self.data[idx] + mg = self.mg_cache[idx] + + # Assign the SMILES string to the MolGraph + mg_with_name = MolGraph( + V=mg.V, + E=mg.E, + edge_index=mg.edge_index, + rev_edge_index=mg.rev_edge_index, + name=d.name # Assign the SMILES string + ) + + return Datum( + mg=mg_with_name, # Use the updated MolGraph + V_d=self.V_ds[idx], + x_d=self.X_d[idx], + y=self.Y[idx], + weight=d.weight, + lt_mask=d.lt_mask, + gt_mask=d.gt_mask, + ) + @property + def cache(self) -> bool: + return self.__cache + + @cache.setter + def cache(self, cache: bool = False): + self.__cache = cache + self._init_cache() + + def _init_cache(self): + """initialize the cache""" + self.mg_cache = (MolGraphCache if self.cache else MolGraphCacheOnTheFly)( + self.mols, self.V_fs, self.E_fs, self.featurizer + ) + + @property + def smiles(self) -> list[str]: + """the SMILES strings associated with the dataset""" + return [Chem.MolToSmiles(d.mol) for d in self.data] + + @property + def mols(self) -> list[Chem.Mol]: + """the molecules associated with the dataset""" + return [d.mol for d in self.data] + + @property + def _V_fs(self) -> list[np.ndarray]: + """the raw atom features of the dataset""" + return [d.V_f for d in self.data] + + @property + def V_fs(self) -> list[np.ndarray]: + """the (scaled) atom descriptors of the dataset""" + return self.__V_fs + + @V_fs.setter + def V_fs(self, V_fs: list[np.ndarray]): + """the (scaled) atom features of the dataset""" + self._validate_attribute(V_fs, "atom features") + + self.__V_fs = V_fs + self._init_cache() + + @property + def _E_fs(self) -> list[np.ndarray]: + """the raw bond features of the dataset""" + return [d.E_f for d in self.data] + + @property + def E_fs(self) -> list[np.ndarray]: + """the (scaled) bond features of the dataset""" + return self.__E_fs + + @E_fs.setter + def E_fs(self, E_fs: list[np.ndarray]): + self._validate_attribute(E_fs, "bond features") + + self.__E_fs = E_fs + self._init_cache() + + @property + def _V_ds(self) -> list[np.ndarray]: + """the raw atom descriptors of the dataset""" + return [d.V_d for d in self.data] + + @property + def V_ds(self) -> list[np.ndarray]: + """the (scaled) atom descriptors of the dataset""" + return self.__V_ds + + @V_ds.setter + def V_ds(self, V_ds: list[np.ndarray]): + self._validate_attribute(V_ds, "atom descriptors") + + self.__V_ds = V_ds + + @property + def d_vf(self) -> int: + """the extra atom feature dimension, if any""" + return 0 if self.V_fs[0] is None else self.V_fs[0].shape[1] + + @property + def d_ef(self) -> int: + """the extra bond feature dimension, if any""" + return 0 if self.E_fs[0] is None else self.E_fs[0].shape[1] + + @property + def d_vd(self) -> int: + """the extra atom descriptor dimension, if any""" + return 0 if self.V_ds[0] is None else self.V_ds[0].shape[1] + + def normalize_inputs( + self, key: str = "X_d", scaler: StandardScaler | None = None + ) -> StandardScaler: + VALID_KEYS = {"X_d", "V_f", "E_f", "V_d"} + + match key: + case "X_d": + X = None if self.d_xd == 0 else self.X_d + case "V_f": + X = None if self.d_vf == 0 else np.concatenate(self.V_fs, axis=0) + case "E_f": + X = None if self.d_ef == 0 else np.concatenate(self.E_fs, axis=0) + case "V_d": + X = None if self.d_vd == 0 else np.concatenate(self.V_ds, axis=0) + case _: + raise ValueError(f"Invalid feature key! got: {key}. expected one of: {VALID_KEYS}") + + if X is None: + return scaler + + if scaler is None: + scaler = StandardScaler().fit(X) + + match key: + case "X_d": + self.X_d = scaler.transform(X) + case "V_f": + self.V_fs = [scaler.transform(V_f) if V_f.size > 0 else V_f for V_f in self.V_fs] + case "E_f": + self.E_fs = [scaler.transform(E_f) if E_f.size > 0 else E_f for E_f in self.E_fs] + case "V_d": + self.V_ds = [scaler.transform(V_d) if V_d.size > 0 else V_d for V_d in self.V_ds] + case _: + raise RuntimeError("unreachable code reached!") + + return scaler + + def reset(self): + """Reset the atom and bond features; atom and extra descriptors; and targets of each + datapoint to their initial, unnormalized values.""" + super().reset() + self.__V_fs = self._V_fs + self.__E_fs = self._E_fs + self.__V_ds = self._V_ds + + +@dataclass +class ReactionDataset(_MolGraphDatasetMixin, MolGraphDataset): + """A :class:`ReactionDataset` composed of :class:`ReactionDatapoint`\s + + .. note:: + The featurized data provided by this class may be cached, simlar to a + :class:`MoleculeDataset`. To enable the cache, set ``ReactionDataset + cache=True``. + """ + + data: list[ReactionDatapoint] + """the dataset from which to load""" + featurizer: Featurizer[Rxn, MolGraph] = field(default_factory=CGRFeaturizer) + """the featurizer with which to generate MolGraphs of the input""" + + def __post_init__(self): + if self.data is None: + raise ValueError("Data cannot be None!") + + self.reset() + self.cache = False + + @property + def cache(self) -> bool: + return self.__cache + + @cache.setter + def cache(self, cache: bool = False): + self.__cache = cache + self.mg_cache = (MolGraphCache if cache else MolGraphCacheOnTheFly)( + self.mols, [None] * len(self), [None] * len(self), self.featurizer + ) + + def __getitem__(self, idx: int) -> Datum: + d = self.data[idx] + mg = self.mg_cache[idx] + + return Datum(mg, None, self.X_d[idx], self.Y[idx], d.weight, d.lt_mask, d.gt_mask) + + @property + def smiles(self) -> list[tuple]: + return [(Chem.MolToSmiles(d.rct), Chem.MolToSmiles(d.pdt)) for d in self.data] + + @property + def mols(self) -> list[Rxn]: + return [(d.rct, d.pdt) for d in self.data] + + @property + def d_vf(self) -> int: + return 0 + + @property + def d_ef(self) -> int: + return 0 + + @property + def d_vd(self) -> int: + return 0 + + +@dataclass(repr=False, eq=False) +class MulticomponentDataset(_MolGraphDatasetMixin, Dataset): + """A :class:`MulticomponentDataset` is a :class:`Dataset` composed of parallel + :class:`MoleculeDatasets` and :class:`ReactionDataset`\s""" + + datasets: list[MoleculeDataset | ReactionDataset] + """the parallel datasets""" + + def __post_init__(self): + sizes = [len(dset) for dset in self.datasets] + if not all(sizes[0] == size for size in sizes[1:]): + raise ValueError(f"Datasets must have all same length! got: {sizes}") + + def __len__(self) -> int: + return len(self.datasets[0]) + + @property + def n_components(self) -> int: + return len(self.datasets) + + def __getitem__(self, idx: int) -> list[Datum]: + return [dset[idx] for dset in self.datasets] + + @property + def smiles(self) -> list[list[str]]: + return list(zip(*[dset.smiles for dset in self.datasets])) + + @property + def names(self) -> list[list[str]]: + return list(zip(*[dset.names for dset in self.datasets])) + + @property + def mols(self) -> list[list[Chem.Mol]]: + return list(zip(*[dset.mols for dset in self.datasets])) + + def normalize_targets(self, scaler: StandardScaler | None = None) -> StandardScaler: + return self.datasets[0].normalize_targets(scaler) + + def normalize_inputs( + self, key: str = "X_d", scaler: list[StandardScaler] | None = None + ) -> list[StandardScaler]: + RXN_VALID_KEYS = {"X_d"} + match scaler: + case None: + return [ + dset.normalize_inputs(key) + if isinstance(dset, MoleculeDataset) or key in RXN_VALID_KEYS + else None + for dset in self.datasets + ] + case _: + assert len(scaler) == len( + self.datasets + ), "Number of scalers must match number of datasets!" + + return [ + dset.normalize_inputs(key, s) + if isinstance(dset, MoleculeDataset) or key in RXN_VALID_KEYS + else None + for dset, s in zip(self.datasets, scaler) + ] + + def reset(self): + return [dset.reset() for dset in self.datasets] + + @property + def d_xd(self) -> list[int]: + return self.datasets[0].d_xd + + @property + def d_vf(self) -> list[int]: + return sum(dset.d_vf for dset in self.datasets) + + @property + def d_ef(self) -> list[int]: + return sum(dset.d_ef for dset in self.datasets) + + @property + def d_vd(self) -> list[int]: + return sum(dset.d_vd for dset in self.datasets) diff --git a/chemprop-updated/chemprop/data/molgraph.py b/chemprop-updated/chemprop/data/molgraph.py new file mode 100644 index 0000000000000000000000000000000000000000..af7025ae72f3a002566ef801b0b8b41b3add8fb8 --- /dev/null +++ b/chemprop-updated/chemprop/data/molgraph.py @@ -0,0 +1,17 @@ +from typing import NamedTuple + +import numpy as np + + +class MolGraph(NamedTuple): + """A :class:`MolGraph` represents the graph featurization of a molecule.""" + + V: np.ndarray + """an array of shape ``V x d_v`` containing the atom features of the molecule""" + E: np.ndarray + """an array of shape ``E x d_e`` containing the bond features of the molecule""" + edge_index: np.ndarray + """an array of shape ``2 x E`` containing the edges of the graph in COO format""" + rev_edge_index: np.ndarray + """A array of shape ``E`` that maps from an edge index to the index of the source of the reverse edge in :attr:`edge_index` attribute.""" + name: str | None = None # Add SMILES string as an optional attribute \ No newline at end of file diff --git a/chemprop-updated/chemprop/data/samplers.py b/chemprop-updated/chemprop/data/samplers.py new file mode 100644 index 0000000000000000000000000000000000000000..8a24c9769ce73fa7c6a853f25899d6a95bc212cb --- /dev/null +++ b/chemprop-updated/chemprop/data/samplers.py @@ -0,0 +1,66 @@ +from itertools import chain +from typing import Iterator, Optional + +import numpy as np +from torch.utils.data import Sampler + + +class SeededSampler(Sampler): + """A :class`SeededSampler` is a class for iterating through a dataset in a randomly seeded + fashion""" + + def __init__(self, N: int, seed: int): + if seed is None: + raise ValueError("arg 'seed' was `None`! A SeededSampler must be seeded!") + + self.idxs = np.arange(N) + self.rg = np.random.default_rng(seed) + + def __iter__(self) -> Iterator[int]: + """an iterator over indices to sample.""" + self.rg.shuffle(self.idxs) + + return iter(self.idxs) + + def __len__(self) -> int: + """the number of indices that will be sampled.""" + return len(self.idxs) + + +class ClassBalanceSampler(Sampler): + """A :class:`ClassBalanceSampler` samples data from a :class:`MolGraphDataset` such that + positive and negative classes are equally sampled + + Parameters + ---------- + dataset : MolGraphDataset + the dataset from which to sample + seed : int + the random seed to use for shuffling (only used when `shuffle` is `True`) + shuffle : bool, default=False + whether to shuffle the data during sampling + """ + + def __init__(self, Y: np.ndarray, seed: Optional[int] = None, shuffle: bool = False): + self.shuffle = shuffle + self.rg = np.random.default_rng(seed) + + idxs = np.arange(len(Y)) + actives = Y.any(1) + + self.pos_idxs = idxs[actives] + self.neg_idxs = idxs[~actives] + + self.length = 2 * min(len(self.pos_idxs), len(self.neg_idxs)) + + def __iter__(self) -> Iterator[int]: + """an iterator over indices to sample.""" + if self.shuffle: + self.rg.shuffle(self.pos_idxs) + self.rg.shuffle(self.neg_idxs) + + return chain(*zip(self.pos_idxs, self.neg_idxs)) + + def __len__(self) -> int: + """the number of indices that will be sampled.""" + return self.length diff --git a/chemprop-updated/chemprop/data/splitting.py b/chemprop-updated/chemprop/data/splitting.py new file mode 100644 index 0000000000000000000000000000000000000000..f4bb1b6f91667634bb21ad9460d9ee6e87286df3 --- /dev/null +++ b/chemprop-updated/chemprop/data/splitting.py @@ -0,0 +1,225 @@ +from collections.abc import Iterable, Sequence +import copy +from enum import auto +import logging + +from astartes import train_test_split, train_val_test_split +from astartes.molecules import train_test_split_molecules, train_val_test_split_molecules +import numpy as np +from rdkit import Chem + +from chemprop.data.datapoints import MoleculeDatapoint, ReactionDatapoint +from chemprop.utils.utils import EnumMapping + +logger = logging.getLogger(__name__) + +Datapoints = Sequence[MoleculeDatapoint] | Sequence[ReactionDatapoint] +MulticomponentDatapoints = Sequence[Datapoints] + + +class SplitType(EnumMapping): + SCAFFOLD_BALANCED = auto() + RANDOM_WITH_REPEATED_SMILES = auto() + RANDOM = auto() + KENNARD_STONE = auto() + KMEANS = auto() + + +def make_split_indices( + mols: Sequence[Chem.Mol], + split: SplitType | str = "random", + sizes: tuple[float, float, float] = (0.8, 0.1, 0.1), + seed: int = 0, + num_replicates: int = 1, + num_folds: None = None, +) -> tuple[list[list[int]], ...]: + """Splits data into training, validation, and test splits. + + Parameters + ---------- + mols : Sequence[Chem.Mol] + Sequence of RDKit molecules to use for structure based splitting + split : SplitType | str, optional + Split type, one of ~chemprop.data.utils.SplitType, by default "random" + sizes : tuple[float, float, float], optional + 3-tuple with the proportions of data in the train, validation, and test sets, by default + (0.8, 0.1, 0.1). Set the middle value to 0 for a two way split. + seed : int, optional + The random seed passed to astartes, by default 0 + num_replicates : int, optional + Number of replicates, by default 1 + num_folds : None, optional + This argument was removed in v2.1 - use `num_replicates` instead. + + Returns + ------- + tuple[list[list[int]], ...] + 2- or 3-member tuple containing num_replicates length lists of training, validation, and testing indexes. + + .. important:: + Validation may or may not be present + + Raises + ------ + ValueError + Requested split sizes tuple not of length 3 + ValueError + Unsupported split method requested + """ + if num_folds is not None: + raise RuntimeError("This argument was removed in v2.1 - use `num_replicates` instead.") + if num_replicates == 1: + logger.warning( + "The return type of make_split_indices has changed in v2.1 - see help(make_split_indices)" + ) + if (num_splits := len(sizes)) != 3: + raise ValueError( + f"Specify sizes for train, validation, and test (got {num_splits} values)." + ) + # typically include a validation set + include_val = True + split_fun = train_val_test_split + mol_split_fun = train_val_test_split_molecules + # default sampling arguments for astartes sampler + astartes_kwargs = dict( + train_size=sizes[0], test_size=sizes[2], return_indices=True, random_state=seed + ) + # if no validation set, reassign the splitting functions + if sizes[1] == 0.0: + include_val = False + split_fun = train_test_split + mol_split_fun = train_test_split_molecules + else: + astartes_kwargs["val_size"] = sizes[1] + + n_datapoints = len(mols) + train_replicates, val_replicates, test_replicates = [], [], [] + for _ in range(num_replicates): + train, val, test = None, None, None + match SplitType.get(split): + case SplitType.SCAFFOLD_BALANCED: + mols_without_atommaps = [] + for mol in mols: + copied_mol = copy.deepcopy(mol) + for atom in copied_mol.GetAtoms(): + atom.SetAtomMapNum(0) + mols_without_atommaps.append(copied_mol) + result = mol_split_fun( + np.array(mols_without_atommaps), sampler="scaffold", **astartes_kwargs + ) + train, val, test = _unpack_astartes_result(result, include_val) + + # Use to constrain data with the same smiles go in the same split. + case SplitType.RANDOM_WITH_REPEATED_SMILES: + # get two arrays: one of all the smiles strings, one of just the unique + all_smiles = np.array([Chem.MolToSmiles(mol) for mol in mols]) + unique_smiles = np.unique(all_smiles) + + # save a mapping of smiles -> all the indices that it appeared at + smiles_indices = {} + for smiles in unique_smiles: + smiles_indices[smiles] = np.where(all_smiles == smiles)[0].tolist() + + # randomly split the unique smiles + result = split_fun( + np.arange(len(unique_smiles)), sampler="random", **astartes_kwargs + ) + train_idxs, val_idxs, test_idxs = _unpack_astartes_result(result, include_val) + + # convert these to the 'actual' indices from the original list using the dict we made + train = sum((smiles_indices[unique_smiles[i]] for i in train_idxs), []) + val = sum((smiles_indices[unique_smiles[j]] for j in val_idxs), []) + test = sum((smiles_indices[unique_smiles[k]] for k in test_idxs), []) + + case SplitType.RANDOM: + result = split_fun(np.arange(n_datapoints), sampler="random", **astartes_kwargs) + train, val, test = _unpack_astartes_result(result, include_val) + + case SplitType.KENNARD_STONE: + result = mol_split_fun( + np.array(mols), + sampler="kennard_stone", + hopts=dict(metric="jaccard"), + fingerprint="morgan_fingerprint", + fprints_hopts=dict(n_bits=2048), + **astartes_kwargs, + ) + train, val, test = _unpack_astartes_result(result, include_val) + + case SplitType.KMEANS: + result = mol_split_fun( + np.array(mols), + sampler="kmeans", + hopts=dict(metric="jaccard"), + fingerprint="morgan_fingerprint", + fprints_hopts=dict(n_bits=2048), + **astartes_kwargs, + ) + train, val, test = _unpack_astartes_result(result, include_val) + + case _: + raise RuntimeError("Unreachable code reached!") + train_replicates.append(train) + val_replicates.append(val) + test_replicates.append(test) + astartes_kwargs["random_state"] += 1 + return train_replicates, val_replicates, test_replicates + + +def _unpack_astartes_result( + result: tuple, include_val: bool +) -> tuple[list[int], list[int], list[int]]: + """Helper function to partition input data based on output of astartes sampler + + Parameters + ----------- + result: tuple + Output from call to astartes containing the split indices + include_val: bool + True if a validation set is included, False otherwise. + + Returns + --------- + train: list[int] + val: list[int] + .. important:: + validation possibly empty + test: list[int] + """ + train_idxs, val_idxs, test_idxs = [], [], [] + # astartes returns a set of lists containing the data, clusters (if applicable) + # and indices (always last), so we pull out the indices + if include_val: + train_idxs, val_idxs, test_idxs = result[-3], result[-2], result[-1] + else: + train_idxs, test_idxs = result[-2], result[-1] + return list(train_idxs), list(val_idxs), list(test_idxs) + + +def split_data_by_indices( + data: Datapoints | MulticomponentDatapoints, + train_indices: Iterable[Iterable[int]] | None = None, + val_indices: Iterable[Iterable[int]] | None = None, + test_indices: Iterable[Iterable[int]] | None = None, +): + """Splits data into training, validation, and test groups based on split indices given.""" + + train_data = _splitter_helper(data, train_indices) + val_data = _splitter_helper(data, val_indices) + test_data = _splitter_helper(data, test_indices) + + return train_data, val_data, test_data + + +def _splitter_helper(data, indices): + if indices is None: + return None + + if isinstance(data[0], (MoleculeDatapoint, ReactionDatapoint)): + datapoints = data + idxss = indices + return [[datapoints[idx] for idx in idxs] for idxs in idxss] + else: + datapointss = data + idxss = indices + return [[[datapoints[idx] for idx in idxs] for datapoints in datapointss] for idxs in idxss] diff --git a/chemprop-updated/chemprop/exceptions.py b/chemprop-updated/chemprop/exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..29229ca41753dbe886312ff91850e75e7d69a556 --- /dev/null +++ b/chemprop-updated/chemprop/exceptions.py @@ -0,0 +1,12 @@ +from typing import Iterable + +from chemprop.utils import pretty_shape + + +class InvalidShapeError(ValueError): + def __init__(self, var_name: str, received: Iterable[int], expected: Iterable[int]): + message = ( + f"arg '{var_name}' has incorrect shape! " + f"got: `{pretty_shape(received)}`. expected: `{pretty_shape(expected)}`" + ) + super().__init__(message) diff --git a/chemprop-updated/chemprop/features/__pycache__/__init__.cpython-37.pyc b/chemprop-updated/chemprop/features/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ba2b45b0e6e63a3a213f6d022537d2ad7497dc17 Binary files /dev/null and b/chemprop-updated/chemprop/features/__pycache__/__init__.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/features/__pycache__/features_generators.cpython-37.pyc b/chemprop-updated/chemprop/features/__pycache__/features_generators.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a8c1bbf65c9224d7bbcf025ea68df7e932d49ece Binary files /dev/null and b/chemprop-updated/chemprop/features/__pycache__/features_generators.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/features/__pycache__/featurization.cpython-37.pyc b/chemprop-updated/chemprop/features/__pycache__/featurization.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2cb4f7da9e474aae8a68f5e1ef355d2843a3914b Binary files /dev/null and b/chemprop-updated/chemprop/features/__pycache__/featurization.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/features/__pycache__/utils.cpython-37.pyc b/chemprop-updated/chemprop/features/__pycache__/utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5759d70f4ca7a6203e1ac4eeddc98cc02306952e Binary files /dev/null and b/chemprop-updated/chemprop/features/__pycache__/utils.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/featurizers/__init__.py b/chemprop-updated/chemprop/featurizers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a266fd820ac640d47a22b5a68a6afcb2ab7a2d9c --- /dev/null +++ b/chemprop-updated/chemprop/featurizers/__init__.py @@ -0,0 +1,52 @@ +from .atom import AtomFeatureMode, MultiHotAtomFeaturizer, get_multi_hot_atom_featurizer +from .base import Featurizer, GraphFeaturizer, S, T, VectorFeaturizer +from .bond import MultiHotBondFeaturizer +from .molecule import ( + BinaryFeaturizerMixin, + CountFeaturizerMixin, + MoleculeFeaturizerRegistry, + MorganBinaryFeaturizer, + MorganCountFeaturizer, + MorganFeaturizerMixin, + RDKit2DFeaturizer, + V1RDKit2DFeaturizer, + V1RDKit2DNormalizedFeaturizer, +) +from .molgraph import ( + CGRFeaturizer, + CondensedGraphOfReactionFeaturizer, + MolGraphCache, + MolGraphCacheFacade, + MolGraphCacheOnTheFly, + RxnMode, + SimpleMoleculeMolGraphFeaturizer, +) + +__all__ = [ + "Featurizer", + "S", + "T", + "VectorFeaturizer", + "GraphFeaturizer", + "MultiHotAtomFeaturizer", + "AtomFeatureMode", + "get_multi_hot_atom_featurizer", + "MultiHotBondFeaturizer", + "MolGraphCacheFacade", + "MolGraphCache", + "MolGraphCacheOnTheFly", + "SimpleMoleculeMolGraphFeaturizer", + "CondensedGraphOfReactionFeaturizer", + "CGRFeaturizer", + "RxnMode", + "MoleculeFeaturizer", + "MorganFeaturizerMixin", + "BinaryFeaturizerMixin", + "CountFeaturizerMixin", + "MorganBinaryFeaturizer", + "MorganCountFeaturizer", + "RDKit2DFeaturizer", + "MoleculeFeaturizerRegistry", + "V1RDKit2DFeaturizer", + "V1RDKit2DNormalizedFeaturizer", +] diff --git a/chemprop-updated/chemprop/featurizers/atom.py b/chemprop-updated/chemprop/featurizers/atom.py new file mode 100644 index 0000000000000000000000000000000000000000..c224423f1a4f311bd371f9a2e83666a138f0659d --- /dev/null +++ b/chemprop-updated/chemprop/featurizers/atom.py @@ -0,0 +1,281 @@ +from enum import auto +from typing import Sequence + +import numpy as np +from rdkit.Chem.rdchem import Atom, HybridizationType + +from chemprop.featurizers.base import VectorFeaturizer +from chemprop.utils.utils import EnumMapping + + +class MultiHotAtomFeaturizer(VectorFeaturizer[Atom]): + """A :class:`MultiHotAtomFeaturizer` uses a multi-hot encoding to featurize atoms. + + .. seealso:: + The class provides three default parameterization schemes: + + * :meth:`MultiHotAtomFeaturizer.v1` + * :meth:`MultiHotAtomFeaturizer.v2` + * :meth:`MultiHotAtomFeaturizer.organic` + + The generated atom features are ordered as follows: + * atomic number + * degree + * formal charge + * chiral tag + * number of hydrogens + * hybridization + * aromaticity + * mass + + .. important:: + Each feature, except for aromaticity and mass, includes a pad for unknown values. + + Parameters + ---------- + atomic_nums : Sequence[int] + the choices for atom type denoted by atomic number. Ex: ``[4, 5, 6]`` for C, N and O. + degrees : Sequence[int] + the choices for number of bonds an atom is engaged in. + formal_charges : Sequence[int] + the choices for integer electronic charge assigned to an atom. + chiral_tags : Sequence[int] + the choices for an atom's chiral tag. See :class:`rdkit.Chem.rdchem.ChiralType` for possible integer values. + num_Hs : Sequence[int] + the choices for number of bonded hydrogen atoms. + hybridizations : Sequence[int] + the choices for an atom’s hybridization type. See :class:`rdkit.Chem.rdchem.HybridizationType` for possible integer values. + """ + + def __init__( + self, + atomic_nums: Sequence[int], + degrees: Sequence[int], + formal_charges: Sequence[int], + chiral_tags: Sequence[int], + num_Hs: Sequence[int], + hybridizations: Sequence[int], + ): + self.atomic_nums = {j: i for i, j in enumerate(atomic_nums)} + self.degrees = {i: i for i in degrees} + self.formal_charges = {j: i for i, j in enumerate(formal_charges)} + self.chiral_tags = {i: i for i in chiral_tags} + self.num_Hs = {i: i for i in num_Hs} + self.hybridizations = {ht: i for i, ht in enumerate(hybridizations)} + + self._subfeats: list[dict] = [ + self.atomic_nums, + self.degrees, + self.formal_charges, + self.chiral_tags, + self.num_Hs, + self.hybridizations, + ] + subfeat_sizes = [ + 1 + len(self.atomic_nums), + 1 + len(self.degrees), + 1 + len(self.formal_charges), + 1 + len(self.chiral_tags), + 1 + len(self.num_Hs), + 1 + len(self.hybridizations), + 1, + 1, + ] + self.__size = sum(subfeat_sizes) + + def __len__(self) -> int: + return self.__size + + def __call__(self, a: Atom | None) -> np.ndarray: + x = np.zeros(self.__size) + + if a is None: + return x + + feats = [ + a.GetAtomicNum(), + a.GetTotalDegree(), + a.GetFormalCharge(), + int(a.GetChiralTag()), + int(a.GetTotalNumHs()), + a.GetHybridization(), + ] + i = 0 + for feat, choices in zip(feats, self._subfeats): + j = choices.get(feat, len(choices)) + x[i + j] = 1 + i += len(choices) + 1 + x[i] = int(a.GetIsAromatic()) + x[i + 1] = 0.01 * a.GetMass() + + return x + + def num_only(self, a: Atom) -> np.ndarray: + """featurize the atom by setting only the atomic number bit""" + x = np.zeros(len(self)) + + if a is None: + return x + + i = self.atomic_nums.get(a.GetAtomicNum(), len(self.atomic_nums)) + x[i] = 1 + + return x + + @classmethod + def v1(cls, max_atomic_num: int = 100): + """The original implementation used in Chemprop V1 [1]_, [2]_. + + Parameters + ---------- + max_atomic_num : int, default=100 + Include a bit for all atomic numbers in the interval :math:`[1, \mathtt{max\_atomic\_num}]` + + References + ----------- + .. [1] Yang, K.; Swanson, K.; Jin, W.; Coley, C.; Eiden, P.; Gao, H.; Guzman-Perez, A.; Hopper, T.; + Kelley, B.; Mathea, M.; Palmer, A. "Analyzing Learned Molecular Representations for Property Prediction." + J. Chem. Inf. Model. 2019, 59 (8), 3370–3388. https://doi.org/10.1021/acs.jcim.9b00237 + .. [2] Heid, E.; Greenman, K.P.; Chung, Y.; Li, S.C.; Graff, D.E.; Vermeire, F.H.; Wu, H.; Green, W.H.; McGill, + C.J. "Chemprop: A machine learning package for chemical property prediction." J. Chem. Inf. Model. 2024, + 64 (1), 9–17. https://doi.org/10.1021/acs.jcim.3c01250 + """ + + return cls( + atomic_nums=list(range(1, max_atomic_num + 1)), + degrees=list(range(6)), + formal_charges=[-1, -2, 1, 2, 0], + chiral_tags=list(range(4)), + num_Hs=list(range(5)), + hybridizations=[ + HybridizationType.SP, + HybridizationType.SP2, + HybridizationType.SP3, + HybridizationType.SP3D, + HybridizationType.SP3D2, + ], + ) + + @classmethod + def v2(cls): + """An implementation that includes an atom type bit for all elements in the first four rows of the periodic table plus iodine.""" + + return cls( + atomic_nums=list(range(1, 37)) + [53], + degrees=list(range(6)), + formal_charges=[-1, -2, 1, 2, 0], + chiral_tags=list(range(4)), + num_Hs=list(range(5)), + hybridizations=[ + HybridizationType.S, + HybridizationType.SP, + HybridizationType.SP2, + HybridizationType.SP2D, + HybridizationType.SP3, + HybridizationType.SP3D, + HybridizationType.SP3D2, + ], + ) + + @classmethod + def organic(cls): + r"""A specific parameterization intended for use with organic or drug-like molecules. + + This parameterization features: + 1. includes an atomic number bit only for H, B, C, N, O, F, Si, P, S, Cl, Br, and I atoms + 2. a hybridization bit for :math:`s, sp, sp^2` and :math:`sp^3` hybridizations. + """ + + return cls( + atomic_nums=[1, 5, 6, 7, 8, 9, 14, 15, 16, 17, 35, 53], + degrees=list(range(6)), + formal_charges=[-1, -2, 1, 2, 0], + chiral_tags=list(range(4)), + num_Hs=list(range(5)), + hybridizations=[ + HybridizationType.S, + HybridizationType.SP, + HybridizationType.SP2, + HybridizationType.SP3, + ], + ) + + +class RIGRAtomFeaturizer(VectorFeaturizer[Atom]): + """A :class:`RIGRAtomFeaturizer` uses a multi-hot encoding to featurize atoms using resonance-invariant features. + + The generated atom features are ordered as follows: + * atomic number + * degree + * number of hydrogens + * mass + """ + + def __init__( + self, + atomic_nums: Sequence[int] | None = None, + degrees: Sequence[int] | None = None, + num_Hs: Sequence[int] | None = None, + ): + self.atomic_nums = {j: i for i, j in enumerate(atomic_nums or list(range(1, 37)) + [53])} + self.degrees = {i: i for i in (degrees or list(range(6)))} + self.num_Hs = {i: i for i in (num_Hs or list(range(5)))} + + self._subfeats: list[dict] = [self.atomic_nums, self.degrees, self.num_Hs] + subfeat_sizes = [1 + len(self.atomic_nums), 1 + len(self.degrees), 1 + len(self.num_Hs), 1] + self.__size = sum(subfeat_sizes) + + def __len__(self) -> int: + return self.__size + + def __call__(self, a: Atom | None) -> np.ndarray: + x = np.zeros(self.__size) + + if a is None: + return x + + feats = [a.GetAtomicNum(), a.GetTotalDegree(), int(a.GetTotalNumHs())] + i = 0 + for feat, choices in zip(feats, self._subfeats): + j = choices.get(feat, len(choices)) + x[i + j] = 1 + i += len(choices) + 1 + x[i] = 0.01 * a.GetMass() # scaled to about the same range as other features + + return x + + def num_only(self, a: Atom) -> np.ndarray: + """featurize the atom by setting only the atomic number bit""" + x = np.zeros(len(self)) + + if a is None: + return x + + i = self.atomic_nums.get(a.GetAtomicNum(), len(self.atomic_nums)) + x[i] = 1 + + return x + + +class AtomFeatureMode(EnumMapping): + """The mode of an atom is used for featurization into a `MolGraph`""" + + V1 = auto() + V2 = auto() + ORGANIC = auto() + RIGR = auto() + + +def get_multi_hot_atom_featurizer(mode: str | AtomFeatureMode) -> MultiHotAtomFeaturizer: + """Build the corresponding multi-hot atom featurizer.""" + match AtomFeatureMode.get(mode): + case AtomFeatureMode.V1: + return MultiHotAtomFeaturizer.v1() + case AtomFeatureMode.V2: + return MultiHotAtomFeaturizer.v2() + case AtomFeatureMode.ORGANIC: + return MultiHotAtomFeaturizer.organic() + case AtomFeatureMode.RIGR: + return RIGRAtomFeaturizer() + case _: + raise RuntimeError("unreachable code reached!") diff --git a/chemprop-updated/chemprop/featurizers/base.py b/chemprop-updated/chemprop/featurizers/base.py new file mode 100644 index 0000000000000000000000000000000000000000..29b876bd8751e13ac151c43f3a7d8b1d42d4a831 --- /dev/null +++ b/chemprop-updated/chemprop/featurizers/base.py @@ -0,0 +1,30 @@ +from abc import abstractmethod +from collections.abc import Sized +from typing import Generic, TypeVar + +import numpy as np + +from chemprop.data.molgraph import MolGraph + +S = TypeVar("S") +T = TypeVar("T") + + +class Featurizer(Generic[S, T]): + """An :class:`Featurizer` featurizes inputs type ``S`` into outputs of + type ``T``.""" + + @abstractmethod + def __call__(self, input: S, *args, **kwargs) -> T: + """featurize an input""" + + +class VectorFeaturizer(Featurizer[S, np.ndarray], Sized): + ... + + +class GraphFeaturizer(Featurizer[S, MolGraph]): + @property + @abstractmethod + def shape(self) -> tuple[int, int]: + ... diff --git a/chemprop-updated/chemprop/featurizers/bond.py b/chemprop-updated/chemprop/featurizers/bond.py new file mode 100644 index 0000000000000000000000000000000000000000..c604b89d1c7b7d991fac2ebbce9f866cc1b1603c --- /dev/null +++ b/chemprop-updated/chemprop/featurizers/bond.py @@ -0,0 +1,122 @@ +from typing import Sequence + +import numpy as np +from rdkit.Chem.rdchem import Bond, BondType + +from chemprop.featurizers.base import VectorFeaturizer + + +class MultiHotBondFeaturizer(VectorFeaturizer[Bond]): + """A :class:`MultiHotBondFeaturizer` feauturizes bonds based on the following attributes: + + * ``null``-ity (i.e., is the bond ``None``?) + * bond type + * conjugated? + * in ring? + * stereochemistry + + The feature vectors produced by this featurizer have the following (general) signature: + + +---------------------+-----------------+--------------+ + | slice [start, stop) | subfeature | unknown pad? | + +=====================+=================+==============+ + | 0-1 | null? | N | + +---------------------+-----------------+--------------+ + | 1-5 | bond type | N | + +---------------------+-----------------+--------------+ + | 5-6 | conjugated? | N | + +---------------------+-----------------+--------------+ + | 6-8 | in ring? | N | + +---------------------+-----------------+--------------+ + | 7-14 | stereochemistry | Y | + +---------------------+-----------------+--------------+ + + **NOTE**: the above signature only applies for the default arguments, as the bond type and + sterochemistry slices can increase in size depending on the input arguments. + + Parameters + ---------- + bond_types : Sequence[BondType] | None, default=[SINGLE, DOUBLE, TRIPLE, AROMATIC] + the known bond types + stereos : Sequence[int] | None, default=[0, 1, 2, 3, 4, 5] + the known bond stereochemistries. See [1]_ for more details + + References + ---------- + .. [1] https://www.rdkit.org/docs/source/rdkit.Chem.rdchem.html#rdkit.Chem.rdchem.BondStereo.values + """ + + def __init__( + self, bond_types: Sequence[BondType] | None = None, stereos: Sequence[int] | None = None + ): + self.bond_types = bond_types or [ + BondType.SINGLE, + BondType.DOUBLE, + BondType.TRIPLE, + BondType.AROMATIC, + ] + self.stereo = stereos or range(6) + + def __len__(self): + return 1 + len(self.bond_types) + 2 + (len(self.stereo) + 1) + + def __call__(self, b: Bond) -> np.ndarray: + x = np.zeros(len(self), int) + + if b is None: + x[0] = 1 + return x + + i = 1 + bond_type = b.GetBondType() + bt_bit, size = self.one_hot_index(bond_type, self.bond_types) + if bt_bit != size: + x[i + bt_bit] = 1 + i += size - 1 + + x[i] = int(b.GetIsConjugated()) + x[i + 1] = int(b.IsInRing()) + i += 2 + + stereo_bit, _ = self.one_hot_index(int(b.GetStereo()), self.stereo) + x[i + stereo_bit] = 1 + + return x + + @classmethod + def one_hot_index(cls, x, xs: Sequence) -> tuple[int, int]: + """Returns a tuple of the index of ``x`` in ``xs`` and ``len(xs) + 1`` if ``x`` is in ``xs``. + Otherwise, returns a tuple with ``len(xs)`` and ``len(xs) + 1``.""" + n = len(xs) + + return xs.index(x) if x in xs else n, n + 1 + + +class RIGRBondFeaturizer(VectorFeaturizer[Bond]): + """A :class:`RIGRBondFeaturizer` feauturizes bonds based on only the resonance-invariant features: + + * ``null``-ity (i.e., is the bond ``None``?) + * in ring? + """ + + def __len__(self): + return 2 + + def __call__(self, b: Bond) -> np.ndarray: + x = np.zeros(len(self), int) + + if b is None: + x[0] = 1 + return x + + x[1] = int(b.IsInRing()) + + return x + + @classmethod + def one_hot_index(cls, x, xs: Sequence) -> tuple[int, int]: + """Returns a tuple of the index of ``x`` in ``xs`` and ``len(xs) + 1`` if ``x`` is in ``xs``. + Otherwise, returns a tuple with ``len(xs)`` and ``len(xs) + 1``.""" + n = len(xs) + + return xs.index(x) if x in xs else n, n + 1 diff --git a/chemprop-updated/chemprop/featurizers/molecule.py b/chemprop-updated/chemprop/featurizers/molecule.py new file mode 100644 index 0000000000000000000000000000000000000000..df35f066f27b64dc6524c54dbbc0c3e4d7233bdb --- /dev/null +++ b/chemprop-updated/chemprop/featurizers/molecule.py @@ -0,0 +1,104 @@ +import logging + +from descriptastorus.descriptors import rdDescriptors, rdNormalizedDescriptors +import numpy as np +from rdkit import Chem +from rdkit.Chem import Descriptors, Mol +from rdkit.Chem.rdFingerprintGenerator import GetMorganGenerator + +from chemprop.featurizers.base import VectorFeaturizer +from chemprop.utils import ClassRegistry + +logger = logging.getLogger(__name__) + +MoleculeFeaturizerRegistry = ClassRegistry[VectorFeaturizer[Mol]]() + + +class MorganFeaturizerMixin: + def __init__(self, radius: int = 2, length: int = 2048, include_chirality: bool = True): + if radius < 0: + raise ValueError(f"arg 'radius' must be >= 0! got: {radius}") + + self.length = length + self.F = GetMorganGenerator( + radius=radius, fpSize=length, includeChirality=include_chirality + ) + + def __len__(self) -> int: + return self.length + + +class BinaryFeaturizerMixin: + def __call__(self, mol: Chem.Mol) -> np.ndarray: + return self.F.GetFingerprintAsNumPy(mol) + + +class CountFeaturizerMixin: + def __call__(self, mol: Chem.Mol) -> np.ndarray: + return self.F.GetCountFingerprintAsNumPy(mol).astype(np.int32) + + +@MoleculeFeaturizerRegistry("morgan_binary") +class MorganBinaryFeaturizer(MorganFeaturizerMixin, BinaryFeaturizerMixin, VectorFeaturizer[Mol]): + pass + + +@MoleculeFeaturizerRegistry("morgan_count") +class MorganCountFeaturizer(MorganFeaturizerMixin, CountFeaturizerMixin, VectorFeaturizer[Mol]): + pass + + +@MoleculeFeaturizerRegistry("rdkit_2d") +class RDKit2DFeaturizer(VectorFeaturizer[Mol]): + def __init__(self): + logger.warning( + "The RDKit 2D features can deviate signifcantly from a normal distribution. Consider " + "manually scaling them using an appropriate scaler before creating datapoints, rather " + "than using the scikit-learn `StandardScaler` (the default in Chemprop)." + ) + + def __len__(self) -> int: + return len(Descriptors.descList) + + def __call__(self, mol: Chem.Mol) -> np.ndarray: + features = np.array( + [ + 0.0 if name == "SPS" and mol.GetNumHeavyAtoms() == 0 else func(mol) + for name, func in Descriptors.descList + ], + dtype=float, + ) + + return features + + +class V1RDKit2DFeaturizerMixin(VectorFeaturizer[Mol]): + def __len__(self) -> int: + return 200 + + def __call__(self, mol: Mol) -> np.ndarray: + smiles = Chem.MolToSmiles(mol, isomericSmiles=True) + features = self.generator.process(smiles)[1:] + + return np.array(features) + + +@MoleculeFeaturizerRegistry("v1_rdkit_2d") +class V1RDKit2DFeaturizer(V1RDKit2DFeaturizerMixin): + def __init__(self): + self.generator = rdDescriptors.RDKit2D() + + +@MoleculeFeaturizerRegistry("v1_rdkit_2d_normalized") +class V1RDKit2DNormalizedFeaturizer(V1RDKit2DFeaturizerMixin): + def __init__(self): + self.generator = rdNormalizedDescriptors.RDKit2DNormalized() + + +@MoleculeFeaturizerRegistry("charge") +class ChargeFeaturizer(VectorFeaturizer[Mol]): + def __call__(self, mol: Chem.Mol) -> np.ndarray: + return np.array([Chem.GetFormalCharge(mol)]) + + def __len__(self) -> int: + return 1 diff --git a/chemprop-updated/chemprop/featurizers/molgraph/__init__.py b/chemprop-updated/chemprop/featurizers/molgraph/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9fb21580de633d627d3144c55fe809d33466d26e --- /dev/null +++ b/chemprop-updated/chemprop/featurizers/molgraph/__init__.py @@ -0,0 +1,13 @@ +from .cache import MolGraphCache, MolGraphCacheFacade, MolGraphCacheOnTheFly +from .molecule import SimpleMoleculeMolGraphFeaturizer +from .reaction import CGRFeaturizer, CondensedGraphOfReactionFeaturizer, RxnMode + +__all__ = [ + "MolGraphCacheFacade", + "MolGraphCache", + "MolGraphCacheOnTheFly", + "SimpleMoleculeMolGraphFeaturizer", + "CondensedGraphOfReactionFeaturizer", + "CGRFeaturizer", + "RxnMode", +] diff --git a/chemprop-updated/chemprop/featurizers/molgraph/cache.py b/chemprop-updated/chemprop/featurizers/molgraph/cache.py new file mode 100644 index 0000000000000000000000000000000000000000..171d2b26f21c19d42539843d29c765b773651e2c --- /dev/null +++ b/chemprop-updated/chemprop/featurizers/molgraph/cache.py @@ -0,0 +1,89 @@ +from abc import abstractmethod +from collections.abc import Sequence +from typing import Generic, Iterable + +import numpy as np + +from chemprop.data.molgraph import MolGraph +from chemprop.featurizers.base import Featurizer, S + + +class MolGraphCacheFacade(Sequence[MolGraph], Generic[S]): + """ + A :class:`MolGraphCacheFacade` provided an interface for caching + :class:`~chemprop.data.molgraph.MolGraph`\s. + + .. note:: + This class only provides a facade for a cached dataset, but it *does not guarantee* + whether the underlying data is truly cached. + + + Parameters + ---------- + inputs : Iterable[S] + The inputs to be featurized. + V_fs : Iterable[np.ndarray] + The node features for each input. + E_fs : Iterable[np.ndarray] + The edge features for each input. + featurizer : Featurizer[S, MolGraph] + The featurizer with which to generate the + :class:`~chemprop.data.molgraph.MolGraph`\s. + """ + + @abstractmethod + def __init__( + self, + inputs: Iterable[S], + V_fs: Iterable[np.ndarray], + E_fs: Iterable[np.ndarray], + featurizer: Featurizer[S, MolGraph], + ): + pass + + +class MolGraphCache(MolGraphCacheFacade): + """ + A :class:`MolGraphCache` precomputes the corresponding + :class:`~chemprop.data.molgraph.MolGraph`\s and caches them in memory. + """ + + def __init__( + self, + inputs: Iterable[S], + V_fs: Iterable[np.ndarray | None], + E_fs: Iterable[np.ndarray | None], + featurizer: Featurizer[S, MolGraph], + ): + self._mgs = [featurizer(input, V_f, E_f) for input, V_f, E_f in zip(inputs, V_fs, E_fs)] + + def __len__(self) -> int: + return len(self._mgs) + + def __getitem__(self, index: int) -> MolGraph: + return self._mgs[index] + + +class MolGraphCacheOnTheFly(MolGraphCacheFacade): + """ + A :class:`MolGraphCacheOnTheFly` computes the corresponding + :class:`~chemprop.data.molgraph.MolGraph`\s as they are requested. + """ + + def __init__( + self, + inputs: Iterable[S], + V_fs: Iterable[np.ndarray | None], + E_fs: Iterable[np.ndarray | None], + featurizer: Featurizer[S, MolGraph], + ): + self._inputs = list(inputs) + self._V_fs = list(V_fs) + self._E_fs = list(E_fs) + self._featurizer = featurizer + + def __len__(self) -> int: + return len(self._inputs) + + def __getitem__(self, index: int) -> MolGraph: + return self._featurizer(self._inputs[index], self._V_fs[index], self._E_fs[index]) diff --git a/chemprop-updated/chemprop/featurizers/molgraph/mixins.py b/chemprop-updated/chemprop/featurizers/molgraph/mixins.py new file mode 100644 index 0000000000000000000000000000000000000000..afa461d481388d51f6e8434a21a5f5f99199616a --- /dev/null +++ b/chemprop-updated/chemprop/featurizers/molgraph/mixins.py @@ -0,0 +1,23 @@ +from dataclasses import dataclass, field + +from rdkit.Chem.rdchem import Atom, Bond + +from chemprop.featurizers.atom import MultiHotAtomFeaturizer +from chemprop.featurizers.base import VectorFeaturizer +from chemprop.featurizers.bond import MultiHotBondFeaturizer + + +@dataclass +class _MolGraphFeaturizerMixin: + atom_featurizer: VectorFeaturizer[Atom] = field(default_factory=MultiHotAtomFeaturizer.v2) + bond_featurizer: VectorFeaturizer[Bond] = field(default_factory=MultiHotBondFeaturizer) + + def __post_init__(self): + self.atom_fdim = len(self.atom_featurizer) + self.bond_fdim = len(self.bond_featurizer) + + @property + def shape(self) -> tuple[int, int]: + """the feature dimension of the atoms and bonds, respectively, of `MolGraph`s generated by + this featurizer""" + return self.atom_fdim, self.bond_fdim diff --git a/chemprop-updated/chemprop/featurizers/molgraph/molecule.py b/chemprop-updated/chemprop/featurizers/molgraph/molecule.py new file mode 100644 index 0000000000000000000000000000000000000000..7ac7fafd4e15c57e1823ff0904e0888126c8352c --- /dev/null +++ b/chemprop-updated/chemprop/featurizers/molgraph/molecule.py @@ -0,0 +1,91 @@ +from dataclasses import InitVar, dataclass + +import numpy as np +from rdkit import Chem +from rdkit.Chem import Mol + +from chemprop.data.molgraph import MolGraph +from chemprop.featurizers.base import GraphFeaturizer +from chemprop.featurizers.molgraph.mixins import _MolGraphFeaturizerMixin + + +@dataclass +class SimpleMoleculeMolGraphFeaturizer(_MolGraphFeaturizerMixin, GraphFeaturizer[Mol]): + """A :class:`SimpleMoleculeMolGraphFeaturizer` is the default implementation of a + :class:`MoleculeMolGraphFeaturizer` + + Parameters + ---------- + atom_featurizer : AtomFeaturizer, default=MultiHotAtomFeaturizer() + the featurizer with which to calculate feature representations of the atoms in a given + molecule + bond_featurizer : BondFeaturizer, default=MultiHotBondFeaturizer() + the featurizer with which to calculate feature representations of the bonds in a given + molecule + extra_atom_fdim : int, default=0 + the dimension of the additional features that will be concatenated onto the calculated + features of each atom + extra_bond_fdim : int, default=0 + the dimension of the additional features that will be concatenated onto the calculated + features of each bond + """ + + extra_atom_fdim: InitVar[int] = 0 + extra_bond_fdim: InitVar[int] = 0 + + def __post_init__(self, extra_atom_fdim: int = 0, extra_bond_fdim: int = 0): + super().__post_init__() + + self.extra_atom_fdim = extra_atom_fdim + self.extra_bond_fdim = extra_bond_fdim + self.atom_fdim += self.extra_atom_fdim + self.bond_fdim += self.extra_bond_fdim + + def __call__( + self, + mol: Chem.Mol, + atom_features_extra: np.ndarray | None = None, + bond_features_extra: np.ndarray | None = None, + ) -> MolGraph: + n_atoms = mol.GetNumAtoms() + n_bonds = mol.GetNumBonds() + + if atom_features_extra is not None and len(atom_features_extra) != n_atoms: + raise ValueError( + "Input molecule must have same number of atoms as `len(atom_features_extra)`!" + f"got: {n_atoms} and {len(atom_features_extra)}, respectively" + ) + if bond_features_extra is not None and len(bond_features_extra) != n_bonds: + raise ValueError( + "Input molecule must have same number of bonds as `len(bond_features_extra)`!" + f"got: {n_bonds} and {len(bond_features_extra)}, respectively" + ) + + if n_atoms == 0: + V = np.zeros((1, self.atom_fdim), dtype=np.single) + else: + V = np.array([self.atom_featurizer(a) for a in mol.GetAtoms()], dtype=np.single) + E = np.empty((2 * n_bonds, self.bond_fdim)) + edge_index = [[], []] + + if atom_features_extra is not None: + V = np.hstack((V, atom_features_extra)) + + i = 0 + for bond in mol.GetBonds(): + x_e = self.bond_featurizer(bond) + if bond_features_extra is not None: + x_e = np.concatenate((x_e, bond_features_extra[bond.GetIdx()]), dtype=np.single) + + E[i : i + 2] = x_e + + u, v = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx() + edge_index[0].extend([u, v]) + edge_index[1].extend([v, u]) + + i += 2 + + rev_edge_index = np.arange(len(E)).reshape(-1, 2)[:, ::-1].ravel() + edge_index = np.array(edge_index, int) + + return MolGraph(V, E, edge_index, rev_edge_index) diff --git a/chemprop-updated/chemprop/featurizers/molgraph/reaction.py b/chemprop-updated/chemprop/featurizers/molgraph/reaction.py new file mode 100644 index 0000000000000000000000000000000000000000..f35b03e037b45553743c0af53363a5f9d68585e9 --- /dev/null +++ b/chemprop-updated/chemprop/featurizers/molgraph/reaction.py @@ -0,0 +1,332 @@ +from dataclasses import InitVar, dataclass +from enum import auto +import logging +from typing import Iterable, Sequence, TypeAlias + +import numpy as np +from rdkit import Chem +from rdkit.Chem.rdchem import Bond, Mol + +from chemprop.data.molgraph import MolGraph +from chemprop.featurizers.base import GraphFeaturizer +from chemprop.featurizers.molgraph.mixins import _MolGraphFeaturizerMixin +from chemprop.types import Rxn +from chemprop.utils.utils import EnumMapping + +logger = logging.getLogger(__name__) + + +class RxnMode(EnumMapping): + """The mode by which a reaction should be featurized into a `MolGraph`""" + + REAC_PROD = auto() + """concatenate the reactant features with the product features.""" + REAC_PROD_BALANCE = auto() + """concatenate the reactant features with the products feature and balances imbalanced + reactions""" + REAC_DIFF = auto() + """concatenates the reactant features with the difference in features between reactants and + products""" + REAC_DIFF_BALANCE = auto() + """concatenates the reactant features with the difference in features between reactants and + product and balances imbalanced reactions""" + PROD_DIFF = auto() + """concatenates the product features with the difference in features between reactants and + products""" + PROD_DIFF_BALANCE = auto() + """concatenates the product features with the difference in features between reactants and + products and balances imbalanced reactions""" + + +@dataclass +class CondensedGraphOfReactionFeaturizer(_MolGraphFeaturizerMixin, GraphFeaturizer[Rxn]): + """A :class:`CondensedGraphOfReactionFeaturizer` featurizes reactions using the condensed + reaction graph method utilized in [1]_ + + **NOTE**: This class *does not* accept a :class:`AtomFeaturizer` instance. This is because + it requries the :meth:`num_only()` method, which is only implemented in the concrete + :class:`AtomFeaturizer` class + + Parameters + ---------- + atom_featurizer : AtomFeaturizer, default=AtomFeaturizer() + the featurizer with which to calculate feature representations of the atoms in a given + molecule + bond_featurizer : BondFeaturizerBase, default=BondFeaturizer() + the featurizer with which to calculate feature representations of the bonds in a given + molecule + mode_ : Union[str, ReactionMode], default=ReactionMode.REAC_DIFF + the mode by which to featurize the reaction as either the string code or enum value + + References + ---------- + .. [1] Heid, E.; Green, W.H. "Machine Learning of Reaction Properties via Learned + Representations of the Condensed Graph of Reaction." J. Chem. Inf. Model. 2022, 62, + 2101-2110. https://doi.org/10.1021/acs.jcim.1c00975 + """ + + mode_: InitVar[str | RxnMode] = RxnMode.REAC_DIFF + + def __post_init__(self, mode_: str | RxnMode): + super().__post_init__() + + self.mode = mode_ + self.atom_fdim += len(self.atom_featurizer) - len(self.atom_featurizer.atomic_nums) - 1 + self.bond_fdim *= 2 + + @property + def mode(self) -> RxnMode: + return self.__mode + + @mode.setter + def mode(self, m: str | RxnMode): + self.__mode = RxnMode.get(m) + + def __call__( + self, + rxn: tuple[Chem.Mol, Chem.Mol], + atom_features_extra: np.ndarray | None = None, + bond_features_extra: np.ndarray | None = None, + ) -> MolGraph: + """Featurize the input reaction into a molecular graph + + Parameters + ---------- + rxn : Rxn + a 2-tuple of atom-mapped rdkit molecules, where the 0th element is the reactant and the + 1st element is the product + atom_features_extra : np.ndarray | None, default=None + *UNSUPPORTED* maintained only to maintain parity with the method signature of the + `MoleculeFeaturizer` + bond_features_extra : np.ndarray | None, default=None + *UNSUPPORTED* maintained only to maintain parity with the method signature of the + `MoleculeFeaturizer` + + Returns + ------- + MolGraph + the molecular graph of the reaction + """ + + if atom_features_extra is not None: + logger.warning("'atom_features_extra' is currently unsupported for reactions") + if bond_features_extra is not None: + logger.warning("'bond_features_extra' is currently unsupported for reactions") + + reac, pdt = rxn + r2p_idx_map, pdt_idxs, reac_idxs = self.map_reac_to_prod(reac, pdt) + + V = self._calc_node_feature_matrix(reac, pdt, r2p_idx_map, pdt_idxs, reac_idxs) + E = [] + edge_index = [[], []] + + n_atoms_tot = len(V) + n_atoms_reac = reac.GetNumAtoms() + + for u in range(n_atoms_tot): + for v in range(u + 1, n_atoms_tot): + b_reac, b_prod = self._get_bonds( + reac, pdt, r2p_idx_map, pdt_idxs, n_atoms_reac, u, v + ) + if b_reac is None and b_prod is None: + continue + + x_e = self._calc_edge_feature(b_reac, b_prod) + E.extend([x_e, x_e]) + edge_index[0].extend([u, v]) + edge_index[1].extend([v, u]) + + E = np.array(E) if len(E) > 0 else np.empty((0, self.bond_fdim)) + rev_edge_index = np.arange(len(E)).reshape(-1, 2)[:, ::-1].ravel() + edge_index = np.array(edge_index, int) + + return MolGraph(V, E, edge_index, rev_edge_index) + + def _calc_node_feature_matrix( + self, + rct: Mol, + pdt: Mol, + r2p_idx_map: dict[int, int], + pdt_idxs: Iterable[int], + reac_idxs: Iterable[int], + ) -> np.ndarray: + """Calculate the node feature matrix for the reaction""" + X_v_r1 = np.array([self.atom_featurizer(a) for a in rct.GetAtoms()]) + X_v_p2 = np.array([self.atom_featurizer(pdt.GetAtomWithIdx(i)) for i in pdt_idxs]) + X_v_p2 = X_v_p2.reshape(-1, X_v_r1.shape[1]) + + if self.mode in [RxnMode.REAC_DIFF, RxnMode.PROD_DIFF, RxnMode.REAC_PROD]: + # Reactant: + # (1) regular features for each atom in the reactants + # (2) zero features for each atom that's only in the products + X_v_r2 = [self.atom_featurizer.num_only(pdt.GetAtomWithIdx(i)) for i in pdt_idxs] + X_v_r2 = np.array(X_v_r2).reshape(-1, X_v_r1.shape[1]) + + # Product: + # (1) either (a) product-side features for each atom in both + # or (b) zero features for each atom only in the reatants + # (2) regular features for each atom only in the products + X_v_p1 = np.array( + [ + ( + self.atom_featurizer(pdt.GetAtomWithIdx(r2p_idx_map[a.GetIdx()])) + if a.GetIdx() not in reac_idxs + else self.atom_featurizer.num_only(a) + ) + for a in rct.GetAtoms() + ] + ) + else: + # Reactant: + # (1) regular features for each atom in the reactants + # (2) regular features for each atom only in the products + X_v_r2 = [self.atom_featurizer(pdt.GetAtomWithIdx(i)) for i in pdt_idxs] + X_v_r2 = np.array(X_v_r2).reshape(-1, X_v_r1.shape[1]) + + # Product: + # (1) either (a) product-side features for each atom in both + # or (b) reactant-side features for each atom only in the reatants + # (2) regular features for each atom only in the products + X_v_p1 = np.array( + [ + ( + self.atom_featurizer(pdt.GetAtomWithIdx(r2p_idx_map[a.GetIdx()])) + if a.GetIdx() not in reac_idxs + else self.atom_featurizer(a) + ) + for a in rct.GetAtoms() + ] + ) + + X_v_r = np.concatenate((X_v_r1, X_v_r2)) + X_v_p = np.concatenate((X_v_p1, X_v_p2)) + + m = min(len(X_v_r), len(X_v_p)) + + if self.mode in [RxnMode.REAC_PROD, RxnMode.REAC_PROD_BALANCE]: + X_v = np.hstack((X_v_r[:m], X_v_p[:m, len(self.atom_featurizer.atomic_nums) + 1 :])) + else: + X_v_d = X_v_p[:m] - X_v_r[:m] + if self.mode in [RxnMode.REAC_DIFF, RxnMode.REAC_DIFF_BALANCE]: + X_v = np.hstack((X_v_r[:m], X_v_d[:m, len(self.atom_featurizer.atomic_nums) + 1 :])) + else: + X_v = np.hstack((X_v_p[:m], X_v_d[:m, len(self.atom_featurizer.atomic_nums) + 1 :])) + + return X_v + + def _get_bonds( + self, + rct: Bond, + pdt: Bond, + ri2pj: dict[int, int], + pids: Sequence[int], + n_atoms_r: int, + u: int, + v: int, + ) -> tuple[Bond, Bond]: + """get the corresponding reactant- and product-side bond, respectively, betweeen atoms `u` and `v`""" + if u >= n_atoms_r and v >= n_atoms_r: + b_prod = pdt.GetBondBetweenAtoms(pids[u - n_atoms_r], pids[v - n_atoms_r]) + + if self.mode in [ + RxnMode.REAC_PROD_BALANCE, + RxnMode.REAC_DIFF_BALANCE, + RxnMode.PROD_DIFF_BALANCE, + ]: + b_reac = b_prod + else: + b_reac = None + elif u < n_atoms_r and v >= n_atoms_r: # One atom only in product + b_reac = None + + if u in ri2pj: + b_prod = pdt.GetBondBetweenAtoms(ri2pj[u], pids[v - n_atoms_r]) + else: # Atom atom only in reactant, the other only in product + b_prod = None + else: + b_reac = rct.GetBondBetweenAtoms(u, v) + + if u in ri2pj and v in ri2pj: # Both atoms in both reactant and product + b_prod = pdt.GetBondBetweenAtoms(ri2pj[u], ri2pj[v]) + elif self.mode in [ + RxnMode.REAC_PROD_BALANCE, + RxnMode.REAC_DIFF_BALANCE, + RxnMode.PROD_DIFF_BALANCE, + ]: + b_prod = None if (u in ri2pj or v in ri2pj) else b_reac + else: # One or both atoms only in reactant + b_prod = None + + return b_reac, b_prod + + def _calc_edge_feature(self, b_reac: Bond, b_pdt: Bond): + """Calculate the global features of the two bonds""" + x_e_r = self.bond_featurizer(b_reac) + x_e_p = self.bond_featurizer(b_pdt) + x_e_d = x_e_p - x_e_r + + if self.mode in [RxnMode.REAC_PROD, RxnMode.REAC_PROD_BALANCE]: + x_e = np.hstack((x_e_r, x_e_p)) + elif self.mode in [RxnMode.REAC_DIFF, RxnMode.REAC_DIFF_BALANCE]: + x_e = np.hstack((x_e_r, x_e_d)) + else: + x_e = np.hstack((x_e_p, x_e_d)) + + return x_e + + @classmethod + def map_reac_to_prod( + cls, reacs: Chem.Mol, pdts: Chem.Mol + ) -> tuple[dict[int, int], list[int], list[int]]: + """Map atom indices between corresponding atoms in the reactant and product molecules + + Parameters + ---------- + reacs : Chem.Mol + An RDKit molecule of the reactants + pdts : Chem.Mol + An RDKit molecule of the products + + Returns + ------- + ri2pi : dict[int, int] + A dictionary of corresponding atom indices from reactant atoms to product atoms + pdt_idxs : list[int] + atom indices of poduct atoms + rct_idxs : list[int] + atom indices of reactant atoms + """ + pdt_idxs = [] + mapno2pj = {} + reac_atommap_nums = {a.GetAtomMapNum() for a in reacs.GetAtoms()} + + for a in pdts.GetAtoms(): + map_num = a.GetAtomMapNum() + j = a.GetIdx() + + if map_num > 0: + mapno2pj[map_num] = j + if map_num not in reac_atommap_nums: + pdt_idxs.append(j) + else: + pdt_idxs.append(j) + + rct_idxs = [] + r2p_idx_map = {} + + for a in reacs.GetAtoms(): + map_num = a.GetAtomMapNum() + i = a.GetIdx() + + if map_num > 0: + try: + r2p_idx_map[i] = mapno2pj[map_num] + except KeyError: + rct_idxs.append(i) + else: + rct_idxs.append(i) + + return r2p_idx_map, pdt_idxs, rct_idxs + + +CGRFeaturizer: TypeAlias = CondensedGraphOfReactionFeaturizer diff --git a/chemprop-updated/chemprop/models/__init__.py b/chemprop-updated/chemprop/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..76946d73b599668b8cdd7adc2a1c48b38b8d1108 --- /dev/null +++ b/chemprop-updated/chemprop/models/__init__.py @@ -0,0 +1,5 @@ +from .model import MPNN +from .multi import MulticomponentMPNN +from .utils import load_model, save_model + +__all__ = ["MPNN", "MulticomponentMPNN", "load_model", "save_model"] diff --git a/chemprop-updated/chemprop/models/__pycache__/__init__.cpython-37.pyc b/chemprop-updated/chemprop/models/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6503ed123f3d65114e035321f0cbf9882c566608 Binary files /dev/null and b/chemprop-updated/chemprop/models/__pycache__/__init__.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/models/__pycache__/ffn.cpython-37.pyc b/chemprop-updated/chemprop/models/__pycache__/ffn.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f2b4f0b097a79f0ccabf82aae5f2e77c672a9eb5 Binary files /dev/null and b/chemprop-updated/chemprop/models/__pycache__/ffn.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/models/__pycache__/model.cpython-37.pyc b/chemprop-updated/chemprop/models/__pycache__/model.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab53bf808264e8ab0121ba648c645b3852d114cd Binary files /dev/null and b/chemprop-updated/chemprop/models/__pycache__/model.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/models/__pycache__/mpn.cpython-37.pyc b/chemprop-updated/chemprop/models/__pycache__/mpn.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9e647541f4fb9aa7804d2c7393f5f55c7bff876a Binary files /dev/null and b/chemprop-updated/chemprop/models/__pycache__/mpn.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/models/model.py b/chemprop-updated/chemprop/models/model.py new file mode 100644 index 0000000000000000000000000000000000000000..c953638d55329f94402615f7ab359a240a755b0c --- /dev/null +++ b/chemprop-updated/chemprop/models/model.py @@ -0,0 +1,374 @@ +from __future__ import annotations + +import io +import logging +from typing import Iterable, TypeAlias + +from lightning import pytorch as pl +import torch +from torch import Tensor, nn, optim + +from chemprop.data import BatchMolGraph, MulticomponentTrainingBatch, TrainingBatch +from chemprop.nn import Aggregation, ChempropMetric, MessagePassing, Predictor +from chemprop.nn.transforms import ScaleTransform +from chemprop.schedulers import build_NoamLike_LRSched +from chemprop.utils.registry import Factory + +logger = logging.getLogger(__name__) + +BatchType: TypeAlias = TrainingBatch | MulticomponentTrainingBatch + +import pandas as pd +from transformers import RobertaTokenizer, RobertaModel +from torch.utils.data import DataLoader +import torch.nn.functional as F + +class ChemBERTaEncoder(nn.Module): + def __init__(self, model_name="DeepChem/ChemBERTa-77M-MLM", fine_tune_percent=10, unfreeze_pooler=True): + super().__init__() + self.tokenizer = RobertaTokenizer.from_pretrained(model_name) + self.encoder = RobertaModel.from_pretrained(model_name) + + # Step 1: Freeze all parameters + for param in self.encoder.parameters(): + param.requires_grad = False + + # Step 2: Unfreeze the top k layers based on fine_tune_percent + num_layers_total = len(self.encoder.encoder.layer) + k = max(1, int(num_layers_total * fine_tune_percent / 100)) + + for layer in self.encoder.encoder.layer[-k:]: # Unfreeze top k layers + for param in layer.parameters(): + param.requires_grad = True + + # Logging + total = sum(p.numel() for p in self.encoder.parameters()) + trainable = sum(p.numel() for p in self.encoder.parameters() if p.requires_grad) + print(f"ChemBERTa Total parameters: {total}") + print(f"Trainable parameters: {trainable} ({100 * trainable / total:.2f}%)") + + def encode(self, smiles_list: list[str], batch_size=64, max_length=128): + device = next(self.encoder.parameters()).device + all_hidden_states = [] + all_pooler_outputs = [] + + for i in range(0, len(smiles_list), batch_size): + batch = smiles_list[i:i+batch_size] + inputs = self.tokenizer(batch, padding=True, truncation=True, return_tensors="pt", max_length=max_length) + inputs = {k: v.to(device) for k, v in inputs.items()} + + with torch.set_grad_enabled(self.encoder.training): + outputs = self.encoder(**inputs) + last_hidden = outputs.last_hidden_state.detach().clone() # [B, L, d_model] + pooler = outputs.pooler_output.detach().clone() # [B, d_model] + all_hidden_states.append(last_hidden) + all_pooler_outputs.append(pooler) + + # Return both as tensors + return { + "last_hidden_state": torch.cat(all_hidden_states, dim=0), + "pooler_output": torch.cat(all_pooler_outputs, dim=0) + } + +class fusionGAT(nn.Module): + def __init__(self, dmpnn_dim: int, bert_dim: int, hidden_dim: int): + super().__init__() + # Project descriptor and nodes to hidden_dim + self.W_dmpnn = nn.Linear(dmpnn_dim, hidden_dim) + self.W_bert = nn.Linear(bert_dim, hidden_dim) + self.attn_fc = nn.Linear(2 * hidden_dim, 1) + self.leaky_relu = nn.LeakyReLU(0.2) + + def forward(self, dmpnn_output: Tensor, encodings: Tensor) -> Tensor: + """ + desc: (B, dmpnn_dim) + nodes: (B, L, bert_dim) + + Returns: + updated_desc: (B, hidden_dim) + """ + B, L, _ = encodings.size() + + dmpnn_proj = self.W_dmpnn(dmpnn_output) # (B, hidden_dim) + bert_proj = self.W_bert(encodings) # (B, L, hidden_dim) + + # Expand dmpnn_proj to (B, L, hidden_dim) to concatenate with each node + dmpnn_expanded = dmpnn_proj.unsqueeze(1).expand(-1, L, -1) # (B, L, hidden_dim) + + # Concatenate dmpnn and bert features + cat = torch.cat([dmpnn_expanded, bert_proj], dim=-1) # (B, L, 2*hidden_dim) + + # Compute attention scores + e = self.leaky_relu(self.attn_fc(cat)).squeeze(-1) # (B, L) + + # Attention weights over L nodes + alpha = torch.softmax(e, dim=1).unsqueeze(-1) # (B, L, 1) + + # Weighted sum of node features + fusion = torch.sum(alpha * bert_proj, dim=1) # (B, hidden_dim) + + return fusion + + + +class MPNN(pl.LightningModule): + def __init__( + self, + message_passing: MessagePassing, + agg: Aggregation, + predictor: Predictor, + batch_norm: bool = False, + metrics: Iterable[ChempropMetric] | None = None, + warmup_epochs: int = 2, + init_lr: float = 1e-4, + max_lr: float = 1e-3, + final_lr: float = 1e-4, + X_d_transform: ScaleTransform | None = None, + fine_tune_bert: bool = True, + fine_tune_percent: int = 10 + ): + super().__init__() + self.save_hyperparameters(ignore=["X_d_transform", "message_passing", "agg", "predictor"]) + self.hparams["X_d_transform"] = X_d_transform + self.hparams.update({ + "message_passing": message_passing.hparams, + "agg": agg.hparams, + "predictor": predictor.hparams, + }) + + self.fusion_GAT = fusionGAT( + dmpnn_dim=message_passing.output_dim, + bert_dim=768, + hidden_dim=message_passing.output_dim + ) + + self.message_passing = message_passing + self.agg = agg + self.bn = nn.BatchNorm1d(self.message_passing.output_dim) if batch_norm else nn.Identity() + self.predictor = predictor + self.X_d_transform = X_d_transform if X_d_transform is not None else nn.Identity() + + self.metrics = ( + nn.ModuleList([*metrics, self.criterion.clone()]) + if metrics + else nn.ModuleList([self.predictor._T_default_metric(), self.criterion.clone()]) + ) + + self.warmup_epochs = warmup_epochs + self.init_lr = init_lr + self.max_lr = max_lr + self.final_lr = final_lr + + self.fine_tune_bert = fine_tune_bert + self.fine_tune_percent = fine_tune_percent + + + self.bert_encoder = ChemBERTaEncoder( + model_name="seyonec/ChemBERTa-zinc-base-v1", + fine_tune_percent=self.fine_tune_percent if self.fine_tune_bert else 0 + ) + + self.bert_encoder = self.bert_encoder.to(self.device) + + + @property + def output_dim(self) -> int: + return self.predictor.output_dim + + @property + def n_tasks(self) -> int: + return self.predictor.n_tasks + + @property + def n_targets(self) -> int: + return self.predictor.n_targets + + @property + def criterion(self) -> ChempropMetric: + return self.predictor.criterion + + def fingerprint(self, bmg: BatchMolGraph, V_d: Tensor | None = None, X_d: Tensor | None = None) -> Tensor: + H_v = self.message_passing(bmg, V_d) + H = self.agg(H_v, bmg.batch) + + smiles_list = bmg.names + outputs = self.bert_encoder.encode(smiles_list) + output_pooler = outputs["last_hidden_state"] + + fingerprint = self.fusion_GAT(H, output_pooler) + fingerprint = self.bn(fingerprint) + + return fingerprint if X_d is None else torch.cat((fingerprint, self.X_d_transform(X_d)), 1) + + def encoding(self, bmg: BatchMolGraph, V_d: Tensor | None = None, X_d: Tensor | None = None, i: int = -1) -> Tensor: + return self.predictor.encode(self.fingerprint(bmg, V_d, X_d), i) + + def forward(self, bmg: BatchMolGraph, V_d: Tensor | None = None, X_d: Tensor | None = None) -> Tensor: + return self.predictor(self.fingerprint(bmg, V_d, X_d)) + + def training_step(self, batch: BatchType, batch_idx): + batch_size = self.get_batch_size(batch) + bmg, V_d, X_d, targets, weights, lt_mask, gt_mask = batch + + mask = targets.isfinite() + targets = targets.nan_to_num(nan=0.0) + + Z = self.fingerprint(bmg, V_d, X_d) + preds = self.predictor.train_step(Z) + l = self.criterion(preds, targets, mask, weights, lt_mask, gt_mask) + + self.log("train_loss", self.criterion, batch_size=batch_size, prog_bar=True, on_epoch=True) + return l + + def on_validation_model_eval(self) -> None: + self.eval() + self.message_passing.V_d_transform.train() + self.message_passing.graph_transform.train() + self.X_d_transform.train() + self.predictor.output_transform.train() + + if self.fine_tune_bert: + self.bert_encoder.encoder.train() + + def validation_step(self, batch: BatchType, batch_idx: int = 0): + self._evaluate_batch(batch, "val") + + batch_size = self.get_batch_size(batch) + bmg, V_d, X_d, targets, weights, lt_mask, gt_mask = batch + + mask = targets.isfinite() + targets = targets.nan_to_num(nan=0.0) + + Z = self.fingerprint(bmg, V_d, X_d) + preds = self.predictor.train_step(Z) + self.metrics[-1](preds, targets, mask, weights, lt_mask, gt_mask) + self.log("val_loss", self.metrics[-1], batch_size=batch_size, prog_bar=True) + + def test_step(self, batch: BatchType, batch_idx: int = 0): + self._evaluate_batch(batch, "test") + + def _evaluate_batch(self, batch: BatchType, label: str) -> None: + batch_size = self.get_batch_size(batch) + bmg, V_d, X_d, targets, weights, lt_mask, gt_mask = batch + + mask = targets.isfinite() + targets = targets.nan_to_num(nan=0.0) + preds = self(bmg, V_d, X_d) + weights = torch.ones_like(weights) + + if self.predictor.n_targets > 1: + preds = preds[..., 0] + + for m in self.metrics[:-1]: + m.update(preds, targets, mask, weights, lt_mask, gt_mask) + self.log(f"{label}/{m.alias}", m, batch_size=batch_size) + + def predict_step(self, batch: BatchType, batch_idx: int, dataloader_idx: int = 0) -> Tensor: + bmg, X_vd, X_d, *_ = batch + return self(bmg, X_vd, X_d) + + def configure_optimizers(self): + opt = optim.Adam(self.parameters(), self.init_lr) + if self.trainer.train_dataloader is None: + self.trainer.estimated_stepping_batches + steps_per_epoch = self.trainer.num_training_batches + warmup_steps = self.warmup_epochs * steps_per_epoch + if self.trainer.max_epochs == -1: + logger.warning( + "For infinite training, the number of cooldown epochs in learning rate scheduler is set to 100 times the number of warmup epochs." + ) + cooldown_steps = 100 * warmup_steps + else: + cooldown_epochs = self.trainer.max_epochs - self.warmup_epochs + cooldown_steps = cooldown_epochs * steps_per_epoch + + lr_sched = build_NoamLike_LRSched( + opt, warmup_steps, cooldown_steps, self.init_lr, self.max_lr, self.final_lr + ) + + return {"optimizer": opt, "lr_scheduler": {"scheduler": lr_sched, "interval": "step"}} + + def get_batch_size(self, batch: TrainingBatch) -> int: + return len(batch[0]) + + @classmethod + def _load(cls, path, map_location, **submodules): + d = torch.load(path, map_location, weights_only=False) + + try: + hparams = d["hyper_parameters"] + state_dict = d["state_dict"] + except KeyError: + raise KeyError(f"Could not find hyper parameters and/or state dict in {path}.") + + if hparams["metrics"] is not None: + hparams["metrics"] = [ + cls._rebuild_metric(metric) + if not hasattr(metric, "_defaults") + or (not torch.cuda.is_available() and metric.device.type != "cpu") + else metric + for metric in hparams["metrics"] + ] + + if hparams["predictor"]["criterion"] is not None: + metric = hparams["predictor"]["criterion"] + if not hasattr(metric, "_defaults") or ( + not torch.cuda.is_available() and metric.device.type != "cpu" + ): + hparams["predictor"]["criterion"] = cls._rebuild_metric(metric) + + submodules |= { + key: hparams[key].pop("cls")(**hparams[key]) + for key in ("message_passing", "agg", "predictor") + if key not in submodules + } + + return submodules, state_dict, hparams + + @classmethod + def _add_metric_task_weights_to_state_dict(cls, state_dict, hparams): + if "metrics.0.task_weights" not in state_dict: + metrics = hparams["metrics"] + n_metrics = len(metrics) if metrics is not None else 1 + for i_metric in range(n_metrics): + state_dict[f"metrics.{i_metric}.task_weights"] = torch.tensor([[1.0]]) + state_dict[f"metrics.{i_metric + 1}.task_weights"] = state_dict[ + "predictor.criterion.task_weights" + ] + return state_dict + + @classmethod + def _rebuild_metric(cls, metric): + return Factory.build(metric.__class__, task_weights=metric.task_weights, **metric.__dict__) + + @classmethod + def load_from_checkpoint( + cls, checkpoint_path, map_location=None, hparams_file=None, strict=True, **kwargs + ) -> MPNN: + submodules = { + k: v for k, v in kwargs.items() if k in ["message_passing", "agg", "predictor"] + } + submodules, state_dict, hparams = cls._load(checkpoint_path, map_location, **submodules) + kwargs.update(submodules) + + state_dict = cls._add_metric_task_weights_to_state_dict(state_dict, hparams) + d = torch.load(checkpoint_path, map_location, weights_only=False) + d["state_dict"] = state_dict + d["hyper_parameters"] = hparams + buffer = io.BytesIO() + torch.save(d, buffer) + buffer.seek(0) + + return super().load_from_checkpoint(buffer, map_location, hparams_file, strict, **kwargs) + + @classmethod + def load_from_file(cls, model_path, map_location=None, strict=True, **submodules) -> MPNN: + submodules, state_dict, hparams = cls._load(model_path, map_location, **submodules) + hparams.update(submodules) + + state_dict = cls._add_metric_task_weights_to_state_dict(state_dict, hparams) + + model = cls(**hparams) + model.load_state_dict(state_dict, strict=strict) + + return model \ No newline at end of file diff --git a/chemprop-updated/chemprop/models/multi.py b/chemprop-updated/chemprop/models/multi.py new file mode 100644 index 0000000000000000000000000000000000000000..930b815b1e8f8688101ab8ce14697f54c41b3e0e --- /dev/null +++ b/chemprop-updated/chemprop/models/multi.py @@ -0,0 +1,101 @@ +from typing import Iterable + +import torch +from torch import Tensor + +from chemprop.data import BatchMolGraph, MulticomponentTrainingBatch +from chemprop.models.model import MPNN +from chemprop.nn import Aggregation, MulticomponentMessagePassing, Predictor +from chemprop.nn.metrics import ChempropMetric +from chemprop.nn.transforms import ScaleTransform + + +class MulticomponentMPNN(MPNN): + def __init__( + self, + message_passing: MulticomponentMessagePassing, + agg: Aggregation, + predictor: Predictor, + batch_norm: bool = False, + metrics: Iterable[ChempropMetric] | None = None, + warmup_epochs: int = 2, + init_lr: float = 1e-4, + max_lr: float = 1e-3, + final_lr: float = 1e-4, + X_d_transform: ScaleTransform | None = None, + ): + super().__init__( + message_passing, + agg, + predictor, + batch_norm, + metrics, + warmup_epochs, + init_lr, + max_lr, + final_lr, + X_d_transform, + ) + self.message_passing: MulticomponentMessagePassing + + def fingerprint( + self, + bmgs: Iterable[BatchMolGraph], + V_ds: Iterable[Tensor | None], + X_d: Tensor | None = None, + ) -> Tensor: + H_vs: list[Tensor] = self.message_passing(bmgs, V_ds) + Hs = [self.agg(H_v, bmg.batch) for H_v, bmg in zip(H_vs, bmgs)] + H = torch.cat(Hs, 1) + H = self.bn(H) + + return H if X_d is None else torch.cat((H, self.X_d_transform(X_d)), 1) + + def on_validation_model_eval(self) -> None: + self.eval() + for block in self.message_passing.blocks: + block.V_d_transform.train() + block.graph_transform.train() + self.X_d_transform.train() + self.predictor.output_transform.train() + + def get_batch_size(self, batch: MulticomponentTrainingBatch) -> int: + return len(batch[0][0]) + + @classmethod + def _load(cls, path, map_location, **submodules): + d = torch.load(path, map_location, weights_only=False) + + try: + hparams = d["hyper_parameters"] + state_dict = d["state_dict"] + except KeyError: + raise KeyError(f"Could not find hyper parameters and/or state dict in {path}.") + + if hparams["metrics"] is not None: + hparams["metrics"] = [ + cls._rebuild_metric(metric) + if not hasattr(metric, "_defaults") + or (not torch.cuda.is_available() and metric.device.type != "cpu") + else metric + for metric in hparams["metrics"] + ] + + if hparams["predictor"]["criterion"] is not None: + metric = hparams["predictor"]["criterion"] + if not hasattr(metric, "_defaults") or ( + not torch.cuda.is_available() and metric.device.type != "cpu" + ): + hparams["predictor"]["criterion"] = cls._rebuild_metric(metric) + + hparams["message_passing"]["blocks"] = [ + block_hparams.pop("cls")(**block_hparams) + for block_hparams in hparams["message_passing"]["blocks"] + ] + submodules |= { + key: hparams[key].pop("cls")(**hparams[key]) + for key in ("message_passing", "agg", "predictor") + if key not in submodules + } + + return submodules, state_dict, hparams diff --git a/chemprop-updated/chemprop/models/utils.py b/chemprop-updated/chemprop/models/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..cf0d06b5fb8e7841856f3a143e10e16701d62783 --- /dev/null +++ b/chemprop-updated/chemprop/models/utils.py @@ -0,0 +1,32 @@ +from os import PathLike + +import torch + +from chemprop.models.model import MPNN +from chemprop.models.multi import MulticomponentMPNN + + +def save_model(path: PathLike, model: MPNN, output_columns: list[str] = None) -> None: + torch.save( + { + "hyper_parameters": model.hparams, + "state_dict": model.state_dict(), + "output_columns": output_columns, + }, + path, + ) + + +def load_model(path: PathLike, multicomponent: bool) -> MPNN: + if multicomponent: + model = MulticomponentMPNN.load_from_file(path, map_location=torch.device("cpu")) + else: + model = MPNN.load_from_file(path, map_location=torch.device("cpu")) + + return model + + +def load_output_columns(path: PathLike) -> list[str] | None: + model_file = torch.load(path, map_location=torch.device("cpu"), weights_only=False) + + return model_file.get("output_columns") diff --git a/chemprop-updated/chemprop/nn/__init__.py b/chemprop-updated/chemprop/nn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3e8680ede568cfe936fefe16626f3bbdf39a5725 --- /dev/null +++ b/chemprop-updated/chemprop/nn/__init__.py @@ -0,0 +1,127 @@ +from .agg import ( + Aggregation, + AggregationRegistry, + AttentiveAggregation, + MeanAggregation, + NormAggregation, + SumAggregation, +) +from .message_passing import ( + AtomMessagePassing, + BondMessagePassing, + MessagePassing, + MulticomponentMessagePassing, +) +from .metrics import ( + MAE, + MSE, + RMSE, + SID, + BCELoss, + BinaryAccuracy, + BinaryAUPRC, + BinaryAUROC, + BinaryF1Score, + BinaryMCCLoss, + BinaryMCCMetric, + BoundedMAE, + BoundedMixin, + BoundedMSE, + BoundedRMSE, + ChempropMetric, + ClassificationMixin, + CrossEntropyLoss, + DirichletLoss, + EvidentialLoss, + LossFunctionRegistry, + MetricRegistry, + MulticlassMCCLoss, + MulticlassMCCMetric, + MVELoss, + QuantileLoss, + R2Score, + Wasserstein, +) +from .predictors import ( + BinaryClassificationFFN, + BinaryClassificationFFNBase, + BinaryDirichletFFN, + EvidentialFFN, + MulticlassClassificationFFN, + MulticlassDirichletFFN, + MveFFN, + Predictor, + PredictorRegistry, + QuantileFFN, + RegressionFFN, + SpectralFFN, +) +from .transforms import GraphTransform, ScaleTransform, UnscaleTransform +from .utils import Activation + +__all__ = [ + "Aggregation", + "AggregationRegistry", + "MeanAggregation", + "SumAggregation", + "NormAggregation", + "AttentiveAggregation", + "ChempropMetric", + "ClassificationMixin", + "LossFunctionRegistry", + "MetricRegistry", + "MSE", + "MAE", + "RMSE", + "BoundedMixin", + "BoundedMSE", + "BoundedMAE", + "BoundedRMSE", + "BinaryAccuracy", + "BinaryAUPRC", + "BinaryAUROC", + "BinaryF1Score", + "BinaryMCCMetric", + "BoundedMAE", + "BoundedMSE", + "BoundedRMSE", + "MetricRegistry", + "MulticlassMCCMetric", + "R2Score", + "MVELoss", + "EvidentialLoss", + "BCELoss", + "CrossEntropyLoss", + "BinaryMCCLoss", + "BinaryMCCMetric", + "MulticlassMCCLoss", + "MulticlassMCCMetric", + "BinaryAUROC", + "BinaryAUPRC", + "BinaryAccuracy", + "BinaryF1Score", + "MulticlassDirichletLoss", + "SID", + "Wasserstein", + "QuantileLoss", + "MessagePassing", + "AtomMessagePassing", + "BondMessagePassing", + "MulticomponentMessagePassing", + "Predictor", + "PredictorRegistry", + "QuantileFFN", + "RegressionFFN", + "MveFFN", + "DirichletLoss", + "EvidentialFFN", + "BinaryClassificationFFNBase", + "BinaryClassificationFFN", + "BinaryDirichletFFN", + "MulticlassClassificationFFN", + "SpectralFFN", + "Activation", + "GraphTransform", + "ScaleTransform", + "UnscaleTransform", +] diff --git a/chemprop-updated/chemprop/nn/agg.py b/chemprop-updated/chemprop/nn/agg.py new file mode 100644 index 0000000000000000000000000000000000000000..ed921b41d41f68534931a93c56552f31bd792d34 --- /dev/null +++ b/chemprop-updated/chemprop/nn/agg.py @@ -0,0 +1,133 @@ +from abc import abstractmethod + +import torch +from torch import Tensor, nn + +from chemprop.nn.hparams import HasHParams +from chemprop.utils import ClassRegistry + +__all__ = [ + "Aggregation", + "AggregationRegistry", + "MeanAggregation", + "SumAggregation", + "NormAggregation", + "AttentiveAggregation", +] + + +class Aggregation(nn.Module, HasHParams): + """An :class:`Aggregation` aggregates the node-level representations of a batch of graphs into + a batch of graph-level representations + + .. note:: + this class is abstract and cannot be instantiated. + + See also + -------- + :class:`~chemprop.v2.models.modules.agg.MeanAggregation` + :class:`~chemprop.v2.models.modules.agg.SumAggregation` + :class:`~chemprop.v2.models.modules.agg.NormAggregation` + """ + + def __init__(self, dim: int = 0, *args, **kwargs): + super().__init__() + + self.dim = dim + self.hparams = {"dim": dim, "cls": self.__class__} + + @abstractmethod + def forward(self, H: Tensor, batch: Tensor) -> Tensor: + """Aggregate the graph-level representations of a batch of graphs into their respective + global representations + + NOTE: it is possible for a graph to have 0 nodes. In this case, the representation will be + a zero vector of length `d` in the final output. + + Parameters + ---------- + H : Tensor + a tensor of shape ``V x d`` containing the batched node-level representations of ``b`` + graphs + batch : Tensor + a tensor of shape ``V`` containing the index of the graph a given vertex corresponds to + + Returns + ------- + Tensor + a tensor of shape ``b x d`` containing the graph-level representations + """ + + +AggregationRegistry = ClassRegistry[Aggregation]() + + +@AggregationRegistry.register("mean") +class MeanAggregation(Aggregation): + r"""Average the graph-level representation: + + .. math:: + \mathbf h = \frac{1}{|V|} \sum_{v \in V} \mathbf h_v + """ + + def forward(self, H: Tensor, batch: Tensor) -> Tensor: + index_torch = batch.unsqueeze(1).repeat(1, H.shape[1]) + dim_size = batch.max().int() + 1 + return torch.zeros(dim_size, H.shape[1], dtype=H.dtype, device=H.device).scatter_reduce_( + self.dim, index_torch, H, reduce="mean", include_self=False + ) + + +@AggregationRegistry.register("sum") +class SumAggregation(Aggregation): + r"""Sum the graph-level representation: + + .. math:: + \mathbf h = \sum_{v \in V} \mathbf h_v + + """ + + def forward(self, H: Tensor, batch: Tensor) -> Tensor: + index_torch = batch.unsqueeze(1).repeat(1, H.shape[1]) + dim_size = batch.max().int() + 1 + return torch.zeros(dim_size, H.shape[1], dtype=H.dtype, device=H.device).scatter_reduce_( + self.dim, index_torch, H, reduce="sum", include_self=False + ) + + +@AggregationRegistry.register("norm") +class NormAggregation(SumAggregation): + r"""Sum the graph-level representation and divide by a normalization constant: + + .. math:: + \mathbf h = \frac{1}{c} \sum_{v \in V} \mathbf h_v + """ + + def __init__(self, dim: int = 0, *args, norm: float = 100.0, **kwargs): + super().__init__(dim, **kwargs) + + self.norm = norm + self.hparams["norm"] = norm + + def forward(self, H: Tensor, batch: Tensor) -> Tensor: + return super().forward(H, batch) / self.norm + + +class AttentiveAggregation(Aggregation): + def __init__(self, dim: int = 0, *args, output_size: int, **kwargs): + super().__init__(dim, *args, **kwargs) + + self.hparams["output_size"] = output_size + self.W = nn.Linear(output_size, 1) + + def forward(self, H: Tensor, batch: Tensor) -> Tensor: + dim_size = batch.max().int() + 1 + attention_logits = self.W(H).exp() + Z = torch.zeros(dim_size, 1, dtype=H.dtype, device=H.device).scatter_reduce_( + self.dim, batch.unsqueeze(1), attention_logits, reduce="sum", include_self=False + ) + alphas = attention_logits / Z[batch] + index_torch = batch.unsqueeze(1).repeat(1, H.shape[1]) + return torch.zeros(dim_size, H.shape[1], dtype=H.dtype, device=H.device).scatter_reduce_( + self.dim, index_torch, alphas * H, reduce="sum", include_self=False + ) diff --git a/chemprop-updated/chemprop/nn/ffn.py b/chemprop-updated/chemprop/nn/ffn.py new file mode 100644 index 0000000000000000000000000000000000000000..f5a02fe92391adeca9c88ec371296951a0132928 --- /dev/null +++ b/chemprop-updated/chemprop/nn/ffn.py @@ -0,0 +1,63 @@ +from abc import abstractmethod + +from torch import Tensor, nn + +from chemprop.nn.utils import get_activation_function + + +class FFN(nn.Module): + r"""A :class:`FFN` is a differentiable function + :math:`f_\theta : \mathbb R^i \mapsto \mathbb R^o`""" + + input_dim: int + output_dim: int + + @abstractmethod + def forward(self, X: Tensor) -> Tensor: + pass + + +class MLP(nn.Sequential, FFN): + r"""An :class:`MLP` is an FFN that implements the following function: + + .. math:: + \mathbf h_0 &= \mathbf W_0 \mathbf x \,+ \mathbf b_{0} \\ + \mathbf h_l &= \mathbf W_l \left( \mathtt{dropout} \left( \sigma ( \,\mathbf h_{l-1}\, ) \right) \right) + \mathbf b_l\\ + + where :math:`\mathbf x` is the input tensor, :math:`\mathbf W_l` and :math:`\mathbf b_l` + are the learned weight matrix and bias, respectively, of the :math:`l`-th layer, + :math:`\mathbf h_l` is the hidden representation after layer :math:`l`, and :math:`\sigma` + is the activation function. + """ + + @classmethod + def build( + cls, + input_dim: int, + output_dim: int, + hidden_dim: int = 300, + n_layers: int = 1, + dropout: float = 0.0, + activation: str = "relu", + ): + dropout = nn.Dropout(dropout) + act = get_activation_function(activation) + dims = [input_dim] + [hidden_dim] * n_layers + [output_dim] + blocks = [nn.Sequential(nn.Linear(dims[0], dims[1]))] + if len(dims) > 2: + blocks.extend( + [ + nn.Sequential(act, dropout, nn.Linear(d1, d2)) + for d1, d2 in zip(dims[1:-1], dims[2:]) + ] + ) + + return cls(*blocks) + + @property + def input_dim(self) -> int: + return self[0][-1].in_features + + @property + def output_dim(self) -> int: + return self[-1][-1].out_features diff --git a/chemprop-updated/chemprop/nn/hparams.py b/chemprop-updated/chemprop/nn/hparams.py new file mode 100644 index 0000000000000000000000000000000000000000..ffa17ab80c16bbae47b35a18d8fe9f3eb66ee590 --- /dev/null +++ b/chemprop-updated/chemprop/nn/hparams.py @@ -0,0 +1,38 @@ +from typing import Protocol, Type, TypedDict + + +class HParamsDict(TypedDict): + """A dictionary containing a module's class and it's hyperparameters + + Using this type should essentially allow for initializing a module via:: + + module = hparams.pop('cls')(**hparams) + """ + + cls: Type + + +class HasHParams(Protocol): + """:class:`HasHParams` is a protocol for clases which possess an :attr:`hparams` attribute which is a dictionary containing the object's class and arguments required to initialize it. + + That is, any object which implements :class:`HasHParams` should be able to be initialized via:: + + class Foo(HasHParams): + def __init__(self, *args, **kwargs): + ... + + foo1 = Foo(...) + foo1_cls = foo1.hparams['cls'] + foo1_kwargs = {k: v for k, v in foo1.hparams.items() if k != "cls"} + foo2 = foo1_cls(**foo1_kwargs) + # code to compare foo1 and foo2 goes here and they should be equal + """ + + hparams: HParamsDict + + +def from_hparams(hparams: HParamsDict): + cls = hparams["cls"] + kwargs = {k: v for k, v in hparams.items() if k != "cls"} + + return cls(**kwargs) diff --git a/chemprop-updated/chemprop/nn/message_passing/__init__.py b/chemprop-updated/chemprop/nn/message_passing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..97078653c6524d5645d4862a8ef683a2d8eb457e --- /dev/null +++ b/chemprop-updated/chemprop/nn/message_passing/__init__.py @@ -0,0 +1,10 @@ +from .base import AtomMessagePassing, BondMessagePassing +from .multi import MulticomponentMessagePassing +from .proto import MessagePassing + +__all__ = [ + "MessagePassing", + "AtomMessagePassing", + "BondMessagePassing", + "MulticomponentMessagePassing", +] diff --git a/chemprop-updated/chemprop/nn/message_passing/base.py b/chemprop-updated/chemprop/nn/message_passing/base.py new file mode 100644 index 0000000000000000000000000000000000000000..8bb14b0f51c97ba083e26b402db7a0c77023a8db --- /dev/null +++ b/chemprop-updated/chemprop/nn/message_passing/base.py @@ -0,0 +1,319 @@ +from abc import abstractmethod + +from lightning.pytorch.core.mixins import HyperparametersMixin +import torch +from torch import Tensor, nn + +from chemprop.conf import DEFAULT_ATOM_FDIM, DEFAULT_BOND_FDIM, DEFAULT_HIDDEN_DIM +from chemprop.data import BatchMolGraph +from chemprop.exceptions import InvalidShapeError +from chemprop.nn.message_passing.proto import MessagePassing +from chemprop.nn.transforms import GraphTransform, ScaleTransform +from chemprop.nn.utils import Activation, get_activation_function + + +class _MessagePassingBase(MessagePassing, HyperparametersMixin): + """The base message-passing block for atom- and bond-based message-passing schemes + + NOTE: this class is an abstract base class and cannot be instantiated + + Parameters + ---------- + d_v : int, default=DEFAULT_ATOM_FDIM + the feature dimension of the vertices + d_e : int, default=DEFAULT_BOND_FDIM + the feature dimension of the edges + d_h : int, default=DEFAULT_HIDDEN_DIM + the hidden dimension during message passing + bias : bool, defuault=False + if `True`, add a bias term to the learned weight matrices + depth : int, default=3 + the number of message passing iterations + undirected : bool, default=False + if `True`, pass messages on undirected edges + dropout : float, default=0.0 + the dropout probability + activation : str, default="relu" + the activation function to use + d_vd : int | None, default=None + the dimension of additional vertex descriptors that will be concatenated to the hidden features before readout + + See also + -------- + * :class:`AtomMessagePassing` + + * :class:`BondMessagePassing` + """ + + def __init__( + self, + d_v: int = DEFAULT_ATOM_FDIM, + d_e: int = DEFAULT_BOND_FDIM, + d_h: int = DEFAULT_HIDDEN_DIM, + bias: bool = False, + depth: int = 3, + dropout: float = 0.0, + activation: str | Activation = Activation.RELU, + undirected: bool = False, + d_vd: int | None = None, + V_d_transform: ScaleTransform | None = None, + graph_transform: GraphTransform | None = None, + # layers_per_message: int = 1, + ): + super().__init__() + # manually add V_d_transform and graph_transform to hparams to suppress lightning's warning + # about double saving their state_dict values. + self.save_hyperparameters(ignore=["V_d_transform", "graph_transform"]) + self.hparams["V_d_transform"] = V_d_transform + self.hparams["graph_transform"] = graph_transform + self.hparams["cls"] = self.__class__ + + self.W_i, self.W_h, self.W_o, self.W_d = self.setup(d_v, d_e, d_h, d_vd, bias) + self.depth = depth + self.undirected = undirected + self.dropout = nn.Dropout(dropout) + self.tau = get_activation_function(activation) + self.V_d_transform = V_d_transform if V_d_transform is not None else nn.Identity() + self.graph_transform = graph_transform if graph_transform is not None else nn.Identity() + + @property + def output_dim(self) -> int: + return self.W_d.out_features if self.W_d is not None else self.W_o.out_features + + @abstractmethod + def setup( + self, + d_v: int = DEFAULT_ATOM_FDIM, + d_e: int = DEFAULT_BOND_FDIM, + d_h: int = DEFAULT_HIDDEN_DIM, + d_vd: int | None = None, + bias: bool = False, + ) -> tuple[nn.Module, nn.Module, nn.Module, nn.Module | None]: + """setup the weight matrices used in the message passing update functions + + Parameters + ---------- + d_v : int + the vertex feature dimension + d_e : int + the edge feature dimension + d_h : int, default=300 + the hidden dimension during message passing + d_vd : int | None, default=None + the dimension of additional vertex descriptors that will be concatenated to the hidden + features before readout, if any + bias: bool, default=False + whether to add a learned bias to the matrices + + Returns + ------- + W_i, W_h, W_o, W_d : tuple[nn.Module, nn.Module, nn.Module, nn.Module | None] + the input, hidden, output, and descriptor weight matrices, respectively, used in the + message passing update functions. The descriptor weight matrix is `None` if no vertex + dimension is supplied + """ + + @abstractmethod + def initialize(self, bmg: BatchMolGraph) -> Tensor: + """initialize the message passing scheme by calculating initial matrix of hidden features""" + + @abstractmethod + def message(self, H_t: Tensor, bmg: BatchMolGraph): + """Calculate the message matrix""" + + def update(self, M_t, H_0): + """Calcualte the updated hidden for each edge""" + H_t = self.W_h(M_t) + H_t = self.tau(H_0 + H_t) + H_t = self.dropout(H_t) + + return H_t + + def finalize(self, M: Tensor, V: Tensor, V_d: Tensor | None) -> Tensor: + r"""Finalize message passing by (1) concatenating the final message ``M`` and the original + vertex features ``V`` and (2) if provided, further concatenating additional vertex + descriptors ``V_d``. + + This function implements the following operation: + + .. math:: + H &= \mathtt{dropout} \left( \tau(\mathbf{W}_o(V \mathbin\Vert M)) \right) \\ + H &= \mathtt{dropout} \left( \tau(\mathbf{W}_d(H \mathbin\Vert V_d)) \right), + + where :math:`\tau` is the activation function, :math:`\Vert` is the concatenation operator, + :math:`\mathbf{W}_o` and :math:`\mathbf{W}_d` are learned weight matrices, :math:`M` is + the message matrix, :math:`V` is the original vertex feature matrix, and :math:`V_d` is an + optional vertex descriptor matrix. + + Parameters + ---------- + M : Tensor + a tensor of shape ``V x d_h`` containing the message vector of each vertex + V : Tensor + a tensor of shape ``V x d_v`` containing the original vertex features + V_d : Tensor | None + an optional tensor of shape ``V x d_vd`` containing additional vertex descriptors + + Returns + ------- + Tensor + a tensor of shape ``V x (d_h + d_v [+ d_vd])`` containing the final hidden + representations + + Raises + ------ + InvalidShapeError + if ``V_d`` is not of shape ``b x d_vd``, where ``b`` is the batch size and ``d_vd`` is + the vertex descriptor dimension + """ + H = self.W_o(torch.cat((V, M), dim=1)) # V x d_o + H = self.tau(H) + H = self.dropout(H) + + if V_d is not None: + V_d = self.V_d_transform(V_d) + try: + H = self.W_d(torch.cat((H, V_d), dim=1)) # V x (d_o + d_vd) + H = self.dropout(H) + except RuntimeError: + raise InvalidShapeError( + "V_d", V_d.shape, [len(H), self.W_d.in_features - self.W_o.out_features] + ) + + return H + + def forward(self, bmg: BatchMolGraph, V_d: Tensor | None = None) -> Tensor: + """Encode a batch of molecular graphs. + + Parameters + ---------- + bmg: BatchMolGraph + a batch of :class:`BatchMolGraph`s to encode + V_d : Tensor | None, default=None + an optional tensor of shape ``V x d_vd`` containing additional descriptors for each atom + in the batch. These will be concatenated to the learned atomic descriptors and + transformed before the readout phase. + + Returns + ------- + Tensor + a tensor of shape ``V x d_h`` or ``V x (d_h + d_vd)`` containing the encoding of each + molecule in the batch, depending on whether additional atom descriptors were provided + """ + bmg = self.graph_transform(bmg) + H_0 = self.initialize(bmg) + + H = self.tau(H_0) + for _ in range(1, self.depth): + if self.undirected: + H = (H + H[bmg.rev_edge_index]) / 2 + + M = self.message(H, bmg) + H = self.update(M, H_0) + + index_torch = bmg.edge_index[1].unsqueeze(1).repeat(1, H.shape[1]) + M = torch.zeros(len(bmg.V), H.shape[1], dtype=H.dtype, device=H.device).scatter_reduce_( + 0, index_torch, H, reduce="sum", include_self=False + ) + return self.finalize(M, bmg.V, V_d) + + +class BondMessagePassing(_MessagePassingBase): + r"""A :class:`BondMessagePassing` encodes a batch of molecular graphs by passing messages along + directed bonds. + + It implements the following operation: + + .. math:: + + h_{vw}^{(0)} &= \tau \left( \mathbf W_i(e_{vw}) \right) \\ + m_{vw}^{(t)} &= \sum_{u \in \mathcal N(v)\setminus w} h_{uv}^{(t-1)} \\ + h_{vw}^{(t)} &= \tau \left(h_v^{(0)} + \mathbf W_h m_{vw}^{(t-1)} \right) \\ + m_v^{(T)} &= \sum_{w \in \mathcal N(v)} h_w^{(T-1)} \\ + h_v^{(T)} &= \tau \left (\mathbf W_o \left( x_v \mathbin\Vert m_{v}^{(T)} \right) \right), + + where :math:`\tau` is the activation function; :math:`\mathbf W_i`, :math:`\mathbf W_h`, and + :math:`\mathbf W_o` are learned weight matrices; :math:`e_{vw}` is the feature vector of the + bond between atoms :math:`v` and :math:`w`; :math:`x_v` is the feature vector of atom :math:`v`; + :math:`h_{vw}^{(t)}` is the hidden representation of the bond :math:`v \rightarrow w` at + iteration :math:`t`; :math:`m_{vw}^{(t)}` is the message received by the bond :math:`v + \to w` at iteration :math:`t`; and :math:`t \in \{1, \dots, T-1\}` is the number of + message passing iterations. + """ + + def setup( + self, + d_v: int = DEFAULT_ATOM_FDIM, + d_e: int = DEFAULT_BOND_FDIM, + d_h: int = DEFAULT_HIDDEN_DIM, + d_vd: int | None = None, + bias: bool = False, + ): + W_i = nn.Linear(d_v + d_e, d_h, bias) + W_h = nn.Linear(d_h, d_h, bias) + W_o = nn.Linear(d_v + d_h, d_h) + # initialize W_d only when d_vd is neither 0 nor None + W_d = nn.Linear(d_h + d_vd, d_h + d_vd) if d_vd else None + + return W_i, W_h, W_o, W_d + + def initialize(self, bmg: BatchMolGraph) -> Tensor: + return self.W_i(torch.cat([bmg.V[bmg.edge_index[0]], bmg.E], dim=1)) + + def message(self, H: Tensor, bmg: BatchMolGraph) -> Tensor: + index_torch = bmg.edge_index[1].unsqueeze(1).repeat(1, H.shape[1]) + M_all = torch.zeros(len(bmg.V), H.shape[1], dtype=H.dtype, device=H.device).scatter_reduce_( + 0, index_torch, H, reduce="sum", include_self=False + )[bmg.edge_index[0]] + M_rev = H[bmg.rev_edge_index] + + return M_all - M_rev + + +class AtomMessagePassing(_MessagePassingBase): + r"""A :class:`AtomMessagePassing` encodes a batch of molecular graphs by passing messages along + atoms. + + It implements the following operation: + + .. math:: + + h_v^{(0)} &= \tau \left( \mathbf{W}_i(x_v) \right) \\ + m_v^{(t)} &= \sum_{u \in \mathcal{N}(v)} h_u^{(t-1)} \mathbin\Vert e_{uv} \\ + h_v^{(t)} &= \tau\left(h_v^{(0)} + \mathbf{W}_h m_v^{(t-1)}\right) \\ + m_v^{(T)} &= \sum_{w \in \mathcal{N}(v)} h_w^{(T-1)} \\ + h_v^{(T)} &= \tau \left (\mathbf{W}_o \left( x_v \mathbin\Vert m_{v}^{(T)} \right) \right), + + where :math:`\tau` is the activation function; :math:`\mathbf{W}_i`, :math:`\mathbf{W}_h`, and + :math:`\mathbf{W}_o` are learned weight matrices; :math:`e_{vw}` is the feature vector of the + bond between atoms :math:`v` and :math:`w`; :math:`x_v` is the feature vector of atom :math:`v`; + :math:`h_v^{(t)}` is the hidden representation of atom :math:`v` at iteration :math:`t`; + :math:`m_v^{(t)}` is the message received by atom :math:`v` at iteration :math:`t`; and + :math:`t \in \{1, \dots, T\}` is the number of message passing iterations. + """ + + def setup( + self, + d_v: int = DEFAULT_ATOM_FDIM, + d_e: int = DEFAULT_BOND_FDIM, + d_h: int = DEFAULT_HIDDEN_DIM, + d_vd: int | None = None, + bias: bool = False, + ): + W_i = nn.Linear(d_v, d_h, bias) + W_h = nn.Linear(d_e + d_h, d_h, bias) + W_o = nn.Linear(d_v + d_h, d_h) + # initialize W_d only when d_vd is neither 0 nor None + W_d = nn.Linear(d_h + d_vd, d_h + d_vd) if d_vd else None + + return W_i, W_h, W_o, W_d + + def initialize(self, bmg: BatchMolGraph) -> Tensor: + return self.W_i(bmg.V[bmg.edge_index[0]]) + + def message(self, H: Tensor, bmg: BatchMolGraph): + H = torch.cat((H, bmg.E), dim=1) + index_torch = bmg.edge_index[1].unsqueeze(1).repeat(1, H.shape[1]) + return torch.zeros(len(bmg.V), H.shape[1], dtype=H.dtype, device=H.device).scatter_reduce_( + 0, index_torch, H, reduce="sum", include_self=False + )[bmg.edge_index[0]] diff --git a/chemprop-updated/chemprop/nn/message_passing/multi.py b/chemprop-updated/chemprop/nn/message_passing/multi.py new file mode 100644 index 0000000000000000000000000000000000000000..98a9cb84c55dbba5c8fa50ac56cac0c30a2171b8 --- /dev/null +++ b/chemprop-updated/chemprop/nn/message_passing/multi.py @@ -0,0 +1,80 @@ +import logging +from typing import Iterable, Sequence + +from torch import Tensor, nn + +from chemprop.data import BatchMolGraph +from chemprop.nn.hparams import HasHParams +from chemprop.nn.message_passing.proto import MessagePassing + +logger = logging.getLogger(__name__) + + +class MulticomponentMessagePassing(nn.Module, HasHParams): + """A `MulticomponentMessagePassing` performs message-passing on each individual input in a + multicomponent input then concatenates the representation of each input to construct a + global representation + + Parameters + ---------- + blocks : Sequence[MessagePassing] + the invidual message-passing blocks for each input + n_components : int + the number of components in each input + shared : bool, default=False + whether one block will be shared among all components in an input. If not, a separate + block will be learned for each component. + """ + + def __init__(self, blocks: Sequence[MessagePassing], n_components: int, shared: bool = False): + super().__init__() + self.hparams = { + "cls": self.__class__, + "blocks": [block.hparams for block in blocks], + "n_components": n_components, + "shared": shared, + } + + if len(blocks) == 0: + raise ValueError("arg 'blocks' was empty!") + if shared and len(blocks) > 1: + logger.warning( + "More than 1 block was supplied but 'shared' was True! Using only the 0th block..." + ) + elif not shared and len(blocks) != n_components: + raise ValueError( + "arg 'n_components' must be equal to `len(blocks)` if 'shared' is False! " + f"got: {n_components} and {len(blocks)}, respectively." + ) + + self.n_components = n_components + self.shared = shared + self.blocks = nn.ModuleList([blocks[0]] * self.n_components if shared else blocks) + + def __len__(self) -> int: + return len(self.blocks) + + @property + def output_dim(self) -> int: + d_o = sum(block.output_dim for block in self.blocks) + + return d_o + + def forward(self, bmgs: Iterable[BatchMolGraph], V_ds: Iterable[Tensor | None]) -> list[Tensor]: + """Encode the multicomponent inputs + + Parameters + ---------- + bmgs : Iterable[BatchMolGraph] + V_ds : Iterable[Tensor | None] + + Returns + ------- + list[Tensor] + a list of tensors of shape `V x d_i` containing the respective encodings of the `i`\th + component, where `d_i` is the output dimension of the `i`\th encoder + """ + if V_ds is None: + return [block(bmg) for block, bmg in zip(self.blocks, bmgs)] + else: + return [block(bmg, V_d) for block, bmg, V_d in zip(self.blocks, bmgs, V_ds)] diff --git a/chemprop-updated/chemprop/nn/message_passing/proto.py b/chemprop-updated/chemprop/nn/message_passing/proto.py new file mode 100644 index 0000000000000000000000000000000000000000..f00c8a36002c36485da6fbdb08d6137c6d954765 --- /dev/null +++ b/chemprop-updated/chemprop/nn/message_passing/proto.py @@ -0,0 +1,35 @@ +from abc import abstractmethod + +from torch import Tensor, nn + +from chemprop.data import BatchMolGraph +from chemprop.nn.hparams import HasHParams + + +class MessagePassing(nn.Module, HasHParams): + """A :class:`MessagePassing` module encodes a batch of molecular graphs + using message passing to learn vertex-level hidden representations.""" + + input_dim: int + output_dim: int + + @abstractmethod + def forward(self, bmg: BatchMolGraph, V_d: Tensor | None = None) -> Tensor: + """Encode a batch of molecular graphs. + + Parameters + ---------- + bmg: BatchMolGraph + the batch of :class:`~chemprop.featurizers.molgraph.MolGraph`\s to encode + V_d : Tensor | None, default=None + an optional tensor of shape `V x d_vd` containing additional descriptors for each atom + in the batch. These will be concatenated to the learned atomic descriptors and + transformed before the readout phase. + + Returns + ------- + Tensor + a tensor of shape `V x d_h` or `V x (d_h + d_vd)` containing the hidden representation + of each vertex in the batch of graphs. The feature dimension depends on whether + additional atom descriptors were provided + """ diff --git a/chemprop-updated/chemprop/nn/metrics.py b/chemprop-updated/chemprop/nn/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..cbbb346f4f9e61a11d97e857aee8c75de28ed44f --- /dev/null +++ b/chemprop-updated/chemprop/nn/metrics.py @@ -0,0 +1,567 @@ +from abc import abstractmethod + +from numpy.typing import ArrayLike +import torch +from torch import Tensor +from torch.nn import functional as F +import torchmetrics +from torchmetrics.utilities.compute import auc +from torchmetrics.utilities.data import dim_zero_cat + +from chemprop.utils.registry import ClassRegistry + +__all__ = [ + "ChempropMetric", + "LossFunctionRegistry", + "MetricRegistry", + "MSE", + "MAE", + "RMSE", + "BoundedMixin", + "BoundedMSE", + "BoundedMAE", + "BoundedRMSE", + "BinaryAccuracy", + "BinaryAUPRC", + "BinaryAUROC", + "BinaryF1Score", + "BinaryMCCMetric", + "BoundedMAE", + "BoundedMSE", + "BoundedRMSE", + "MetricRegistry", + "MulticlassMCCMetric", + "R2Score", + "MVELoss", + "EvidentialLoss", + "BCELoss", + "CrossEntropyLoss", + "BinaryMCCLoss", + "BinaryMCCMetric", + "MulticlassMCCLoss", + "MulticlassMCCMetric", + "ClassificationMixin", + "BinaryAUROC", + "BinaryAUPRC", + "BinaryAccuracy", + "BinaryF1Score", + "DirichletLoss", + "SID", + "Wasserstein", + "QuantileLoss", +] + + +class ChempropMetric(torchmetrics.Metric): + is_differentiable = True + higher_is_better = False + full_state_update = False + + def __init__(self, task_weights: ArrayLike = 1.0): + """ + Parameters + ---------- + task_weights : ArrayLike, default=1.0 + the per-task weights of shape `t` or `1 x t`. Defaults to all tasks having a weight of 1. + """ + super().__init__() + task_weights = torch.as_tensor(task_weights, dtype=torch.float).view(1, -1) + self.register_buffer("task_weights", task_weights) + + self.add_state("total_loss", default=torch.tensor(0.0), dist_reduce_fx="sum") + self.add_state("num_samples", default=torch.tensor(0), dist_reduce_fx="sum") + + def update( + self, + preds: Tensor, + targets: Tensor, + mask: Tensor | None = None, + weights: Tensor | None = None, + lt_mask: Tensor | None = None, + gt_mask: Tensor | None = None, + ) -> None: + """Calculate the mean loss function value given predicted and target values + + Parameters + ---------- + preds : Tensor + a tensor of shape `b x t x u` (regression), `b x t` (binary classification), or + `b x t x c` (multiclass classification) containing the predictions, where `b` is the + batch size, `t` is the number of tasks to predict, `u` is the number of + targets to predict for each task, and `c` is the number of classes. + targets : Tensor + a float tensor of shape `b x t` containing the target values + mask : Tensor + a boolean tensor of shape `b x t` indicating whether the given prediction should be + included in the loss calculation + weights : Tensor + a tensor of shape `b` or `b x 1` containing the per-sample weight + lt_mask: Tensor + gt_mask: Tensor + """ + mask = torch.ones_like(targets, dtype=torch.bool) if mask is None else mask + weights = torch.ones_like(targets, dtype=torch.float) if weights is None else weights + lt_mask = torch.zeros_like(targets, dtype=torch.bool) if lt_mask is None else lt_mask + gt_mask = torch.zeros_like(targets, dtype=torch.bool) if gt_mask is None else gt_mask + + L = self._calc_unreduced_loss(preds, targets, mask, weights, lt_mask, gt_mask) + L = L * weights.view(-1, 1) * self.task_weights * mask + + self.total_loss += L.sum() + self.num_samples += mask.sum() + + def compute(self): + return self.total_loss / self.num_samples + + @abstractmethod + def _calc_unreduced_loss(self, preds, targets, mask, weights, lt_mask, gt_mask) -> Tensor: + """Calculate a tensor of shape `b x t` containing the unreduced loss values.""" + + def extra_repr(self) -> str: + return f"task_weights={self.task_weights.tolist()}" + + +LossFunctionRegistry = ClassRegistry[ChempropMetric]() +MetricRegistry = ClassRegistry[ChempropMetric]() + + +@LossFunctionRegistry.register("mse") +@MetricRegistry.register("mse") +class MSE(ChempropMetric): + def _calc_unreduced_loss(self, preds: Tensor, targets: Tensor, *args) -> Tensor: + return F.mse_loss(preds, targets, reduction="none") + + +@MetricRegistry.register("mae") +@LossFunctionRegistry.register("mae") +class MAE(ChempropMetric): + def _calc_unreduced_loss(self, preds, targets, *args) -> Tensor: + return (preds - targets).abs() + + +@LossFunctionRegistry.register("rmse") +@MetricRegistry.register("rmse") +class RMSE(MSE): + def compute(self): + return (self.total_loss / self.num_samples).sqrt() + + +class BoundedMixin: + def _calc_unreduced_loss(self, preds, targets, mask, weights, lt_mask, gt_mask) -> Tensor: + preds = torch.where((preds < targets) & lt_mask, targets, preds) + preds = torch.where((preds > targets) & gt_mask, targets, preds) + + return super()._calc_unreduced_loss(preds, targets, mask, weights) + + +@LossFunctionRegistry.register("bounded-mse") +@MetricRegistry.register("bounded-mse") +class BoundedMSE(BoundedMixin, MSE): + pass + + +@LossFunctionRegistry.register("bounded-mae") +@MetricRegistry.register("bounded-mae") +class BoundedMAE(BoundedMixin, MAE): + pass + + +@LossFunctionRegistry.register("bounded-rmse") +@MetricRegistry.register("bounded-rmse") +class BoundedRMSE(BoundedMixin, RMSE): + pass + + +@MetricRegistry.register("r2") +class R2Score(torchmetrics.R2Score): + def __init__(self, task_weights: ArrayLike = 1.0, **kwargs): + """ + Parameters + ---------- + task_weights : ArrayLike = 1.0 + .. important:: + Ignored. Maintained for compatibility with :class:`ChempropMetric` + """ + super().__init__() + task_weights = torch.as_tensor(task_weights, dtype=torch.float).view(1, -1) + self.register_buffer("task_weights", task_weights) + + def update(self, preds: Tensor, targets: Tensor, mask: Tensor, *args, **kwargs): + super().update(preds[mask], targets[mask]) + + +@LossFunctionRegistry.register("mve") +class MVELoss(ChempropMetric): + """Calculate the loss using Eq. 9 from [nix1994]_ + + References + ---------- + .. [nix1994] Nix, D. A.; Weigend, A. S. "Estimating the mean and variance of the target + probability distribution." Proceedings of 1994 IEEE International Conference on Neural + Networks, 1994 https://doi.org/10.1109/icnn.1994.374138 + """ + + def _calc_unreduced_loss(self, preds: Tensor, targets: Tensor, *args) -> Tensor: + mean, var = torch.unbind(preds, dim=-1) + + L_sos = (mean - targets) ** 2 / (2 * var) + L_kl = (2 * torch.pi * var).log() / 2 + + return L_sos + L_kl + + +@LossFunctionRegistry.register("evidential") +class EvidentialLoss(ChempropMetric): + """Calculate the loss using Eqs. 8, 9, and 10 from [amini2020]_. See also [soleimany2021]_. + + References + ---------- + .. [amini2020] Amini, A; Schwarting, W.; Soleimany, A.; Rus, D.; + "Deep Evidential Regression" Advances in Neural Information Processing Systems; 2020; Vol.33. + https://proceedings.neurips.cc/paper_files/paper/2020/file/aab085461de182608ee9f607f3f7d18f-Paper.pdf + .. [soleimany2021] Soleimany, A.P.; Amini, A.; Goldman, S.; Rus, D.; Bhatia, S.N.; Coley, C.W.; + "Evidential Deep Learning for Guided Molecular Property Prediction and Discovery." ACS + Cent. Sci. 2021, 7, 8, 1356-1367. https://doi.org/10.1021/acscentsci.1c00546 + """ + + def __init__(self, task_weights: ArrayLike = 1.0, v_kl: float = 0.2, eps: float = 1e-8): + super().__init__(task_weights) + self.v_kl = v_kl + self.eps = eps + + def _calc_unreduced_loss(self, preds: Tensor, targets: Tensor, *args) -> Tensor: + mean, v, alpha, beta = torch.unbind(preds, dim=-1) + + residuals = targets - mean + twoBlambda = 2 * beta * (1 + v) + + L_nll = ( + 0.5 * (torch.pi / v).log() + - alpha * twoBlambda.log() + + (alpha + 0.5) * torch.log(v * residuals**2 + twoBlambda) + + torch.lgamma(alpha) + - torch.lgamma(alpha + 0.5) + ) + + L_reg = (2 * v + alpha) * residuals.abs() + + return L_nll + self.v_kl * (L_reg - self.eps) + + def extra_repr(self) -> str: + parent_repr = super().extra_repr() + return parent_repr + f", v_kl={self.v_kl}, eps={self.eps}" + + +@LossFunctionRegistry.register("bce") +class BCELoss(ChempropMetric): + def _calc_unreduced_loss(self, preds: Tensor, targets: Tensor, *args) -> Tensor: + return F.binary_cross_entropy_with_logits(preds, targets, reduction="none") + + +@LossFunctionRegistry.register("ce") +class CrossEntropyLoss(ChempropMetric): + def _calc_unreduced_loss(self, preds: Tensor, targets: Tensor, *args) -> Tensor: + preds = preds.transpose(1, 2) + targets = targets.long() + + return F.cross_entropy(preds, targets, reduction="none") + + +@LossFunctionRegistry.register("binary-mcc") +class BinaryMCCLoss(ChempropMetric): + def __init__(self, task_weights: ArrayLike = 1.0): + """ + Parameters + ---------- + task_weights : ArrayLike, default=1.0 + the per-task weights of shape `t` or `1 x t`. Defaults to all tasks having a weight of 1. + """ + super().__init__(task_weights) + + self.add_state("TP", default=[], dist_reduce_fx="cat") + self.add_state("FP", default=[], dist_reduce_fx="cat") + self.add_state("TN", default=[], dist_reduce_fx="cat") + self.add_state("FN", default=[], dist_reduce_fx="cat") + + def update( + self, + preds: Tensor, + targets: Tensor, + mask: Tensor | None = None, + weights: Tensor | None = None, + *args, + ): + mask = torch.ones_like(targets, dtype=torch.bool) if mask is None else mask + weights = torch.ones_like(targets, dtype=torch.float) if weights is None else weights + + if not (0 <= preds.min() and preds.max() <= 1): # assume logits + preds = preds.sigmoid() + + TP, FP, TN, FN = self._calc_unreduced_loss(preds, targets.long(), mask, weights, *args) + + self.TP += [TP] + self.FP += [FP] + self.TN += [TN] + self.FN += [FN] + + def _calc_unreduced_loss(self, preds, targets, mask, weights, *args) -> Tensor: + TP = (targets * preds * weights * mask).sum(0, keepdim=True) + FP = ((1 - targets) * preds * weights * mask).sum(0, keepdim=True) + TN = ((1 - targets) * (1 - preds) * weights * mask).sum(0, keepdim=True) + FN = (targets * (1 - preds) * weights * mask).sum(0, keepdim=True) + + return TP, FP, TN, FN + + def compute(self): + TP = dim_zero_cat(self.TP).sum(0) + FP = dim_zero_cat(self.FP).sum(0) + TN = dim_zero_cat(self.TN).sum(0) + FN = dim_zero_cat(self.FN).sum(0) + + MCC = (TP * TN - FP * FN) / ((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN) + 1e-8).sqrt() + MCC = MCC * self.task_weights + return 1 - MCC.mean() + + +@MetricRegistry.register("binary-mcc") +class BinaryMCCMetric(BinaryMCCLoss): + def compute(self): + return 1 - super().compute() + + +@LossFunctionRegistry.register("multiclass-mcc") +class MulticlassMCCLoss(ChempropMetric): + """Calculate a soft Matthews correlation coefficient ([mccWiki]_) loss for multiclass + classification based on the implementataion of [mccSklearn]_ + References + ---------- + .. [mccWiki] https://en.wikipedia.org/wiki/Phi_coefficient#Multiclass_case + .. [mccSklearn] https://scikit-learn.org/stable/modules/generated/sklearn.metrics.matthews_corrcoef.html + """ + + def __init__(self, task_weights: ArrayLike = 1.0): + """ + Parameters + ---------- + task_weights : ArrayLike, default=1.0 + the per-task weights of shape `t` or `1 x t`. Defaults to all tasks having a weight of 1. + """ + super().__init__(task_weights) + + self.add_state("p", default=[], dist_reduce_fx="cat") + self.add_state("t", default=[], dist_reduce_fx="cat") + self.add_state("c", default=[], dist_reduce_fx="cat") + self.add_state("s", default=[], dist_reduce_fx="cat") + + def update( + self, + preds: Tensor, + targets: Tensor, + mask: Tensor | None = None, + weights: Tensor | None = None, + *args, + ): + mask = torch.ones_like(targets, dtype=torch.bool) if mask is None else mask + weights = ( + torch.ones_like(targets, dtype=torch.float) if weights is None else weights.view(-1, 1) + ) + + if not (0 <= preds.min() and preds.max() <= 1): # assume logits + preds = preds.softmax(2) + + p, t, c, s = self._calc_unreduced_loss(preds, targets.long(), mask, weights, *args) + + self.p += [p] + self.t += [t] + self.c += [c] + self.s += [s] + + def _calc_unreduced_loss(self, preds, targets, mask, weights, *args) -> Tensor: + device = preds.device + C = preds.shape[2] + bin_targets = torch.eye(C, device=device)[targets] + bin_preds = torch.eye(C, device=device)[preds.argmax(-1)] + masked_data_weights = weights.unsqueeze(2) * mask.unsqueeze(2) + p = (bin_preds * masked_data_weights).sum(0, keepdims=True) + t = (bin_targets * masked_data_weights).sum(0, keepdims=True) + c = (bin_preds * bin_targets * masked_data_weights).sum(2).sum(0, keepdims=True) + s = (preds * masked_data_weights).sum(2).sum(0, keepdims=True) + + return p, t, c, s + + def compute(self): + p = dim_zero_cat(self.p).sum(0) + t = dim_zero_cat(self.t).sum(0) + c = dim_zero_cat(self.c).sum(0) + s = dim_zero_cat(self.s).sum(0) + s2 = s.square() + + # the `einsum` calls amount to calculating the batched dot product + cov_ytyp = c * s - torch.einsum("ij,ij->i", p, t) + cov_ypyp = s2 - torch.einsum("ij,ij->i", p, p) + cov_ytyt = s2 - torch.einsum("ij,ij->i", t, t) + + x = cov_ypyp * cov_ytyt + MCC = torch.where(x == 0, torch.tensor(0.0), cov_ytyp / x.sqrt()) + MCC = MCC * self.task_weights + + return 1 - MCC.mean() + + +@MetricRegistry.register("multiclass-mcc") +class MulticlassMCCMetric(MulticlassMCCLoss): + def compute(self): + return 1 - super().compute() + + +class ClassificationMixin: + def __init__(self, task_weights: ArrayLike = 1.0, **kwargs): + """ + Parameters + ---------- + task_weights : ArrayLike = 1.0 + .. important:: + Ignored. Maintained for compatibility with :class:`ChempropMetric` + """ + super().__init__() + task_weights = torch.as_tensor(task_weights, dtype=torch.float).view(1, -1) + self.register_buffer("task_weights", task_weights) + + def update(self, preds: Tensor, targets: Tensor, mask: Tensor, *args, **kwargs): + super().update(preds[mask], targets[mask].long()) + + +@MetricRegistry.register("roc") +class BinaryAUROC(ClassificationMixin, torchmetrics.classification.BinaryAUROC): + pass + + +@MetricRegistry.register("prc") +class BinaryAUPRC(ClassificationMixin, torchmetrics.classification.BinaryPrecisionRecallCurve): + def compute(self) -> Tensor: + p, r, _ = super().compute() + return auc(r, p) + + +@MetricRegistry.register("accuracy") +class BinaryAccuracy(ClassificationMixin, torchmetrics.classification.BinaryAccuracy): + pass + + +@MetricRegistry.register("f1") +class BinaryF1Score(ClassificationMixin, torchmetrics.classification.BinaryF1Score): + pass + + +@LossFunctionRegistry.register("dirichlet") +class DirichletLoss(ChempropMetric): + """Uses the loss function from [sensoy2018]_ based on the implementation at [sensoyGithub]_ + + References + ---------- + .. [sensoy2018] Sensoy, M.; Kaplan, L.; Kandemir, M. "Evidential deep learning to quantify + classification uncertainty." NeurIPS, 2018, 31. https://doi.org/10.48550/arXiv.1806.01768 + .. [sensoyGithub] https://muratsensoy.github.io/uncertainty.html#Define-the-loss-function + """ + + def __init__(self, task_weights: ArrayLike = 1.0, v_kl: float = 0.2): + super().__init__(task_weights) + self.v_kl = v_kl + + def _calc_unreduced_loss(self, preds: Tensor, targets: Tensor, *args) -> Tensor: + targets = torch.eye(preds.shape[2], device=preds.device)[targets.long()] + + S = preds.sum(-1, keepdim=True) + p = preds / S + + A = (targets - p).square().sum(-1, keepdim=True) + B = ((p * (1 - p)) / (S + 1)).sum(-1, keepdim=True) + + L_mse = A + B + + alpha = targets + (1 - targets) * preds + beta = torch.ones_like(alpha) + S_alpha = alpha.sum(-1, keepdim=True) + S_beta = beta.sum(-1, keepdim=True) + + ln_alpha = S_alpha.lgamma() - alpha.lgamma().sum(-1, keepdim=True) + ln_beta = beta.lgamma().sum(-1, keepdim=True) - S_beta.lgamma() + + dg0 = torch.digamma(alpha) + dg1 = torch.digamma(S_alpha) + + L_kl = ln_alpha + ln_beta + torch.sum((alpha - beta) * (dg0 - dg1), -1, keepdim=True) + + return (L_mse + self.v_kl * L_kl).mean(-1) + + def extra_repr(self) -> str: + return f"v_kl={self.v_kl}" + + +@LossFunctionRegistry.register("sid") +class SID(ChempropMetric): + def __init__(self, task_weights: ArrayLike = 1.0, threshold: float | None = None, **kwargs): + super().__init__(task_weights, **kwargs) + + self.threshold = threshold + + def _calc_unreduced_loss(self, preds: Tensor, targets: Tensor, mask: Tensor, *args) -> Tensor: + if self.threshold is not None: + preds = preds.clamp(min=self.threshold) + + preds_norm = preds / (preds * mask).sum(1, keepdim=True) + + targets = targets.masked_fill(~mask, 1) + preds_norm = preds_norm.masked_fill(~mask, 1) + + return (preds_norm / targets).log() * preds_norm + (targets / preds_norm).log() * targets + + def extra_repr(self) -> str: + return f"threshold={self.threshold}" + + +@LossFunctionRegistry.register(["earthmovers", "wasserstein"]) +class Wasserstein(ChempropMetric): + def __init__(self, task_weights: ArrayLike = 1.0, threshold: float | None = None): + super().__init__(task_weights) + + self.threshold = threshold + + def _calc_unreduced_loss(self, preds: Tensor, targets: Tensor, mask: Tensor, *args) -> Tensor: + if self.threshold is not None: + preds = preds.clamp(min=self.threshold) + + preds_norm = preds / (preds * mask).sum(1, keepdim=True) + + return (targets.cumsum(1) - preds_norm.cumsum(1)).abs() + + def extra_repr(self) -> str: + return f"threshold={self.threshold}" + + +@LossFunctionRegistry.register(["quantile", "pinball"]) +class QuantileLoss(ChempropMetric): + def __init__(self, task_weights: ArrayLike = 1.0, alpha: float = 0.1): + super().__init__(task_weights) + self.alpha = alpha + + bounds = torch.tensor([-1 / 2, 1 / 2]).view(-1, 1, 1) + tau = torch.tensor([[alpha / 2, 1 - alpha / 2], [alpha / 2 - 1, -alpha / 2]]).view( + 2, 2, 1, 1 + ) + + self.register_buffer("bounds", bounds) + self.register_buffer("tau", tau) + + def _calc_unreduced_loss(self, preds: Tensor, targets: Tensor, mask: Tensor, *args) -> Tensor: + mean, interval = torch.unbind(preds, dim=-1) + + interval_bounds = self.bounds * interval + pred_bounds = mean + interval_bounds + error_bounds = targets - pred_bounds + loss_bounds = (self.tau * error_bounds).amax(0) + + return loss_bounds.sum(0) + + def extra_repr(self) -> str: + return f"alpha={self.alpha}" diff --git a/chemprop-updated/chemprop/nn/predictors.py b/chemprop-updated/chemprop/nn/predictors.py new file mode 100644 index 0000000000000000000000000000000000000000..45d6ed415a7f8b599d1d213f768590c8ee3a8112 --- /dev/null +++ b/chemprop-updated/chemprop/nn/predictors.py @@ -0,0 +1,369 @@ +from abc import abstractmethod + +from lightning.pytorch.core.mixins import HyperparametersMixin +import torch +from torch import Tensor, nn +from torch.nn import functional as F + +from chemprop.conf import DEFAULT_HIDDEN_DIM +from chemprop.nn.ffn import MLP +from chemprop.nn.hparams import HasHParams +from chemprop.nn.metrics import ( + MSE, + SID, + BCELoss, + BinaryAUROC, + ChempropMetric, + CrossEntropyLoss, + DirichletLoss, + EvidentialLoss, + MulticlassMCCMetric, + MVELoss, + QuantileLoss, +) +from chemprop.nn.transforms import UnscaleTransform +from chemprop.utils import ClassRegistry, Factory + +__all__ = [ + "Predictor", + "PredictorRegistry", + "RegressionFFN", + "MveFFN", + "EvidentialFFN", + "BinaryClassificationFFNBase", + "BinaryClassificationFFN", + "BinaryDirichletFFN", + "MulticlassClassificationFFN", + "MulticlassDirichletFFN", + "SpectralFFN", +] + + +class Predictor(nn.Module, HasHParams): + r"""A :class:`Predictor` is a protocol that defines a differentiable function + :math:`f` : \mathbb R^d \mapsto \mathbb R^o""" + + input_dim: int + """the input dimension""" + output_dim: int + """the output dimension""" + n_tasks: int + """the number of tasks `t` to predict for each input""" + n_targets: int + """the number of targets `s` to predict for each task `t`""" + criterion: ChempropMetric + """the loss function to use for training""" + task_weights: Tensor + """the weights to apply to each task when calculating the loss""" + output_transform: UnscaleTransform + """the transform to apply to the output of the predictor""" + + @abstractmethod + def forward(self, Z: Tensor) -> Tensor: + pass + + @abstractmethod + def train_step(self, Z: Tensor) -> Tensor: + pass + + @abstractmethod + def encode(self, Z: Tensor, i: int) -> Tensor: + """Calculate the :attr:`i`-th hidden representation + + Parameters + ---------- + Z : Tensor + a tensor of shape ``n x d`` containing the input data to encode, where ``d`` is the + input dimensionality. + i : int + The stop index of slice of the MLP used to encode the input. That is, use all + layers in the MLP *up to* :attr:`i` (i.e., ``MLP[:i]``). This can be any integer + value, and the behavior of this function is dependent on the underlying list + slicing behavior. For example: + + * ``i=0``: use a 0-layer MLP (i.e., a no-op) + * ``i=1``: use only the first block + * ``i=-1``: use *up to* the final block + + Returns + ------- + Tensor + a tensor of shape ``n x h`` containing the :attr:`i`-th hidden representation, where + ``h`` is the number of neurons in the :attr:`i`-th hidden layer. + """ + pass + + +PredictorRegistry = ClassRegistry[Predictor]() + + +class _FFNPredictorBase(Predictor, HyperparametersMixin): + """A :class:`_FFNPredictorBase` is the base class for all :class:`Predictor`\s that use an + underlying :class:`SimpleFFN` to map the learned fingerprint to the desired output. + """ + + _T_default_criterion: ChempropMetric + _T_default_metric: ChempropMetric + + def __init__( + self, + n_tasks: int = 1, + input_dim: int = DEFAULT_HIDDEN_DIM, + hidden_dim: int = 300, + n_layers: int = 1, + dropout: float = 0.0, + activation: str = "relu", + criterion: ChempropMetric | None = None, + task_weights: Tensor | None = None, + threshold: float | None = None, + output_transform: UnscaleTransform | None = None, + ): + super().__init__() + # manually add criterion and output_transform to hparams to suppress lightning's warning + # about double saving their state_dict values. + self.save_hyperparameters(ignore=["criterion", "output_transform"]) + self.hparams["criterion"] = criterion + self.hparams["output_transform"] = output_transform + self.hparams["cls"] = self.__class__ + + self.ffn = MLP.build( + input_dim, n_tasks * self.n_targets, hidden_dim, n_layers, dropout, activation + ) + task_weights = torch.ones(n_tasks) if task_weights is None else task_weights + self.criterion = criterion or Factory.build( + self._T_default_criterion, task_weights=task_weights, threshold=threshold + ) + self.output_transform = output_transform if output_transform is not None else nn.Identity() + + @property + def input_dim(self) -> int: + return self.ffn.input_dim + + @property + def output_dim(self) -> int: + return self.ffn.output_dim + + @property + def n_tasks(self) -> int: + return self.output_dim // self.n_targets + + def forward(self, Z: Tensor) -> Tensor: + return self.ffn(Z) + + def encode(self, Z: Tensor, i: int) -> Tensor: + return self.ffn[:i](Z) + + +@PredictorRegistry.register("regression") +class RegressionFFN(_FFNPredictorBase): + n_targets = 1 + _T_default_criterion = MSE + _T_default_metric = MSE + + def forward(self, Z: Tensor) -> Tensor: + return self.output_transform(self.ffn(Z)) + + train_step = forward + + +@PredictorRegistry.register("regression-mve") +class MveFFN(RegressionFFN): + n_targets = 2 + _T_default_criterion = MVELoss + + def forward(self, Z: Tensor) -> Tensor: + Y = self.ffn(Z) + mean, var = torch.chunk(Y, self.n_targets, 1) + var = F.softplus(var) + + mean = self.output_transform(mean) + if not isinstance(self.output_transform, nn.Identity): + var = self.output_transform.transform_variance(var) + + return torch.stack((mean, var), dim=2) + + train_step = forward + + +@PredictorRegistry.register("regression-evidential") +class EvidentialFFN(RegressionFFN): + n_targets = 4 + _T_default_criterion = EvidentialLoss + + def forward(self, Z: Tensor) -> Tensor: + Y = self.ffn(Z) + mean, v, alpha, beta = torch.chunk(Y, self.n_targets, 1) + v = F.softplus(v) + alpha = F.softplus(alpha) + 1 + beta = F.softplus(beta) + + mean = self.output_transform(mean) + if not isinstance(self.output_transform, nn.Identity): + beta = self.output_transform.transform_variance(beta) + + return torch.stack((mean, v, alpha, beta), dim=2) + + train_step = forward + + +@PredictorRegistry.register("regression-quantile") +class QuantileFFN(RegressionFFN): + n_targets = 2 + _T_default_criterion = QuantileLoss + + def forward(self, Z: Tensor) -> Tensor: + Y = super().forward(Z) + lower_bound, upper_bound = torch.chunk(Y, self.n_targets, 1) + + lower_bound = self.output_transform(lower_bound) + upper_bound = self.output_transform(upper_bound) + + mean = (lower_bound + upper_bound) / 2 + interval = upper_bound - lower_bound + + return torch.stack((mean, interval), dim=2) + + train_step = forward + + +class BinaryClassificationFFNBase(_FFNPredictorBase): + pass + + +@PredictorRegistry.register("classification") +class BinaryClassificationFFN(BinaryClassificationFFNBase): + n_targets = 1 + _T_default_criterion = BCELoss + _T_default_metric = BinaryAUROC + + def forward(self, Z: Tensor) -> Tensor: + Y = super().forward(Z) + + return Y.sigmoid() + + def train_step(self, Z: Tensor) -> Tensor: + return super().forward(Z) + + +@PredictorRegistry.register("classification-dirichlet") +class BinaryDirichletFFN(BinaryClassificationFFNBase): + n_targets = 2 + _T_default_criterion = DirichletLoss + _T_default_metric = BinaryAUROC + + def forward(self, Z: Tensor) -> Tensor: + Y = super().forward(Z).reshape(len(Z), -1, 2) + + alpha = F.softplus(Y) + 1 + + u = 2 / alpha.sum(-1) + Y = alpha / alpha.sum(-1, keepdim=True) + + return torch.stack((Y[..., 1], u), dim=2) + + def train_step(self, Z: Tensor) -> Tensor: + Y = super().forward(Z).reshape(len(Z), -1, 2) + + return F.softplus(Y) + 1 + + +@PredictorRegistry.register("multiclass") +class MulticlassClassificationFFN(_FFNPredictorBase): + n_targets = 1 + _T_default_criterion = CrossEntropyLoss + _T_default_metric = MulticlassMCCMetric + + def __init__( + self, + n_classes: int, + n_tasks: int = 1, + input_dim: int = DEFAULT_HIDDEN_DIM, + hidden_dim: int = 300, + n_layers: int = 1, + dropout: float = 0.0, + activation: str = "relu", + criterion: ChempropMetric | None = None, + task_weights: Tensor | None = None, + threshold: float | None = None, + output_transform: UnscaleTransform | None = None, + ): + task_weights = torch.ones(n_tasks) if task_weights is None else task_weights + super().__init__( + n_tasks * n_classes, + input_dim, + hidden_dim, + n_layers, + dropout, + activation, + criterion, + task_weights, + threshold, + output_transform, + ) + + self.n_classes = n_classes + + @property + def n_tasks(self) -> int: + return self.output_dim // (self.n_targets * self.n_classes) + + def forward(self, Z: Tensor) -> Tensor: + return self.train_step(Z).softmax(-1) + + def train_step(self, Z: Tensor) -> Tensor: + return super().forward(Z).reshape(Z.shape[0], -1, self.n_classes) + + +@PredictorRegistry.register("multiclass-dirichlet") +class MulticlassDirichletFFN(MulticlassClassificationFFN): + _T_default_criterion = DirichletLoss + _T_default_metric = MulticlassMCCMetric + + def forward(self, Z: Tensor) -> Tensor: + Y = super().train_step(Z) + + alpha = F.softplus(Y) + 1 + + Y = alpha / alpha.sum(-1, keepdim=True) + + return Y + + def train_step(self, Z: Tensor) -> Tensor: + Y = super().train_step(Z) + + return F.softplus(Y) + 1 + + +class _Exp(nn.Module): + def forward(self, X: Tensor): + return X.exp() + + +@PredictorRegistry.register("spectral") +class SpectralFFN(_FFNPredictorBase): + n_targets = 1 + _T_default_criterion = SID + _T_default_metric = SID + + def __init__(self, *args, spectral_activation: str | None = "softplus", **kwargs): + super().__init__(*args, **kwargs) + + match spectral_activation: + case "exp": + spectral_activation = _Exp() + case "softplus" | None: + spectral_activation = nn.Softplus() + case _: + raise ValueError( + f"Unknown spectral activation: {spectral_activation}. " + "Expected one of 'exp', 'softplus' or None." + ) + + self.ffn.add_module("spectral_activation", spectral_activation) + + def forward(self, Z: Tensor) -> Tensor: + Y = super().forward(Z) + Y = self.ffn.spectral_activation(Y) + return Y / Y.sum(1, keepdim=True) + + train_step = forward diff --git a/chemprop-updated/chemprop/nn/transforms.py b/chemprop-updated/chemprop/nn/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..2af42099aab6409b138316342babd9c209b1b060 --- /dev/null +++ b/chemprop-updated/chemprop/nn/transforms.py @@ -0,0 +1,70 @@ +from numpy.typing import ArrayLike +from sklearn.preprocessing import StandardScaler +import torch +from torch import Tensor, nn + +from chemprop.data.collate import BatchMolGraph + + +class _ScaleTransformMixin(nn.Module): + def __init__(self, mean: ArrayLike, scale: ArrayLike, pad: int = 0): + super().__init__() + + mean = torch.cat([torch.zeros(pad), torch.tensor(mean, dtype=torch.float)]) + scale = torch.cat([torch.ones(pad), torch.tensor(scale, dtype=torch.float)]) + + if mean.shape != scale.shape: + raise ValueError( + f"uneven shapes for 'mean' and 'scale'! got: mean={mean.shape}, scale={scale.shape}" + ) + + self.register_buffer("mean", mean.unsqueeze(0)) + self.register_buffer("scale", scale.unsqueeze(0)) + + @classmethod + def from_standard_scaler(cls, scaler: StandardScaler, pad: int = 0): + return cls(scaler.mean_, scaler.scale_, pad=pad) + + def to_standard_scaler(self, anti_pad: int = 0) -> StandardScaler: + scaler = StandardScaler() + scaler.mean_ = self.mean[anti_pad:].numpy() + scaler.scale_ = self.scale[anti_pad:].numpy() + return scaler + + +class ScaleTransform(_ScaleTransformMixin): + def forward(self, X: Tensor) -> Tensor: + if self.training: + return X + + return (X - self.mean) / self.scale + + +class UnscaleTransform(_ScaleTransformMixin): + def forward(self, X: Tensor) -> Tensor: + if self.training: + return X + + return X * self.scale + self.mean + + def transform_variance(self, var: Tensor) -> Tensor: + if self.training: + return var + return var * (self.scale**2) + + +class GraphTransform(nn.Module): + def __init__(self, V_transform: ScaleTransform, E_transform: ScaleTransform): + super().__init__() + + self.V_transform = V_transform + self.E_transform = E_transform + + def forward(self, bmg: BatchMolGraph) -> BatchMolGraph: + if self.training: + return bmg + + bmg.V = self.V_transform(bmg.V) + bmg.E = self.E_transform(bmg.E) + + return bmg diff --git a/chemprop-updated/chemprop/nn/utils.py b/chemprop-updated/chemprop/nn/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..19913bd0164f385944deb01a70c58fbdb7cd8587 --- /dev/null +++ b/chemprop-updated/chemprop/nn/utils.py @@ -0,0 +1,46 @@ +from enum import auto + +from torch import nn + +from chemprop.utils.utils import EnumMapping + + +class Activation(EnumMapping): + RELU = auto() + LEAKYRELU = auto() + PRELU = auto() + TANH = auto() + SELU = auto() + ELU = auto() + + +def get_activation_function(activation: str | Activation) -> nn.Module: + """Gets an activation function module given the name of the activation. + + See :class:`~chemprop.v2.models.utils.Activation` for available activations. + + Parameters + ---------- + activation : str | Activation + The name of the activation function. + + Returns + ------- + nn.Module + The activation function module. + """ + match Activation.get(activation): + case Activation.RELU: + return nn.ReLU() + case Activation.LEAKYRELU: + return nn.LeakyReLU(0.1) + case Activation.PRELU: + return nn.PReLU() + case Activation.TANH: + return nn.Tanh() + case Activation.SELU: + return nn.SELU() + case Activation.ELU: + return nn.ELU() + case _: + raise RuntimeError("unreachable code reached!") diff --git a/chemprop-updated/chemprop/schedulers.py b/chemprop-updated/chemprop/schedulers.py new file mode 100644 index 0000000000000000000000000000000000000000..843df0f8a75585ea6a309cc1792f93ff15096218 --- /dev/null +++ b/chemprop-updated/chemprop/schedulers.py @@ -0,0 +1,65 @@ +from torch.optim import Optimizer +from torch.optim.lr_scheduler import LambdaLR + + +def build_NoamLike_LRSched( + optimizer: Optimizer, + warmup_steps: int, + cooldown_steps: int, + init_lr: float, + max_lr: float, + final_lr: float, +): + r"""Build a Noam-like learning rate scheduler which schedules the learning rate with a piecewise linear followed + by an exponential decay. + + The learning rate increases linearly from ``init_lr`` to ``max_lr`` over the course of + the first warmup_steps then decreases exponentially to ``final_lr`` over the course of the + remaining ``total_steps - warmup_steps`` (where ``total_steps = total_epochs * steps_per_epoch``). This is roughly based on the learning rate schedule from [1]_, section 5.3. + + Formally, the learning rate schedule is defined as: + + .. math:: + \mathtt{lr}(i) &= + \begin{cases} + \mathtt{init\_lr} + \delta \cdot i &\text{if } i < \mathtt{warmup\_steps} \\ + \mathtt{max\_lr} \cdot \left( \frac{\mathtt{final\_lr}}{\mathtt{max\_lr}} \right)^{\gamma(i)} &\text{otherwise} \\ + \end{cases} + \\ + \delta &\mathrel{:=} + \frac{\mathtt{max\_lr} - \mathtt{init\_lr}}{\mathtt{warmup\_steps}} \\ + \gamma(i) &\mathrel{:=} + \frac{i - \mathtt{warmup\_steps}}{\mathtt{total\_steps} - \mathtt{warmup\_steps}} + + + Parameters + ----------- + optimizer : Optimizer + A PyTorch optimizer. + warmup_steps : int + The number of steps during which to linearly increase the learning rate. + cooldown_steps : int + The number of steps during which to exponential decay the learning rate. + init_lr : float + The initial learning rate. + max_lr : float + The maximum learning rate (achieved after ``warmup_steps``). + final_lr : float + The final learning rate (achieved after ``cooldown_steps``). + + References + ---------- + .. [1] Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, Ł. and Polosukhin, I. "Attention is all you need." Advances in neural information processing systems, 2017, 30. https://arxiv.org/abs/1706.03762 + """ + + def lr_lambda(step: int): + if step < warmup_steps: + warmup_factor = (max_lr - init_lr) / warmup_steps + return step * warmup_factor / init_lr + 1 + elif warmup_steps <= step < warmup_steps + cooldown_steps: + cooldown_factor = (final_lr / max_lr) ** (1 / cooldown_steps) + return (max_lr * (cooldown_factor ** (step - warmup_steps))) / init_lr + else: + return final_lr / init_lr + + return LambdaLR(optimizer, lr_lambda) diff --git a/chemprop-updated/chemprop/train/__pycache__/__init__.cpython-37.pyc b/chemprop-updated/chemprop/train/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..44d67a6fc3904d2cdb0108af55f86ed8c7d27c11 Binary files /dev/null and b/chemprop-updated/chemprop/train/__pycache__/__init__.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/train/__pycache__/cross_validate.cpython-37.pyc b/chemprop-updated/chemprop/train/__pycache__/cross_validate.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4e23c8c9cea5c9ab57acbeb920dd00b62b851e1b Binary files /dev/null and b/chemprop-updated/chemprop/train/__pycache__/cross_validate.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/train/__pycache__/evaluate.cpython-37.pyc b/chemprop-updated/chemprop/train/__pycache__/evaluate.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..44c1858923c6b770124edc0bb26cf82fb4bcc080 Binary files /dev/null and b/chemprop-updated/chemprop/train/__pycache__/evaluate.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/train/__pycache__/loss_functions.cpython-37.pyc b/chemprop-updated/chemprop/train/__pycache__/loss_functions.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1fab53738a6550cb4998803b7b83e59bfa8f0dce Binary files /dev/null and b/chemprop-updated/chemprop/train/__pycache__/loss_functions.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/train/__pycache__/make_predictions.cpython-37.pyc b/chemprop-updated/chemprop/train/__pycache__/make_predictions.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..92cfc083cdacf41747c5695c45200be25e7778c3 Binary files /dev/null and b/chemprop-updated/chemprop/train/__pycache__/make_predictions.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/train/__pycache__/metrics.cpython-37.pyc b/chemprop-updated/chemprop/train/__pycache__/metrics.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6c04dcf0fa814e32cd868bc918606271acc6eb81 Binary files /dev/null and b/chemprop-updated/chemprop/train/__pycache__/metrics.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/train/__pycache__/molecule_fingerprint.cpython-37.pyc b/chemprop-updated/chemprop/train/__pycache__/molecule_fingerprint.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..881b8725854d2168acc4645b07ff5c2dbc4bd27c Binary files /dev/null and b/chemprop-updated/chemprop/train/__pycache__/molecule_fingerprint.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/train/__pycache__/predict.cpython-37.pyc b/chemprop-updated/chemprop/train/__pycache__/predict.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1749473999e3adbeecc35ca7bc9169595d7ab61e Binary files /dev/null and b/chemprop-updated/chemprop/train/__pycache__/predict.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/train/__pycache__/run_training.cpython-37.pyc b/chemprop-updated/chemprop/train/__pycache__/run_training.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..db47bbf8b1fc85943c5e820bf67488da35d73665 Binary files /dev/null and b/chemprop-updated/chemprop/train/__pycache__/run_training.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/train/__pycache__/train.cpython-37.pyc b/chemprop-updated/chemprop/train/__pycache__/train.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fa495ba3f298f0c511b601efc272ee055a9a128b Binary files /dev/null and b/chemprop-updated/chemprop/train/__pycache__/train.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/types.py b/chemprop-updated/chemprop/types.py new file mode 100644 index 0000000000000000000000000000000000000000..71ef27b18cfde9504644f7e627668d5ce62aa431 --- /dev/null +++ b/chemprop-updated/chemprop/types.py @@ -0,0 +1,3 @@ +from rdkit.Chem import Mol + +Rxn = tuple[Mol, Mol] diff --git a/chemprop-updated/chemprop/uncertainty/__init__.py b/chemprop-updated/chemprop/uncertainty/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d81fe53b0575837d843eaca6ae52e4462149c526 --- /dev/null +++ b/chemprop-updated/chemprop/uncertainty/__init__.py @@ -0,0 +1,94 @@ +from .calibrator import ( + AdaptiveMulticlassConformalCalibrator, + BinaryClassificationCalibrator, + CalibratorBase, + IsotonicCalibrator, + IsotonicMulticlassCalibrator, + MulticlassClassificationCalibrator, + MulticlassConformalCalibrator, + MultilabelConformalCalibrator, + MVEWeightingCalibrator, + PlattCalibrator, + RegressionCalibrator, + RegressionConformalCalibrator, + UncertaintyCalibratorRegistry, + ZelikmanCalibrator, + ZScalingCalibrator, +) +from .estimator import ( # RoundRobinSpectraEstimator, + ClassEstimator, + ClassificationDirichletEstimator, + DropoutEstimator, + EnsembleEstimator, + EvidentialAleatoricEstimator, + EvidentialEpistemicEstimator, + EvidentialTotalEstimator, + MulticlassDirichletEstimator, + MVEEstimator, + NoUncertaintyEstimator, + QuantileRegressionEstimator, + UncertaintyEstimator, + UncertaintyEstimatorRegistry, +) +from .evaluator import ( + BinaryClassificationEvaluator, + CalibrationAreaEvaluator, + ExpectedNormalizedErrorEvaluator, + MulticlassClassificationEvaluator, + MulticlassConformalEvaluator, + MultilabelConformalEvaluator, + NLLClassEvaluator, + NLLMulticlassEvaluator, + NLLRegressionEvaluator, + RegressionConformalEvaluator, + RegressionEvaluator, + SpearmanEvaluator, + UncertaintyEvaluatorRegistry, +) + +__all__ = [ + "AdaptiveMulticlassConformalCalibrator", + "BinaryClassificationCalibrator", + "CalibratorBase", + "IsotonicCalibrator", + "IsotonicMulticlassCalibrator", + "MulticlassClassificationCalibrator", + "MulticlassConformalCalibrator", + "MultilabelConformalCalibrator", + "MVEWeightingCalibrator", + "PlattCalibrator", + "RegressionCalibrator", + "RegressionConformalCalibrator", + "UncertaintyCalibratorRegistry", + "ZelikmanCalibrator", + "ZScalingCalibrator", + "BinaryClassificationEvaluator", + "CalibrationAreaEvaluator", + "ExpectedNormalizedErrorEvaluator", + "MulticlassClassificationEvaluator", + "MetricEvaluator", + "MulticlassConformalEvaluator", + "MultilabelConformalEvaluator", + "NLLClassEvaluator", + "NLLMulticlassEvaluator", + "NLLRegressionEvaluator", + "RegressionConformalEvaluator", + "RegressionEvaluator", + "SpearmanEvaluator", + "UncertaintyEvaluator", + "UncertaintyEvaluatorRegistry", + "ClassificationDirichletEstimator", + "ClassEstimator", + "MulticlassDirichletEstimator", + "DropoutEstimator", + "EnsembleEstimator", + "EvidentialAleatoricEstimator", + "EvidentialEpistemicEstimator", + "EvidentialTotalEstimator", + "MVEEstimator", + "NoUncertaintyEstimator", + "QuantileRegressionEstimator", + # "RoundRobinSpectraEstimator", + "UncertaintyEstimator", + "UncertaintyEstimatorRegistry", +] diff --git a/chemprop-updated/chemprop/uncertainty/__pycache__/__init__.cpython-37.pyc b/chemprop-updated/chemprop/uncertainty/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..43c37cc34ec737d54c1d2e08e665efd9c3520b24 Binary files /dev/null and b/chemprop-updated/chemprop/uncertainty/__pycache__/__init__.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/uncertainty/__pycache__/uncertainty_calibrator.cpython-37.pyc b/chemprop-updated/chemprop/uncertainty/__pycache__/uncertainty_calibrator.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d522582d59e6606267b4e8ba0eff021c66ffb258 Binary files /dev/null and b/chemprop-updated/chemprop/uncertainty/__pycache__/uncertainty_calibrator.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/uncertainty/__pycache__/uncertainty_estimator.cpython-37.pyc b/chemprop-updated/chemprop/uncertainty/__pycache__/uncertainty_estimator.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..046c33150f6a21736d768d14293ef97fd8c23a17 Binary files /dev/null and b/chemprop-updated/chemprop/uncertainty/__pycache__/uncertainty_estimator.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/uncertainty/__pycache__/uncertainty_evaluator.cpython-37.pyc b/chemprop-updated/chemprop/uncertainty/__pycache__/uncertainty_evaluator.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fc1951758f631ce5e1331991ec1ade98b9244cff Binary files /dev/null and b/chemprop-updated/chemprop/uncertainty/__pycache__/uncertainty_evaluator.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/uncertainty/__pycache__/uncertainty_predictor.cpython-37.pyc b/chemprop-updated/chemprop/uncertainty/__pycache__/uncertainty_predictor.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..131fadea910f50ac7b82fa340cf6cbcf39d0c981 Binary files /dev/null and b/chemprop-updated/chemprop/uncertainty/__pycache__/uncertainty_predictor.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/uncertainty/calibrator.py b/chemprop-updated/chemprop/uncertainty/calibrator.py new file mode 100644 index 0000000000000000000000000000000000000000..6c9769e7aed4bfaf1e54030932be11fb54304171 --- /dev/null +++ b/chemprop-updated/chemprop/uncertainty/calibrator.py @@ -0,0 +1,715 @@ +from abc import ABC, abstractmethod +import logging +import math +from typing import Self + +import numpy as np +from scipy.optimize import fmin +from scipy.special import expit, logit, softmax +from sklearn.isotonic import IsotonicRegression +import torch +from torch import Tensor + +from chemprop.utils.registry import ClassRegistry + +logger = logging.getLogger(__name__) + + +class CalibratorBase(ABC): + """ + A base class for calibrating the predicted uncertainties. + """ + + @abstractmethod + def fit(self, *args, **kwargs) -> Self: + """ + Fit calibration method for the calibration data. + """ + + @abstractmethod + def apply(self, uncs: Tensor) -> Tensor: + """ + Apply this calibrator to the input uncertainties. + + Parameters + ---------- + uncs: Tensor + a tensor containinig uncalibrated uncertainties + + Returns + ------- + Tensor + the calibrated uncertainties + """ + + +UncertaintyCalibratorRegistry = ClassRegistry[CalibratorBase]() + + +class RegressionCalibrator(CalibratorBase): + """ + A class for calibrating the predicted uncertainties in regressions tasks. + """ + + @abstractmethod + def fit(self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + """ + Fit calibration method for the calibration data. + + Parameters + ---------- + preds: Tensor + the predictions for regression tasks. It is a tensor of the shape of ``n x t``, where ``n`` is + the number of input molecules/reactions, and ``t`` is the number of tasks. + uncs: Tensor + the predicted uncertainties of the shape of ``n x t`` + targets: Tensor + a tensor of the shape ``n x t`` + mask: Tensor + a tensor of the shape ``n x t`` indicating whether the given values should be used in the fitting + + Returns + ------- + self : RegressionCalibrator + the fitted calibrator + """ + + +@UncertaintyCalibratorRegistry.register("zscaling") +class ZScalingCalibrator(RegressionCalibrator): + """Calibrate regression datasets by applying a scaling value to the uncalibrated standard deviation, + fitted by minimizing the negative-log-likelihood of a normal distribution around each prediction. [levi2022]_ + + References + ---------- + .. [levi2022] Levi, D.; Gispan, L.; Giladi, N.; Fetaya, E. "Evaluating and Calibrating Uncertainty Prediction in + Regression Tasks." Sensors, 2022, 22(15), 5540. https://www.mdpi.com/1424-8220/22/15/5540 + """ + + def fit(self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + scalings = np.zeros(uncs.shape[1]) + for j in range(uncs.shape[1]): + mask_j = mask[:, j] + preds_j = preds[:, j][mask_j].numpy() + uncs_j = uncs[:, j][mask_j].numpy() + targets_j = targets[:, j][mask_j].numpy() + errors = preds_j - targets_j + + def objective(scaler_value: float): + scaled_vars = uncs_j * scaler_value**2 + nll = np.log(2 * np.pi * scaled_vars) / 2 + errors**2 / (2 * scaled_vars) + return nll.sum() + + zscore = errors / np.sqrt(uncs_j) + initial_guess = np.std(zscore) + scalings[j] = fmin(objective, x0=initial_guess, disp=False) + + self.scalings = torch.tensor(scalings) + return self + + def apply(self, uncs: Tensor) -> Tensor: + return uncs * self.scalings**2 + + +@UncertaintyCalibratorRegistry.register("zelikman-interval") +class ZelikmanCalibrator(RegressionCalibrator): + """Calibrate regression datasets using a method that does not depend on a particular probability function form. + + It uses the "CRUDE" method as described in [zelikman2020]_. We implemented this method to be used with variance as the uncertainty. + + Parameters + ---------- + p: float + The target qunatile, :math:`p \in [0, 1]` + + References + ---------- + .. [zelikman2020] Zelikman, E.; Healy, C.; Zhou, S.; Avati, A. "CRUDE: calibrating regression uncertainty distributions + empirically." arXiv preprint arXiv:2005.12496. https://doi.org/10.48550/arXiv.2005.12496 + """ + + def __init__(self, p: float): + super().__init__() + self.p = p + if not 0 <= self.p <= 1: + raise ValueError(f"arg `p` must be between 0 and 1. got: {p}.") + + def fit(self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + scalings = [] + for j in range(uncs.shape[1]): + mask_j = mask[:, j] + preds_j = preds[:, j][mask_j] + uncs_j = uncs[:, j][mask_j] + targets_j = targets[:, j][mask_j] + z = (preds_j - targets_j).abs() / (uncs_j).sqrt() + scaling = torch.quantile(z, self.p, interpolation="lower") + scalings.append(scaling) + + self.scalings = torch.tensor(scalings) + return self + + def apply(self, uncs: Tensor) -> Tensor: + return uncs * self.scalings**2 + + +@UncertaintyCalibratorRegistry.register("mve-weighting") +class MVEWeightingCalibrator(RegressionCalibrator): + """Calibrate regression datasets that have ensembles of individual models that make variance predictions. + + This method minimizes the negative log likelihood for the predictions versus the targets by applying + a weighted average across the variance predictions of the ensemble. [wang2021]_ + + References + ---------- + .. [wang2021] Wang, D.; Yu, J.; Chen, L.; Li, X.; Jiang, H.; Chen, K.; Zheng, M.; Luo, X. "A hybrid framework + for improving uncertainty quantification in deep learning-based QSAR regression modeling." J. Cheminform., + 2021, 13, 1-17. https://doi.org/10.1186/s13321-021-00551-x + """ + + def fit(self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + """ + Fit calibration method for the calibration data. + + Parameters + ---------- + preds: Tensor + the predictions for regression tasks. It is a tensor of the shape of ``n x t``, where ``n`` is + the number of input molecules/reactions, and ``t`` is the number of tasks. + uncs: Tensor + the predicted uncertainties of the shape of ``m x n x t`` + targets: Tensor + a tensor of the shape ``n x t`` + mask: Tensor + a tensor of the shape ``n x t`` indicating whether the given values should be used in the fitting + + Returns + ------- + self : MVEWeightingCalibrator + the fitted calibrator + """ + scalings = [] + for j in range(uncs.shape[2]): + mask_j = mask[:, j] + preds_j = preds[:, j][mask_j].numpy() + uncs_j = uncs[:, mask_j, j].numpy() + targets_j = targets[:, j][mask_j].numpy() + errors = preds_j - targets_j + + def objective(scaler_values: np.ndarray): + scaler_values = np.reshape(softmax(scaler_values), [-1, 1]) # (m, 1) + scaled_vars = np.sum(uncs_j * scaler_values, axis=0, keepdims=False) + nll = np.log(2 * np.pi * scaled_vars) / 2 + errors**2 / (2 * scaled_vars) + return np.sum(nll) + + initial_guess = np.ones(uncs_j.shape[0]) + sol = fmin(objective, x0=initial_guess, disp=False) + scalings.append(torch.tensor(softmax(sol))) + + self.scalings = torch.stack(scalings).t().unsqueeze(1) + return self + + def apply(self, uncs: Tensor) -> Tensor: + """ + Apply this calibrator to the input uncertainties. + + Parameters + ---------- + uncs: Tensor + a tensor containinig uncalibrated uncertainties of the shape of ``m x n x t`` + + Returns + ------- + Tensor + the calibrated uncertainties of the shape of ``n x t`` + """ + return (uncs * self.scalings).sum(0) + + +@UncertaintyCalibratorRegistry.register("conformal-regression") +class RegressionConformalCalibrator(RegressionCalibrator): + r"""Conformalize quantiles to make the interval :math:`[\hat{t}_{\alpha/2}(x),\hat{t}_{1-\alpha/2}(x)]` to have + approximately :math:`1-\alpha` coverage. [angelopoulos2021]_ + + .. math:: + s(x, y) &= \max \left\{ \hat{t}_{\alpha/2}(x) - y, y - \hat{t}_{1-\alpha/2}(x) \right\} + + \hat{q} &= Q(s_1, \ldots, s_n; \left\lceil \frac{(n+1)(1-\alpha)}{n} \right\rceil) + + C(x) &= \left[ \hat{t}_{\alpha/2}(x) - \hat{q}, \hat{t}_{1-\alpha/2}(x) + \hat{q} \right] + + where :math:`s` is the nonconformity score as the difference between :math:`y` and its nearest quantile. + :math:`\hat{t}_{\alpha/2}(x)` and :math:`\hat{t}_{1-\alpha/2}(x)` are the predicted quantiles from a quantile + regression model. + + .. note:: + The algorithm is specifically designed for quantile regression model. Intuitively, the set :math:`C(x)` just + grows or shrinks the distance between the quantiles by :math:`\hat{q}` to achieve coverage. However, this + function can also be applied to regression model without quantiles being provided. In this case, both + :math:`\hat{t}_{\alpha/2}(x)` and :math:`\hat{t}_{1-\alpha/2}(x)` are the same as :math:`\hat{y}`. Then, the + interval would be the same for every data point (i.e., :math:`\left[-\hat{q}, \hat{q} \right]`). + + Parameters + ---------- + alpha: float + The error rate, :math:`\alpha \in [0, 1]` + + References + ---------- + .. [angelopoulos2021] Angelopoulos, A.N.; Bates, S.; "A Gentle Introduction to Conformal Prediction and Distribution-Free + Uncertainty Quantification." arXiv Preprint 2021, https://arxiv.org/abs/2107.07511 + """ + + def __init__(self, alpha: float): + super().__init__() + self.alpha = alpha + self.bounds = torch.tensor([-1 / 2, 1 / 2]).view(-1, 1) + if not 0 <= self.alpha <= 1: + raise ValueError(f"arg `alpha` must be between 0 and 1. got: {alpha}.") + + def fit(self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + self.qhats = [] + for j in range(preds.shape[1]): + mask_j = mask[:, j] + targets_j = targets[:, j][mask_j] + preds_j = preds[:, j][mask_j] + interval_j = uncs[:, j][mask_j] + + interval_bounds = self.bounds * interval_j.unsqueeze(0) + pred_bounds = preds_j.unsqueeze(0) + interval_bounds + + calibration_scores = torch.max(pred_bounds[0] - targets_j, targets_j - pred_bounds[1]) + + num_data = targets_j.shape[0] + if self.alpha >= 1 / (num_data + 1): + q_level = math.ceil((num_data + 1) * (1 - self.alpha)) / num_data + else: + q_level = 1 + logger.warning( + "The error rate (i.e., `alpha`) is smaller than `1 / (number of data + 1)`, so the `1 - alpha` quantile is set to 1, " + "but this only ensures that the coverage is trivially satisfied." + ) + qhat = torch.quantile(calibration_scores, q_level, interpolation="higher") + self.qhats.append(qhat) + + self.qhats = torch.tensor(self.qhats) + return self + + def apply(self, uncs: Tensor) -> tuple[Tensor, Tensor]: + """ + Apply this calibrator to the input uncertainties. + + Parameters + ---------- + uncs: Tensor + a tensor containinig uncalibrated uncertainties + + Returns + ------- + Tensor + the calibrated intervals + """ + cal_intervals = uncs + 2 * self.qhats + + return cal_intervals + + +class BinaryClassificationCalibrator(CalibratorBase): + """ + A class for calibrating the predicted uncertainties in binary classification tasks. + """ + + @abstractmethod + def fit(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + """ + Fit calibration method for the calibration data. + + Parameters + ---------- + uncs: Tensor + the predicted uncertainties (i.e., the predicted probability of class 1) of the shape of ``n x t``, where ``n`` is the number of input + molecules/reactions, and ``t`` is the number of tasks. + targets: Tensor + a tensor of the shape ``n x t`` + mask: Tensor + a tensor of the shape ``n x t`` indicating whether the given values should be used in the fitting + + Returns + ------- + self : BinaryClassificationCalibrator + the fitted calibrator + """ + + +@UncertaintyCalibratorRegistry.register("platt") +class PlattCalibrator(BinaryClassificationCalibrator): + """Calibrate classification datasets using the Platt scaling algorithm [guo2017]_, [platt1999]_. + + In [platt1999]_, Platt suggests using the number of positive and negative training examples to + adjust the value of target probabilities used to fit the parameters. + + References + ---------- + .. [guo2017] Guo, C.; Pleiss, G.; Sun, Y.; Weinberger, K. Q. "On calibration of modern neural + networks". ICML, 2017. https://arxiv.org/abs/1706.04599 + .. [platt1999] Platt, J. "Probabilistic Outputs for Support Vector Machines and Comparisons to + Regularized Likelihood Methods." Adv. Large Margin Classif. 1999, 10 (3), 61–74. + """ + + def fit( + self, uncs: Tensor, targets: Tensor, mask: Tensor, training_targets: Tensor | None = None + ) -> Self: + if torch.any((targets[mask] != 0) & (targets[mask] != 1)): + raise ValueError( + "Platt scaling is only implemented for binary classification tasks! Input tensor " + "must contain only 0's and 1's." + ) + + if training_targets is not None: + logger.info( + "Training targets were provided. Platt scaling for calibration uses a Bayesian " + "correction to avoid training set overfitting. Now replacing calibration targets " + "[0, 1] with adjusted values." + ) + + n_negative_examples = (training_targets == 0).sum(dim=0) + n_positive_examples = (training_targets == 1).sum(dim=0) + + negative_target_bayes_MAP = (1 / (n_negative_examples + 2)).expand_as(targets) + positive_target_bayes_MAP = ( + (n_positive_examples + 1) / (n_positive_examples + 2) + ).expand_as(targets) + + targets = targets.float() + targets[targets == 0] = negative_target_bayes_MAP[targets == 0] + targets[targets == 1] = positive_target_bayes_MAP[targets == 1] + else: + logger.info("No training targets were provided. No Bayesian correction is applied.") + + xs = [] + for j in range(uncs.shape[1]): + mask_j = mask[:, j] + uncs_j = uncs[:, j][mask_j].numpy() + targets_j = targets[:, j][mask_j].numpy() + + def objective(parameters): + a, b = parameters + scaled_uncs = expit(a * logit(uncs_j) + b) + nll = -1 * np.sum( + targets_j * np.log(scaled_uncs) + (1 - targets_j) * np.log(1 - scaled_uncs) + ) + return nll + + xs.append(fmin(objective, x0=[1, 0], disp=False)) + + xs = np.vstack(xs) + self.a, self.b = torch.tensor(xs).T.unbind(dim=0) + + return self + + def apply(self, uncs: Tensor) -> Tensor: + return torch.sigmoid(self.a * torch.logit(uncs) + self.b) + + +@UncertaintyCalibratorRegistry.register("isotonic") +class IsotonicCalibrator(BinaryClassificationCalibrator): + """Calibrate binary classification datasets using isotonic regression as discussed in [guo2017]_. + In effect, the method transforms incoming uncalibrated confidences using a histogram-like + function where the range of each transforming bin and its magnitude is learned. + + References + ---------- + .. [guo2017] Guo, C.; Pleiss, G.; Sun, Y.; Weinberger, K. Q. "On calibration of modern neural + networks". ICML, 2017. https://arxiv.org/abs/1706.04599 + """ + + def fit(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + if torch.any((targets[mask] != 0) & (targets[mask] != 1)): + raise ValueError( + "Isotonic calibration is only implemented for binary classification tasks! Input " + "tensor must contain only 0's and 1's." + ) + + isotonic_models = [] + for j in range(uncs.shape[1]): + mask_j = mask[:, j] + uncs_j = uncs[:, j][mask_j].numpy() + targets_j = targets[:, j][mask_j].numpy() + + isotonic_model = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip") + isotonic_model.fit(uncs_j, targets_j) + isotonic_models.append(isotonic_model) + + self.isotonic_models = isotonic_models + + return self + + def apply(self, uncs: Tensor) -> Tensor: + cal_uncs = [] + for j, isotonic_model in enumerate(self.isotonic_models): + cal_uncs.append(isotonic_model.predict(uncs[:, j].numpy())) + return torch.tensor(np.array(cal_uncs)).t() + + +@UncertaintyCalibratorRegistry.register("conformal-multilabel") +class MultilabelConformalCalibrator(BinaryClassificationCalibrator): + r"""Creates conformal in-set and conformal out-set such that, for :math:`1-\alpha` proportion of datapoints, + the set of labels is bounded by the in- and out-sets [1]_: + + .. math:: + \Pr \left( + \hat{\mathcal C}_{\text{in}}(X) \subseteq \mathcal Y \subseteq \hat{\mathcal C}_{\text{out}}(X) + \right) \geq 1 - \alpha, + + where the in-set :math:`\hat{\mathcal C}_\text{in}` is contained by the set of true labels :math:`\mathcal Y` and + :math:`\mathcal Y` is contained within the out-set :math:`\hat{\mathcal C}_\text{out}`. + + Parameters + ---------- + alpha: float + The error rate, :math:`\alpha \in [0, 1]` + + References + ---------- + .. [1] Cauchois, M.; Gupta, S.; Duchi, J.; "Knowing What You Know: Valid and Validated Confidence Sets + in Multiclass and Multilabel Prediction." arXiv Preprint 2020, https://arxiv.org/abs/2004.10181 + """ + + def __init__(self, alpha: float): + super().__init__() + self.alpha = alpha + if not 0 <= self.alpha <= 1: + raise ValueError(f"arg `alpha` must be between 0 and 1. got: {alpha}.") + + @staticmethod + def nonconformity_scores(preds: Tensor): + r""" + Compute nonconformity score as the negative of the predicted probability. + + .. math:: + s_i = -\hat{f}(X_i)_{Y_i} + """ + return -preds + + def fit(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + if targets.shape[1] < 2: + raise ValueError( + f"the number of tasks should be larger than 1! got: {targets.shape[1]}." + ) + + has_zeros = torch.any(targets == 0, dim=1) + index_zeros = targets[has_zeros] == 0 + scores_in = self.nonconformity_scores(uncs[has_zeros]) + masked_scores_in = scores_in * index_zeros.float() + torch.where( + index_zeros, torch.zeros_like(scores_in), torch.tensor(float("inf")) + ) + calibration_scores_in = torch.min( + masked_scores_in.masked_fill(~mask, float("inf")), dim=1 + ).values + + has_ones = torch.any(targets == 1, dim=1) + index_ones = targets[has_ones] == 1 + scores_out = self.nonconformity_scores(uncs[has_ones]) + masked_scores_out = scores_out * index_ones.float() + torch.where( + index_ones, torch.zeros_like(scores_out), torch.tensor(float("-inf")) + ) + calibration_scores_out = torch.max( + masked_scores_out.masked_fill(~mask, float("-inf")), dim=1 + ).values + + self.tout = torch.quantile( + calibration_scores_out, 1 - self.alpha / 2, interpolation="higher" + ) + self.tin = torch.quantile(calibration_scores_in, self.alpha / 2, interpolation="higher") + return self + + def apply(self, uncs: Tensor) -> Tensor: + """ + Apply this calibrator to the input uncertainties. + + Parameters + ---------- + uncs: Tensor + a tensor containinig uncalibrated uncertainties + + Returns + ------- + Tensor + the calibrated uncertainties of the shape of ``n x t x 2``, where ``n`` is the number of input + molecules/reactions, ``t`` is the number of tasks, and the first element in the last dimension + corresponds to the in-set :math:`\hat{\mathcal C}_\text{in}`, while the second corresponds to + the out-set :math:`\hat{\mathcal C}_\text{out}`. + """ + scores = self.nonconformity_scores(uncs) + + cal_preds_in = (scores <= self.tin).int() + cal_preds_out = (scores <= self.tout).int() + cal_preds_in_out = torch.stack((cal_preds_in, cal_preds_out), dim=2) + + return cal_preds_in_out + + +class MulticlassClassificationCalibrator(CalibratorBase): + """ + A class for calibrating the predicted uncertainties in multiclass classification tasks. + """ + + @abstractmethod + def fit(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + """ + Fit calibration method for the calibration data. + + Parameters + ---------- + uncs: Tensor + the predicted uncertainties (i.e., the predicted probabilities for each class) of the + shape of ``n x t x c``, where ``n`` is the number of input molecules/reactions, ``t`` is + the number of tasks, and ``c`` is the number of classes. + targets: Tensor + a tensor of the shape ``n x t`` + mask: Tensor + a tensor of the shape ``n x t`` indicating whether the given values should be used in + the fitting + + Returns + ------- + self : MulticlassClassificationCalibrator + the fitted calibrator + """ + + +@UncertaintyCalibratorRegistry.register("conformal-multiclass") +class MulticlassConformalCalibrator(MulticlassClassificationCalibrator): + r"""Create a prediction sets of possible labels :math:`C(X_{\text{test}}) \subset \{1 \mathrel{.\,.} K\}` that follows: + + .. math:: + 1 - \alpha \leq \Pr (Y_{\text{test}} \in C(X_{\text{test}})) \leq 1 - \alpha + \frac{1}{n + 1} + + In other words, the probability that the prediction set contains the correct label is almost exactly :math:`1-\alpha`. + More detailes can be found in [1]_. + + Parameters + ---------- + alpha: float + Error rate, :math:`\alpha \in [0, 1]` + + References + ---------- + .. [1] Angelopoulos, A.N.; Bates, S.; "A Gentle Introduction to Conformal Prediction and Distribution-Free + Uncertainty Quantification." arXiv Preprint 2021, https://arxiv.org/abs/2107.07511 + """ + + def __init__(self, alpha: float): + super().__init__() + self.alpha = alpha + if not 0 <= self.alpha <= 1: + raise ValueError(f"arg `alpha` must be between 0 and 1. got: {alpha}.") + + @staticmethod + def nonconformity_scores(preds: Tensor): + r"""Compute nonconformity score as the negative of the softmax output for the true class. + + .. math:: + s_i = -\hat{f}(X_i)_{Y_i} + """ + return -preds + + def fit(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + self.qhats = [] + scores = self.nonconformity_scores(uncs) + for j in range(uncs.shape[1]): + mask_j = mask[:, j] + targets_j = targets[:, j][mask_j] + scores_j = scores[:, j][mask_j] + + scores_j = torch.gather(scores_j, 1, targets_j.unsqueeze(1)).squeeze(1) + num_data = targets_j.shape[0] + if self.alpha >= 1 / (num_data + 1): + q_level = math.ceil((num_data + 1) * (1 - self.alpha)) / num_data + else: + q_level = 1 + logger.warning( + "`alpha` is smaller than `1 / (number of data + 1)`, so the `1 - alpha` quantile is set to 1, " + "but this only ensures that the coverage is trivially satisfied." + ) + qhat = torch.quantile(scores_j, q_level, interpolation="higher") + self.qhats.append(qhat) + + self.qhats = torch.tensor(self.qhats) + return self + + def apply(self, uncs: Tensor) -> Tensor: + calibrated_preds = torch.zeros_like(uncs, dtype=torch.int) + scores = self.nonconformity_scores(uncs) + + for j, qhat in enumerate(self.qhats): + calibrated_preds[:, j] = (scores[:, j] <= qhat).int() + + return calibrated_preds + + +@UncertaintyCalibratorRegistry.register("conformal-adaptive") +class AdaptiveMulticlassConformalCalibrator(MulticlassConformalCalibrator): + @staticmethod + def nonconformity_scores(preds): + r"""Compute nonconformity score by greedily including classes in the classification set until it reaches the true label. + + .. math:: + s(x, y) = \sum_{j=1}^{k} \hat{f}(x)_{\pi_j(x)}, \text{ where } y = \pi_k(x) + + where :math:`\pi_k(x)` is the permutation of :math:`\{1 \mathrel{.\,.} K\}` that sorts :math:`\hat{f}(X_{test})` from most likely to least likely. + """ + + sort_index = torch.argsort(-preds, dim=2) + sorted_preds = torch.gather(preds, 2, sort_index) + sorted_scores = sorted_preds.cumsum(dim=2) + unsorted_scores = torch.zeros_like(sorted_scores).scatter_(2, sort_index, sorted_scores) + + return unsorted_scores + + +@UncertaintyCalibratorRegistry.register("isotonic-multiclass") +class IsotonicMulticlassCalibrator(MulticlassClassificationCalibrator): + """Calibrate multiclass classification datasets using isotonic regression as discussed in + [guo2017]_. It uses a one-vs-all aggregation scheme to extend isotonic regression from binary to + multiclass classifiers. + + References + ---------- + .. [guo2017] Guo, C.; Pleiss, G.; Sun, Y.; Weinberger, K. Q. "On calibration of modern neural + networks". ICML, 2017. https://arxiv.org/abs/1706.04599 + """ + + def fit(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + isotonic_models = [] + for j in range(uncs.shape[1]): + mask_j = mask[:, j] + uncs_j = uncs[:, j, :][mask_j].numpy() + targets_j = targets[:, j][mask_j].numpy() + + class_isotonic_models = [] + for k in range(uncs.shape[2]): + class_uncs_j = uncs_j[..., k] + positive_class_targets = targets_j == k + + class_targets = np.ones_like(class_uncs_j) + class_targets[positive_class_targets] = 1 + class_targets[~positive_class_targets] = 0 + + isotonic_model = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip") + isotonic_model.fit(class_uncs_j, class_targets) + class_isotonic_models.append(isotonic_model) + + isotonic_models.append(class_isotonic_models) + + self.isotonic_models = isotonic_models + + return self + + def apply(self, uncs: Tensor) -> Tensor: + cal_uncs = torch.zeros_like(uncs) + for j, class_isotonic_models in enumerate(self.isotonic_models): + for k, isotonic_model in enumerate(class_isotonic_models): + class_uncs_j = uncs[:, j, k].numpy() + class_cal_uncs = isotonic_model.predict(class_uncs_j) + cal_uncs[:, j, k] = torch.tensor(class_cal_uncs) + return cal_uncs / cal_uncs.sum(dim=-1, keepdim=True) diff --git a/chemprop-updated/chemprop/uncertainty/estimator.py b/chemprop-updated/chemprop/uncertainty/estimator.py new file mode 100644 index 0000000000000000000000000000000000000000..4213a1afcf2be50069b22cb7a8f9a2a626a62c73 --- /dev/null +++ b/chemprop-updated/chemprop/uncertainty/estimator.py @@ -0,0 +1,376 @@ +from abc import ABC, abstractmethod +from typing import Iterable + +from lightning import pytorch as pl +import torch +from torch import Tensor +from torch.utils.data import DataLoader + +from chemprop.models.model import MPNN +from chemprop.utils.registry import ClassRegistry + + +class UncertaintyEstimator(ABC): + """A helper class for making model predictions and associated uncertainty predictions.""" + + @abstractmethod + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + """ + Calculate the uncalibrated predictions and uncertainties for the dataloader. + + dataloader: DataLoader + the dataloader used for model predictions and uncertainty predictions + models: Iterable[MPNN] + the models used for model predictions and uncertainty predictions + trainer: pl.Trainer + an instance of the :class:`~lightning.pytorch.trainer.trainer.Trainer` used to manage model inference + + Returns + ------- + preds : Tensor + the model predictions, with shape varying by task type: + + * regression/binary classification: ``m x n x t`` + + * multiclass classification: ``m x n x t x c``, where ``m`` is the number of models, + ``n`` is the number of inputs, ``t`` is the number of tasks, and ``c`` is the number of classes. + uncs : Tensor + the predicted uncertainties, with shapes of ``m' x n x t``. + + .. note:: + The ``m`` and ``m'`` are different by definition. The ``m`` is the number of models, + while the ``m'`` is the number of uncertainty estimations. For example, if two MVE + or evidential models are provided, both ``m`` and ``m'`` are two. However, for an + ensemble of two models, ``m'`` would be one (even though ``m = 2``). + """ + + +UncertaintyEstimatorRegistry = ClassRegistry[UncertaintyEstimator]() + + +@UncertaintyEstimatorRegistry.register("none") +class NoUncertaintyEstimator(UncertaintyEstimator): + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + predss = [] + for model in models: + preds = torch.concat(trainer.predict(model, dataloader), 0) + predss.append(preds) + return torch.stack(predss), None + + +@UncertaintyEstimatorRegistry.register("mve") +class MVEEstimator(UncertaintyEstimator): + """ + Class that estimates prediction means and variances (MVE). [nix1994]_ + + References + ---------- + .. [nix1994] Nix, D. A.; Weigend, A. S. "Estimating the mean and variance of the target + probability distribution." Proceedings of 1994 IEEE International Conference on Neural + Networks, 1994 https://doi.org/10.1109/icnn.1994.374138 + """ + + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + mves = [] + for model in models: + preds = torch.concat(trainer.predict(model, dataloader), 0) + mves.append(preds) + mves = torch.stack(mves, dim=0) + mean, var = mves.unbind(dim=-1) + return mean, var + + +@UncertaintyEstimatorRegistry.register("ensemble") +class EnsembleEstimator(UncertaintyEstimator): + """ + Class that predicts the uncertainty of predictions based on the variance in predictions among + an ensemble's submodels. + """ + + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + if len(models) <= 1: + raise ValueError( + "Ensemble method for uncertainty is only available when multiple models are provided." + ) + ensemble_preds = [] + for model in models: + preds = torch.concat(trainer.predict(model, dataloader), 0) + ensemble_preds.append(preds) + stacked_preds = torch.stack(ensemble_preds).float() + vars = torch.var(stacked_preds, dim=0, correction=0).unsqueeze(0) + return stacked_preds, vars + + +@UncertaintyEstimatorRegistry.register("classification") +class ClassEstimator(UncertaintyEstimator): + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + predss = [] + for model in models: + preds = torch.concat(trainer.predict(model, dataloader), 0) + predss.append(preds) + return torch.stack(predss), torch.stack(predss) + + +@UncertaintyEstimatorRegistry.register("evidential-total") +class EvidentialTotalEstimator(UncertaintyEstimator): + """ + Class that predicts the total evidential uncertainty based on hyperparameters of + the evidential distribution [amini2020]_. + + References + ----------- + .. [amini2020] Amini, A.; Schwarting, W.; Soleimany, A.; Rus, D. "Deep Evidential Regression". + NeurIPS, 2020. https://arxiv.org/abs/1910.02600 + """ + + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + uncs = [] + for model in models: + preds = torch.concat(trainer.predict(model, dataloader), 0) + uncs.append(preds) + uncs = torch.stack(uncs) + mean, v, alpha, beta = uncs.unbind(-1) + total_uncs = (1 + 1 / v) * (beta / (alpha - 1)) + return mean, total_uncs + + +@UncertaintyEstimatorRegistry.register("evidential-epistemic") +class EvidentialEpistemicEstimator(UncertaintyEstimator): + """ + Class that predicts the epistemic evidential uncertainty based on hyperparameters of + the evidential distribution. + """ + + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + uncs = [] + for model in models: + preds = torch.concat(trainer.predict(model, dataloader), 0) + uncs.append(preds) + uncs = torch.stack(uncs) + mean, v, alpha, beta = uncs.unbind(-1) + epistemic_uncs = (1 / v) * (beta / (alpha - 1)) + return mean, epistemic_uncs + + +@UncertaintyEstimatorRegistry.register("evidential-aleatoric") +class EvidentialAleatoricEstimator(UncertaintyEstimator): + """ + Class that predicts the aleatoric evidential uncertainty based on hyperparameters of + the evidential distribution. + """ + + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + uncs = [] + for model in models: + preds = torch.concat(trainer.predict(model, dataloader), 0) + uncs.append(preds) + uncs = torch.stack(uncs) + mean, _, alpha, beta = uncs.unbind(-1) + aleatoric_uncs = beta / (alpha - 1) + return mean, aleatoric_uncs + + +@UncertaintyEstimatorRegistry.register("dropout") +class DropoutEstimator(UncertaintyEstimator): + """ + A :class:`DropoutEstimator` creates a virtual ensemble of models via Monte Carlo dropout with + the provided model [gal2016]_. + + Parameters + ---------- + ensemble_size: int + The number of samples to draw for the ensemble. + dropout: float | None + The probability of dropping out units in the dropout layers. If unspecified, + the training probability is used, which is prefered but not possible if the model was not + trained with dropout (i.e. p=0). + + References + ----------- + .. [gal2016] Gal, Y.; Ghahramani, Z. "Dropout as a bayesian approximation: Representing model uncertainty in deep learning." + International conference on machine learning. PMLR, 2016. https://arxiv.org/abs/1506.02142 + """ + + def __init__(self, ensemble_size: int, dropout: None | float = None): + self.ensemble_size = ensemble_size + self.dropout = dropout + + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + meanss, varss = [], [] + for model in models: + self._setup_model(model) + individual_preds = [] + + for _ in range(self.ensemble_size): + predss = trainer.predict(model, dataloader) + preds = torch.concat(predss, 0) + individual_preds.append(preds) + + stacked_preds = torch.stack(individual_preds, dim=0).float() + means = torch.mean(stacked_preds, dim=0).unsqueeze(0) + vars = torch.var(stacked_preds, dim=0, correction=0) + self._restore_model(model) + meanss.append(means) + varss.append(vars) + return torch.stack(meanss), torch.stack(varss) + + def _setup_model(self, model): + model._predict_step = model.predict_step + model.predict_step = self._predict_step(model) + model.apply(self._change_dropout) + + def _restore_model(self, model): + model.predict_step = model._predict_step + del model._predict_step + model.apply(self._restore_dropout) + + def _predict_step(self, model): + def _wrapped_predict_step(*args, **kwargs): + model.apply(self._activate_dropout) + return model._predict_step(*args, **kwargs) + + return _wrapped_predict_step + + def _activate_dropout(self, module): + if isinstance(module, torch.nn.Dropout): + module.train() + + def _change_dropout(self, module): + if isinstance(module, torch.nn.Dropout): + module._p = module.p + if self.dropout: + module.p = self.dropout + + def _restore_dropout(self, module): + if isinstance(module, torch.nn.Dropout): + if hasattr(module, "_p"): + module.p = module._p + del module._p + + +# TODO: Add in v2.1.x +# @UncertaintyEstimatorRegistry.register("spectra-roundrobin") +# class RoundRobinSpectraEstimator(UncertaintyEstimator): +# def __call__( +# self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer +# ) -> tuple[Tensor, Tensor]: +# return + + +@UncertaintyEstimatorRegistry.register("classification-dirichlet") +class ClassificationDirichletEstimator(UncertaintyEstimator): + """ + A :class:`ClassificationDirichletEstimator` predicts an amount of 'evidence' for both the + negative class and the positive class as described in [sensoy2018]_. The class probabilities and + the uncertainty are calculated based on the evidence. + + .. math:: + S = \sum_{i=1}^K \alpha_i + p_i = \alpha_i / S + u = K / S + + where :math:`K` is the number of classes, :math:`\alpha_i` is the evidence for class :math:`i`, + :math:`p_i` is the probability of class :math:`i`, and :math:`u` is the uncertainty. + + References + ---------- + .. [sensoy2018] Sensoy, M.; Kaplan, L.; Kandemir, M. "Evidential deep learning to quantify + classification uncertainty." NeurIPS, 2018, 31. https://doi.org/10.48550/arXiv.1806.01768 + """ + + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + uncs = [] + for model in models: + preds = torch.concat(trainer.predict(model, dataloader), 0) + uncs.append(preds) + uncs = torch.stack(uncs, dim=0) + y, u = uncs.unbind(dim=-1) + return y, u + + +@UncertaintyEstimatorRegistry.register("multiclass-dirichlet") +class MulticlassDirichletEstimator(UncertaintyEstimator): + """ + A :class:`MulticlassDirichletEstimator` predicts an amount of 'evidence' for each class as + described in [sensoy2018]_. The class probabilities and the uncertainty are calculated based on + the evidence. + + .. math:: + S = \sum_{i=1}^K \alpha_i + p_i = \alpha_i / S + u = K / S + + where :math:`K` is the number of classes, :math:`\alpha_i` is the evidence for class :math:`i`, + :math:`p_i` is the probability of class :math:`i`, and :math:`u` is the uncertainty. + + References + ---------- + .. [sensoy2018] Sensoy, M.; Kaplan, L.; Kandemir, M. "Evidential deep learning to quantify + classification uncertainty." NeurIPS, 2018, 31. https://doi.org/10.48550/arXiv.1806.01768 + """ + + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + preds = [] + uncs = [] + for model in models: + self._setup_model(model) + output = torch.concat(trainer.predict(model, dataloader), 0) + self._restore_model(model) + preds.append(output[..., :-1]) + uncs.append(output[..., -1]) + preds = torch.stack(preds, 0) + uncs = torch.stack(uncs, 0) + + return preds, uncs + + def _setup_model(self, model): + model.predictor._forward = model.predictor.forward + model.predictor.forward = self._forward.__get__(model.predictor, model.predictor.__class__) + + def _restore_model(self, model): + model.predictor.forward = model.predictor._forward + del model.predictor._forward + + def _forward(self, Z: Tensor) -> Tensor: + alpha = self.train_step(Z) + + u = alpha.shape[2] / alpha.sum(-1, keepdim=True) + Y = alpha / alpha.sum(-1, keepdim=True) + + return torch.concat([Y, u], -1) + + +@UncertaintyEstimatorRegistry.register("quantile-regression") +class QuantileRegressionEstimator(UncertaintyEstimator): + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + individual_preds = [] + for model in models: + predss = trainer.predict(model, dataloader) + individual_preds.append(torch.concat(predss, 0)) + stacked_preds = torch.stack(individual_preds).float() + mean, interval = stacked_preds.unbind(2) + return mean, interval diff --git a/chemprop-updated/chemprop/uncertainty/evaluator.py b/chemprop-updated/chemprop/uncertainty/evaluator.py new file mode 100644 index 0000000000000000000000000000000000000000..1e88fab2835b0b29aadd654949176b38ff899476 --- /dev/null +++ b/chemprop-updated/chemprop/uncertainty/evaluator.py @@ -0,0 +1,368 @@ +from abc import ABC, abstractmethod + +import numpy as np +import torch +from torch import Tensor +from torchmetrics.regression import SpearmanCorrCoef + +from chemprop.utils.registry import ClassRegistry + +UncertaintyEvaluatorRegistry = ClassRegistry() + + +class RegressionEvaluator(ABC): + """Evaluates the quality of uncertainty estimates in regression tasks.""" + + @abstractmethod + def evaluate(self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor) -> Tensor: + """Evaluate the performance of uncertainty predictions against the model target values. + + Parameters + ---------- + preds: Tensor + the predictions for regression tasks. It is a tensor of the shape of ``n x t``, where ``n`` is + the number of input molecules/reactions, and ``t`` is the number of tasks. + uncs: Tensor + the predicted uncertainties of the shape of ``n x t`` + targets: Tensor + a tensor of the shape ``n x t`` + mask: Tensor + a tensor of the shape ``n x t`` indicating whether the given values should be used in the evaluation + + Returns + ------- + Tensor + a tensor of the shape ``t`` containing the evaluated metrics + """ + + +@UncertaintyEvaluatorRegistry.register("nll-regression") +class NLLRegressionEvaluator(RegressionEvaluator): + r""" + Evaluate uncertainty values for regression datasets using the mean negative-log-likelihood + of the targets given the probability distributions estimated by the model: + + .. math:: + + \mathrm{NLL}(y, \hat y) = \frac{1}{2} \log(2 \pi \sigma^2) + \frac{(y - \hat{y})^2}{2 \sigma^2} + + where :math:`\hat{y}` is the predicted value, :math:`y` is the true value, and + :math:`\sigma^2` is the predicted uncertainty (variance). + + The function returns a tensor containing the mean NLL for each task. + """ + + def evaluate(self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor) -> Tensor: + nlls = [] + for j in range(uncs.shape[1]): + mask_j = mask[:, j] + preds_j = preds[:, j][mask_j] + targets_j = targets[:, j][mask_j] + uncs_j = uncs[:, j][mask_j] + errors = preds_j - targets_j + nll = (2 * torch.pi * uncs_j).log() / 2 + errors**2 / (2 * uncs_j) + nlls.append(nll.mean(dim=0)) + return torch.stack(nlls) + + +@UncertaintyEvaluatorRegistry.register("miscalibration_area") +class CalibrationAreaEvaluator(RegressionEvaluator): + """ + A class for evaluating regression uncertainty values based on how they deviate from perfect + calibration on an observed-probability versus expected-probability plot. + """ + + def evaluate( + self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor, num_bins: int = 100 + ) -> Tensor: + """Evaluate the performance of uncertainty predictions against the model target values. + + Parameters + ---------- + preds: Tensor + the predictions for regression tasks. It is a tensor of the shape of ``n x t``, where ``n`` is + the number of input molecules/reactions, and ``t`` is the number of tasks. + uncs: Tensor + the predicted uncertainties (variance) of the shape of ``n x t`` + targets: Tensor + a tensor of the shape ``n x t`` + mask: Tensor + a tensor of the shape ``n x t`` indicating whether the given values should be used in the evaluation + num_bins: int, default=100 + the number of bins to discretize the ``[0, 1]`` interval + + Returns + ------- + Tensor + a tensor of the shape ``t`` containing the evaluated metrics + """ + bins = torch.arange(1, num_bins) + bin_scaling = torch.special.erfinv(bins / num_bins).view(-1, 1, 1) * np.sqrt(2) + errors = torch.abs(preds - targets) + uncs = torch.sqrt(uncs).unsqueeze(0) + bin_unc = uncs * bin_scaling + bin_count = bin_unc >= errors.unsqueeze(0) + mask = mask.unsqueeze(0) + observed_auc = (bin_count & mask).sum(1) / mask.sum(1) + num_tasks = uncs.shape[-1] + observed_auc = torch.cat( + [torch.zeros(1, num_tasks), observed_auc, torch.ones(1, num_tasks)] + ).T + ideal_auc = torch.arange(num_bins + 1) / num_bins + miscal_area = (1 / num_bins) * (observed_auc - ideal_auc).abs().sum(dim=1) + return miscal_area + + +@UncertaintyEvaluatorRegistry.register("ence") +class ExpectedNormalizedErrorEvaluator(RegressionEvaluator): + r""" + A class that evaluates uncertainty performance by binning together clusters of predictions + and comparing the average predicted variance of the clusters against the RMSE of the cluster. [1]_ + + .. math:: + \mathrm{ENCE} = \frac{1}{N} \sum_{i=1}^{N} \frac{|\mathrm{RMV}_i - \mathrm{RMSE}_i|}{\mathrm{RMV}_i} + + where :math:`N` is the number of bins, :math:`\mathrm{RMV}_i` is the root of the mean uncertainty over the + :math:`i`-th bin and :math:`\mathrm{RMSE}_i` is the root mean square error over the :math:`i`-th bin. This + discrepancy is further normalized by the uncertainty over the bin, :math:`\mathrm{RMV}_i`, because the error + is expected to be naturally higher as the uncertainty increases. + + References + ---------- + .. [1] Levi, D.; Gispan, L.; Giladi, N.; Fetaya, E. "Evaluating and Calibrating Uncertainty Prediction in Regression Tasks." + Sensors, 2022, 22(15), 5540. https://www.mdpi.com/1424-8220/22/15/5540 + """ + + def evaluate( + self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor, num_bins: int = 100 + ) -> Tensor: + """Evaluate the performance of uncertainty predictions against the model target values. + + Parameters + ---------- + preds: Tensor + the predictions for regression tasks. It is a tensor of the shape of ``n x t``, where ``n`` is + the number of input molecules/reactions, and ``t`` is the number of tasks. + uncs: Tensor + the predicted uncertainties (variance) of the shape of ``n x t`` + targets: Tensor + a tensor of the shape ``n x t`` + mask: Tensor + a tensor of the shape ``n x t`` indicating whether the given values should be used in the evaluation + num_bins: int, default=100 + the number of bins the data are divided into + + Returns + ------- + Tensor + a tensor of the shape ``t`` containing the evaluated metrics + """ + masked_preds = preds * mask + masked_targets = targets * mask + masked_uncs = uncs * mask + errors = torch.abs(masked_preds - masked_targets) + + sort_idx = torch.argsort(masked_uncs, dim=0) + sorted_uncs = torch.gather(masked_uncs, 0, sort_idx) + sorted_errors = torch.gather(errors, 0, sort_idx) + + split_unc = torch.chunk(sorted_uncs, num_bins, dim=0) + split_error = torch.chunk(sorted_errors, num_bins, dim=0) + + root_mean_vars = torch.sqrt(torch.stack([chunk.mean(0) for chunk in split_unc])) + rmses = torch.sqrt(torch.stack([chunk.pow(2).mean(0) for chunk in split_error])) + + ence = torch.mean(torch.abs(root_mean_vars - rmses) / root_mean_vars, dim=0) + return ence + + +@UncertaintyEvaluatorRegistry.register("spearman") +class SpearmanEvaluator(RegressionEvaluator): + """ + Evaluate the Spearman rank correlation coefficient between the uncertainties and errors in the model predictions. + + The correlation coefficient returns a value in the [-1, 1] range, with better scores closer to 1 + observed when the uncertainty values are predictive of the rank ordering of the errors in the model prediction. + """ + + def evaluate(self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor) -> Tensor: + spearman_coeffs = [] + for j in range(uncs.shape[1]): + mask_j = mask[:, j] + preds_j = preds[:, j][mask_j] + targets_j = targets[:, j][mask_j] + uncs_j = uncs[:, j][mask_j] + errs_j = (preds_j - targets_j).abs() + spearman = SpearmanCorrCoef() + spearman_coeff = spearman(uncs_j, errs_j) + spearman_coeffs.append(spearman_coeff) + return torch.stack(spearman_coeffs) + + +@UncertaintyEvaluatorRegistry.register("conformal-coverage-regression") +class RegressionConformalEvaluator(RegressionEvaluator): + r""" + Evaluate the coverage of conformal prediction for regression datasets. + + .. math:: + \Pr (Y_{\text{test}} \in C(X_{\text{test}})) + + where the :math:`C(X_{\text{test}})` is the predicted interval. + """ + + def evaluate(self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor) -> Tensor: + bounds = torch.tensor([-1 / 2, 1 / 2], device=mask.device) + interval = uncs.unsqueeze(0) * bounds.view([-1] + [1] * preds.ndim) + lower, upper = preds.unsqueeze(0) + interval + covered_mask = torch.logical_and(lower <= targets, targets <= upper) + + return (covered_mask & mask).sum(0) / mask.sum(0) + + +class BinaryClassificationEvaluator(ABC): + """Evaluates the quality of uncertainty estimates in binary classification tasks.""" + + @abstractmethod + def evaluate(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Tensor: + """Evaluate the performance of uncertainty predictions against the model target values. + + Parameters + ---------- + uncs: Tensor + the predicted uncertainties (i.e., the predicted probability of class 1) of the shape of ``n x t``, where ``n`` is the number of input + molecules/reactions, and ``t`` is the number of tasks. + targets: Tensor + a tensor of the shape ``n x t`` + mask: Tensor + a tensor of the shape ``n x t`` indicating whether the given values should be used in the evaluation + + Returns + ------- + Tensor + a tensor of the shape ``t`` containing the evaluated metrics + """ + + +@UncertaintyEvaluatorRegistry.register("nll-classification") +class NLLClassEvaluator(BinaryClassificationEvaluator): + """ + Evaluate uncertainty values for binary classification datasets using the mean negative-log-likelihood + of the targets given the assigned probabilities from the model: + + .. math:: + + \mathrm{NLL} = -\log(\hat{y} \cdot y + (1 - \hat{y}) \cdot (1 - y)) + + where :math:`y` is the true binary label (0 or 1), and + :math:`\hat{y}` is the predicted probability associated with the class label 1. + + The function returns a tensor containing the mean NLL for each task. + """ + + def evaluate(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Tensor: + nlls = [] + for j in range(uncs.shape[1]): + mask_j = mask[:, j] + targets_j = targets[:, j][mask_j] + uncs_j = uncs[:, j][mask_j] + likelihood = uncs_j * targets_j + (1 - uncs_j) * (1 - targets_j) + nll = -1 * likelihood.log() + nlls.append(nll.mean(dim=0)) + return torch.stack(nlls) + + +@UncertaintyEvaluatorRegistry.register("conformal-coverage-classification") +class MultilabelConformalEvaluator(BinaryClassificationEvaluator): + r""" + Evaluate the coverage of conformal prediction for binary classification datasets with multiple labels. + + .. math:: + \Pr \left( + \hat{\mathcal C}_{\text{in}}(X) \subseteq \mathcal Y \subseteq \hat{\mathcal C}_{\text{out}}(X) + \right) + + where the in-set :math:`\hat{\mathcal C}_\text{in}` is contained by the set of true labels :math:`\mathcal Y` and + :math:`\mathcal Y` is contained within the out-set :math:`\hat{\mathcal C}_\text{out}`. + """ + + def evaluate(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Tensor: + in_set, out_set = torch.chunk(uncs, 2, 1) + covered_mask = torch.logical_and(in_set <= targets, targets <= out_set) + return (covered_mask & mask).sum(0) / mask.sum(0) + + +class MulticlassClassificationEvaluator(ABC): + """Evaluates the quality of uncertainty estimates in multiclass classification tasks.""" + + @abstractmethod + def evaluate(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Tensor: + """Evaluate the performance of uncertainty predictions against the model target values. + + Parameters + ---------- + uncs: Tensor + the predicted uncertainties (i.e., the predicted probabilities for each class) of the shape of ``n x t x c``, where ``n`` is the number of input + molecules/reactions, ``t`` is the number of tasks, and ``c`` is the number of classes. + targets: Tensor + a tensor of the shape ``n x t`` + mask: Tensor + a tensor of the shape ``n x t`` indicating whether the given values should be used in the evaluation + + Returns + ------- + Tensor + a tensor of the shape ``t`` containing the evaluated metrics + """ + + +@UncertaintyEvaluatorRegistry.register("nll-multiclass") +class NLLMulticlassEvaluator(MulticlassClassificationEvaluator): + """ + Evaluate uncertainty values for multiclass classification datasets using the mean negative-log-likelihood + of the targets given the assigned probabilities from the model: + + .. math:: + + \mathrm{NLL} = -\log(p_{y_i}) + + where :math:`p_{y_i}` is the predicted probability for the true class :math:`y_i`, calculated as: + + .. math:: + + p_{y_i} = \sum_{k=1}^{K} \mathbb{1}(y_i = k) \cdot p_k + + Here: :math:`K` is the total number of classes, + :math:`\mathbb{1}(y_i = k)` is the indicator function that is 1 when the true class :math:`y_i` equals class :math:`k`, and 0 otherwise, + and :math:`p_k` is the predicted probability for class :math:`k`. + + The function returns a tensor containing the mean NLL for each task. + """ + + def evaluate(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Tensor: + nlls = [] + for j in range(uncs.shape[1]): + mask_j = mask[:, j] + targets_j = targets[:, j][mask_j] + uncs_j = uncs[:, j][mask_j] + targets_one_hot = torch.eye(uncs_j.shape[-1])[targets_j.long()] + likelihood = (targets_one_hot * uncs_j).sum(dim=-1) + nll = -1 * likelihood.log() + nlls.append(nll.mean(dim=0)) + return torch.stack(nlls) + + +@UncertaintyEvaluatorRegistry.register("conformal-coverage-multiclass") +class MulticlassConformalEvaluator(MulticlassClassificationEvaluator): + r""" + Evaluate the coverage of conformal prediction for multiclass classification datasets. + + .. math:: + \Pr (Y_{\text{test}} \in C(X_{\text{test}})) + + where the :math:`C(X_{\text{test}}) \subset \{1 \mathrel{.\,.} K\}` is a prediction set of possible labels . + """ + + def evaluate(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Tensor: + targets_one_hot = torch.nn.functional.one_hot(targets, num_classes=uncs.shape[2]) + covered_mask = torch.max(uncs * targets_one_hot, dim=-1)[0] > 0 + return (covered_mask & mask).sum(0) / mask.sum(0) diff --git a/chemprop-updated/chemprop/utils/__init__.py b/chemprop-updated/chemprop/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a8937a6e06591c8b5eb19bbe5ae00851364351a --- /dev/null +++ b/chemprop-updated/chemprop/utils/__init__.py @@ -0,0 +1,4 @@ +from .registry import ClassRegistry, Factory +from .utils import EnumMapping, make_mol, pretty_shape + +__all__ = ["ClassRegistry", "Factory", "EnumMapping", "make_mol", "pretty_shape"] diff --git a/chemprop-updated/chemprop/utils/registry.py b/chemprop-updated/chemprop/utils/registry.py new file mode 100644 index 0000000000000000000000000000000000000000..58137351965bc80749f10654abce8d8c4d8570e0 --- /dev/null +++ b/chemprop-updated/chemprop/utils/registry.py @@ -0,0 +1,46 @@ +import inspect +from typing import Any, Iterable, Type, TypeVar + +T = TypeVar("T") + + +class ClassRegistry(dict[str, Type[T]]): + def register(self, alias: Any | Iterable[Any] | None = None): + def decorator(cls): + if alias is None: + keys = [cls.__name__.lower()] + elif isinstance(alias, str): + keys = [alias] + else: + keys = alias + + cls.alias = keys[0] + for k in keys: + self[k] = cls + + return cls + + return decorator + + __call__ = register + + def __repr__(self) -> str: # pragma: no cover + return f"{self.__class__.__name__}: {super().__repr__()}" + + def __str__(self) -> str: # pragma: no cover + INDENT = 4 + items = [f"{' ' * INDENT}{repr(k)}: {repr(v)}" for k, v in self.items()] + + return "\n".join([f"{self.__class__.__name__} {'{'}", ",\n".join(items), "}"]) + + +class Factory: + @classmethod + def build(cls, clz_T: Type[T], *args, **kwargs) -> T: + if not inspect.isclass(clz_T): + raise TypeError(f"Expected a class type! got: {type(clz_T)}") + + sig = inspect.signature(clz_T) + kwargs = {k: v for k, v in kwargs.items() if k in sig.parameters.keys()} + + return clz_T(*args, **kwargs) diff --git a/chemprop-updated/chemprop/utils/utils.py b/chemprop-updated/chemprop/utils/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..bf826fc0fa0f1edb8e74d11d769b2b8ba343b424 --- /dev/null +++ b/chemprop-updated/chemprop/utils/utils.py @@ -0,0 +1,80 @@ +from __future__ import annotations + +from enum import StrEnum +from typing import Iterable, Iterator + +from rdkit import Chem + + +class EnumMapping(StrEnum): + @classmethod + def get(cls, name: str | EnumMapping) -> EnumMapping: + if isinstance(name, cls): + return name + + try: + return cls[name.upper()] + except KeyError: + raise KeyError( + f"Unsupported {cls.__name__} member! got: '{name}'. expected one of: {cls.keys()}" + ) + + @classmethod + def keys(cls) -> Iterator[str]: + return (e.name for e in cls) + + @classmethod + def values(cls) -> Iterator[str]: + return (e.value for e in cls) + + @classmethod + def items(cls) -> Iterator[tuple[str, str]]: + return zip(cls.keys(), cls.values()) + + +def make_mol(smi: str, keep_h: bool, add_h: bool) -> Chem.Mol: + """build an RDKit molecule from a SMILES string. + + Parameters + ---------- + smi : str + a SMILES string. + keep_h : bool + whether to keep hydrogens in the input smiles. This does not add hydrogens, it only keeps them if they are specified + add_h : bool + whether to add hydrogens to the molecule + + Returns + ------- + Chem.Mol + the RDKit molecule. + """ + if keep_h: + mol = Chem.MolFromSmiles(smi, sanitize=False) + Chem.SanitizeMol( + mol, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_ADJUSTHS + ) + else: + mol = Chem.MolFromSmiles(smi) + + if mol is None: + raise RuntimeError(f"SMILES {smi} is invalid! (RDKit returned None)") + + if add_h: + mol = Chem.AddHs(mol) + + return mol + + +def pretty_shape(shape: Iterable[int]) -> str: + """Make a pretty string from an input shape + + Example + -------- + >>> X = np.random.rand(10, 4) + >>> X.shape + (10, 4) + >>> pretty_shape(X.shape) + '10 x 4' + """ + return " x ".join(map(str, shape)) diff --git a/chemprop-updated/chemprop/utils/v1_to_v2.py b/chemprop-updated/chemprop/utils/v1_to_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..cd059340a147db7cd383fac6d60ebf1cf87debb2 --- /dev/null +++ b/chemprop-updated/chemprop/utils/v1_to_v2.py @@ -0,0 +1,188 @@ +from os import PathLike + +from lightning.pytorch import __version__ +from lightning.pytorch.utilities.parsing import AttributeDict +import torch + +from chemprop.nn.agg import AggregationRegistry +from chemprop.nn.message_passing import AtomMessagePassing, BondMessagePassing +from chemprop.nn.metrics import LossFunctionRegistry, MetricRegistry +from chemprop.nn.predictors import PredictorRegistry +from chemprop.nn.transforms import UnscaleTransform +from chemprop.utils import Factory + + +def convert_state_dict_v1_to_v2(model_v1_dict: dict) -> dict: + """Converts v1 model dictionary to a v2 state dictionary""" + + state_dict_v2 = {} + args_v1 = model_v1_dict["args"] + + state_dict_v1 = model_v1_dict["state_dict"] + state_dict_v2["message_passing.W_i.weight"] = state_dict_v1["encoder.encoder.0.W_i.weight"] + state_dict_v2["message_passing.W_h.weight"] = state_dict_v1["encoder.encoder.0.W_h.weight"] + state_dict_v2["message_passing.W_o.weight"] = state_dict_v1["encoder.encoder.0.W_o.weight"] + state_dict_v2["message_passing.W_o.bias"] = state_dict_v1["encoder.encoder.0.W_o.bias"] + + # v1.6 renamed ffn to readout + if "readout.1.weight" in state_dict_v1: + for i in range(args_v1.ffn_num_layers): + suffix = 0 if i == 0 else 2 + state_dict_v2[f"predictor.ffn.{i}.{suffix}.weight"] = state_dict_v1[ + f"readout.{i * 3 + 1}.weight" + ] + state_dict_v2[f"predictor.ffn.{i}.{suffix}.bias"] = state_dict_v1[ + f"readout.{i * 3 + 1}.bias" + ] + else: + for i in range(args_v1.ffn_num_layers): + suffix = 0 if i == 0 else 2 + state_dict_v2[f"predictor.ffn.{i}.{suffix}.weight"] = state_dict_v1[ + f"ffn.{i * 3 + 1}.weight" + ] + state_dict_v2[f"predictor.ffn.{i}.{suffix}.bias"] = state_dict_v1[ + f"ffn.{i * 3 + 1}.bias" + ] + + if args_v1.dataset_type == "regression": + state_dict_v2["predictor.output_transform.mean"] = torch.tensor( + model_v1_dict["data_scaler"]["means"], dtype=torch.float32 + ).unsqueeze(0) + state_dict_v2["predictor.output_transform.scale"] = torch.tensor( + model_v1_dict["data_scaler"]["stds"], dtype=torch.float32 + ).unsqueeze(0) + + # target_weights was added in #183 + if getattr(args_v1, "target_weights", None) is not None: + task_weights = torch.tensor(args_v1.target_weights).unsqueeze(0) + else: + task_weights = torch.ones(args_v1.num_tasks).unsqueeze(0) + + state_dict_v2["predictor.criterion.task_weights"] = task_weights + + return state_dict_v2 + + +def convert_hyper_parameters_v1_to_v2(model_v1_dict: dict) -> dict: + """Converts v1 model dictionary to v2 hyper_parameters dictionary""" + hyper_parameters_v2 = {} + renamed_metrics = { + "auc": "roc", + "prc-auc": "prc", + "cross_entropy": "ce", + "binary_cross_entropy": "bce", + "mcc": "binary-mcc", + "recall": "recall is not in v2", + "precision": "precision is not in v2", + "balanced_accuracy": "balanced_accuracy is not in v2", + } + + args_v1 = model_v1_dict["args"] + hyper_parameters_v2["batch_norm"] = False + hyper_parameters_v2["metrics"] = [ + Factory.build(MetricRegistry[renamed_metrics.get(args_v1.metric, args_v1.metric)]) + ] + hyper_parameters_v2["warmup_epochs"] = args_v1.warmup_epochs + hyper_parameters_v2["init_lr"] = args_v1.init_lr + hyper_parameters_v2["max_lr"] = args_v1.max_lr + hyper_parameters_v2["final_lr"] = args_v1.final_lr + + # convert the message passing block + W_i_shape = model_v1_dict["state_dict"]["encoder.encoder.0.W_i.weight"].shape + W_h_shape = model_v1_dict["state_dict"]["encoder.encoder.0.W_h.weight"].shape + W_o_shape = model_v1_dict["state_dict"]["encoder.encoder.0.W_o.weight"].shape + + d_h = W_i_shape[0] + d_v = W_o_shape[1] - d_h + d_e = W_h_shape[1] - d_h if args_v1.atom_messages else W_i_shape[1] - d_v + + hyper_parameters_v2["message_passing"] = AttributeDict( + { + "activation": args_v1.activation, + "bias": args_v1.bias, + "cls": BondMessagePassing if not args_v1.atom_messages else AtomMessagePassing, + "d_e": d_e, # the feature dimension of the edges + "d_h": args_v1.hidden_size, # dimension of the hidden layer + "d_v": d_v, # the feature dimension of the vertices + "d_vd": args_v1.atom_descriptors_size, + "depth": args_v1.depth, + "dropout": args_v1.dropout, + "undirected": args_v1.undirected, + } + ) + + # convert the aggregation block + hyper_parameters_v2["agg"] = { + "dim": 0, # in v1, the aggregation is always done on the atom features + "cls": AggregationRegistry[args_v1.aggregation], + } + if args_v1.aggregation == "norm": + hyper_parameters_v2["agg"]["norm"] = args_v1.aggregation_norm + + # convert the predictor block + fgs = args_v1.features_generator or [] + d_xd = sum((200 if "rdkit" in fg else 0) + (2048 if "morgan" in fg else 0) for fg in fgs) + + if getattr(args_v1, "target_weights", None) is not None: + task_weights = torch.tensor(args_v1.target_weights).unsqueeze(0) + else: + task_weights = torch.ones(args_v1.num_tasks).unsqueeze(0) + + # loss_function was added in #238 + loss_fn_defaults = { + "classification": "bce", + "regression": "mse", + "multiclass": "ce", + "specitra": "sid", + } + T_loss_fn = LossFunctionRegistry[ + getattr(args_v1, "loss_function", loss_fn_defaults[args_v1.dataset_type]) + ] + + hyper_parameters_v2["predictor"] = AttributeDict( + { + "activation": args_v1.activation, + "cls": PredictorRegistry[args_v1.dataset_type], + "criterion": Factory.build(T_loss_fn, task_weights=task_weights), + "task_weights": None, + "dropout": args_v1.dropout, + "hidden_dim": args_v1.ffn_hidden_size, + "input_dim": args_v1.hidden_size + args_v1.atom_descriptors_size + d_xd, + "n_layers": args_v1.ffn_num_layers - 1, + "n_tasks": args_v1.num_tasks, + } + ) + + if args_v1.dataset_type == "regression": + hyper_parameters_v2["predictor"]["output_transform"] = UnscaleTransform( + model_v1_dict["data_scaler"]["means"], model_v1_dict["data_scaler"]["stds"] + ) + + return hyper_parameters_v2 + + +def convert_model_dict_v1_to_v2(model_v1_dict: dict) -> dict: + """Converts a v1 model dictionary from a loaded .pt file to a v2 model dictionary""" + + model_v2_dict = {} + + model_v2_dict["epoch"] = None + model_v2_dict["global_step"] = None + model_v2_dict["pytorch-lightning_version"] = __version__ + model_v2_dict["state_dict"] = convert_state_dict_v1_to_v2(model_v1_dict) + model_v2_dict["loops"] = None + model_v2_dict["callbacks"] = None + model_v2_dict["optimizer_states"] = None + model_v2_dict["lr_schedulers"] = None + model_v2_dict["hparams_name"] = "kwargs" + model_v2_dict["hyper_parameters"] = convert_hyper_parameters_v1_to_v2(model_v1_dict) + + return model_v2_dict + + +def convert_model_file_v1_to_v2(model_v1_file: PathLike, model_v2_file: PathLike) -> None: + """Converts a v1 model .pt file to a v2 model .pt file""" + + model_v1_dict = torch.load(model_v1_file, map_location=torch.device("cpu"), weights_only=False) + model_v2_dict = convert_model_dict_v1_to_v2(model_v1_dict) + torch.save(model_v2_dict, model_v2_file) diff --git a/chemprop-updated/chemprop/utils/v2_0_to_v2_1.py b/chemprop-updated/chemprop/utils/v2_0_to_v2_1.py new file mode 100644 index 0000000000000000000000000000000000000000..8627637bc63d6594547f8b4f401e52d43808bb16 --- /dev/null +++ b/chemprop-updated/chemprop/utils/v2_0_to_v2_1.py @@ -0,0 +1,40 @@ +import pickle +import sys + +import torch + + +class Unpickler(pickle.Unpickler): + name_mappings = { + "MSELoss": "MSE", + "MSEMetric": "MSE", + "MAEMetric": "MAE", + "RMSEMetric": "RMSE", + "BoundedMSELoss": "BoundedMSE", + "BoundedMSEMetric": "BoundedMSE", + "BoundedMAEMetric": "BoundedMAE", + "BoundedRMSEMetric": "BoundedRMSE", + "SIDLoss": "SID", + "SIDMetric": "SID", + "WassersteinLoss": "Wasserstein", + "WassersteinMetric": "Wasserstein", + "R2Metric": "R2Score", + "BinaryAUROCMetric": "BinaryAUROC", + "BinaryAUPRCMetric": "BinaryAUPRC", + "BinaryAccuracyMetric": "BinaryAccuracy", + "BinaryF1Metric": "BinaryF1Score", + "BCEMetric": "BCELoss", + } + + def find_class(self, module, name): + if module == "chemprop.nn.loss": + module = "chemprop.nn.metrics" + name = self.name_mappings.get(name, name) + return super().find_class(module, name) + + +if __name__ == "__main__": + model = torch.load( + sys.argv[1], map_location="cpu", pickle_module=sys.modules[__name__], weights_only=False + ) + torch.save(model, sys.argv[2]) diff --git a/chemprop-updated/chemprop/web/__pycache__/__init__.cpython-37.pyc b/chemprop-updated/chemprop/web/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e583789216f0a21aa82e5c4521bfe4e7e2e70979 Binary files /dev/null and b/chemprop-updated/chemprop/web/__pycache__/__init__.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/web/__pycache__/config.cpython-37.pyc b/chemprop-updated/chemprop/web/__pycache__/config.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7b98aa54568a30ac337c685689bd24259deaa5bc Binary files /dev/null and b/chemprop-updated/chemprop/web/__pycache__/config.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/web/__pycache__/run.cpython-37.pyc b/chemprop-updated/chemprop/web/__pycache__/run.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9244cc7bcf7465d40584fdbdb7b94d816832035a Binary files /dev/null and b/chemprop-updated/chemprop/web/__pycache__/run.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/web/__pycache__/utils.cpython-37.pyc b/chemprop-updated/chemprop/web/__pycache__/utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..98bc7d90a7a7819fed797504bcc66e32deac6e61 Binary files /dev/null and b/chemprop-updated/chemprop/web/__pycache__/utils.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/web/app/__pycache__/__init__.cpython-37.pyc b/chemprop-updated/chemprop/web/app/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2b429c2989b044e1459612ef2e200beaf4b58720 Binary files /dev/null and b/chemprop-updated/chemprop/web/app/__pycache__/__init__.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/web/app/__pycache__/db.cpython-37.pyc b/chemprop-updated/chemprop/web/app/__pycache__/db.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0771eb4cfafc14b740d8b5a12d037816f94fa006 Binary files /dev/null and b/chemprop-updated/chemprop/web/app/__pycache__/db.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/web/app/__pycache__/views.cpython-37.pyc b/chemprop-updated/chemprop/web/app/__pycache__/views.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7afc9091c06166c507ae72789ba64b4a8332a283 Binary files /dev/null and b/chemprop-updated/chemprop/web/app/__pycache__/views.cpython-37.pyc differ diff --git a/chemprop-updated/chemprop/web/chemprop.sqlite3 b/chemprop-updated/chemprop/web/chemprop.sqlite3 new file mode 100644 index 0000000000000000000000000000000000000000..a4adae685e0264b067ded01dd9ca6099c06a2c9b Binary files /dev/null and b/chemprop-updated/chemprop/web/chemprop.sqlite3 differ diff --git a/chemprop-updated/docs/Makefile b/chemprop-updated/docs/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..26b942286fc384c8b4ca0af218fc3ffba0506984 --- /dev/null +++ b/chemprop-updated/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/chemprop-updated/docs/make.bat b/chemprop-updated/docs/make.bat new file mode 100644 index 0000000000000000000000000000000000000000..747ffb7b3033659bdd2d1e6eae41ecb00358a45e --- /dev/null +++ b/chemprop-updated/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/chemprop-updated/docs/source/_static/images/logo/LICENSE.txt b/chemprop-updated/docs/source/_static/images/logo/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..354f1e04f1247b3ddcbfc83b7519594d0f1ba261 --- /dev/null +++ b/chemprop-updated/docs/source/_static/images/logo/LICENSE.txt @@ -0,0 +1,121 @@ +Creative Commons Legal Code + +CC0 1.0 Universal + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS + PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM + THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED + HEREUNDER. + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator +and subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for +the purpose of contributing to a commons of creative, cultural and +scientific works ("Commons") that the public can reliably and without fear +of later claims of infringement build upon, modify, incorporate in other +works, reuse and redistribute as freely as possible in any form whatsoever +and for any purposes, including without limitation commercial purposes. +These owners may contribute to the Commons to promote the ideal of a free +culture and the further production of creative, cultural and scientific +works, or to gain reputation or greater distribution for their Work in +part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any +expectation of additional consideration or compensation, the person +associating CC0 with a Work (the "Affirmer"), to the extent that he or she +is an owner of Copyright and Related Rights in the Work, voluntarily +elects to apply CC0 to the Work and publicly distribute the Work under its +terms, with knowledge of his or her Copyright and Related Rights in the +Work and the meaning and intended legal effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not +limited to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, + communicate, and translate a Work; + ii. moral rights retained by the original author(s) and/or performer(s); +iii. publicity and privacy rights pertaining to a person's image or + likeness depicted in a Work; + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + v. rights protecting the extraction, dissemination, use and reuse of data + in a Work; + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation + thereof, including any amended or successor version of such + directive); and +vii. other similar, equivalent or corresponding rights throughout the + world based on applicable law or treaty, and any national + implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention +of, applicable law, Affirmer hereby overtly, fully, permanently, +irrevocably and unconditionally waives, abandons, and surrenders all of +Affirmer's Copyright and Related Rights and associated claims and causes +of action, whether now known or unknown (including existing as well as +future claims and causes of action), in the Work (i) in all territories +worldwide, (ii) for the maximum duration provided by applicable law or +treaty (including future time extensions), (iii) in any current or future +medium and for any number of copies, and (iv) for any purpose whatsoever, +including without limitation commercial, advertising or promotional +purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each +member of the public at large and to the detriment of Affirmer's heirs and +successors, fully intending that such Waiver shall not be subject to +revocation, rescission, cancellation, termination, or any other legal or +equitable action to disrupt the quiet enjoyment of the Work by the public +as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason +be judged legally invalid or ineffective under applicable law, then the +Waiver shall be preserved to the maximum extent permitted taking into +account Affirmer's express Statement of Purpose. In addition, to the +extent the Waiver is so judged Affirmer hereby grants to each affected +person a royalty-free, non transferable, non sublicensable, non exclusive, +irrevocable and unconditional license to exercise Affirmer's Copyright and +Related Rights in the Work (i) in all territories worldwide, (ii) for the +maximum duration provided by applicable law or treaty (including future +time extensions), (iii) in any current or future medium and for any number +of copies, and (iv) for any purpose whatsoever, including without +limitation commercial, advertising or promotional purposes (the +"License"). The License shall be deemed effective as of the date CC0 was +applied by Affirmer to the Work. Should any part of the License for any +reason be judged legally invalid or ineffective under applicable law, such +partial invalidity or ineffectiveness shall not invalidate the remainder +of the License, and in such case Affirmer hereby affirms that he or she +will not (i) exercise any of his or her remaining Copyright and Related +Rights in the Work or (ii) assert any associated claims and causes of +action with respect to the Work, in either case contrary to Affirmer's +express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + b. Affirmer offers the Work as-is and makes no representations or + warranties of any kind concerning the Work, express, implied, + statutory or otherwise, including without limitation warranties of + title, merchantability, fitness for a particular purpose, non + infringement, or the absence of latent or other defects, accuracy, or + the present or absence of errors, whether or not discoverable, all to + the greatest extent permissible under applicable law. + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without + limitation any person's Copyright and Related Rights in the Work. + Further, Affirmer disclaims responsibility for obtaining any necessary + consents, permissions or other rights required for any use of the + Work. + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to + this CC0 or use of the Work. diff --git a/chemprop-updated/docs/source/_static/images/logo/chemprop_logo.png b/chemprop-updated/docs/source/_static/images/logo/chemprop_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..b6bfa1c0ab5c0b297d1919f8ac66f77b61c288de Binary files /dev/null and b/chemprop-updated/docs/source/_static/images/logo/chemprop_logo.png differ diff --git a/chemprop-updated/docs/source/_static/images/logo/chemprop_logo.svg b/chemprop-updated/docs/source/_static/images/logo/chemprop_logo.svg new file mode 100644 index 0000000000000000000000000000000000000000..b121087a389530df4ec36857890010c691771851 --- /dev/null +++ b/chemprop-updated/docs/source/_static/images/logo/chemprop_logo.svg @@ -0,0 +1,185 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/chemprop-updated/docs/source/_static/images/logo/chemprop_logo_dark_mode.svg b/chemprop-updated/docs/source/_static/images/logo/chemprop_logo_dark_mode.svg new file mode 100644 index 0000000000000000000000000000000000000000..0fccc32336bae2cf0134af48ea24973352e548dc --- /dev/null +++ b/chemprop-updated/docs/source/_static/images/logo/chemprop_logo_dark_mode.svg @@ -0,0 +1,186 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/chemprop-updated/docs/source/_static/images/message_passing.png b/chemprop-updated/docs/source/_static/images/message_passing.png new file mode 100644 index 0000000000000000000000000000000000000000..679956675f864e40530c952d469d383d520b1a11 --- /dev/null +++ b/chemprop-updated/docs/source/_static/images/message_passing.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:827148fbb3c94f9a4d905e23d48e97b8ad6cb7fbc237497caf7ac3a572068299 +size 168917 diff --git a/chemprop-updated/docs/source/active_learning.nblink b/chemprop-updated/docs/source/active_learning.nblink new file mode 100644 index 0000000000000000000000000000000000000000..25e1bc93592e2b3cd6ed31d4d11e830dd38a6db6 --- /dev/null +++ b/chemprop-updated/docs/source/active_learning.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/active_learning.ipynb" +} \ No newline at end of file diff --git a/chemprop-updated/docs/source/cmd.rst b/chemprop-updated/docs/source/cmd.rst new file mode 100644 index 0000000000000000000000000000000000000000..7de9d992d3ead08133505a6dc489206cb711f80f --- /dev/null +++ b/chemprop-updated/docs/source/cmd.rst @@ -0,0 +1,12 @@ +.. _cmd: + +CLI Reference +************* + +.. contents:: Table of Contents + :depth: 3 + :local: + +.. argparse:: + :ref: chemprop.cli.main.construct_parser + :prog: chemprop diff --git a/chemprop-updated/docs/source/conf.py b/chemprop-updated/docs/source/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..7141e55322fa7e6a69a3272ca3fb62619e6809ae --- /dev/null +++ b/chemprop-updated/docs/source/conf.py @@ -0,0 +1,62 @@ +import os +import sys + +sys.path.insert(0, os.path.abspath("../..")) +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "Chemprop" +copyright = "2024, Chemprop developers" +author = "Chemprop developers" +release = "2.1.2" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + "nbsphinx", + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "autoapi.extension", + "sphinx.ext.mathjax", + "sphinx.ext.viewcode", + "sphinxcontrib.bibtex", + "sphinx.ext.doctest", + "sphinxarg.ext", + "nbsphinx_link", +] + +nbsphinx_execute = "never" +templates_path = ["_templates"] +exclude_patterns = [] +autodoc_typehints = "description" + +# -- AutoAPI configuration --------------------------------------------------- +nbsphinx_allow_errors = True +autoapi_dirs = ["../.."] +autoapi_ignore = ["*/tests/*", "*/cli/*"] +autoapi_file_patterns = ["*.py"] +autoapi_options = [ + "members", + "undoc-members", + "show-inheritance", + "show-module-summary", + "special-members", + "imported-members", +] +autoapi_keep_files = True + +# -- bibtex configuration --------------------------------------------------- + +bibtex_bibfiles = ["refs.bib"] + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "sphinx_book_theme" +html_static_path = ["_static"] diff --git a/chemprop-updated/docs/source/convert_v1_to_v2.nblink b/chemprop-updated/docs/source/convert_v1_to_v2.nblink new file mode 100644 index 0000000000000000000000000000000000000000..3c2d325cf7515c3f6a3bc512cce46bf170de2cd8 --- /dev/null +++ b/chemprop-updated/docs/source/convert_v1_to_v2.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/convert_v1_to_v2.ipynb" +} diff --git a/chemprop-updated/docs/source/extra_features_descriptors.nblink b/chemprop-updated/docs/source/extra_features_descriptors.nblink new file mode 100644 index 0000000000000000000000000000000000000000..a49793a1dafe38e54e7c9f56e6375206bb83cd30 --- /dev/null +++ b/chemprop-updated/docs/source/extra_features_descriptors.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/extra_features_descriptors.ipynb" +} diff --git a/chemprop-updated/docs/source/hpopting.nblink b/chemprop-updated/docs/source/hpopting.nblink new file mode 100644 index 0000000000000000000000000000000000000000..549c21dce2b725afcb34a772abe07f01c27c3847 --- /dev/null +++ b/chemprop-updated/docs/source/hpopting.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/hpopting.ipynb" +} diff --git a/chemprop-updated/docs/source/index.rst b/chemprop-updated/docs/source/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..1b48749bc4768b811ef89fdf384e06b2a1aee1f7 --- /dev/null +++ b/chemprop-updated/docs/source/index.rst @@ -0,0 +1,36 @@ +.. Chemprop documentation master file, created by + sphinx-quickstart on Wed Aug 23 22:52:52 2023. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to Chemprop's documentation! +==================================== + +This website contains documentation for Chemprop, a PyTorch-based framework for training and evaluating message-passing neural networks (MPNNs) for molecular property prediction. The package was originally developed for :footcite:t:`chemprop_theory` and further described in :footcite:t:`chemprop_software`. + +To get started with Chemprop, check out the :ref:`quickstart` page, and for more detailed information, see the :ref:`installation`, :ref:`tutorial`, and :ref:`notebooks` pages. + +.. note:: + Chemprop recently underwent a ground-up rewrite and new major release (v2.0.0). A helpful transition guide from Chemprop v1 to v2 can be found `here `_. This includes a side-by-side comparison of CLI argument options, a list of which arguments will be implemented in later versions of v2, and a list of changes to default hyperparameters. + +If you use Chemprop to train or develop a model in your own work, we would appreciate if you cite the following papers: + +.. footbibliography:: + +.. toctree:: + :maxdepth: 1 + :caption: Contents: + + quickstart + installation + tutorial/cli/index + tutorial/python/index + notebooks + cmd + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/chemprop-updated/docs/source/installation.rst b/chemprop-updated/docs/source/installation.rst new file mode 100644 index 0000000000000000000000000000000000000000..1ca041852a84c949aeef35e350a319e5b56cf3a0 --- /dev/null +++ b/chemprop-updated/docs/source/installation.rst @@ -0,0 +1,103 @@ +.. _installation: + +Installation +============ + +Chemprop can either be installed from PyPI via pip_, from source (i.e., directly from the `git repo`_) using ``pip`` or the ``environment.yml`` file, or from `Docker`_. The PyPI version includes the vast majority of Chemprop functionality, but some functionality is only accessible when installed from source. We recommend installing ``chemprop`` in a virtual environment (e.g., conda_ or miniconda_). The following sections assume you are using ``conda`` or ``miniconda``, but you can use any virtual environment manager you like (e.g. ``mamba``). + +.. _pip: https://pypi.org/project/chemprop/ +.. _git repo: https://github.com/chemprop/chemprop.git +.. _`Docker`: https://www.docker.com/get-started/ +.. _conda: https://docs.conda.io/en/latest/conda.html +.. _miniconda: https://docs.conda.io/en/latest/miniconda.html + +.. note:: + *Python 3.11 vs. 3.12:* Options 1, 2, and 4 below explicitly specify ``python=3.11`` but you can choose to replace ``python=3.11`` with ``python=3.12`` in these commands. We test Chemprop on both versions in our CI. + +.. note:: + *CPU-only installation:* For the following options 1-3, if you do not have a GPU, you might need to manually install a CPU-only version of PyTorch. This should be handled automatically, but if you find that it is not, you should run the following command before installing Chemprop: + + .. code-block:: + + conda install pytorch cpuonly -c pytorch + +Option 1: Installing from PyPI +------------------------------ + +.. code-block:: + + conda create -n chemprop python=3.11 + conda activate chemprop + pip install chemprop + +Option 2: Installing from source using pip +------------------------------------------ + +.. code-block:: + + conda create -n chemprop python=3.11 + conda activate chemprop + git clone https://github.com/chemprop/chemprop.git + cd chemprop + pip install -e . + +.. note:: + You can also use this option to install additional optional dependencies by replacing ``pip install -e .`` with ``pip install -e ".[hpopt,dev,docs,test,notebooks]"``. + +Option 3: Installing from source using environment.yml +------------------------------------------------------- + +.. code-block:: + + git clone https://github.com/chemprop/chemprop.git + cd chemprop + conda env create -f environment.yml + conda activate chemprop + pip install -e . + +Option 4: Installing via Docker +------------------------------- + +Chemprop can also be installed with Docker, making it possible to isolate the Chemprop code and environment. +To install and run Chemprop in a Docker container, first `install Docker`_. +You may then either ``pull`` and use official Chemprop images or ``build`` the image yourself. + +.. _`install Docker`: https://docs.docker.com/get-docker/ + +.. note:: + The Chemprop Dockerfile runs only on CPU and does not support GPU acceleration. + Linux users with NVIDIA GPUs may install the `nvidia-container-toolkit`_ from NVIDIA and modify the installation instructions in the Dockerfile to install the version of `torch` which is compatible with your system's GPUs and drivers. + Adding the ``--gpus all`` argument to ``docker run`` will then allow Chemprop to run on GPU from within the container. You can see other options for exposing GPUs in the `Docker documentation`_. + Users on other systems should install Chemprop from PyPI or source. + +.. _`nvidia-container-toolkit`: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html +.. _`Docker documentation`: https://docs.docker.com/config/containers/resource_constraints/#expose-gpus-for-use + +Pull Official Images +++++++++++++++++++++ + +.. code-block:: + + docker pull chemprop/chemprop:X.Y.Z + docker run -it chemprop/chemprop:X.Y.Z + +Where ``X``, ``Y``, and ``Z`` should be replaced with the version of Chemprop you wish to ``pull``. +For example, to pull ``chemprop-2.0.0`` run + +.. code-block:: + + docker pull chemprop/chemprop:2.0.0 + +.. note:: + Not all versions of Chemprop are available as pre-built images. + Visit the `Docker Hub`_ page for a list of those that are available. + +.. note:: + Nightly builds of Chemprop are available under the ``latest`` tag on Dockerhub and are intended for developer use and as feature previews, not production deployment. + +.. _`Docker Hub`: https://hub.docker.com/repository/docker/chemprop/chemprop/general + +Build Image Locally ++++++++++++++++++++ + +See the build instructions in the top of the ``Dockerfile``. diff --git a/chemprop-updated/docs/source/interpreting_monte_carlo_tree_search.nblink b/chemprop-updated/docs/source/interpreting_monte_carlo_tree_search.nblink new file mode 100644 index 0000000000000000000000000000000000000000..6b3f3ab03c3895d4cd63b0371ac047a607171200 --- /dev/null +++ b/chemprop-updated/docs/source/interpreting_monte_carlo_tree_search.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/interpreting_monte_carlo_tree_search.ipynb" +} diff --git a/chemprop-updated/docs/source/mpnn_fingerprints.nblink b/chemprop-updated/docs/source/mpnn_fingerprints.nblink new file mode 100644 index 0000000000000000000000000000000000000000..059bd126a21145eaae4cd28c268eeb57e852c665 --- /dev/null +++ b/chemprop-updated/docs/source/mpnn_fingerprints.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/mpnn_fingerprints.ipynb" +} diff --git a/chemprop-updated/docs/source/multi_task.nblink b/chemprop-updated/docs/source/multi_task.nblink new file mode 100644 index 0000000000000000000000000000000000000000..88f8b0d252aa9f57d24be499badbce09b244010e --- /dev/null +++ b/chemprop-updated/docs/source/multi_task.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/multi_task.ipynb" +} diff --git a/chemprop-updated/docs/source/notebooks.rst b/chemprop-updated/docs/source/notebooks.rst new file mode 100644 index 0000000000000000000000000000000000000000..dbe6ac0c606389bfb97ffe53f89f9a3be4d6a999 --- /dev/null +++ b/chemprop-updated/docs/source/notebooks.rst @@ -0,0 +1,30 @@ +.. _notebooks: + +Jupyter Notebook Examples +========================= + +Chemprop's usage within Python scripts is also illustrated by the Jupyter notebooks on the following pages. + + +.. toctree:: + :maxdepth: 1 + :hidden: + + training + predicting + training_classification + training_regression_multicomponent + predicting_regression_multicomponent + training_regression_reaction + predicting_regression_reaction + multi_task + hpopting + mpnn_fingerprints + active_learning + transfer_learning + uncertainty + interpreting_monte_carlo_tree_search + shapley_value_with_customized_featurizers + extra_features_descriptors + use_featurizer_with_other_libraries + convert_v1_to_v2 \ No newline at end of file diff --git a/chemprop-updated/docs/source/predicting.nblink b/chemprop-updated/docs/source/predicting.nblink new file mode 100644 index 0000000000000000000000000000000000000000..ca5bfdadc82559f2e212b9b2a3fecf707903a0bf --- /dev/null +++ b/chemprop-updated/docs/source/predicting.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/predicting.ipynb" +} diff --git a/chemprop-updated/docs/source/predicting_regression_multicomponent.nblink b/chemprop-updated/docs/source/predicting_regression_multicomponent.nblink new file mode 100644 index 0000000000000000000000000000000000000000..a406d1665795906be4ccb6d45249de18ff59a41d --- /dev/null +++ b/chemprop-updated/docs/source/predicting_regression_multicomponent.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/predicting_regression_multicomponent.ipynb" +} diff --git a/chemprop-updated/docs/source/predicting_regression_reaction.nblink b/chemprop-updated/docs/source/predicting_regression_reaction.nblink new file mode 100644 index 0000000000000000000000000000000000000000..d5fac4357b87412f98dab5a22e28562f01777370 --- /dev/null +++ b/chemprop-updated/docs/source/predicting_regression_reaction.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/predicting_regression_reaction.ipynb" +} diff --git a/chemprop-updated/docs/source/quickstart.rst b/chemprop-updated/docs/source/quickstart.rst new file mode 100644 index 0000000000000000000000000000000000000000..c307f052ccee740edf872985d0c4cb96b8aba0cd --- /dev/null +++ b/chemprop-updated/docs/source/quickstart.rst @@ -0,0 +1,83 @@ +.. _quickstart: + +Quickstart +========== + +To get started with Chemprop, first install the package using the instructions in the :ref:`installation` section. Once you have Chemprop installed, you can train a model on your own data or use the pre-packaged solubility dataset to get a feel for how the package works. + +Let's use the solubility data that comes pre-packaged in the Chemprop directory: + +.. code-block:: text + + $ head tests/data/regression.csv + smiles,logSolubility + OCC3OC(OCC2OC(OC(C#N)c1ccccc1)C(O)C(O)C2O)C(O)C(O)C3O,-0.77 + Cc1occc1C(=O)Nc2ccccc2,-3.3 + CC(C)=CCCC(C)=CC(=O),-2.06 + c1ccc2c(c1)ccc3c2ccc4c5ccccc5ccc43,-7.87 + c1ccsc1,-1.33 + c2ccc1scnc1c2,-1.5 + Clc1cc(Cl)c(c(Cl)c1)c2c(Cl)cccc2Cl,-7.32 + CC12CCC3C(CCc4cc(O)ccc34)C2CCC1O,-5.03 + ClC4=C(Cl)C5(Cl)C3C1CC(C2OC12)C3C4(Cl)C5(Cl)Cl,-6.29 + ... + +Now we're ready to train a simple Chemprop model: + +.. code-block:: bash + + chemprop train --data-path tests/data/regression.csv \ + --task-type regression \ + --output-dir train_example + +This will train a model on the solubility dataset (``tests/data/regression.csv``) and save the model and training logs in the ``train_example`` directory. You should see some output printed to your terminal that shows the model architecture, number of parameters, and a progress bar for each epoch of training. At the end, you should see something like: + +.. code-block:: text + + ─────────────────────────────────────────────────────── + Test metric DataLoader 0 + ─────────────────────────────────────────────────────── + test/mse 0.7716904154601469 + ─────────────────────────────────────────────────────── + +With our trained model in hand, we can now use it to predict solubilities of new molecules. In the absence of additional data, for demonstration purposes, let's just test on the same molecules that we trained on: + +.. code-block:: bash + + chemprop predict --test-path tests/data/regression.csv \ + --model-path train_example/model_0/best.pt \ + --preds-path train_example/predictions.csv + +This should output a file ``train_example/predictions_0.csv`` containing the predicted log(solubility) values for the molecules contained in ``tests/data/regression.csv``. + +.. code-block:: text + + $ head train_example/predictions_0.csv + smiles,logSolubility,pred_0 + OCC3OC(OCC2OC(OC(C#N)c1ccccc1)C(O)C(O)C2O)C(O)C(O)C3O,-0.77,-1.0349703 + Cc1occc1C(=O)Nc2ccccc2,-3.3,-3.0304263 + CC(C)=CCCC(C)=CC(=O),-2.06,-2.0320206 + ... + +Given that our test data is identical to our training data, it makes sense that the predictions are similar to the ground truth values. + +In the rest of this documentation, we'll go into more detail about how to: + +* :ref:`Install Chemprop` +* :ref:`Customize model architecture and task type` +* :ref:`Specify training parameters: split type, learning rate, batch size, loss function, etc. ` +* :ref:`Use Chemprop as a Python package ` +* :ref:`Perform a hyperparameter optimization ` +* :ref:`Generate a molecular fingerprint ` +.. * :ref:`Quantify prediction uncertainty` + +Summary +------- + +* Install Chemprop using the instructions in the :ref:`installation` section +* Train a model with ``chemprop train --data-path --task-type --output-dir `` +* Use a saved model for prediction with ``chemprop predict --test-path --checkpoint-dir --preds-path `` + +.. _GitHub repository: https://github.com/chemprop/chemprop +.. + .. _FreeSolv dataset: https://pubmed.ncbi.nlm.nih.gov/24928188/ \ No newline at end of file diff --git a/chemprop-updated/docs/source/refs.bib b/chemprop-updated/docs/source/refs.bib new file mode 100644 index 0000000000000000000000000000000000000000..5c87772f0976442f01a0b142ad3ef9e0d41bb806 --- /dev/null +++ b/chemprop-updated/docs/source/refs.bib @@ -0,0 +1,37 @@ +# this was downloaded from ACS: https://pubs.acs.org/doi/10.1021/acs.jcim.9b00237 +@article{chemprop_theory, + author = {Yang, Kevin and Swanson, Kyle and Jin, Wengong and Coley, Connor and Eiden, Philipp and Gao, Hua and Guzman-Perez, Angel and Hopper, Timothy and Kelley, Brian and Mathea, Miriam and Palmer, Andrew and Settels, Volker and Jaakkola, Tommi and Jensen, Klavs and Barzilay, Regina}, + title = {Analyzing Learned Molecular Representations for Property Prediction}, + journal = {Journal of Chemical Information and Modeling}, + volume = {59}, + number = {8}, + pages = {3370-3388}, + year = {2019}, + doi = {10.1021/acs.jcim.9b00237}, + note ={PMID: 31361484}, + URL = { + https://doi.org/10.1021/acs.jcim.9b00237 + }, + eprint = { + https://doi.org/10.1021/acs.jcim.9b00237 + } +} + +# this was downloaded from ACS: https://pubs.acs.org/doi/10.1021/acs.jcim.3c01250 +@article{chemprop_software, + author = {Heid, Esther and Greenman, Kevin P. and Chung, Yunsie and Li, Shih-Cheng and Graff, David E. and Vermeire, Florence H. and Wu, Haoyang and Green, William H. and McGill, Charles J.}, + title = {Chemprop: A Machine Learning Package for Chemical Property Prediction}, + journal = {Journal of Chemical Information and Modeling}, + volume = {64}, + number = {1}, + pages = {9-17}, + year = {2024}, + doi = {10.1021/acs.jcim.3c01250}, + note ={PMID: 38147829}, + URL = { + https://doi.org/10.1021/acs.jcim.3c01250 + }, + eprint = { + https://doi.org/10.1021/acs.jcim.3c01250 + } +} \ No newline at end of file diff --git a/chemprop-updated/docs/source/shapley_value_with_customized_featurizers.nblink b/chemprop-updated/docs/source/shapley_value_with_customized_featurizers.nblink new file mode 100644 index 0000000000000000000000000000000000000000..12a61d9c424e5588b43fea4a3ec6a8fc9520c254 --- /dev/null +++ b/chemprop-updated/docs/source/shapley_value_with_customized_featurizers.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/shapley_value_with_customized_featurizers.ipynb" +} diff --git a/chemprop-updated/docs/source/training.nblink b/chemprop-updated/docs/source/training.nblink new file mode 100644 index 0000000000000000000000000000000000000000..ccfd59543f1892d55957ec7ef0b44111f6861dd3 --- /dev/null +++ b/chemprop-updated/docs/source/training.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/training.ipynb" +} diff --git a/chemprop-updated/docs/source/training_classification.nblink b/chemprop-updated/docs/source/training_classification.nblink new file mode 100644 index 0000000000000000000000000000000000000000..48d0526d0bc7198225f3662b35e829ef51593ffa --- /dev/null +++ b/chemprop-updated/docs/source/training_classification.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/training_classification.ipynb" +} diff --git a/chemprop-updated/docs/source/training_regression_multicomponent.nblink b/chemprop-updated/docs/source/training_regression_multicomponent.nblink new file mode 100644 index 0000000000000000000000000000000000000000..99c5bd4fe795054747f2502bcb1be8c2ed905782 --- /dev/null +++ b/chemprop-updated/docs/source/training_regression_multicomponent.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/training_regression_multicomponent.ipynb" +} diff --git a/chemprop-updated/docs/source/training_regression_reaction.nblink b/chemprop-updated/docs/source/training_regression_reaction.nblink new file mode 100644 index 0000000000000000000000000000000000000000..fdd7511550caf6bf5214d3256055b7f19a4ad910 --- /dev/null +++ b/chemprop-updated/docs/source/training_regression_reaction.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/training_regression_reaction.ipynb" +} diff --git a/chemprop-updated/docs/source/transfer_learning.nblink b/chemprop-updated/docs/source/transfer_learning.nblink new file mode 100644 index 0000000000000000000000000000000000000000..542ad040fbce9b0d9604867f9baa1c85323d249b --- /dev/null +++ b/chemprop-updated/docs/source/transfer_learning.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/transfer_learning.ipynb" +} \ No newline at end of file diff --git a/chemprop-updated/docs/source/tutorial/cli/convert.rst b/chemprop-updated/docs/source/tutorial/cli/convert.rst new file mode 100644 index 0000000000000000000000000000000000000000..6eab765aa5dbc78941437df0e5c25e1a5d5c24c7 --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/cli/convert.rst @@ -0,0 +1,10 @@ +.. _convert: + +Conversion +---------- + +To convert a trained model from Chemprop v1 to v2, run ``chemprop convert`` and specify: + + * :code:`--input-path ` Path of the Chemprop v1 file to convert. + * :code:`--output-path ` Path where the converted Chemprop v2 will be saved. If unspecified, this will default to ``_v2.ckpt``. + diff --git a/chemprop-updated/docs/source/tutorial/cli/fingerprint.rst b/chemprop-updated/docs/source/tutorial/cli/fingerprint.rst new file mode 100644 index 0000000000000000000000000000000000000000..b2c0af8a2d03c859dc23eac795f234a83e63ae34 --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/cli/fingerprint.rst @@ -0,0 +1,34 @@ +.. _fingerprint: + +Fingerprint +============================ + +To calculate the learned representations (encodings) of model inputs from a pretrained model, run + +.. code-block:: + + chemprop fingerprint --test-path --model-path + +where :code:`` is the path to the CSV file containing SMILES strings, and :code:`` is the location of checkpoint(s) or model file(s) to use for prediction. It can be a path to either a single pretrained model checkpoint (.ckpt) or single pretrained model file (.pt), a directory that contains these files, or a list of path(s) and directory(s). If a directory, will recursively search and predict on all found (.pt) models. By default, predictions will be saved to the same directory as the test path. If desired, a different directory can be specified by using :code:`--output `. The output can end with either .csv or .npz, and the output will be saved to the corresponding file type. + +For example: + +.. code-block:: + + chemprop fingerprint --test-path tests/data/smis.csv \ + --model-path tests/data/example_model_v2_regression_mol.ckpt \ + --output fps.csv + + +Specifying FFN encoding layer +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +By default, the encodings are returned from the penultimate linear layer of the model's FFN. However, the exact layer to draw encodings from can be specified using :code:`--ffn-block-index `. + +An index of 0 will simply return the post-aggregation representation without passing through the FFN. Here, an index of 1 will return the output of the first linear layer of the FFN, an index of 2 the second layer, and so on. + + +Specifying Data to Parse +^^^^^^^^^^^^^^^^^^^^^^^^ + +:code:`fingerprint` shares the same arguments for specifying SMILES columns and reaction types as :code:`predict`. For more detail, see :ref:`predict`. \ No newline at end of file diff --git a/chemprop-updated/docs/source/tutorial/cli/hpopt.rst b/chemprop-updated/docs/source/tutorial/cli/hpopt.rst new file mode 100644 index 0000000000000000000000000000000000000000..53d1ce6d47043462b28bbe8f83091fd15b624f0c --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/cli/hpopt.rst @@ -0,0 +1,79 @@ +.. _hpopt: + +Hyperparameter Optimization +============================ + +.. note:: + Chemprop relies on `Ray Tune `_ for hyperparameter optimization which is an optional install. To install the required dependencies, run :code:`pip install -U ray[tune]` if installing with PyPI, or :code:`pip install -e .[hpopt]` if installing from source. + +Searching Hyperparameter Space +-------------------------------- + +We include an automated hyperparameter optimization procedure through the Ray Tune package. Hyperparameter optimization can be run as follows: + +.. code-block:: + + chemprop hpopt --data-path --task-type --search-parameter-keywords --hpopt-save-dir + +For example: + +.. code-block:: + + chemprop hpopt --data-path tests/data/regression.csv \ + --task-type regression \ + --search-parameter-keywords depth ffn_num_layers message_hidden_dim \ + --hpopt-save-dir results + +The search parameters can be any combination of hyperparameters or a predefined set. Options include :code:`basic` (default), which consists of: + + * :code:`depth` The number of message passing steps + * :code:`ffn_num_layers` The number of layers in the FFN model + * :code:`dropout` The probability (from 0.0 to 1.0) of dropout in the MPNN & FNN layers + * :code:`message_hidden_dim` The hidden dimension in the message passing step + * :code:`ffn_hidden_dim` The hidden dimension in the FFN model + +Another option is :code:`learning_rate` which includes: + + * :code:`max_lr` The maximum learning rate + * :code:`init_lr` The initial learning rate. It is searched as a ratio relative to the max learning rate + * :code:`final_lr` The initial learning rate. It is searched as a ratio relative to the max learning rate + * :code:`warmup_epochs` Number of warmup epochs, during which the learning rate linearly increases from the initial to the maximum learning rate + +Other individual search parameters include: + + * :code:`activation` The activation function used in the MPNN & FFN layers. Choices include ``relu``, ``leakyrelu``, ``prelu``, ``tanh``, ``selu``, and ``elu`` + * :code:`aggregation` Aggregation mode used during molecule-level predictor. Choices include ``mean``, ``sum``, ``norm`` + * :code:`aggregation_norm` For ``norm`` aggregation, the normalization factor by which atomic features are divided + * :code:`batch_size` Batch size for dataloader + +Specifying :code:`--search-parameter-keywords all` will search over all 13 of the above parameters. + +The following other common keywords may be used: + + * :code:`--raytune-num-samples ` The number of trials to perform + * :code:`--raytune-num-cpus ` The number of CPUs to use + * :code:`--raytune-num-gpus ` The number of GPUs to use + * :code:`--raytune-max-concurrent-trials ` The maximum number of concurrent trials + * :code:`--raytune-search-algorithm ` The choice of control search algorithm (either ``random``, ``hyperopt``, or ``optuna``). If ``hyperopt`` is specified, then the arguments ``--hyperopt-n-initial-points `` and ``--hyperopt-random-state-seed `` can be specified. + +Other keywords related to hyperparameter optimization are also available (see :ref:`cmd` for a full list). + +Splitting +---------- +By default, Chemprop will split the data into train / validation / test data splits. The splitting behavior can be modified using the same splitting arguments used in training, i.e., section :ref:`train_validation_test_splits`. + +.. note:: + This default splitting behavior is different from Chemprop v1, wherein the hyperparameter optimization was performed on the entirety of the data provided to it. + +If ``--num-replicates`` is greater than one, Chemprop will only use the first split to perform hyperparameter optimization. If you need to optimize hyperparameters separately for several different cross validation splits, you should e.g. set up a bash script to run :code:`chemprop hpopt` separately on each split. + + +Applying Optimal Hyperparameters +--------------------------------- + +Once hyperparameter optimization is complete, the optimal hyperparameters can be applied during training by specifying the config path. If an argument is both provided via the command line and the config file, the command line takes precedence. For example: + +.. code-block:: + + chemprop train --data-path tests/data/regression.csv \ + --config-path results/best_config.toml diff --git a/chemprop-updated/docs/source/tutorial/cli/index.rst b/chemprop-updated/docs/source/tutorial/cli/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..a5c54c8111e9857ef5388183f27634044f0a915d --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/cli/index.rst @@ -0,0 +1,62 @@ +.. _tutorial: + +Command Line Tutorials +====================== + +.. note:: + Chemprop recently underwent a ground-up rewrite and new major release (v2.0.0). A helpful transition guide from Chemprop v1 to v2 can be found `here `_. This includes a side-by-side comparison of CLI argument options, a list of which arguments will be implemented in later versions of v2, and a list of changes to default hyperparameters. + +Chemprop may be invoked from the command line using the following command: + +.. code-block:: + + $ chemprop COMMAND [ARGS] + +where ``COMMAND`` is one of the following: + +* ``train``: Train a model. +* ``predict``: Make predictions with a trained model. +* ``convert``: Convert a trained Chemprop model from v1 to v2. +* ``hpopt``: Perform hyperparameter optimization. +* ``fingerprint``: Use a trained model to compute a learned representation. + +and ``ARGS`` are command-specific arguments. To see the arguments for a specific command, run: + +.. code-block:: + + $ chemprop COMMAND --help + +For example, to see the arguments for the ``train`` command, run: + +.. code-block:: + + $ chemprop train --help + +To enable logging, specify ``--log `` or ``--logfile ``, where ```` is the desired path to which the logfile should be written; if unspecified, the log will be written to ``chemprop_logs``. +The default logging level is INFO. If more detailed debugging information is required, specify ``-v`` for DEBUG level. To decrease verbosity below the default INFO level, use ``-q`` for WARNING or ``-qq`` for ERROR. + +Chemprop is built on top of Lightning, which has support for training and predicting on GPUs. +Relevant CLI flags include `--accelerator` and `--devices`. +See the `Lightning documentation `_ and CLI reference for more details. + +For more details on each command, see the corresponding section below: + +* :ref:`train` +* :ref:`predict` +* :ref:`convert` +* :ref:`hpopt` +* :ref:`fingerprint` + +The following features are not yet implemented, but will be included in a future release: + +* ``interpret``: Interpret model predictions. + +.. toctree:: + :maxdepth: 1 + :hidden: + + train + predict + convert + hpopt + fingerprint \ No newline at end of file diff --git a/chemprop-updated/docs/source/tutorial/cli/interpret.rst b/chemprop-updated/docs/source/tutorial/cli/interpret.rst new file mode 100644 index 0000000000000000000000000000000000000000..bddaec99c9b2ab47cc991f68717891ea9cc4ff65 --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/cli/interpret.rst @@ -0,0 +1,37 @@ +.. _interpret: + +Interpreting +============ + +.. warning:: + This page is under construction. + +.. + It is often helpful to provide explanation of model prediction (i.e., this molecule is toxic because of this substructure). Given a trained model, you can interpret the model prediction using the following command: + + .. code-block:: + + chemprop interpret --data_path data/tox21.csv --checkpoint_dir tox21_checkpoints/fold_0/ --property_id 1 + + The output will be like the following: + + * The first column is a molecule and second column is its predicted property (in this case NR-AR toxicity). + * The third column is the smallest substructure that made this molecule classified as toxic (which we call rationale). + * The fourth column is the predicted toxicity of that substructure. + + As shown in the first row, when a molecule is predicted to be non-toxic, we will not provide any rationale for its prediction. + + .. csv-table:: + :header: "smiles", "NR-AR", "rationale", "rationale_score" + :widths: 20, 10, 20, 10 + + "O=[N+]([O-])c1cc(C(F)(F)F)cc([N+](=O)[O-])c1Cl", "0.014", "", "" + "CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)C=C[C@]5(C)[C@H]4[C@@H](O)C[C@]3(C)[C@]2(C(=O)CO)O1", "0.896", "C[C@]12C=CC(=O)C=C1[CH2:1]C[CH2:1][CH2:1]2", "0.769" + "C[C@]12CC[C@H]3[C@@H](CC[C@@]45O[C@@H]4C(O)=C(C#N)C[C@]35C)[C@@H]1CC[C@@H]2O", "0.941", "C[C@]12C[CH:1]=[CH:1][C@H]3O[C@]31CC[C@@H]1[C@@H]2CC[C:1][CH2:1]1", "0.808" + "C[C@]12C[C@H](O)[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@]2(O)C(=O)COP(=O)([O-])[O-]", "0.957", "C1C[CH2:1][C:1][C@@H]2[C@@H]1[C@@H]1CC[C:1][C:1]1C[CH2:1]2", "0.532" + + Chemprop's interpretation script explains model prediction one property at a time. :code:`--property_id 1` tells the script to provide explanation for the first property in the dataset (which is NR-AR). In a multi-task training setting, you will need to change :code:`--property_id` to provide explanation for each property in the dataset. + + For computational efficiency, we currently restricted the rationale to have maximum 20 atoms and minimum 8 atoms. You can adjust these constraints through :code:`--max_atoms` and :code:`--min_atoms` argument. + + Please note that the interpreting framework is currently only available for models trained on properties of single molecules, that is, multi-molecule models generated via the :code:`--number_of_molecules` command are not supported. diff --git a/chemprop-updated/docs/source/tutorial/cli/predict.rst b/chemprop-updated/docs/source/tutorial/cli/predict.rst new file mode 100644 index 0000000000000000000000000000000000000000..091a3f9f261f1edf23e6c77a7846b45b738fa61a --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/cli/predict.rst @@ -0,0 +1,109 @@ +.. _predict: + +Prediction +---------- + +To load a trained model and make predictions, run: + +.. code-block:: + + chemprop predict --test-path --model-paths <[model_paths]> + +where :code:`` is the path to the data to test on, and :code:`<[model_paths]>` is the location of checkpoint(s) or model file(s) to use for prediction. It can be a path to either a single pretrained model checkpoint (.ckpt) or single pretrained model file (.pt), a directory that contains these files, or a list of path(s) and directory(s). If a directory, will recursively search and predict on all found (.pt) models. By default, predictions will be saved to the same directory as the test path. If desired, a different directory can be specified by using :code:`--preds-path `. The predictions can end with either .csv or .pkl, and the output will be saved to the corresponding file type. + +For example: + +.. code-block:: + + chemprop predict --test-path tests/data/smis.csv \ + --model-path tests/data/example_model_v2_regression_mol.ckpt \ + --preds-path preds.csv + + +Specifying Data to Parse +^^^^^^^^^^^^^^^^^^^^^^^^ + +By default, Chemprop will assume that the the 0th column in the data .csv will have the data. To use a separate column, specify: + + * :code:`--smiles-columns` Text label of the column that includes the SMILES strings + +If atom-mapped reaction SMILES are used, specify: + + * :code:`--reaction-columns` Text labels of the columns that include the reaction SMILES + +If :code:`--reaction-mode` was specified during training, those same flags must be specified for the prediction step. + + +Uncertainty Quantification +-------------------------- + +To load a trained model and make uncertainty quantification, run: + +.. code-block:: + + chemprop predict --test-path \ + --cal-path \ + --model-paths <[model_paths]> \ + --uncertainty-method \ + --calibration-method \ + --evaluation-methods <[methods]> + +where :code:`` is the path to the data to test on, :code:`` is the calibration dataset used for uncertainty calibration if needed, and :code:`<[model_paths]>` is the location of checkpoint(s) or model file(s) to use for prediction. The uncertianty estimation, calibration, and evaluations methods are detailed below. + +Uncertainty Estimation +^^^^^^^^^^^^^^^^^^^^^^ + +The uncertainty of predictions made in Chemprop can be estimated by several different methods. Uncertainty estimation is carried out alongside model value prediction and reported in the predictions csv file when the argument :code:`--uncertainty-method ` is provided. If no uncertainty method is provided, then only the model value predictions will be carried out. The available methods are: + + * :code:`dropout` + * :code:`ensemble` + * :code:`quantile-regression` + * :code:`mve` + * :code:`evidential-total`, :code:`evidential-epistemic`, :code:`evidential-aleatoric` + * :code:`classification` + * :code:`classification-dirichlet` + * :code:`multiclass` + * :code:`multiclass-dirichlet` + +Uncertainty Calibration +^^^^^^^^^^^^^^^^^^^^^^^ + +Uncertainty predictions may be calibrated to improve their performance on new predictions. Calibration methods are selected using :code:`--calibration-method `, options provided below. An additional dataset to use in calibration is provided through :code:`--cal-path `, along with necessary features like :code:`--cal-descriptors-path `. As with the data used in training, calibration data for multitask models are allowed to have gaps and missing targets in the data. + +**Regression**: + + * :code:`zscaling` Assumes that errors are normally distributed according to the estimated variance for each prediction. Applies a constant multiple to all stdev or interval outputs in order to minimize the negative log likelihood for the normal distributions. (https://arxiv.org/abs/1905.11659) + * :code:`zelikman-interval` Assumes that the error distribution is the same for each prediction but scaled by the uncalibrated standard deviation for each. Multiplies the uncalibrated standard deviation by a factor necessary to cover the specified interval of the calibration set. Does not assume a Gaussian distribution using :code:`--calibration-interval-percentile ` which is default ot 95. (https://arxiv.org/abs/2005.12496) + * :code:`mve-weighting` For use with ensembles of models trained with mve or evidential loss function. Uses a weighted average of the predicted variances to achieve a minimum negative log likelihood of predictions. (https://doi.org/10.1186/s13321-021-00551-x) + * :code:`conformal-regression` Generates an interval of variable size for each prediction based on quantile predictions of the data such that the actual value has probability :math:`1 - \alpha` of falling in the interval. The desired error rate is controlled using the parameter :code:`--conformal-alpha ` which is set by default to 0.1. (https://arxiv.org/abs/2107.07511) + +**Classification**: + + * :code:`platt` Uses a linear scaling before the sigmoid function in prediction to minimize the negative log likelihood of the predictions. (https://arxiv.org/abs/1706.04599) + * :code:`isotonic` Fits an isotonic regression model to the predictions. Prediction outputs are transformed using a stepped histogram-style to match the empirical probability observed in the calibration data. Number and size of the histogram bins are procedurally decided. Histogram bins are wider in the regions of the model output that are less reliable in ordering confidence. (https://arxiv.org/abs/1706.04599) + * :code:`conformal-multilabel` Generates a pair of sets of labels :math:`C_{in} \subset C_{out}` such that the true set of labels :math:`S` satisfies the property :math:`C_{in} \subset S \subset C_{out}` with probability at least :math:`1-\alpha`. The desired error rate :math:`\alpha` can be controlled with the parameter :code:`--conformal-alpha ` which is set by default to 0.1. (https://arxiv.org/abs/2004.10181) + + +**Multiclass**: + + * :code:`conformal-multiclass` Generates a set of possible classes for each prediction such that the true class has probability :math:`1-\alpha` of falling in the set. The desired error rate :math:`\alpha` can be controlled with the parameter :code:`--conformal-alpha ` which is set by default to 0.1. Set generated using the basic conformal method. (https://arxiv.org/abs/2107.07511) + * :code:`conformal-adaptive` Similar to conformal-multiclass, this method generates a set of possible classes but uses an adaptive conformal method. The desired error rate :math:`\alpha` can be controlled with the parameter :code:`--conformal_alpha ` which is set by default to 0.1. (https://arxiv.org/abs/2107.07511) + * :code:`isotonic-multiclass` Calibrate multiclass classification datasets using isotonic regression. It uses a one-vs-all aggregation scheme to extend isotonic regression from binary to multiclass classifiers. (https://arxiv.org/abs/1706.04599) + +Uncertainty Evaluation Metrics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The performance of uncertainty predictions (calibrated or uncalibrated) as evaluated on the test set using different evaluation metrics as specified with :code:`--evaluation-methods <[methods]>`. +Evaluation scores will only appear in the output trace. Multiple evaluation methods can be provided and they will be calculated separately for each model task. Evaluation is only available when the target values are provided with the data in :code:`--test-path `. As with the data used in training, evaluation data for multitask models are allowed to have gaps and missing targets in the data. + + .. * Any valid classification or multiclass metric. Because classification and multiclass outputs are inherently probabilistic, any metric used to assess them during training is appropriate to evaluate the confidences produced after calibration. + + * :code:`nll-regression`, :code:`nll-classification`, :code:`nll-multiclass` Returns the average negative log likelihood of the real target as indicated by the uncertainty predictions. Enabled for regression, classification, and multiclass dataset types. + * :code:`spearman` A regression evaluation metric. Returns the Spearman rank correlation between the predicted uncertainty and the actual error in predictions. Only considers ordering, does not assume a particular probability distribution. + * :code:`ence` Expected normalized calibration error. A regression evaluation metric. Bins model prediction according to uncertainty prediction and compares the RMSE in each bin versus the expected error based on the predicted uncertainty variance then scaled by variance. (discussed in https://doi.org/10.1021/acs.jcim.9b00975) + * :code:`miscalibration_area` A regression evaluation metric. Calculates the model's performance of expected probability versus realized probability at different points along the probability distribution. Values range (0, 0.5) with perfect calibration at 0. (discussed in https://doi.org/10.1021/acs.jcim.9b00975) + * :code:`conformal-coverage-regression`, :code:`conformal-coverage-classification`, :code:`conformal-coverage-multiclass` Measures the empirical coverage of the conformal methods, that is the proportion of datapoints that fall within the output set or interval. Must be used with a conformal calibration method which outputs a set or interval. The metric can be used with multiclass, multilabel, or regression conformal methods. + +Different evaluation metrics consider different aspects of uncertainty. It is often appropriate to consider multiple metrics. For intance, miscalibration error is important for evaluating uncertainty magnitude but does not indicate that the uncertainty function discriminates well between different outputs. Similarly, spearman tests ordering but not prediction magnitude. + +Evaluations can be used to compare different uncertainty methods and different calibration methods for a given dataset. Using evaluations to compare between datasets may not be a fair comparison and should be done cautiously. \ No newline at end of file diff --git a/chemprop-updated/docs/source/tutorial/cli/train.rst b/chemprop-updated/docs/source/tutorial/cli/train.rst new file mode 100644 index 0000000000000000000000000000000000000000..83d63da0e7c10c130b0f08b956d6a1621e5c171f --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/cli/train.rst @@ -0,0 +1,335 @@ +.. _train: + +Training +========================= + +To train a model, run: + +.. code-block:: + + chemprop train --data-path --task-type --output-dir + +where ```` is the path to a CSV file containing a dataset, ```` is the type of modeling task, and ```` is the directory where model checkpoints will be saved. + +For example: + +.. code-block:: + + chemprop train --data-path tests/data/regression.csv \ + --task-type regression \ + --output-dir solubility_checkpoints + +The following modeling tasks are supported: + + * :code:`regression` + * :code:`regression-mve` + * :code:`regression-evidential` + * :code:`regression-quantile` + * :code:`classification` + * :code:`classification-dirichlet` + * :code:`multiclass` + * :code:`multiclass-dirichlet` + * :code:`spectral` + +A full list of available command-line arguments can be found in :ref:`cmd`. + + +Input Data +---------- + +In order to train a model, you must provide training data containing molecules (as SMILES strings) and known target values. Targets can either be real numbers, if performing regression, or binary (i.e. 0s and 1s), if performing classification. Target values which are unknown can be left as blanks. A model can be trained as either single- or multi-task. + +The data file must be be a **CSV file with a header row**. For example: + +.. code-block:: + + smiles,NR-AR,NR-AR-LBD,NR-AhR,NR-Aromatase,NR-ER,NR-ER-LBD,NR-PPAR-gamma,SR-ARE,SR-ATAD5,SR-HSE,SR-MMP,SR-p53 + CCOc1ccc2nc(S(N)(=O)=O)sc2c1,0,0,1,,,0,0,1,0,0,0,0 + CCN1C(=O)NC(c2ccccc2)C1=O,0,0,0,0,0,0,0,,0,,0,0 + ... + +By default, it is assumed that the SMILES are in the first column and the targets are in the remaining columns. However, the specific columns containing the SMILES and targets can be specified using the :code:`--smiles-columns ` and :code:`--target-columns ...` flags, respectively. To simultaneously train multiple molecules (such as a solute and a solvent), supply two column headers in :code:`--smiles-columns `. + +.. _train_validation_test_splits: + +Train/Validation/Test Splits +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Our code supports several methods of splitting data into train, validation, and test sets. + +* **Random:** By default, the data will be split randomly into train, validation, and test sets. + +* **Scaffold:** Alternatively, the data can be split by molecular scaffold so that the same scaffold never appears in more than one split. This can be specified by adding :code:`--split-type scaffold_balanced`. + +* **User Specified Splits** Custom splits can be specified in two ways, :code:`--splits-column` and :code:`--splits-file`, examples of which are shown below. + +.. code-block:: + + chemprop train --splits-column split -i data.csv -t regression + +.. list-table:: data.csv + :widths: 10 10 10 + :header-rows: 1 + + * - smiles + - property + - split + * - C + - 1.0 + - train + * - CC + - 2.0 + - train + * - CCC + - 3.0 + - test + * - CCCC + - 4.0 + - val + * - CCCCC + - 5.0 + - val + * - CCCCCC + - 6.0 + - test + +.. code-block:: + + chemprop train --splits-file splits.json -i data.csv -t regression + +.. code-block:: JSON + :caption: splits.json + + [ + {"train": [1, 2], "val": "3-5", "test": "6,7"}, + {"val": [1, 2], "test": "3-5", "train": "6,7"}, + ] + +.. note:: + By default, both random and scaffold split the data into 80% train, 10% validation, and 10% test. This can be changed with :code:`--split-sizes `. The default setting is :code:`--split-sizes 0.8 0.1 0.1`. Both splits also involve a random component that can be seeded with :code:`--data-seed `. The default setting is :code:`--data-seed 0`. + +Other supported splitting methods include :code:`random_with_repeated_smiles`, :code:`kennard_stone`, and :code:`kmeans`. + +Replicates +^^^^^^^^^^ + +Repeat random trials (i.e. replicates) run by specifying :code:`--num-replicates ` (default 1, i.e. no replicates). +This is analogous to the 'outer loop' of nested cross validation but at a lower cost, suitable for deep learning applications. + +Ensembling +^^^^^^^^^^ + +To train an ensemble, specify the number of models in the ensemble with :code:`--ensemble-size ` (default 1). + +Hyperparameters +--------------- + +Model performance is often highly dependent on the hyperparameters used. Below is a list of common hyperparameters (see :ref:`cmd` for a full list): + + * :code:`--batch-size` Batch size (default 64) + * :code:`--message-hidden-dim ` Hidden dimension of the messages in the MPNN (default 300) + * :code:`--depth ` Number of message-passing steps (default 3) + * :code:`--dropout ` Dropout probability in the MPNN & FFN layers (default 0) + * :code:`--activation ` The activation function used in the MPNN and FNN layers. Options include :code:`relu`, :code:`leakyrelu`, :code:`prelu`, :code:`tanh`, :code:`selu`, and :code:`elu`. (default :code:`relu`) + * :code:`--epochs ` How many epochs to train over (default 50) + * :code:`--warmup-epochs `: The number of epochs during which the learning rate is linearly incremented from :code:`init_lr` to :code:`max_lr` (default 2) + * :code:`--init-lr ` Initial learning rate (default 0.0001) + * :code:`--max-lr ` Maximum learning rate (default 0.001) + * :code:`--final-lr ` Final learning rate (default 0.0001) + + +Loss Functions +-------------- + +The loss function can be specified using the :code:`--loss-function ` keyword, where `` is one of the following: + +**Regression**: + + * :code:`mse` Mean squared error (default) + * :code:`bounded-mse` Bounded mean squared error + * :code:`mve` Mean-variance estimation + * :code:`evidential` Evidential; if used, :code:`--evidential-regularization` can be specified to modify the regularization, and :code:`--eps` to modify epsilon. + +**Classification**: + + * :code:`bce` Binary cross-entropy (default) + * :code:`binary-mcc` Binary Matthews correlation coefficient + * :code:`dirichlet` Dirichlet + + +**Multiclass**: + + * :code:`ce` Cross-entropy (default) + * :code:`multiclass-mcc` Multiclass Matthews correlation coefficient + * :code:`dirichlet` Dirichlet + +**Spectral**: + + * :code:`sid` Spectral information divergence (default) + * :code:`earthmovers` Earth mover's distance (or first-order Wasserstein distance) + * :code:`wasserstein` See above. + +Evaluation Metrics +------------------ + +The following evaluation metrics are supported during training: + +**Regression**: + + * :code:`rmse` Root mean squared error + * :code:`mae` Mean absolute error + * :code:`mse` Mean squared error (default) + * :code:`bounded-mae` Bounded mean absolute error + * :code:`bounded-mse` Bounded mean squared error + * :code:`bounded-rmse` Bounded root mean squared error + * :code:`r2` R squared metric + +**Classification**: + + * :code:`roc` Receiver operating characteristic (default) + * :code:`prc` Precision-recall curve + * :code:`accuracy` Accuracy + * :code:`f1` F1 score + * :code:`bce` Binary cross-entropy + * :code:`binary-mcc` Binary Matthews correlation coefficient + +**Multiclass**: + + * :code:`ce` Cross-entropy (default) + * :code:`multiclass-mcc` Multiclass Matthews correlation coefficient + +**Spectral**: + + * :code:`sid` Spectral information divergence (default) + * :code:`wasserstein` Earth mover's distance (or first-order Wasserstein distance) + + +Advanced Training Methods +------------------------- + +Pretraining and Transfer Learning +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +An existing model, for example from training on a larger, lower quality dataset, can be used for parameter-initialization of a new model by providing a checkpoint of the existing model using :code:`--checkpoint `. :code:``` is the location of checkpoint(s) or model file(s). It can be a path to either a single pretrained model checkpoint (.ckpt) or single pretrained model file (.pt), a directory that contains these files, or a list of path(s) and directory(s). + +When training the new model, its architecture **must** resemble that of the old model. Depending on the similarity of the tasks and datasets, as well as the quality of the old model, the new model might require fewer epochs to achieve optimal performance compared to training from scratch. + +It is also possible to freeze the weights of a loaded Chemprop model during training, such as for transfer learning applications. To do so, you first need to load a pre-trained model by specifying its checkpoint file using :code:`--checkpoint `. After loading the model, the MPNN weights can be frozen via :code:`--freeze-encoder`. You can control how the weights are frozen in the FFN layers by using :code:`--frzn-ffn-layers ` flag, where the :code:`n` is the first n layers are frozen in the FFN layers. By default, :code:`n` is set to 0, meaning all FFN layers are trainable unless specified otherwise. + +.. _train-on-reactions: + +Training on Reactions +^^^^^^^^^^^^^^^^^^^^^ + +Chemprop can also process atom-mapped reaction SMILES (see `Daylight manual `_ for details), which consist of three parts denoting reactants, agents, and products, each separated by ">". For example, an atom-mapped reaction SMILES denoting the reaction of methanol to formaldehyde without hydrogens: :code:`[CH3:1][OH:2]>>[CH2:1]=[O:2]` and with hydrogens: :code:`[C:1]([H:3])([H:4])([H:5])[O:2][H:6]>>[C:1]([H:3])([H:4])=[O:2].[H:5][H:6]`. The reactions do not need to be balanced and can thus contain unmapped parts, for example leaving groups, if necessary. + +Specify columns in the input file with reaction SMILES using the option :code:`--reaction-columns` to enable this, which transforms the reactants and products to the corresponding condensed graph of reaction, and changes the initial atom and bond features depending on the argument provided to :code:`--rxn-mode `: + + * :code:`reac_diff` Featurize with the reactant and the difference upon reaction (default) + * :code:`reac_prod` Featurize with both the reactant and product + * :code:`prod_diff` Featurize with the product and the difference upon reaction + +Each of these arguments can be modified to balance imbalanced reactions by appending :code:`_balance`, e.g. :code:`reac_diff_balance`. + +In reaction mode, Chemprop concatenates information to each atomic and bond feature vector. For example, using :code:`--reaction-mode reac_prod`, each atomic feature vector holds information on the state of the atom in the reactant (similar to default Chemprop), and concatenates information on the state of the atom in the product. Agents are featurized with but not connected to the reactants. Functions incompatible with a reaction as input (scaffold splitting and feature generation) are carried out on the reactants only. + +If the atom-mapped reaction SMILES contain mapped hydrogens, enable explicit hydrogens via :code:`--keep-h`. + +For further details and benchmarking, as well as a citable reference, please see `DOI 10.1021/acs.jcim.1c00975 `_. + + +Training Reactions with Molecules (e.g. Solvents, Reagents) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Both reaction and molecule SMILES can be associated with a target (e.g. a reaction rate in a solvent). To do so, use both :code:`--smiles-columns` and :code:`--reaction-columns`. + +.. Chemprop allows differently-sized MPNNs to be used for each reaction and solvent/molecule encoding. The following commands can be used to specify the solvent/molecule MPNN size if :code:`--reaction-solvent` is specified: + +.. * :code:`--bias-solvent` Whether to add bias to the linear layers of the solvent/molecule (default :code:`false`) +.. * :code:`--hidden-size-solvent ` The dimensionality of the hidden layers for the solvent/molecule (default 300) +.. * :code:`--depth-solvent ` The number of message passing steps for the solvent/molecule (default 3) + +The reaction and molecule SMILES columns can be ordered in any way. However, the same column ordering as used in the training must be used for the prediction. For more information on atom-mapped reaction SMILES, please refer to :ref:`train-on-reactions`. + + +Training on Spectra +^^^^^^^^^^^^^^^^^^^ + +Spectra training is different than other datatypes because it considers the predictions of all targets together. Targets for spectra should be provided as the values for the spectrum at a specific position in the spectrum. Spectra predictions are configured to return only positive values and normalize them to sum each spectrum to 1. Spectral prediction are still in beta and will be updated in the future. + +.. Activation to enforce positivity is an exponential function by default but can also be set as a Softplus function, according to the argument :code:`--spectral-activation `. Value positivity is enforced on input targets as well using a floor value that replaces negative or smaller target values with the floor value, customizable with the argument :code:`--spectra_target_floor ` (default 1e-8). + +.. In absorption spectra, sometimes the phase of collection will create regions in the spectrum where data collection or prediction would be unreliable. To exclude these regions, include paths to phase features for your data (:code:`--phase-features-path `) and a mask indicating the spectrum regions that are supported (:code:`--spectra-phase-mask-path `). The format for the mask file is a .csv file with columns for the spectrum positions and rows for the phases, with column and row labels in the same order as they appear in the targets and features files. + + +Additional Features +------------------- + +While the model works very well on its own, especially after hyperparameter optimization, additional features and descriptors may further improve performance on certain datasets. Features are used before message passing while descriptors are used after message passing. The additional features/descriptors can be added at the atom-, bond, or molecule-level. Molecule-level features can be either automatically generated by RDKit or custom features provided by the user and are concatenated to the learned descriptors generated by Chemprop during message passing (i.e. used as extra descriptors). + + +Atom-Level Features/Descriptors +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can provide additional atom features via :code:`--atom-features-path /path/to/atom/features.npz` as a numpy :code:`.npz` file. This command concatenates the features to each atomic feature vector before the D-MPNN, so that they are used during message-passing. This file can be saved using :code:`np.savez("atom_features.npz", *V_fs)`, where :code:`V_fs` is a list containing the atom features :code:`V_f` for each molecule, where :code:`V_f` is a 2D array with a shape of number of atoms by number of atom features in the exact same order as the SMILES strings in your data file. + +Similarly, you can provide additional atom descriptors via :code:`--atom-descriptors-path /path/to/atom/descriptors.npz` as a numpy :code:`.npz` file. This command concatenates the new features to the embedded atomic features after the D-MPNN with an additional linear layer. This file can be saved using :code:`np.savez("atom_descriptors.npz", *V_ds)`, where :code:`V_ds` has the same format as :code:`V_fs` above. + +The order of the atom features and atom descriptors for each atom per molecule must match the ordering of atoms in the RDKit molecule object. + +The atom-level features and descriptors are scaled by default. This can be disabled with the option :code:`--no-atom-feature-scaling` or :code:`--no-atom-descriptor-scaling`. + + +Bond-Level Features +^^^^^^^^^^^^^^^^^^^ + +Bond-level features can be provided using the option :code:`--bond-features-path /path/to/bond/features.npz`. as a numpy :code:`.npz` file. This command concatenates the features to each bond feature vector before the D-MPNN, so that they are used during message-passing. This file can be saved using :code:`np.savez("bond_features.npz", *E_fs)`, where :code:`E_fs` is a list containing the bond features :code:`E_f` for each molecule, where :code:`E_f` is a 2D array with a shape of number of bonds by number of bond features in the exact same order as the SMILES strings in your data file. + +The order of the bond features for each molecule must match the bond ordering in the RDKit molecule object. + +Note that bond descriptors are not currently supported because the post message passing readout function aggregates atom descriptors. + +The bond-level features are scaled by default. This can be disabled with the option :code:`--no-bond-features-scaling`. + + +Extra Datapoint Descriptors +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Additional datapoint descriptors can be concatenated to the learned representation after aggregation. These extra descriptors could be molecule-level features. If you install from source, you can modify the code to load custom descriptors as follows: + +1. **Generate features:** If you want to generate molecule features in code, you can write a custom features generator function using the default featurizers in :code:`chemprop/featurizers/`. This also works for custom atom and bond features. +2. **Load features:** Additional descriptors can be provided using :code:`--descriptors-path /path/to/descriptors.npz` where the descriptors are saved as a numpy :code:`.npz` file. This file can be saved using :code:`np.savez("/path/to/descriptors.npz", X_d)`, where :code:`X_d` is a 2D array with a shape of number of datapoints by number of additional descriptors. Note that the descriptors must be in the same order as the SMILES strings in your data file. The extra descriptors are scaled by default. This can be disabled with the option :code:`--no-descriptor-scaling`. + + +Molecule-Level 2D Features +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Chemprop provides several molecule featurizers that automatically calculate molecular features and uses them as extra datapoint descriptors. These are specified using :code:`--molecule-featurizers` followed by one or more of the following: + + * :code:`morgan_binary` binary Morgan fingerprints, radius 2 and 2048 bits + * :code:`morgan_count` count-based Morgan, radius 2 and 2048 bits + * :code:`rdkit_2d` RDKit 2D features + * :code:`v1_rdkit_2d` The RDKit 2D features used in Chemprop v1 + * :code:`v1_rdkit_2d_normalized` The normalized RDKit 2D features used in Chemprop v1 + +.. note:: + The Morgan fingerprints should not be scaled. Use :code:`--no-descriptor-scaling` to ensure this. + + The RDKit 2D features are not normalized. The :code:`StandardScaler` used in the CLI to normalize is non-optimal for some of the RDKit features. It is recommended to precompute and scale these features outside of the CLI using an appropriate scaler and then provide them using :code:`--descriptors-path` and :code:`--no-descriptor-scaling` as described above. + + In Chemprop v1, :code:`descriptastorus` was used to calculate RDKit 2D features. This package offers normalization of the features, with the normalizations fit to a set of molecules randomly selected from ChEMBL. Several descriptors have been added to :code:`rdkit` recently which are not included in :code:`descriptastorus` including 'AvgIpc', 'BCUT2D_CHGHI', 'BCUT2D_CHGLO', 'BCUT2D_LOGPHI', 'BCUT2D_LOGPLOW', 'BCUT2D_MRHI', 'BCUT2D_MRLOW', 'BCUT2D_MWHI', 'BCUT2D_MWLOW', and 'SPS'. + + +Missing Target Values +^^^^^^^^^^^^^^^^^^^^^ + +When training multitask models (models which predict more than one target simultaneously), sometimes not all target values are known for all molecules in the dataset. Chemprop automatically handles missing entries in the dataset by masking out the respective values in the loss function, so that partial data can be utilized. + +The loss function is rescaled according to all non-missing values, and missing values do not contribute to validation or test errors. Training on partial data is therefore possible and encouraged (versus taking out datapoints with missing target entries). No keyword is needed for this behavior, it is the default. + + +TensorBoard +^^^^^^^^^^^ + +During training, TensorBoard logs are automatically saved to the output directory under :code:`model_{i}/trainer_logs/version_0/`. +.. To view TensorBoard logs, run :code:`tensorboard --logdir=` where :code:`` is the path to the checkpoint directory. Then navigate to ``_. diff --git a/chemprop-updated/docs/source/tutorial/python/activation.ipynb b/chemprop-updated/docs/source/tutorial/python/activation.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..a23d30796be48232ad559aa7c42c818ceb8efb6b --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/python/activation.ipynb @@ -0,0 +1,143 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Activation" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.utils import Activation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Activation functions\n", + "\n", + "The following activation functions are available in Chemprop." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "relu\n", + "leakyrelu\n", + "prelu\n", + "tanh\n", + "selu\n", + "elu\n" + ] + } + ], + "source": [ + "for activation in Activation:\n", + " print(activation)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Custom activation functions require editing the source code in `chemprop.nn.utils.py`." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from enum import auto\n", + "from torch import nn\n", + "\n", + "from chemprop.utils.utils import EnumMapping\n", + "\n", + "\n", + "class Activation(EnumMapping):\n", + " RELU = auto()\n", + " LEAKYRELU = auto()\n", + " PRELU = auto()\n", + " TANH = auto()\n", + " SELU = auto()\n", + " ELU = auto()\n", + " GELU = auto() # example edited source code\n", + "\n", + "\n", + "def get_activation_function(activation: str | Activation) -> nn.Module:\n", + " \"\"\"Gets an activation function module given the name of the activation.\n", + "\n", + " See :class:`~chemprop.v2.models.utils.Activation` for available activations.\n", + "\n", + " Parameters\n", + " ----------\n", + " activation : str | Activation\n", + " The name of the activation function.\n", + "\n", + " Returns\n", + " -------\n", + " nn.Module\n", + " The activation function module.\n", + " \"\"\"\n", + " match Activation.get(activation):\n", + " case Activation.RELU:\n", + " return nn.ReLU()\n", + " case Activation.LEAKYRELU:\n", + " return nn.LeakyReLU(0.1)\n", + " case Activation.PRELU:\n", + " return nn.PReLU()\n", + " case Activation.TANH:\n", + " return nn.Tanh()\n", + " case Activation.SELU:\n", + " return nn.SELU()\n", + " case Activation.ELU:\n", + " return nn.ELU()\n", + " case Activation.GELU: # example edited source code\n", + " return nn.GELU() # example edited source code\n", + " case _:\n", + " raise RuntimeError(\"unreachable code reached!\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop-updated/docs/source/tutorial/python/data/dataloaders.ipynb b/chemprop-updated/docs/source/tutorial/python/data/dataloaders.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..d1c8bdbf44b2b76c68c75eee70c1898dcf657736 --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/python/data/dataloaders.ipynb @@ -0,0 +1,385 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dataloaders" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.data.dataloader import build_dataloader" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an example [dataset](./datasets.ipynb) to load." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from chemprop.data import MoleculeDatapoint, MoleculeDataset\n", + "\n", + "smis = [\"C\" * i for i in range(1, 4)]\n", + "ys = np.random.rand(len(smis), 1)\n", + "dataset = MoleculeDataset([MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Torch dataloaders" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chemprop uses native `torch.utils.data.Dataloader`s to batch data as input to a model. `build_dataloader` is a helper function to make the dataloader." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "dataloader = build_dataloader(dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`build_dataloader` changes the defaults of `Dataloader` to use a batch size of 64 and turn on shuffling. It also automatically uses the correct collating function for the dataset (single component vs multi-component)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from torch.utils.data import DataLoader\n", + "from chemprop.data.collate import collate_batch, collate_multicomponent\n", + "\n", + "dataloader = DataLoader(dataset=dataset, batch_size=64, shuffle=True, collate_fn=collate_batch)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Collate function" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The collate function takes an iterable of dataset outputs and batches them together. Iterating through batches is done automatically during training by the lightning `Trainer`." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "TrainingBatch(bmg=, V_d=None, X_d=None, Y=tensor([[0.0562],\n", + " [0.5048]]), w=tensor([[1.],\n", + " [1.]]), lt_mask=None, gt_mask=None)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "collate_batch([dataset[0], dataset[1]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Shuffling" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Shuffling the data helps improve model training, so `build_dataloader` has `shuffle=True` as the default. Shuffling should be turned off for validation and test dataloaders. Lightning gives a warning if a dataloader with shuffling is used during prediction." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = build_dataloader(dataset)\n", + "val_loader = build_dataloader(dataset, shuffle=False)\n", + "test_loader = build_dataloader(dataset, shuffle=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "from lightning import pytorch as pl\n", + "from chemprop import models, nn\n", + "\n", + "trainer = pl.Trainer(logger=False, enable_checkpointing=False, max_epochs=1)\n", + "chemprop_model = models.MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), nn.RegressionFFN())" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicting DataLoader 0: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 3.37it/s]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/brianli/Documents/chemprop/chemprop/nn/message_passing/base.py:263: UserWarning: The operator 'aten::scatter_reduce.two_out' is not currently supported on the MPS backend and will fall back to run on the CPU. This may have performance implications. (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/mps/MPSFallback.mm:13.)\n", + " M_all = torch.zeros(len(bmg.V), H.shape[1], dtype=H.dtype, device=H.device).scatter_reduce_(\n" + ] + } + ], + "source": [ + "preds = trainer.predict(chemprop_model, dataloader)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicting DataLoader 0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 164.67it/s]\n" + ] + } + ], + "source": [ + "preds = trainer.predict(chemprop_model, test_loader)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Parallel data loading" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As datapoints are sampled from the dataset, the `MolGraph` data structures are generated on-the-fly, which requires featurization of the molecular graphs. Giving the dataloader multiple workers can increase dataloading speed by preparing the datapoints in parallel. Note that this is not compatible with Windows (the process hangs) and some versions of Mac. \n", + "\n", + "[Caching](./dataloaders.ipynb) the the `MolGraphs` in the dataset before making the dataloader can also speed up sequential dataloading (`num_workers=0`)." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "build_dataloader(dataset, num_workers=8)\n", + "\n", + "dataset.cache = True\n", + "build_dataloader(dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Drop last batch" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`build_dataloader` drops the last batch if it is a single datapoint as batch normalization (the default) requires at least two data points. If you do not want to drop the last datapoint, you can adjust the batch size, or, if you aren't using batch normalization, build the dataloader manually." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Dropping last batch of size 1 to avoid issues with batch normalization (dataset size = 3, batch_size = 2)\n" + ] + } + ], + "source": [ + "dataloader = build_dataloader(dataset, batch_size=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "dataloader = build_dataloader(dataset, batch_size=3)\n", + "dataloader = DataLoader(dataset=dataset, batch_size=2, shuffle=True, collate_fn=collate_batch)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Samplers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The default sampler for a `torch.utils.data.Dataloader` is a `torch.utils.data.sampler.SequentialSampler` for `shuffle=False`, or a `torch.utils.data.sampler.RandomSampler` if `shuffle=True`. \n", + "\n", + "`build_dataloader` can be given a seed to make a `chemprop.data.samplers.SeededSampler` for reproducibility. Chemprop also offers `chemprop.data.samplers.ClassSampler` to equally sample positive and negative classes for binary classification tasks. " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "build_dataloader(dataset, seed=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([[1.],\n", + " [0.],\n", + " [1.],\n", + " [0.],\n", + " [1.],\n", + " [0.],\n", + " [1.],\n", + " [0.]])\n" + ] + } + ], + "source": [ + "smis = [\"C\" * i for i in range(1, 11)]\n", + "ys = np.random.randint(low=0, high=2, size=(len(smis), 1))\n", + "dataset = MoleculeDataset([MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)])\n", + "\n", + "dataloader = build_dataloader(dataset, class_balance=True)\n", + "\n", + "_, _, _, Y, *_ = next(iter(dataloader))\n", + "print(Y)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop-updated/docs/source/tutorial/python/data/datapoints.ipynb b/chemprop-updated/docs/source/tutorial/python/data/datapoints.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..a75d5162208a379c324fe518d350695318763b84 --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/python/data/datapoints.ipynb @@ -0,0 +1,419 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Datapoints" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from rdkit import Chem\n", + "from chemprop.data.datapoints import MoleculeDatapoint, ReactionDatapoint" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Molecule Datapoints" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`MoleculeDatapoint`s are made from target value(s) and either a `rdkit.Chem.Mol` object or a SMILES." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "mol = Chem.MolFromInchi(\"InChI=1S/C2H6/c1-2/h1-2H3\")\n", + "smi = \"CC\"\n", + "n_targets = 1\n", + "y = np.random.rand(n_targets)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MoleculeDatapoint(mol=, y=array([0.30484272]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name=None, V_f=None, E_f=None, V_d=None)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MoleculeDatapoint(mol, y)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MoleculeDatapoint(mol=, y=array([0.30484272]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='CC', V_f=None, E_f=None, V_d=None)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MoleculeDatapoint.from_smi(smi, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Hydrogens in the graph" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Explicit hydrogens in the graph created by `from_smi` can be controlled using `keep_h` and `add_h`." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAPWUlEQVR4nO3dW0yT5x8H8LcIVU5ylCGKHGQcRVDAEzAmsoCTLUsWdrGsy5Il4C7WsQxWLly6C5dg2aHLki2wOFezK7bdFB1uBcEjKKAMx0EUEUEROcjGsVTa/8Xjv2NyKrxtn/fw/VxpAu030H7pe3h+j8RkMjEAALBaDrQDAADwG2oUAIAV1CgAACuoUQAAVlCjAACsoEYBAFhBjQIAsIIaBQBgBTUKAMAKahQAgBXUKAAAK6hRAABWUKMAAKw40g4A8K+jR4/evXs3Li7u/fffp50FwFISDMoDLvjjjz/eeeed/v5+8t+AgIDy8vLk5GS6qQAsgYN6oKy3tzcxMTEzM7O/v18ikWzevJlhmAcPHqSkpKSlpQ0NDdEOCLAM1ChQ8+TJk7y8vODg4KamJoZhQkNDW1paent7GxsbQ0NDGYY5f/68v79/Xl6e0WikHRZgUahRoOOHH37w8vIqKyszGo1ubm7Hjx/v6uratm0bwzAJCQldXV0//viju7v77OxsWVnZ+vXrv//+e9qRARaGc6Ngb3fu3ElPT+/p6WEYZs2aNYcPH/7mm28kEsn8r5ydnc3Lyztx4gT5NPrmm29+8cUX/v7+9k4MsCTUKNjP5OSkSqVSqVRTU1MMw6Smpv76668bNmxY+rsePnz4+uuvNzQ0GAwGV1fXgoKCoqKidevW2SUygAVMALZnNBrLy8uDgoLIq+7AgQN1dXUreoTbt2/n5OSQbw8MDNRoNDaKCrBSqFGwucbGxpSUFNKAO3fuvHDhwqofqrq6evv27eSh9u/f/+eff1oxJ8DqoEbBhh48eJCbm7tmzRqGYTZu3FhaWvrkyROWj2kwGEpLS8mpAAcHB5lM9ujRI6ukBVgd1CjYxMzMjFqtXr9+PcMwTk5Ocrn877//tuLjj4yMKBQKqVTKMIyXl1dxcbFer7fi4wNYDjUK1qfVardu3UoOvbOzs2/fvm2jJ+ro6Hj55ZfJE0VERJw+fdpGTwSwBNQoWFN7e/vBgwdJr0VGRv722292eFKdThcVFUWeNCMjo7W11Q5PCmCGGgXrGBkZkcvljo6O5ChbrVYbDAa7Pfv8cwijo6N2e3YQOdQosDX3mo+jo2Nubi6taz6Dg4NyuZxc0fLx8VGr1eyvaAEsCzUKrFRXV8fGxpID6vT09JaWFtqJTE1NTampqeb7q86fP087EQgcahRW6datW+b74cPCwsrLy2kn+g+tVhscHGy+zNXd3U07EQgWahRWbHx8XKlUrl27lmEYV1dXpVI5PT1NO9QCJiYmiouL3dzcGIZxdnZWKBRjY2O0Q4EAoUZhBYxGo0ajIcNBJBKJTCbr7++nHWoZfX19MpmMjD7ZtGmTRqMxGo20Q4GgoEbBUleuXNmzZw85TN61a9fly5dpJ1qBZ8KvdEU/wBJQo7A8YXygm52d1Wg0zz33HI8+SgMvoEZhKZOTkwI7vciXE7vAI6hRWJSAL3Z3dnZy+TYD4BfUKCxg7q2XO3bsEOqtl1VVVWTbEjIClQs3vQIfoUbhP4aGhkS1EIgswfL19aW+BAv4CzUKT5Fl6R4eHiJclj48PGweCODt7W3ngQDAd6hRMJlMJp1OFx0dLfIhSe3t7VlZWebxVJWVlbQTAT+gRsUOIzufodVqQ0ND7TAsFQQDNSpeGCC/GL1eb9PR/SAwqFExIjei+/n5mbczGhgYoB2Kc8hGUg4ODuaNpGZnZ2mHAi5CjYrO2bNnsbmm5RobG5OTk8mPKyEhgc22piBUqFERuXfvnkwmw1bvK2U0GsvLy4OCgsgq0pycnLt379IOBRwiMZlMDAjdxMRESUnJsWPHpqenXV1dCwoKioqK1q1bRzsXn0xOTqpUKpVKNTU15eLiUlhYqFAonJ2daecCDqDd42Bb5JNUYGAg8/9PUj09PbRD8Vhvb695SsvmzZt5OqUFrAs1KmRXr17dt28f+XuZlJR06dIl2okEora2Nj4+nvxg09LSrl+/TjsR0IQaFab79++brzIHBATgKrPVzR27R+52ePjwIe1QQAdqVGjIaDt3d3eGYaRSqVwu/+eff2iHEqzHjx8rFAoyds/T07O4uBhj90QINSooWq02JCSEHGxmZ2ffuXOHdiJRuHnzZnZ2NvmxP//88xi7JzaoUYG4fv36Cy+8QN7J8fHxtbW1tBOJjk6ni4mJIb+CjIyMv/76i3YisBPUKO/NHW1HphMJe7Qdl5EpWZ6engzDODk55ebmDg4O0g4FNoca5bH5o+0eP35MOxQ8HbuHP2zigRrlKxxCclxbW1tmZib5BUVFRZ05c4Z2IrAV1Cj/zL2gER4eXlFRQTsRLOqZi35dXV20E4H1oUb5BLfX8BEZu4db0AQMNcoP80fb4WZvfsGCCAFDjfJATU1NXFyceelhc3Mz7USwSnOX5yYmJmJ5rjCgRjmNjLbDIAwhIcNitmzZgmExgoFBeRxFxrKR0XZkLBtG2wkJRhcKCu0eh2fN/7SCIcFChUHawoAa5ZaGhoa5W1ZcvHiRdiKwOWzrwneoUa7ABmpihk0GeQ01St/c7XzJfYXYzlec5m55Te4LxpbXvIAapUyr1YaGhpIDOqxyAZPJ1NHRcejQIfKSCA8PP3XqFO1EsAzUKDVtbW1ZWVnk3RIZGVlZWUk7EXCITqeLjo4mL4+MjIzW1lbaiWBRqFEK5k8AMhgMtEMB58yf4DU6Oko7FCwANWpXBoOhtLTU19eXYRhHR0fMo4RlzZ0n6+Pjg7F7HIQatZ+qqqpt27aRw7QDBw7cuHGDdiLgjWvXrqWmppIXz44dO86dO0c7EfwLNWoPnZ2dOTk55D2AvXpg1bRabXBwsPmCJPba4gjUqG2NjY0plUoy2s7NzU2pVGK0HbBBdn51c3NjGMbZ2VmhUGDsHnWoUVvBPuZgO319feaZNZs2bcLMGrpQozZRX1+/e/ducvC1a9euuro62olAgK5cubJ3717yMktKSrp8+TLtRCKFGrWy3t5ejLYDuzEajRqNxt/fnwyykclk/f39tEOJDmrUaiYmJswnrVxcXBQKxdjYGO1QIArj4+NKpZLM2XN1dVUqlVNTU7RDiQhq1Dq0Wm1QUJD5Emp3dzftRCA6t27dMt8QEhYWhhtC7AY1ylZTU1NKSgp57e7cufPChQu0E4GoVVdXx8bGkhdkenp6S0sL7UTChxpdPTLajiwv8fX1xfIS4AiyWG7Dhg3mxXKPHj2iHUrIUKOrQRY7k9F2ZLEzRtsB14yMjMjlckdHR4ZhvLy8MLrBdlCjK6bVardu3WoevdPW1kY7EcCi2tvbDx48SF6uERERp0+fpp1IgFCjK4BXJPAU/vbbFGrUIjg+Ar7DmSjbQY0uA2frQUhwXdQWUKNLeebeEWzZCMKAu/SsCzW6MNzJDIKHNSPWghp9FtbVgXhgBbNVoEb/hSkPIE6Yp8MSavSpq1evYuYYiNnc6Y67d+/GdEfLoUYxARfgKcwaXx1R1yj2YwCYDzvfrJR4axS7gwEsAfswWk6MNYq9agEshF3BLSGuGh0aGpLL5WQJh4+PD5ZwACyLLOTz9fU1L+QbHBykHYpbxFKjZEGxh4eHeUHx6Ogo7VAAvDE8PGz+COLt7Y2xEnOJokZ1Ol10dLR5vE1rayvtRAC81NbWlpmZSd5KkZGRlZWVtBNxgsBrtKOj49ChQ+S3Hh4efurUKdqJAHhPq9WGhoaaL892dXXRTkSZYGt0ZGREoVBIpVKGYTw9PYuLi/V6Pe1QAAKh1+vNY/ekUqnIx+4JsEbJLcR+fn7mW4gHBgZohwIQoPv37+fm5jo4ODAMs3HjxtLS0tnZWdqhKBBajZ49e3b79u3kcGP//v3Nzc20EwEIXENDQ3JyMnnTJSQkXLx4kXYiexNOjd67d08mk5HfZWBgoEajoZ0IQCyMRmN5efmWLVvIWJ+cnJyenh7aoexHYjKZGJ6bmJgoKSk5duzY9PS0q6trQUFBUVERmXQHAHYzOTmpUqnIO9HFxaWwsFAs70TaPc6KyP8GAnDQ3ONCkYzd43GNNjQ07Nu3j/y2EhMTL126RDsRADxVU1MTFxdH3p4vvviisK9S8LJG514fDAgIEO31QQAuE889Mzw7NzozM/Pdd9998sknY2NjUqn08OHDR48edXd3p50LABY2OjpaXFz81VdfzczMeHp6FhUV5efnkyl8C6qtrT1+/Dj591tvvWVeNLW08fHx9957j/w7IiLiyJEj7JOvAO0eXwGtVhsSEkJiY+0EAI/cvHkzOzubvHnDw8MrKioW+8rS0lJzO3355ZcWPv7g4KD5u1JSUqyU2lIO9qzsVWtvb8/Kynr11Ve7u7ujoqLOnDlTUVFhXo4GABxHqlOn08XExHR2dr7yyisvvfRSa2sr7VzWwfUaHRkZ+eCDD2JjY3///XcyV+bGjRsWfs4HAE7JyMi4du3a559/7uHhUVVVlZ6ertfraYeyAkfaARZlMBi+/fbbTz/9dHR01MnJ6d133/3ss8/I0EMA4CmpVPrRRx/JZLIjR47ExsYucZKURzhao1VVVfn5+eQzf0ZGhlqtjomJoR0KAKzDz8+vrKyMdgqr4dxB/dzzJmQHGHI+hXYuAICFcejTKLkxQq1W6/V6S26MAADgAk7UqNFo/Omnnz7++OOBgQFym25JSQnZLBsAgOPo1+i5c+fy8/Obm5sZhklLS1Or1fHx8bRDAQBYiua50b6+vrfffptMBSUjDGpqatChAMAvdD6NkoFaKpVqamqKDNRSKBTOzs5UwgAAsGHvGjWZTL/88kthYWFPTw8ZbVdSUhIUFGTnGADAfT///HNHR4clXzk9PW3rMEuwa40ODw9nZ2fX19czDJOUlPT111/v3bvXngEAgEfq6urq6upop1ieXc+Nent7Ozk5ka2v6uvr0aEAIAB2/TQqkUhOnjzp6+vr5uZmz+cFAD7as2dPbGysJV+p1+tPnjxp6zyLsfe50eDgYDs/IwDw1BtvvPHhhx9a8pVDQ0MUa5Rzi0EBAPgFNQoAwApqFACAFdQoAAArqFEAAFZQowAArKBGAQBYQY0CALCCGgUAYAU1CgDACmoUAIAV+puIAACYhYSEvPbaa+TfYWFhFn7X2rVrzd8VHR1tk2SLk5hMJjs/JQCAkOCgHgCAFdQoAAArqFEAAFZQowAArKBGAQBYQY0CALCCGgUAYAU1CgDACmoUAIAV1CgAACuoUQAAVlCjAACsoEYBAFhBjQIAsPI/mfYDNf0DrLIAAABlelRYdHJka2l0UEtMIHJka2l0IDIwMjMuMDkuNQAAeJx7v2/tPQYg4GdAAGYgZgLiBkZGBQ0gzcjIxpAAEmOC0IzM3AyMDIxMDCIg1eJ6ICG43oduy+yBWvchmWcPIoDi+2HiYgCzdgzCMqvRuwAAAKh6VFh0TU9MIHJka2l0IDIwMjMuMDkuNQAAeJyNUEEKwzAMu+cV+kCD61BYjk1S1jKawJbtD7vv/8yhZE0PG7N9kIVkhBVKXcPl+cKnOCgF0I+x1uJhiEitKABuOi8RPo+uMj7dY77BgMUhfVSOOa2V6TGj12wtmRM60jzIYXFoog1UIcM3rB7oi86Irvvr4hTDIcoWzqUY9nCleY8gC0zrb9Vlr08QrN4jl0NZa+vfuwAAAEF6VFh0U01JTEVTIHJka2l0IDIwMjMuMDkuNQAAeJyL9oh1dlao0TDUM7K0NDDR0TXQMzLVsTbQMdADUrqowpo1APtNChCjpyj6AAAAAElFTkSuQmCC", + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MoleculeDatapoint.from_smi(\"[H]CC\", y, keep_h=True).mol" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAGb0lEQVR4nO3cP0iV/x7A8a+npFq0SLRFHRQ1gkAHq6WlRce2iqLWIhCC1oTW/i0/bBUq22pMgtDTUjgkRKFJQihGSlG4WITn3EHu797LOZn66fcc9b5e4/N8h494ePscz0erisViAmCjcpUeAGBrk1GAEBkFCJFRgBAZBQiRUYCQnZUegK1ncXHx9u3bKaXLly/X1dWVHnj//v39+/dzudy1a9cyn66869evFwqFs2fPtra2lt79/PnzX3/9lVK6cuVKTU1N5tOxtVXZG2W9Zmdnm5qaUkoTExMdHR2lB4aHh3t7e3O53PLycubTlbdjx45CofDkyZOenp7Su5OTkwcPHkwpzczMNDY2Zj4dW5s39QAhMgoQIqMAITIKECKjACEWnti4R48eHThwoPT6mzdvsh9mLZ4+ffrx48fS658+fcp+GLYNC0+s298LT6vbhAtPvz1m4YkN8DTKxl28eHH//v2l16enpx8+fJj9PL91+vTplpaW0utfvny5e/du9vOwPXgaZd2s38N/8xETQIiMAoTIKECIjAKEyChAiIwChNgbZd3q6+tHR0dTSs3NzWUPHDlyZHR0tKqqKtOxVjUyMlIsFg8fPlz2bnNz88pXVF9fn+lYbAv2RgFCvKnnD7h161ZXV9fQ0FClB1mroaGhrq6uW7duVXoQtgMZ5Q+YnZ0dHx9fWFio9CBrtbCwMD4+Pjs7W+lB2A5kFCBERgFCZBQgREYBQmQUIERGAUJkFCBERgFCZBQgREYBQmQUIERGAUJkFCBERgFCZBQgREYBQmQUIERGAUJkFCBERgFCZBQgREYBQmQUIERGAUJkFCBERgFCZBQgREYBQmQUIERGAUJkFCBERgFCZBQgREYBQmQUIERGAUJkFCBERgFCdlZ6ANL8/Pzw8HBK6dy5c7lcmR9sU1NTL168qKmpOXnyZObTURmPHz9eXFw8duxYW1tb6d1CoXDv3r2UUk9PT0NDQ+bT8T9ktPLevXt34cKFlNKpU6d27dpVeuDZs2eXLl1qaWmR0f8fV69enZ6eHhgYKJvRnz9/rrxm8vm8jFacN/UAITIKECKjACEyChAiowAhPqnfRPL5fHV1den1qamp7IdhM5iamhoZGSm9/vPnz+yH4ZeKVFo+n1/Ld6qlpaVYLI6Pj//TLwkqa3x8vFgstrS0rOVwPp+v9OuXoqfRTeTMmTO/Wr8fGxvLfh4qrru7+1fr90NDQ9nPQ3mV7jj/eRr9/v172QMDAwPp30+jm1NfX19K6c6dO5UeZK3u3LmTUurr66v0IL+08jQ6MDBQ9u73799XXjOeRjcDHzEBhMgoQIiMAoTIKECIjAKEyChAiL3Rymtvbx8cHEwplf0TppTSiRMnBgcHa2pqMh2Lirpx48bKv20ue7e6unrlNdPe3p7pWJQjo5XX0NBw/vz5VQ60tbWV3cFmG1v9X3TncrnVXzNkyZt6gBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUf6AxsbGzs7O+vr6Sg+yVvX19Z2dnY2NjZUehO2gqlgsVnoGgC1sZ6UHYOv58ePHy5cvU0rd3d179uwpPfD169fXr19XVVUdP3488+nKe/78ebFYPHz48L59+0rvLi0tjY2NpZSOHj26a9euzKdja/M0yrrNzs42NTWllCYmJjo6OkoPDA8P9/b25nK55eXlzKcrb8eOHYVC4cmTJz09PaV3JycnDx48mFKamZnxTp/18rtRgBAZBQiRUYAQGQUIkVGAEAtPbNzc3Nzu3btLr8/Pz2c/zFrMz89/+PCh9Prc3Fzms7B9WHhi3f5eeFrdJlx4+u0xC09sgKdRNq6rq6vs+v23b9/evn2b/Ty/dejQob1795ZeX1paevXqVfbzsD3IKBv34MGDVdbvs5/nt27evLn6+j1sgI+YAEJkFCBERgFCZBQgREYBQmQUIMTCE+tWW1vb39+fUqqrqyt7oLW1tb+/P5fbRD+k+/v7C4VCa2tr2bt1dXUrX1FtbW22c7Ed+CsmgJBN9LwAsBXJKECIjAKEyChAiIwChMgoQMi/AJsvobhUBxZMAAAAhnpUWHRyZGtpdFBLTCByZGtpdCAyMDIzLjA5LjUAAHice79v7T0GIOBnQAAOIGYH4gZGNgUFIM0CpRgZNEDSjMTS3AyMDAxMDAzMQL0MjKwMjGwMjOwMIiA58SyQAiQLHfYD6SUQroM9gi1w4NRJY1WoOFDNAXsk9n6oGgdUvTBxMBusXgwAriwUsztESVUAAADZelRYdE1PTCByZGtpdCAyMDIzLjA5LjUAAHicjZJBDoMgEEX3nOJfQIMooksV0zaNmLS2d+i+909n2uBomxoHFvPhzQA/KHBc/PnxxBzGKwXojVnXNe651loN4ARtfzgFdFPTxpVuvIXpigqOKmisyWYah7iSoUOiU2epn+ZMvwOSRNIQuQvMcURiUmO/9n/AgsG5ZZbaf6BdgckGWRI5H711SUfgro7VEtzg+uBXxn6sbsfgxWoeRvxkmYtrLAvxhqUVB6gSpTyTpZPHsKyWV1kezDr+DsrVC3NxdbukCdmpAAAAeHpUWHRTTUlMRVMgcmRraXQgMjAyMy4wOS41AAB4nIv2iHXWiPaI1QQTSEwgVqjR0DXSMzLVMdCx1jXQM0diGOqZwpi6YDZMGqYeXQrE0tRJLMnPDSjKL7Ay0Mss9swtyMlMzizRM7QyQuUao3JNUblmqFxzFG4NAG7AOeL/jG0zAAAAAElFTkSuQmCC", + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MoleculeDatapoint.from_smi(smi, y, add_h=True).mol" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Other datapoint properties" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Datapoints can be individually weighted in the loss function." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MoleculeDatapoint(mol=, y=array([0.30484272]), weight=0.5, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='CC', V_f=None, E_f=None, V_d=None)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MoleculeDatapoint.from_smi(smi, y, weight=0.5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A string identifier (e.g. a name) can be assigned to a datapoint. If a SMILES is used to make the datapoint, the name defaults to the SMILES, but this can be overwritten." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MoleculeDatapoint(mol=, y=array([0.30484272]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='Ethane', V_f=None, E_f=None, V_d=None)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MoleculeDatapoint(mol, y, name=\"Ethane\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MoleculeDatapoint(mol=, y=array([0.30484272]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='Ethane', V_f=None, E_f=None, V_d=None)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MoleculeDatapoint.from_smi(smi, y, name=\"Ethane\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Extra features and descriptors" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Extra datapoint descriptors (like [molecule features](../featurizers/molecule_featurizers.ipynb)) will be concatenated to the learned descriptors from message passing and used in the FFN. They are called `x_d`. " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MoleculeDatapoint(mol=, y=array([0.30484272]), weight=1.0, gt_mask=None, lt_mask=None, x_d=array([0.79952846, 0.57058144, 0.61951421]), x_phase=None, name='CC', V_f=None, E_f=None, V_d=None)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "n_extra_descriptors = 3\n", + "MoleculeDatapoint.from_smi(smi, y, x_d=np.random.rand(n_extra_descriptors))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Extra atom features, bond features, and atom descriptors are called `V_f`, `E_f`, `V_d`. In this context, features are used before the message passing operations, while descriptors are used after. Extra bond descriptors aren't currently supported as aggregation ignores the final bond (edge) representations. " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MoleculeDatapoint(mol=, y=array([0.30484272]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='CC', V_f=array([[0.3860953 , 0.64302719, 0.05571153],\n", + " [0.06926393, 0.90740897, 0.95685501]]), E_f=array([[0.55393371, 0.29979474, 0.07807503, 0.73485953]]), V_d=array([[0.10712249, 0.33913704, 0.37935725, 0.74724361, 0.49632224],\n", + " [0.8496356 , 0.31315312, 0.14000781, 0.58916825, 0.16698837]]))" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "n_atoms = 2\n", + "n_bonds = 1\n", + "n_extra_atom_features = 3\n", + "n_extra_bond_features = 4\n", + "n_extra_atom_descriptors = 5\n", + "extra_atom_features = np.random.rand(n_atoms, n_extra_atom_features)\n", + "extra_bond_features = np.random.rand(n_bonds, n_extra_bond_features)\n", + "extra_atom_descriptors = np.random.rand(n_atoms, n_extra_atom_descriptors)\n", + "MoleculeDatapoint.from_smi(\n", + " smi, y, V_f=extra_atom_features, E_f=extra_bond_features, V_d=extra_atom_descriptors\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Reaction Datapoints" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`ReactionDatapoint`s are the same as for molecules expect for:\n", + "1. extra atom features, bond features, and atom descriptors are not supported\n", + "2. both reactant and product `rdkit.Chem.Mol` objects or SMILES are required" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ReactionDatapoint(rct=, pdt=, y=array([0.30484272]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name=None)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Keep the atom mapping for hydrogens\n", + "rct = Chem.MolFromSmiles(\"[H:1][C:4]([H:2])([H:3])[F:5]\", sanitize=False)\n", + "pdt = Chem.MolFromSmiles(\"[H:1][C:4]([H:2])([H:3]).[F:5]\", sanitize=False)\n", + "Chem.SanitizeMol(\n", + " rct, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_ADJUSTHS\n", + ")\n", + "Chem.SanitizeMol(\n", + " pdt, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_ADJUSTHS\n", + ")\n", + "ReactionDatapoint(rct, pdt, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The SMILES can either be a single reaction SMILES 'Reactant>Agent>Product', or a tuple of reactant and product SMILES. Note that if an Agent is provided, its graph is concatenated to the reactant graph with no edges connecting them." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAbvElEQVR4nO3daUAUV7oG4LebpgEB2SEBATUMyhoX3GIQFRMVN1wgk2hcogYSrqJoEAlqDDruDksSjNuoKI6oMUa8LjioMSIiQb24sgVZBBQaRGiQbrruj4oMYdeiabv5nl/WqdPF15i8nqo6dYrHMAwIIYS8Lr6iCyCEEOVGMUoIIZxQjBJCCCcUo4QQwgnFKCGEcEIxSgghnAgUXQDh5Ny5c2KxePDgwRYWFk33FhcXJyYmCoXCCRMmtHIQiUSSkJCQmZlZWVlpZmY2atQoa2truZVMiKrh0bxRpWZpaZmfn3/kyBFvb++me8+cOePh4aGnp1deXt7SEfbv379y5crCwsL6FjU1tc8///yf//ynhoaGXIomRLXQaLRL2759+7Jly3g8nqen56hRo4RC4cWLF2NjY6OiohiGiYqKUnSBhCgBitGuq6ysbM2aNXw+//Dhw/WDWV9fXzs7u7Vr1+7atSsoKIjO7glpE91i6roMDAxu3ry5b9++RhcEAgMD1dTU6urqLl++rKjaCFEiNBrt0mxsbGxsbBo1duvWzdzcPC8vr6SkRCFVEaJcaDTahfzxxx8+Pj4+Pj4VFRWtdGMYprS0FICRkVFnlUaIEqPRqCq4evVqXV1d0/bbt2833CwuLt65cyeAtWvXdu/evaWjJScni8ViAO+9915HV0qICqIYVQURERERERFtdtPX1x87diyA1mcybd68GYCbm9vf/va3jqqQEBVG80aVGztvdPr06fb29k33ZmVlxcTEtD5vtJHo6OjZs2cLBIJr1665uLh0aLGEqCYajaoCb2/vlqbfx8TEtP84ly9f9vX1BbB161bKUELaiW4xkT9dvHhxwoQJYrE4NDTU399f0eUQojQoRgkA7N27d9y4cVVVVatWrQoJCVF0OYQoEzqp7+pqa2sDAwPDw8O1tLSio6NnzZql6IoIUTIUo11IVlbW2rVrAURGRurp6QHIzc39+OOPExMTe/To8dNPPw0aNEjRNRKifChGu5CnT59GR0cD2Lx5s56e3p07d1xdXdmb+EKh8IsvvmjU38XFZceOHQoolBClQjHahRgaGnp6egLQ1NQEkJOTUz8RKjs7u2n/VqboE0Lq0bxR5VZWVlZXV9e9e3ehUNh0r0QiefbsGZ/PNzQ0bHZvZWVlKwcXCAS6urodVishKopiVKUwDCOTyfh8Po/HU3QthHQVNOFJpWzcuFEgEAQHByu6EEK6EIpRQgjhhGKUEEI4oRglhBBOKEYJIYQTilFCCOGEYpQQQjihGCWEEE4oRgkhhBOKUUII4YRilBBCOKEYJYQQTihGCSGEE4pRQgjhhGKUEEI4oRglhBBOKEYJIYQTilFCCOGEYpQQQjihGCWEEE4oRgkhhBOKUUII4YRilBBCOKEYJYQQTihGCSGEE4pRQgjhhGKUEEI4oRglhBBOKEYJIYQTgaILIKQNNTU1z58/V1dX19fXb7ZDWVmZVCrV1dXV1NRs/VCPHj3Ky8sDYG1tbWlp2fG1KrO6ujqRSATA2NiYx+M17VBVVSUWizU1NXV1dds8WnV1dVZWlkgkMjMzs7a2bvOvRqnRaJS86Xbv3m1qauru7t5SBzc3N1NT0/3797dykGPHjtnZ2fXs2dPV1dXV1dXKysrBweHUqVNyqFdZPXjwwNTU1NTU9Pnz5812CA4ONjU1XbhwYevHSU9P9/b2NjY2dnJycnNz69u3r4mJia+v74sXL+RQ9RuBRqNE9e3du3f+/PkAhg4d6ubmxjBMQkJCSkrKlClTDh48+Mknnyi6QNVx/vx5T0/P6upqHR0dDw8PCwuLJ0+eJCYmFhUVaWhoKLo6eaEYJSpOKpWuWLECgI+Pz44dO9hGhmHmzZu3f//+5cuXf/TRR2pqagqtUUXk5eXNmDGjurp66tSpu3fvNjQ0ZNtra2vFYrFia5MrOqknKi4rK6ukpATA559/Xt/I4/F8fX0BFBYW5uTkKKg0VbNq1arnz587OTkdOXKkPkMBCIXClq5rqwaKUaLi6s8lq6urG7bX30XR1tbu7JpUUVVV1bFjxwCsWLFCXV1d0eV0KopRomouXLgQHR1948YNdrNHjx7m5uYAVqxYUV5eXt/tyJEjAIYNG/bWW28ppE5ll5aWFh0dHRcXx26mpKRUVVXxeDwPDw/FFtb56NooUQ4SiYSdq9TsroabGzZsSEhI8PPzGzRoEACBQLBp06bZs2dfvXq1T58+CxcunDNnTlJSUnh4uLm5+b59+zqheOVSUFDw7Nmzpu2VlZUNN0+cOLFmzRpHR8eJEycCSEtLA2BhYaGpqfn9998fO3YsIyNDR0enf//+ixcvHjZsWOcUrxAUo0Q5pKWlWVlZtaenlpaWtrZ2w/vCs2bNqq2tnT9//pMnT9avX/+Pf/wDgKWlZVJS0ttvvy2vipWWvb19e7oJhUJtbe1u3bqxm+ycU4Zh+vfv//DhQwDdunUrKCh4+PDh0aNHw8PD/fz85FezYlGMEuWgq6vLji6bSk5ObjhQqj/NrJeYmLh69Wo+n//FF1+UlJT88ssv1dXVubm5o0ePjo2NdXJykmPdSsjNza3ZqQsZGRkNTwiCgoKCgoLqN9m/goKCAjs7u0OHDk2ePFlHRyc3N3f58uVHjx5dsmSJq6urs7NzJ9Tf+VQ2Rvfs2VNUVDR+/PgBAwY03VtYWLh3714AwcHBzT6wUU8kErHnfdOnT7e2tpZPse1VXV198eLF69evl5SUCAQCS0tLDw+Pdo4dWBKJJCIiQiqVAujXr9/YsWPlVmzzUp8/T6yoaGnvZGNjqyazCxkAgH7Pnp4HDjTa5aav76yt7ezszJ5RNuvRo0cTJkwoLy/fu3fvvHnzAFRUVMTGxq5bt+7Bgweurq537tzp0aPH634hFfTLL7907969abu/v39ERERLn2KH/z179kxJSakfolpZWR08ePDWrVsZGRm7d+9u5eNKTWVjNDIy8vbt24aGhs3GaH5+fkhICIAVK1YIBM3/ErKzs8PDw/fs2VNVVQXAyMhozpw5cq25dbt27QoJCXny5EnDxq+++mrGjBlRUVHGxsYApk+f3rdv3z59+rR0kM2bN7NfHICvr2/nx+j/VVXtKypqae8AXd2mMVojkwEolUj2N/mgQ7duaOs+e1hYWHl5+bBhw9gMBdC9e/cFCxZ4enr269evoKBg+/bt27dvf+VvQv7KwMAAQHV1dX2GsoRCobu7e0ZGRiv/1Ck7lY1RLgoLC/38/E6ePCmTyTQ0NDQ1NWtqahRb0sqVKzdu3AjAwcHh008/7du3r0QiSUlJ2bVr17Fjx/7444+kpCSBQGBra2tra9vSQR4+fLhu3TqhUPj+++8nJCR0YvmN2WhpzWvu/ritllbTxqq6OgBqPN7G3r0b7erfjoe7b926BWDIkCGN2o2NjSdPnhwVFXXnzh225fbt2++++247yifNsLOzA/DkyROxWNwoSdlNmUymmMrkj2K0Gfr6+jdu3PDw8PDy8poyZcro0aNTU1MVWM/Zs2c3bdoE4MsvvwwPD68fPs+YMWPp0qVeXl5BQUEtjanryWSyhQsX1tTUrFq1SiQSKTZGDdXVxzaYnt2653V1ANR5vDEGBq/xs9jfTFFzQ+CnT5/i5bzRo0ePent7T5w4MTIysmfPnq/xg5SRWCyOjIxMTEw8efIkx0MNHTpUKBTW1tZeuHBh8uTJDXdduXIFQCsnScqO5o02Q0tLKy8v79SpU7Nnz9bT01N0OVi3bh3DMIMHD46MjGwUl2ZmZr/++mt7Zur9+OOPV65csbGxCQ4OllulcsGORgWtXsJu6PTp0zt37kxMTGQ3R4wYASAuLu7+/fsNu6Wnp585cwYAu+hJUVGRlpZWXFycg4NDaGhoo7n6qodhmJiYmD59+gQFBZ06dap+mm37paam7ty5k51yD8DAwGDKlCkAQkJCGi5usmPHDvbgs2bN6qDa3zgUo2+6vLy8q1evAli2bBmf/5p/X48fP2bTMyoqSumWLKt8xRjdvn27j49PTEwMu7lo0SJra+vKysrhw4d/++23586dS0hI2LRp0/Dhw6uqqpydnT/77DO2W3p6+qefflpdXb169WpbW9sDBw4wDCOnL6VYqampI0aMmDlzZn5+/oABAy5fvtzSLIhWxMXF+fj4rF27tr5lw4YNBgYGaWlpzs7OISEhERER06ZN+/LLLwHMmzeP/fdMJVGMAkBWVha7flphYaGia2ksOTmZ/cOYMWNa7ymRSMaPHz9+/PiUlJRGu/z8/MrLy2fNmtXmQRTujEj0yf37/pmZ9S31o9GCFy9+LS//39LS358/l7YccPr6+sbGxvVrYurr61++fNnd3b2srGzNmjXjxo1zd3cPCgoqKSmZOnXq+fPn6y/k9ejR48CBA9euXRsyZEh+fv6cOXOGDh2alJQkz6/b2QoLC318fAYPHvzbb78ZGxuHhYUlJye7urq+xqG0tbWNjY0bPjv/zjvvxMfH29nZ5eTkrF+/3t/f/8SJE0KhMDAw8Mcff+y4L/HGUfFro6dOnXr8+HHT9kZxWVlZ+dtvv6HJY9etuHfv3t27d7lX2AobG5v+/fvn5+cDMDExMWzrYqJMJjt79iyARYsWNWyPjY39+eefDQ0Nt23b1tJn79y50+ict02G7u5Mu0eIAMw1NOxfBlamWByYldWow0Rj4xF6emUSSbpY/FworG83GzHinW3bdPT0pry8FwTAUCD4nx49JhsZAYiIiHj27Fm/fv3YXcePH290ZGtr6wsXLty/f//KlSsFBQU8Hs/c3HzMmDG9m9yzAjBkyJDExMSDBw8GBgYmJycPHz585syZW7ZsMTMza/+XfQNJJJIffvhh9erVFRUV6urqfn5+oaGhDSc2WVlZ/fzzz3h5R6iphQsXjh49un5y2LJly5YtW9aoz8CBA9PS0hITE2/duvXixQtzc/MPP/yQnUaiwlQ8Rs+cOcNe/2qdiYnJ0qVLAbR/HZrY2NiGpzPy4Ofn991337GP5bVnvXE1NTV2EaOGT/uIRKLFixcD2LRpk6mpaUufPXToEDsToP2G//57zauc804yMlrz8taNSCpNaPB4O+tdHR3o6Tnq6Cx4++3uDaZ/f2Bv/9jIyK5bNydtbX2B4IlEEi8SZVRXf5uT80Im8zIxGTlyZHsKsLOzY+8mt4nP58+ePXvatGlbt27duHFjdHT0Tz/9tHz58pUrVyrpopmnTp1aunRpVlYWgDFjxkRERDT9Vejq6rIXN1vi6Ojo6OjY5s9SU1Njz+24FKxcVDxG58+f/8EHHzRtz87Obninxdzc/FVnDjo4OMyYMYNrfa1iZ7yy/9/W1ta22V8gEDQ9dVq2bFlxcfH777/PrlvcEicnp1f9OrZ6etJXGY3aN5jgaa+tvbzJOzzMhUIAztrazn+dCjrdxGS6iUnDlrlvvbU1Nzf26dPI/HwPQ0Nt+awWqqOj880333zyyScBAQGnT59eu3bt4cOH94WHDxs3Th4/Tk4ePHgQEBDADib69Omzffv2Lrh0iNwxKoqdAPjDDz80u7f+gqNEImnzUGyc7du3r6NrbJedO3cC0NTUbE+pjSQkJPB4PIFAcOvWrYbt7NPNvr6+HVdme/2rsHBgSsoX6ekcj1MhlbqkpAxMSfmPSNQhhbXuwoULjo6OAoHgjo0N4+7OpKV1wg/lSiRiFi8+PWoUAAMDg7CwsNf4T4i0h4qPRlUA+xhyTU3N3bt3X2lyuFQqXbBgAcMwRkZG69evb7jr5s2bAOLj4729vT08PObOnduhJXcGXTU1E6HwSW1t0V+Xd5ITd3f31NTUxP37Hb76CpmZGDAAX36JNWvwWlNZ5U4qRVQUvvkGItF4ff1VAQFLvv66zWvr5LVRjL7p+vXrZ2BgUFZWdvjw4VeKUbFYnJ2dDaC4uPjo0aNNO2RlZWVlZVlYWHRYrZ1IxjDPpFIA+m09d9BR1NXV3RYswLRpWLsW33+P8HBER2P1avj5obNqaJeEBCxZAvbJy9GjeWFh39LaK3JGE57edBoaGuzD4BEREffu3Wv/B7W1tVNa4OXlBWD69OkpKSkBAQHyKr2D3Hj+nE3MhpIqKl7IZHzAsZPXrjc0RHg40tIwdixEIixZAicnnD3bqTW0JDMT3t5wd0daGmxsEBuL//wHlKHyRzEKABkZGS4uLi4uLgUFBYqupRmrVq2ytLSsrq4ePXr0iRMnGj6bnJWVtXLlSvYkXSKRjBw5cuTIkdevXwegpqY2sAXsLXsTE5OBAwe+aa9rjystnX737hfp6exmckWFf0bG7AcPLpSV1TIMABlwvqxsdU4OgA8NDZsuZdIZ7Oxw9ix++QW9e+PBA4wfj0mTkJ2tgEpYVVX45hs4OuLoUWhrY80apKXBy0th9XQxb9LJiOKIxeLff/8dQP2rtA8cOLBkyRL2zxUVFQD8/PzYSVGfffbZ1q1bO7M8fX39c+fOjR8//tGjR9OmTTMxMbG3txcIBJmZmbm5uQzDVFRUfP/99zKZ7PLlywBKS0s7s7yOVSGVPqqpqX35T0VvLS0bLa17YnFQdraQzzdTVy+RSKplMgADdXW/VuzShZMmYexYREVh1SrExeH8efj6IjQUza0yJy8Mg+horFiBoiLwePj0U2zeDHotSudS2Rg1MjIyNTVtaSKxuro6OyJjFxs1MzP7+uuv8XKxr0aaXXuxk9nZ2aWlpYWFhf373/++d+8eG5d8Pt/W1nbSpEnsfHuBQODv7w+gV69erR9NS0vLwMCgpV+OXHVXU7PQ0DBp+ZVn/XR0/CwsdF/OYTJWV9/Xt+8ZkeiMSJRWVZX34oUmnz9AV3eioeFEY2PFn0wJhfD3x4wZCArCoUOIiMDx49i0CTNndsZPT07GkiW4dg0ABg1CeDhU+l0dbyweo6JPDauwysrKkpIShmHeeustreZWllNhEoZRf5XJqp0qJQX+/khMxOefo3OefRwyBMnJsLTEpk34+9/xxv5mVB3FKCEdhz3FHjcOLT8w1pGuXcPZs1ixAoo4sSD1KEYJkb+6Oty9i8ePIZXC2BjOzm0HX3U1Wl8oR1cXf324iyiKyl4bJeSNUFqK9euxfz9Eov82amjAwwOhoXBwaPGD169j1KjWjjxnDujt0G8GilGVkpSUdOnSpWHDhrm5uSm6FgI8fIhx45CTAzU1jBgBZ2cIBPjjD8TH48QJnDmDQ4cwbVrznzUwQEurGqamQiTq1PkApHUKfBCVdDj2DexBQUGKLoQwTGUlY2vLAIydXeNn8IuLmUmTGIDR0GBu3361w4pEjI4OAzA3b3ZgsYQLxc8YIUQ1hYUhPR36+oiPR6P15UxNcfw4Bg3Cixd41afIduxAZSU++AAv11clCkcxSoh87NwJAIsXo9lVC9TVwa4Xk5CAjIz2HlMiwQ8/AECT9ZKJAlGMEiIH2dnIzQUAT88W+7i7Q18fDINffwWAx4/x3Xf47ju8fJSuGYcOIT8fjo748MOOrpi8PopRQuSAXUSGx2vtXjyf/+fJPts5IwOLFmHRIlRVtfiRsDAACAigmfZvFLpTT4gcsNObdHTQ4KVSzTAy+m9nDQ2YmwNAS+9/PX8et2/D1BQff9yRpRLOKEYJkQN2tNjOMSPbbehQtL7AGPtGwsWLoWyvyFZ5dFJPiByw70asrETri/Oz49D2vEjxzh3Ex6NbN/j4dER9pCNRjBIiB337AoBMhlZeW80wYN/R3Z73lW7dCobB3LlQ9ZcVKyOKUULkwMbmz0U/T59usU9i4p+j0TbfRfz4MQ4fBo+HRYs6rkTSYShGCZEDHg+ffQYAkZF49qz5PqGhAPD++38OXVsRGYnaWkye3HZPoggUo4TIR0AAzM1RWAhPTzR6H4FEgiVLcO4c1NSwefOfjfn52LIFW7agpuYvncVi7NoF0JT7NxfdqSdEPoyMEBuLiRNx6RJsbfHRR3B2hqYm0tNx7BgyMsDnIzLyv+vVZ2UhMBAA5s//y7343btRWgoXl7bP/YmCUIwSIjfDhyMpCcuW4fRpREX9ZZeTE7Ztwwcf/LelWzfY2ADAyxeoAEBdHSIiAGD58k6ol7weilFC5KlPH8TFoaAAly6huBi1tTA1xeDBjRcrATBoUDMP19fUYMcOABg5shOKJa+HYpQQ+bOweM2X3Glrt7jqKHlj0C0mQgjhhGKUEEI4oRglhBBOKEYJIYQTilFCCOGEYpQQQjihGCWEEE4oRgkhhBOKUUII4YRilBBCOKEYJYQQTihGCSGEE4pRQgjhhGKUEEI4oRglhBBOKEYJIYQTilFCCOGEYpQQQjihGFUpw4YNCwwMdHNzU3QhhHQhPIZhFF0DIYQoMXqlnXILDQ0tKyubM2fOu+++23TvgwcPdu7cqaWltX79+lYOkpmZuW/fvszMzGfPnpmbm7u5uXl5eWlpacmtakJUCo1GlZulpWV+fv6RI0e8vb2b7j1z5oyHh4eenl55eXmzH5fJZEFBQdu2bZPJZA3be/fuHRcXZ2dnJ5eiCVEtdG20S/vqq6+2bNliZWW1d+/etLS0u3fv/utf/7KwsMjOzvb09JRIJIoukBAlQCf1XdeLFy+uXLny9ttvJyUlmZmZsY329vb29vZDhgxJT09PTEyku1WEtIlitOvS0NC4fv16Xl5efYayXFxcBAKBVCotKipSVG2EKBE6qe/SeDyelZVVo8b09HSpVArAxsZGEUURomQoRruQ27dvs+fsJSUlLfUpLy/38/MDMGrUqIEDB3ZidYQoKzqpVwV79uy5dOlS0/bc3NyGm2Kx+P79+wDYwWZDMTExqampubm58fHx5eXlkyZNOnDggNzqJUSlUIyqgvPnz7enm5WV1bp16wDo6uo22nXy5MnY2Fj2z6amppMnT27ahxDSLJo3qtzYeaMhISHN3lK/ceNGcHBwK/NG6xUXF5eVlRUWFl6/fj0sLKy4uHjq1KnHjh3j8+myDyFtoNGoKnBychozZkzT9vZP/DQzMzMzM+vbt++oUaNmzpxpb29/4sSJ48ePe3l5dWilhKggGmuQxiwtLYcOHQogPj5e0bUQogQoRkkzTE1NATx9+lTRhRCiBChGu7SEhISTJ082aqytrU1KSgLwzjvvKKIoQpQMxWgXcvPmzV69evXq1YudN1pcXDxlyhQvL68NGzZUVVWxfUpLS+fNm5ednc3n8+fOnavIcglREhSjXUhNTU1OTk5OTg47b9TMzCwiIoLP5wcHB5uYmDg4ONjb25ubm8fExPD5/IiICEdHR0WXTIgSoDv1ys3W1lZXV1dPT6/ZvTo6OnZ2dt27d2c3e/bsuW3bNgD1LfPmzXvvvfe2bNly9uzZe/fuATA0NBwzZkxAQMCQIUM65RsQovRo3ij5k0QikUqltFozIa+KYpQQQjiha6OEEMIJxSghhHBCMUoIIZxQjBJCCCcUo4QQwgnFKCGEcEIxSgghnFCMEkIIJ/8P74ZWgJKpAVMAAACwelRYdHJka2l0UEtMIHJka2l0IDIwMjMuMDkuNQAAeJx7v2/tPQYg4GdAAA4gZgPiBkZGDQ0gzcgowcimoQBksUiwwIWY4CxmTrAkowQrXIiNAyzEJMEOF+LgZmBkYGRiYGRmYGRhYGVjYGNnEAFZJ54FUoBk+YH906cpqII4Z8/4qADxEhBbQkIWKHbAHsTeZ9u6FKQOouWAPUxNofJiB6C77SDsYiCbAapGAS4uBgAnfB2tRmsD5QAAAPF6VFh0TU9MIHJka2l0IDIwMjMuMDkuNQAAeJx9kkGOwjAMRfc5xb8AkZM0TbKkLcOMEK0EhTuwn/trHJDHtB2N04XtvnzrWzGocRlOj2/8hh+MAeifr5SCeyAic0ZN0B2OXyP6ed9Jp59u43xFRss3+CzJ/TydpePwiZ2zkWqALK0SJ6BHr93dFmwEDKyo1zfKXrimTlbFLRmEjPhQoT9GRwFbloyWSqbExq2PS64VLmFCsKkUCrkKricnATMLev29FszCHcZhsdXXnrtpHHTP9XjdJhfqMDzLRldTy6j+HVtL6tKxg8Xs90m1lrfAufkBkHpy18/7PwAAAACwelRYdFNNSUxFUyByZGtpdCAyMDIzLjA5LjUAAHicZcq7DoMwDAXQX+kIUrCcFwlmqipVLH3skQcqdQOBULvx8U3abl7s6+ObBtKcTuS4SgMZrsuyXKczeYacW043CpxT5MNeNRq8QtXjb5Qrr6/2/5cH7CJGhWAyWghdh041WCqmVIrXanwt831bVkKYl+mYr8u4Xt/z47mBJi3RkZFoyEq05CR68hJbaiUGChLj/gHLeln8f0JKEgAAAABJRU5ErkJggg==", + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rxn_smi = \"[H:1][C:4]([H:2])([H:3])[F:5]>[H:6][O:7][H:8]>[H:1][C:4]([H:2])([H:3]).[F:5]\"\n", + "from_rxn_smi = ReactionDatapoint.from_smi(rxn_smi, y, keep_h=True)\n", + "from_rxn_smi.rct" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAP+ElEQVR4nO3de1BTd5/H8W9iIAECCBhEQXkYUASt4spqq6BsRwVWrBcKbrHSOiJycaaX6VQ79QK2TqWDZbTj2GJFeaxaa8Wnio7VUhURi+I6tihIYUUCChIkoJCQ6/5xVpcSrv6AXPi8/kp+OTnzZcy85+ScA/L0ej0BAMDL4ht7AAAA84aMAgAwQUYBAJggowAATJBRAAAmyCgAABOBsQcA6KywsLC+vt7Pz2/SpEmGryoUinPnzhFReHi4SCTqbid6vb6goKCsrOzJkyeOjo7BwcGTJ08exKFhONMDmJjQ0FAi2rJlS5evVldXcx9dqVTa3R7y8vJ8fX07fdQXL14sk8kGbWoYvnA0CpYmNzd3+fLlarU6ODh4yZIlDg4Ot27d2rdv3+nTp5cvX37p0iUej2fsGcGiIKNgUXQ63UcffaRWq1NSUrZu3fpiPSQkZMWKFfn5+efPn+eOdgEGCi4xgUXh8/mXL1/es2fPli1bOq5HR0f7+PgQ0W+//Wak0cBiIaNgaUaPHp2UlGT4zZ3LaENDgzGGAkuGjIJ5a29vX7du3bp168rKynreUiaTEZGLi8uQzAXDCM6NgokqKSk5evSo4fqTJ086PlWpVJmZmUQUFRXV5Q1SHJlMVlJSQkRz5swZ6ElhuENGwUTl5OTk5OT0uplAIOAuGY0aNaqHzb7++mulUunu7h4eHj5gIwIQETIKJmvu3Lnz5s0zXG9padm1a9eLpzY2Ntzd+D0oKir64osviCgtLU0oFA7snADIKJiokJCQ1NRUw3WpVNoxo72qqKiIjIxUq9Xx8fErV64cuAEB/g8uMYElKy8vDwkJqa2tjYmJ2bt3r7HHAcuEjILFysvLe+2112pra6OiorKzs/l8fNphUOCDBZZp165dYWFhTU1NKSkpx44dEwhw/goGCz5bYN6USmV8fDwRbdiwgfsbTi0tLfHx8ceOHbO3t8/Ozl62bJmxZwQLh4yCeVOr1YcOHSKi2NjYyZMny+Xy6dOnV1VVEZFYLN6+ffv27ds7bu/k5HThwgWjjAqWChkF82ZlZbV06VIicnV1JSK5XM41lIgePXr06NGjTttLJJKhHRAsH0+P/6ceTExLS4tKpbK1tbW1tTV8VafTcb/I5OzsbHjVSKfTNTc397BzHo83cuTIAZwWABkFM6DVank8Hi61g2nC5xJM3alTpwQCAffNHcAEIaMAAEyQUQAAJsgoAAATZBQAgAkyCgDABBkFAGCCjAIAMEFGAQCYIKMAAEyQUQAAJsgoAAATZBQAgAkyCgDABBkFAGCCjAIAMEFGAQCYIKMAAEyQUQAAJsgoAAATZBQAgAkyCgDABBkFAGCCjAIAMEFGAQCYIKMAAEyQUQAAJsgoAAATZBQAgAkyCgDABBkFAGCCjAIAMEFGAQCYIKMAAEyQUQAAJsgoAAATZBQAgAkyCgDABBkFAGAiMPYA0A8///xzSUlJYGBgaGio4asajSYtLY2I1q5d6+rq2sN+FArF4cOHZTJZWFhYQEDAYI3bNxqNprCw8MqVK/X19Tqdzs3Nbf78+bNmzeLxeH3fyf79+2UyGRF5eHisXLly0IbtWpVSmdvY2N2rQY6OAWKx4fqR+vpGjcZwfYqd3X+MHDmQ88EgQ0bNyY8//njkyJHk5OQuM6pSqTZt2kREERER3WW0oaEhKytr9+7dDx8+JKKmpibjZvTs2bPvv//+X3/91XFx8+bNs2fPPnDgwMSJE4lo5syZOTk5Y8aM6W4nZ86ciYuL4x4HBQUNfUal7e0H6+q6e3WkQNBlRg/W1z9Rqw3X48eORUbNCzI6XKhUquTk5O+//16pVPL5fLFY/OzZM+OOlJ2dvWbNGq1W6+7uvnr16oCAAIFA8Oeff2ZlZRUWFoaEhNy+fVsikbi5uS1btqy7nTQ3N69bt46IwsLCzp07N4Tjd2Y/YsTG8eMN1yfZ2houavV6uVpNRCn/+IeIz+91ezBlyOhwYW1tfefOnaCgoIiIiKioqI0bNx46dMiI85SXlycmJmq12oULF544cUL8/HhtyZIlH3744cqVKxcsWCCRSHrdz4YNG2pra5ctW7ZgwQLjZtSazw91du7jxo1qtY7Imsdb5OLSj5MXYJKQ0WGksLDQ2CP8v/T0dIVC4ebmduzYMfHfv/Pa2tqePHmyLzvJz8/PzMy0t7ffvXv36dOnB2fSQSFTq4lolJUVGmoBcKUejECn0x0/fpyI1q5dO/JlzwO2t7cnJCTo9frPPvvMw8NjQAccdA1cRq2tjT0IDABk1GJptdrg4ODg4GCTOgjllJeXy+VyIlq4cGGvG8fFxYWHh584caLTempqamlp6bRp05KTkwdlyoFT0toaU1oaU1qq0um4lUYuowLBE42moLn5bGPjtZaWtuevgnnBl3rzU1RUtHnzZsN1zd/vntHr9QUFBUTU2P29OJ3U1NRcu3aNfcIeuLq6zps3r6amhnvKXYvv2eXLlysqKl5//fWOi3/88Ud6ejqfz//2228Fgq4/xtXV1UVFRf0az2XOHJ2NTd+3dxAIZtrbc4+farUfV1Z22uDfHRyiJJJWrba8rY2ItM/XuS/1v7e0hN2+/aKdtnz+225ua9zcRvTnZi8wOmTU/BQXFxcXF/e6GZ/P/+CDD4jI29u7j3u+du1adHQ003C9CQkJuXjxYnNzM/fU/nmDevDWW2/V19d3vDFLq9WuWbNGrVa/9957s2bN6u6N+fn5q1at6td4kTduVPUnYf62tv/08+Meq3S63+TyThuIBQIichcK48aMISKr5zv3t7PzEoncrK3/zd5eYmXVpNEUNjffePo08+HDJrV6Q1dX/MFkIaPmJzQ0dPXq1YbrKpUqNjb2xVM+n//VV1/1a8/jxo178803Wefr0ZQpU4hIKBRyT1UqlU1vR3/btm3rtJKRkVFcXDxmzJjU1NQe3ujp6dnfH2eaUOhtZ9f37cc9/0GIyFEgyPDx6bSBi0BARB5CYcLYsR3XgxwdgxwdO66sGj36x8ePv5RKjzc0LJdIJvTnoBiMCxk1Pz4+PitWrDBcb2tr65jRl/Dqq69yV34Gm/PzG4Pq6uoc/16TXlVVVaWkpBDRnj17en4vd2r4ZWfsNwGPN7U/CTYU5eqaXV9fr1JdlMuRUTOCjIIRTJkyhc/n63S6mzdv+vr69uu9iYmJra2tIpHo6NGjR48efbFeWVlJRGVlZdHR0VOnTuV+ocu88Ii8RKJ6lapepTL2LNAPyCgYgYODw4wZM27cuHHkyJGYmJh+vffOnTtEpFQquzxwlslkx48flxucozQXTzQaInLq5qIZmCb8a4FxJCQk3Lhx48yZM7m5uREREX1/45kzZ1RdHaz99NNPO3bsCAgI+O677xwcHAZu0kFRrlCI+PzxHU6tElFNe/v/KBRExHhyAIYYMmqxtFotdxU7IyNjKE8R9tE777yTlZV19erV6Ojo9PT0uLg46+f3oj9+/PjgwYMTJ05cunQpEb399ts1NTWJiYncGeFXXnmlyx1ev36diMRi8YwZM4bqh+irP549S33wgIiO+PkJ+fxqpTK5vFxLlDh27H86O9uNGEFE//306ecPHmj0el9b2yD8aRKzgoxaLL1ef/PmTSJ68Q33+vXrYWFh3OPW1lYi2r179759+4ho9uzZubm5QzneiBEjTp48GRERcf369eTk5I0bNwYEBNjY2Ny/f//+/fsajWbu3LlcRouKiioqKhYtWjSU4w0shU73QKkkIu4WUVdr62li8SW5PK26eqdUOtra+qlG06LVEpGnSJTh7Y3fijEvyKg5cXR0dHV17e4bK4/H4/4+npWVFRHx+fxPP/2UurnF3c7Ozs7Y3xwlEklBQcH+/fsPHDhw8+bNK1eucOvjxo0LDw9PSkrinr777rsNDQ2BgYE9700oFDo5OfXlRtQBZ8PnuwuFzt2f0BwnEiW7u9Pz+0ZFfH66t3dhc/O/GhtvPX1a295uzeNNsrVd6Oz8XxKJNR8VNTM8vV5v7BkASKFQyGQytVo9atQo0z+zObDUer0Vfm3JnCGjAABM8PUBAIAJMgoAwAQZBVNXUVGxY8cOwz+UB2AikFEwdXfv3v3kk0+ys7ONPQhA15BRAAAmyCgAABNkFACACTIKAMAEGQUAYIKMAgAwQUYBAJggowAATJBRAAAmyCgAABNkFACACTIKAMAEGQUAYIKMAgAwQUYBAJggowAATJBRAAAmyCgAABNkFACACTIKAMAEGQUAYIKMAgAwQUYBAJggowAATJBRAAAmyCgAABNkFACACTIKAMAEGQUAYIKMAgAwQUYBAJggowAATJBRAAAmAmMPANCLCRMmfPzxx/7+/sYeBKBrPL1eb+wZAADMGI5GweRkZmaWlZWFhoaGhoYaviqXy7dt20ZEW7dudXR07G4njx8/zsrKunv3bl1dnZub28yZM2NiYpydnQdxbhi29AAmhqvnli1buny1urqa++hKpdLu9vDNN9/Y2Nh0+qi7uLhcuHBh0KaG4QuXmMDSZGVlJSQkCIXCnTt33rp1q6ys7OTJk1OnTm1sbIyMjKyrqzP2gGBp8KUeLE1eXp5QKLx48WJAQAC34uvrO2fOHC8vr5aWlpycnKSkJONOCBYGR6NgaQ4fPnzv3r0XDeVIJBJvb28iwtEoDDhkFCyQp6dnp5X29vbKykoi8vHxMcZEYMmQUTBvbW1t/v7+/v7+v//+e3fbtLe3r1+/vrW1dfz48VFRUUM5HgwHODcKJurs2bMNDQ2G68+ePev4VKvVlpaWElFra2unLfPy8s6fP19bW3vp0qXa2trAwMAffvjB8Ao+ACNkFExUcXFxcXFxr5sJhcLPP/+ciLhTnx1dvXr1yy+/5B6LRKLIyMjRo0cP+JwA+C0mMDlhYWG//PLLqlWrYmNjDV9taGiIiYkhIqlU6uHh0cN+mpqaZDJZXV1dSUnJrl277t27FxgY+Ouvv/Zw0z7AS8DRKJgoLy+v+fPnG65LpdI+7sHJycnJyWnChAnBwcGxsbHTp08vLi7OyMhISUkZyEFh2MMlJhgW7OzsFi1aREQXLlww9ixgaZBRGC5cXV2JqMvLVgAskFGwNPfu3duzZ4/hen5+PnV1JQqAETIK5q21tdXLy8vLy6uwsJCINBrN4sWL169fn5SUJJPJuG2USuWmTZvOnTtHRKtXrzbmuGCJcIkJzJtOp6uqqiIihUJBRAKBIDMzMzIycu/evZmZmT4+PtbW1pWVlW1tbUSUnJwcHR1t3IHB8iCjYHI8PT39/Py4U5mGrKys/Pz8uAdEJBKJdu7cSUQTJ07kNggJCSktLU1LSzt16lRFRYVWqxWLxWFhYYmJiW+88cZQ/RAwjOC+UbBkOp2ura1NLBYbexCwZMgoAAATXGICAGCCjAIAMEFGAQCYIKMAAEyQUQAAJsgoAAATZBQAgAkyCgDA5H8BhAgcb+74aH4AAACLelRYdHJka2l0UEtMIHJka2l0IDIwMjMuMDkuNQAAeJx7v2/tPQYg4GdAAFYgZgHiBkZGDQ0gzcgowcimoQASlWCBCzHBWcycYElGCVZuBkYGRiYGRmYGRhYGEZBZ4m4gGSSTD+yfPk1BFcQ5e8ZHBYiXgNgSErJAsQP2IPY+29alIHUQLQfsYWrEALyjFrvi+CrHAAAAvnpUWHRNT0wgcmRraXQgMjAyMy4wOS41AAB4nH2QSw7CMAxE9znFXKCVm8+iy7YpH6GmEhTuwJ77qzHImFCEk4VtPY8zMeA4x9P9gXfYaAxAf27btrg5IjITOEE/7o8Jw9L10hnma1ouCPB5Ip+S7JZ5kk6DA6qmDsQBqukraQS0GLRbbUEvoMuKOr5RtsJ53qyKW9IJGbBToR+rg4BjioW5l91+TlHt8rFqKhe6yD1Lry/kslD/1OJaPj3nZgXXjFcxHOdHzwAAAIN6VFh0U01JTEVTIHJka2l0IDIwMjMuMDkuNQAAeJyL9rAyjI12tjKJ1Yj2sDKK1QRRxrGa0W5WprEKNRq6hnqmOgY61gYQAsQDUmBRa4iUpk5iSX5uQFF+gZWBXm5+jiOQ55tY4Feam5RapGdoZYgpaGJlhCloZGWMKWhsZYIpaFoDADOaN9kqtHGTAAAAAElFTkSuQmCC", + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rct_smi = \"[H:1][C:4]([H:2])([H:3])[F:5]\"\n", + "pdt_smi = \"[H:1][C:4]([H:2])([H:3]).[F:5]\"\n", + "from_tuple = ReactionDatapoint.from_smi((rct_smi, pdt_smi), y, keep_h=True)\n", + "from_tuple.rct" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop-updated/docs/source/tutorial/python/data/datasets.ipynb b/chemprop-updated/docs/source/tutorial/python/data/datasets.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..078c3d94283a827ca21d92e3d390982e8c90b7e0 --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/python/data/datasets.ipynb @@ -0,0 +1,367 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.data.datasets import MoleculeDataset, ReactionDataset, MulticomponentDataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To make a dataset you first need a list of [datapoints](./datapoints.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from chemprop.data import MoleculeDatapoint, ReactionDatapoint\n", + "\n", + "ys = np.random.rand(2, 1)\n", + "\n", + "smis = [\"C\", \"CC\"]\n", + "mol_datapoints = [MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]\n", + "\n", + "rxn_smis = [\"[H:2][O:1][H:3]>>[H:2][O:1].[H:3]\", \"[H:2][S:1][H:3]>>[H:2][S:1].[H:3]\"]\n", + "rxn_datapoints = [\n", + " ReactionDatapoint.from_smi(rxn_smi, y, keep_h=True) for rxn_smi, y in zip(rxn_smis, ys)\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Molecule Datasets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`MoleculeDataset`s are made from a list of `MoleculeDatapoint`s." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MoleculeDataset(data=[MoleculeDatapoint(mol=, y=array([0.23384385]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='C', V_f=None, E_f=None, V_d=None), MoleculeDatapoint(mol=, y=array([0.74433064]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='CC', V_f=None, E_f=None, V_d=None)], featurizer=SimpleMoleculeMolGraphFeaturizer(atom_featurizer=, bond_featurizer=))" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MoleculeDataset(mol_datapoints)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dataset properties" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The properties of datapoints are collated in a dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0.23384385]\n", + " [0.74433064]]\n", + "['C', 'CC']\n" + ] + } + ], + "source": [ + "dataset = MoleculeDataset(mol_datapoints)\n", + "print(dataset.Y)\n", + "print(dataset.names)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Datasets return a `Datum` when indexed. A `Datum` contains a `MolGraph` (see the [molgraph featurizer notebook](../featurizers/molgraph_molecule_featurizer.ipynb)), the extra atom and datapoint level descriptors, the target(s), the weights, and masks for bounded loss functions." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Datum(mg=MolGraph(V=array([[0. , 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 1. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0.12011]], dtype=float32), E=array([], shape=(0, 14), dtype=float64), edge_index=array([], shape=(2, 0), dtype=int64), rev_edge_index=array([], dtype=int64)), V_d=None, x_d=None, y=array([0.23384385]), weight=1.0, lt_mask=None, gt_mask=None)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Caching" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `MolGraph`s are generated as needed by default. For small to medium dataset (exact sizes not yet benchmarked), it is more efficient to generate and cache the molgraphs when the dataset is created. \n", + "\n", + "If the cache needs to be recreated, set the cache to True again. To clear the cache, set it to False. \n", + "\n", + "Note we recommend [scaling](../scaling.ipynb) additional atom and bond features before setting the cache, as scaling them after caching will require the cache to be recreated, which is done automatically." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "dataset.cache = True # Generate the molgraphs and cache them\n", + "dataset.cache = True # Recreate the cache\n", + "dataset.cache = False # Clear the cache\n", + "\n", + "dataset.cache = True # Cache created with unscaled extra bond features\n", + "dataset.normalize_inputs(key=\"E_f\") # Cache recreated automatically with scaled extra bond features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Datasets with custom featurizers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Datasets use a molgraph featurizer to create the `MolGraphs`s from the `rdkit.Chem.Mol` objects in datapoints. A basic `SimpleMoleculeMolGraphFeaturizer` is the default featurizer for `MoleculeDataset`s. If you are using a [custom molgraph featurizer](../featurizers/molgraph_molecule_featurizer.ipynb), pass it as an argument when creating the dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MoleculeDataset(data=[MoleculeDatapoint(mol=, y=array([0.23384385]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='C', V_f=None, E_f=None, V_d=None), MoleculeDatapoint(mol=, y=array([0.74433064]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='CC', V_f=None, E_f=None, V_d=None)], featurizer=SimpleMoleculeMolGraphFeaturizer(atom_featurizer=, bond_featurizer=))" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from chemprop.featurizers import SimpleMoleculeMolGraphFeaturizer, MultiHotAtomFeaturizer\n", + "\n", + "mol_featurizer = SimpleMoleculeMolGraphFeaturizer(atom_featurizer=MultiHotAtomFeaturizer.v1())\n", + "MoleculeDataset(mol_datapoints, featurizer=mol_featurizer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Reaction Datasets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Reaction datasets are the same as molecule datasets, except they are made from a list of `ReactionDatapoint`s and `CondensedGraphOfReactionFeaturizer` is the default featurizer. [CGRs](../featurizers/molgraph_reaction_featurizer.ipynb) are also `MolGraph`s." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "CondensedGraphOfReactionFeaturizer(atom_featurizer=, bond_featurizer=)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ReactionDataset(rxn_datapoints).featurizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Multicomponent datasets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`MulticomponentDataset` is for datasets whose target values depend on multiple components. It is composed of parallel `MoleculeDataset`s and `ReactionDataset`s." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mol_dataset = MoleculeDataset(mol_datapoints)\n", + "rxn_dataset = ReactionDataset(rxn_datapoints)\n", + "\n", + "# e.g. reaction in solvent\n", + "multi_dataset = MulticomponentDataset(datasets=[mol_dataset, rxn_dataset])\n", + "\n", + "# e.g. solubility\n", + "MulticomponentDataset(datasets=[mol_dataset, mol_dataset])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A `MulticomponentDataset` collates dataset properties (e.g. SMILES) of each dataset. It does not collate datapoint level properties like target values and extra datapoint descriptors. Chemprop models automatically take those from **the first dataset** in datasets." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('C', ('[O:1]([H:2])[H:3]', '[H:3].[O:1][H:2]')),\n", + " ('CC', ('[S:1]([H:2])[H:3]', '[H:3].[S:1][H:2]'))]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "multi_dataset.smiles" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0.23384385],\n", + " [0.74433064]])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "multi_dataset.datasets[0].Y" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop-updated/docs/source/tutorial/python/data/splitting.ipynb b/chemprop-updated/docs/source/tutorial/python/data/splitting.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..b3c1b5d652ffdd7edd0643ec86a0e7ccc4878060 --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/python/data/splitting.ipynb @@ -0,0 +1,487 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data splitting" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.data import SplitType, make_split_indices, split_data_by_indices" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These are example [datapoints](./datapoints.ipynb) to split." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "from chemprop.data import MoleculeDatapoint\n", + "\n", + "smis = [\"C\" * i for i in range(1, 11)]\n", + "ys = np.random.rand(len(smis), 1)\n", + "datapoints = [MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data splits" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A typical Chemprop workflow uses three sets of data. The first is used to train the model. The second is used as validation for early stopping and hyperparameter optimization. The third is used to test the final model's performance as an estimate for how it will perform on future data. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chemprop provides helper functions to split data into these training, validation, and test sets. Available splitting schemes are listed in `SplitType`.\n", + "All of these rely on [`astartes`](https://github.com/JacksonBurns/astartes) in the backend." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "scaffold_balanced\n", + "random_with_repeated_smiles\n", + "random\n", + "kennard_stone\n", + "kmeans\n" + ] + } + ], + "source": [ + "for splittype in SplitType:\n", + " print(splittype)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Splitting steps" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. Collect the `rdkit.Chem.mol` objects for each datapoint. These are required for structure based splits.\n", + "2. Generate the splitting indices.\n", + "3. Split the data using those indices.\n", + "\n", + "The `make_split_indices` function includes a `num_replicates` argument to perform repeated splits (each with a different random seed) with your sampler of choice.\n", + "Any sampler can be used for replicates, though deterministic samplers (i.e. Kennard-Stone) will not change on replicates.\n", + "Splits are returned as a 2- or 3-member tuple containing `num_replicates`-length lists of training, validation, and testing indexes." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "mols = [d.mol for d in datapoints]\n", + "\n", + "train_indices, val_indices, test_indices = make_split_indices(mols)\n", + "\n", + "train_data, val_data, test_data = split_data_by_indices(\n", + " datapoints, train_indices, val_indices, test_indices\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The default splitting scheme is a random split with 80% of the data used to train, 10% to validate and 10% to split." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1, 1, 1)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(train_data), len(val_data), len(test_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Each of these is length 1 because we only requested 1 replicate (the default).\n", + "The inner lists for each of these sets contain the actual indices for training." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(8, 1, 1)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(train_data[0]), len(val_data[0]), len(test_data[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Split randomness" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "All split randomness uses a default seed of 0 and `numpy.random`. The seed can be changed to get different splits." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "([[8, 4, 9, 1, 6, 7, 3, 0]], [[5]], [[2]])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "make_split_indices(datapoints)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "([[8, 7, 0, 4, 9, 3, 2, 1]], [[6]], [[5]])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "make_split_indices(datapoints, seed=12)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Split fractions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The split sizes can also be changed. Set the middle value to 0 for a two way split. If the data can not be split to exactly the specified proportions, you will get a warning from `astartes` with the actual sizes used. And if the specified sizes don't sum to 1, the sizes will first be rescaled to sum to 1. " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "([[8, 4, 9, 1]], [[6, 7, 3]], [[0, 5, 2]])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "make_split_indices(datapoints, sizes=(0.4, 0.3, 0.3))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "([[8, 4, 9, 1, 6, 7]], [[]], [[3, 0, 5, 2]])" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "make_split_indices(datapoints, sizes=(0.6, 0.0, 0.4))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/astartes/main.py:325: ImperfectSplittingWarning: Actual train/test split differs from requested size. Requested validation size of 0.25, got 0.30. Requested test size of 0.25, got 0.30. \n", + " warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "([[8, 4, 9, 1, 6]], [[7, 3]], [[0, 5, 2]])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "make_split_indices(datapoints, sizes=(0.5, 0.25, 0.25))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/astartes/main.py:381: NormalizationWarning: Requested train/val/test split (0.50, 0.50, 0.50) do not sum to 1.0, normalizing to train=0.33, val=0.33, test=0.33.\n", + " warn(\n", + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/astartes/main.py:325: ImperfectSplittingWarning: Actual train/test split differs from requested size. Requested train size of 0.33, got 0.30. Requested test size of 0.33, got 0.20. \n", + " warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "([[8, 4, 9]], [[1, 6, 7, 3, 0]], [[5, 2]])" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "make_split_indices(datapoints, sizes=(0.5, 0.5, 0.5))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Random with repeated molecules" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your dataset has repeated molecules, all duplicate molecules should go in the same split. This split type requires the `rdkit.Chem.mol` objects of the datapoints. It first removes duplicates before using `astartes` to make the random splits and then adds back in the duplicate datapoints." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "smis = [\"O\", \"O\"] + [\"C\" * i for i in range(1, 10)]\n", + "ys = np.random.rand(len(smis), 1)\n", + "repeat_datapoints = [MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]\n", + "mols = [d.mol for d in repeat_datapoints]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "([[10, 6, 0, 1, 3, 8, 9, 5, 2]], [[7]], [[4]])" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "make_split_indices(mols, split=\"random_with_repeated_smiles\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Structure based splits" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Including all similar molecules in only one of the datasets can give a more realistic estimate of how a model will perform on unseen chemistry. This uses the `rdkit.Chem.mol` representation of the molecules. See the `astartes` [documentation](https://jacksonburns.github.io/astartes/) for details about Kennard Stone, k-means, and scaffold balanced splitting schemes." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "smis = [\n", + " \"Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14\",\n", + " \"COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)CCc3ccccc23\",\n", + " \"COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl\",\n", + " \"OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(Cl)sc4[nH]3\",\n", + " \"Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)NCC#N)c1\",\n", + " \"OC1(CN2CCC1CC2)C#Cc3ccc(cc3)c4ccccc4\",\n", + " \"COc1cc(OC)c(cc1NC(=O)CCC(=O)O)S(=O)(=O)NCc2ccccc2N3CCCCC3\",\n", + " \"CNc1cccc(CCOc2ccc(C[C@H](NC(=O)c3c(Cl)cccc3Cl)C(=O)O)cc2C)n1\",\n", + " \"COc1ccc(cc1)C2=COc3cc(OC)cc(OC)c3C2=O\",\n", + " \"Oc1ncnc2scc(c3ccsc3)c12\",\n", + " \"CS(=O)(=O)c1ccc(Oc2ccc(cc2)C#C[C@]3(O)CN4CCC3CC4)cc1\",\n", + " \"C[C@H](Nc1nc(Nc2cc(C)[nH]n2)c(C)nc1C#N)c3ccc(F)cn3\",\n", + " \"O=C1CCCCCN1\",\n", + " \"CCCSc1ncccc1C(=O)N2CCCC2c3ccncc3\",\n", + " \"CC1CCCCC1NC(=O)c2cnn(c2NS(=O)(=O)c3ccc(C)cc3)c4ccccc4\",\n", + " \"Nc1ccc(cc1)c2nc3ccc(O)cc3s2\",\n", + " \"COc1ccc(cc1)N2CCN(CC2)C(=O)[C@@H]3CCCC[C@H]3C(=O)NCC#N\",\n", + " \"CCC(COC(=O)c1cc(OC)c(OC)c(OC)c1)(N(C)C)c2ccccc2\",\n", + " \"COc1cc(ccc1N2CC[C@@H](O)C2)N3N=Nc4cc(sc4C3=O)c5ccc(Cl)cc5\",\n", + " \"CO[C@H]1CN(CCN2C(=O)C=Cc3ccc(cc23)C#N)CC[C@H]1NCc4ccc5OCC(=O)Nc5n4\",\n", + "]\n", + "\n", + "ys = np.random.rand(len(smis), 1)\n", + "datapoints = [MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]\n", + "mols = [d.mol for d in datapoints]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/astartes/main.py:325: ImperfectSplittingWarning: Actual train/test split differs from requested size. Requested train size of 0.80, got 0.85. Requested test size of 0.10, got 0.05. \n", + " warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "([[0, 1, 2, 3, 4, 6, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19]],\n", + " [[5, 10]],\n", + " [[7]])" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "make_split_indices(mols, split=\"kmeans\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop-updated/docs/source/tutorial/python/ensembling.ipynb b/chemprop-updated/docs/source/tutorial/python/ensembling.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..dda8effd173e150d897bf95e9a8c4816dd4915b0 --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/python/ensembling.ipynb @@ -0,0 +1,355 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Ensembling" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from lightning import pytorch as pl\n", + "import numpy as np\n", + "import torch\n", + "from chemprop import data, models, nn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an example [dataloader](./data/dataloaders.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "smis = [\"C\" * i for i in range(1, 4)]\n", + "ys = np.random.rand(len(smis), 1)\n", + "dset = data.MoleculeDataset([data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)])\n", + "dataloader = data.build_dataloader(dset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Model ensembling" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A single model will sometimes give erroneous predictions for some molecules. These erroneous predictions can be mitigated by averaging the predictions of several models trained on the same data. " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "ensemble = []\n", + "n_models = 3\n", + "for _ in range(n_models):\n", + " ensemble.append(models.MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), nn.RegressionFFN()))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | RegressionFFN | 90.6 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.273 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 14.38it/s, train_loss_step=0.234, train_loss_epoch=0.234]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=1` reached.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 13.86it/s, train_loss_step=0.234, train_loss_epoch=0.234]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | RegressionFFN | 90.6 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.273 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 46.40it/s, train_loss_step=0.215, train_loss_epoch=0.215]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=1` reached.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.79it/s, train_loss_step=0.215, train_loss_epoch=0.215]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | RegressionFFN | 90.6 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.273 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 42.51it/s, train_loss_step=0.239, train_loss_epoch=0.239]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=1` reached.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 36.88it/s, train_loss_step=0.239, train_loss_epoch=0.239]\n" + ] + } + ], + "source": [ + "for model in ensemble:\n", + " trainer = pl.Trainer(logger=False, enable_checkpointing=False, max_epochs=1)\n", + " trainer.fit(model, dataloader)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 83.86it/s] \n", + "Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 82.63it/s]\n", + "Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 68.94it/s] \n" + ] + } + ], + "source": [ + "prediction_dataloader = data.build_dataloader(dset, shuffle=False)\n", + "predictions = []\n", + "for model in ensemble:\n", + " predictions.append(torch.concat(trainer.predict(model, prediction_dataloader)))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[tensor([[0.0096],\n", + " [0.0008],\n", + " [0.0082]]),\n", + " tensor([[0.0318],\n", + " [0.0260],\n", + " [0.0254]]),\n", + " tensor([[-0.0054],\n", + " [ 0.0032],\n", + " [-0.0035]])]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predictions" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[0.0120],\n", + " [0.0100],\n", + " [0.0100]])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "torch.concat(predictions, axis=1).mean(axis=1, keepdim=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop-updated/docs/source/tutorial/python/featurizers/atom_featurizers.ipynb b/chemprop-updated/docs/source/tutorial/python/featurizers/atom_featurizers.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..7d02bcbf62df15efe46d1ce1e0b8913e06f38883 --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/python/featurizers/atom_featurizers.ipynb @@ -0,0 +1,373 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Atom featurizers" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.featurizers.atom import MultiHotAtomFeaturizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an example atom to featurize." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from rdkit import Chem\n", + "\n", + "atom_to_featurize = Chem.MolFromSmiles(\"CC\").GetAtoms()[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Atom features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following atom features are generated by `rdkit` and cast to one-hot vectors (except for mass which is divided by 100). These feature vectors are joined together to a single multi-hot feature vector (with a final float32 bit for mass). All of these features (except aromaticity and mass) are padded with an extra bit for all unknown values.\n", + "\n", + " - atomic number\n", + " - degree\n", + " - formal charge\n", + " - chiral tag\n", + " - number of hydrogens\n", + " - hybridization\n", + " - aromaticity\n", + " - mass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### v2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The v2 atom featurizer is the default. It provides bits in the feature vector for:\n", + "\n", + " - atomic number\n", + " - first four rows of the period table plus iodine\n", + " - degree\n", + " - 0 bonds - 5 bonds\n", + " - formal charge\n", + " - -2, -1, 0, 1, 2\n", + " - chiral tag\n", + " - 0, 1, 2, 3 - See `rdkit.Chem.rdchem.ChiralType` for more details\n", + " - number of hydrogens\n", + " - 0 - 4\n", + " - hybridization\n", + " - S, SP, SP2, SP2D, SP3, SP3D, SP3D2" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0. , 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0.12011])" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "featurizer = MultiHotAtomFeaturizer.v2()\n", + "featurizer(atom_to_featurize)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### v1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The v1 atom featurizer is the same as was used in Chemprop v1. It is the same as the v2 atom featurizer except for:\n", + "\n", + " - atomic number\n", + " - first 100 elements (customizable)\n", + " - hybridization\n", + " - SP, SP2, SP3, SP3D, SP3D2" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0. , 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. , 0.12011])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "featurizer = MultiHotAtomFeaturizer.v1()\n", + "featurizer(atom_to_featurize)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0. , 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 1. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0.12011])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "featurizer = MultiHotAtomFeaturizer.v1(max_atomic_num=53)\n", + "featurizer(atom_to_featurize)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### organic" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The organic atom featurizer is optimized to reduce feature vector size for organic molecule. It is the same as the v2 atom featurizer except for:\n", + "\n", + " - atomic number\n", + " - H, B, C, N, O, F, Si, P, S, Cl, Br, and I atoms\n", + " - hybridization\n", + " - S, SP, SP2, SP3" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0. , 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 1. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 1. ,\n", + " 0. , 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0.12011])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "featurizer = MultiHotAtomFeaturizer.organic()\n", + "featurizer(atom_to_featurize)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Custom atom featurizers can also be created by specifying the choices. Custom choices for atomic number, degree, formal charge, chiral tag, # of hydrogens, and hybridization can be specified to create a custom atom featurizer. Aromaticity featurization is always True/False. " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0. , 1. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0. , 1. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 1. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0.12011])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from rdkit.Chem.rdchem import HybridizationType\n", + "\n", + "atomic_nums = [1, 6, 7, 8]\n", + "degrees = [0, 1, 2, 3, 4]\n", + "formal_charges = [-2, -1, 0, 1, 2]\n", + "chiral_tags = [0, 1, 2, 3]\n", + "num_Hs = [0, 1, 2, 3, 4]\n", + "hybridizations = [HybridizationType.SP, HybridizationType.SP2, HybridizationType.SP3]\n", + "featurizer = MultiHotAtomFeaturizer(\n", + " atomic_nums, degrees, formal_charges, chiral_tags, num_Hs, hybridizations\n", + ")\n", + "featurizer(atom_to_featurize)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Generic" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Any class that has a length and returns a numpy array when given an `rdkit.Chem.rdchem.Atom` can be used as an atom featurizer. " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([6.])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from rdkit.Chem.rdchem import Atom\n", + "import numpy as np\n", + "\n", + "\n", + "class MyAtomFeaturizer:\n", + " def __len__(self):\n", + " return 1\n", + "\n", + " def __call__(self, a: Atom):\n", + " return np.array([a.GetAtomicNum()], dtype=float)\n", + "\n", + "\n", + "featurizer = MyAtomFeaturizer()\n", + "featurizer(atom_to_featurize)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop-updated/docs/source/tutorial/python/featurizers/bond_featurizers.ipynb b/chemprop-updated/docs/source/tutorial/python/featurizers/bond_featurizers.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..183d730af509e2a2024761d0a29e2b56edb8704d --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/python/featurizers/bond_featurizers.ipynb @@ -0,0 +1,189 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Bond featurizers" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.featurizers.bond import MultiHotBondFeaturizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an example bond to featurize." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from rdkit import Chem\n", + "\n", + "bond_to_featurize = Chem.MolFromSmiles(\"CC\").GetBondBetweenAtoms(0, 1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Bond features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following bond features are generated by `rdkit` and cast to one-hot vectors (except for the initial null bit which is True/False depending on if the bond is `None`). These feature vectors are joined together to a single multi-hot feature vector. Only the stereochemistry vector is padded for unknown values.\n", + "\n", + " - null?\n", + " - bond type\n", + " - conjugated?\n", + " - in ring?\n", + " - stereochemistry" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0])" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "featurizer = MultiHotBondFeaturizer()\n", + "featurizer(bond_to_featurize)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The bond types and stereochemistry can be customized. The defaults are:\n", + "\n", + " - bond_type\n", + " - Single, Double, Triple, Aromatic\n", + " - stereos\n", + " - 0, 1, 2, 3, 4, 5 - See `rdkit.Chem.rdchem.BondStereo` for more details" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 1, 0, 0, 1, 0, 0, 0])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from rdkit.Chem.rdchem import BondType\n", + "\n", + "featurizer = MultiHotBondFeaturizer(bond_types=[BondType.SINGLE], stereos=[0, 1, 2])\n", + "featurizer(bond_to_featurize)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Generic" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Any class that has a length and returns a numpy array when given an `rdkit.Chem.rdchem.Bond` can be used as a bond featurizer. " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from rdkit.Chem.rdchem import Bond\n", + "import numpy as np\n", + "\n", + "\n", + "class MyBondFeaturizer:\n", + " def __len__(self):\n", + " return 1\n", + "\n", + " def __call__(self, a: Bond):\n", + " return np.array([a.GetIsConjugated()], dtype=float)\n", + "\n", + "\n", + "featurizer = MyBondFeaturizer()\n", + "featurizer(bond_to_featurize)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop-updated/docs/source/tutorial/python/featurizers/molecule_featurizers.ipynb b/chemprop-updated/docs/source/tutorial/python/featurizers/molecule_featurizers.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..9fbcd1e43e9371276019fc38a11789a1413a0229 --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/python/featurizers/molecule_featurizers.ipynb @@ -0,0 +1,423 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Molecule featurizers" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.featurizers.molecule import (\n", + " MorganBinaryFeaturizer,\n", + " MorganCountFeaturizer,\n", + " RDKit2DFeaturizer,\n", + " V1RDKit2DFeaturizer,\n", + " V1RDKit2DNormalizedFeaturizer,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These are example molecules to featurize." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.utils import make_mol\n", + "\n", + "smis = [\"C\" * i for i in range(1, 11)]\n", + "mols = [make_mol(smi, keep_h=False, add_h=False, ignore_chirality=False) for smi in smis]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Molecule vs molgraph featurizers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Both molecule and [molgraph](./molgraph_molecule_featurizer.ipynb) featurizers take `rdkit.Chem.Mol` objects as input. Molgraph featurizers produce a `MolGraph` which is used in message passing. Molecule featurizers produce a 1D numpy array of features that can be used as [extra datapoint descriptors](../data/datapoints.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, ..., 0, 0, 0], dtype=uint8)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from chemprop.data import MoleculeDatapoint\n", + "\n", + "molecule_featurizer = MorganBinaryFeaturizer()\n", + "\n", + "datapoints = [MoleculeDatapoint(mol, x_d=molecule_featurizer(mol)) for mol in mols]\n", + "\n", + "molecule_featurizer(mols[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Morgan fingerprint featurizers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Morgan fingerprint can either use a binary or count representation of molecular structures. The radius of structures, length of the fingerprint, and whether to include chirality can all be customized. The default radius is 2, the default length is 2048, and chirality is included by default." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((1024,), array([0, 0, 0, ..., 0, 0, 0], dtype=int32))" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mf = MorganCountFeaturizer(radius=3, length=1024, include_chirality=False)\n", + "morgan_fp = mf(mols[0])\n", + "morgan_fp.shape, morgan_fp" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### RDKit molecule featurizers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chemprop gives a warning that the RDKit molecule featurers are not well scaled by a `StandardScaler`. Consult the literature for more appropriate scaling methods." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "The RDKit 2D features can deviate signifcantly from a normal distribution. Consider manually scaling them using an appropriate scaler before creating datapoints, rather than using the scikit-learn `StandardScaler` (the default in Chemprop).\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "data": { + "text/plain": [ + "array([ 0. , 0. , 0. , 0. , 0.35978494,\n", + " 0. , 16.043 , 12.011 , 16.03130013, 8. ,\n", + " 0. , -0.07755789, -0.07755789, 0.07755789, 0.07755789,\n", + " 1. , 1. , 1. , 12.011 , 12.011 ,\n", + " -0.07755789, -0.07755789, 0.1441 , 0.1441 , 2.503 ,\n", + " 2.503 , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 8.73925103, 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 7.42665278, 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 7.42665278, 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 7.42665278, 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 7.42665278, 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0.6361 , 6.731 ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "molecule_featurizer = RDKit2DFeaturizer()\n", + "extra_datapoint_descriptors = [molecule_featurizer(mol) for mol in mols]\n", + "extra_datapoint_descriptors[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The rdkit featurizers from v1 are also available. They rely on the `descriptastorus` package which can be found at [https://github.com/bp-kelley/descriptastorus](https://github.com/bp-kelley/descriptastorus). This package doesn't include the following rdkit descriptors: `['AvgIpc', 'BCUT2D_CHGHI', 'BCUT2D_CHGLO', 'BCUT2D_LOGPHI', 'BCUT2D_LOGPLOW', 'BCUT2D_MRHI', 'BCUT2D_MRLOW', 'BCUT2D_MWHI', 'BCUT2D_MWLOW', 'SPS']`. Scaled versions of these descriptors are available, though it is unknown which molecules were used to fit the scaling, so this may be a dataleak depending on the test set used to evaluate model performace. See this [issue](https://github.com/bp-kelley/descriptastorus/issues/31) for more details about the scaling. " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "data": { + "text/plain": [ + "array([1.96075662e-05, 5.77173432e-04, 3.87525506e-15, 2.72296612e-11,\n", + " 1.02515408e-07, 4.10254814e-13, 1.63521389e-11, 1.93930344e-05,\n", + " 1.22824218e-06, 2.20907757e-07, 6.35349909e-07, 3.08677419e-06,\n", + " 1.70338959e-05, 1.34072882e-05, 4.07488775e-10, 2.17523456e-08,\n", + " 6.89356874e-07, 2.63048207e-01, 1.96742684e-02, 2.50993926e-11,\n", + " 9.25841695e-11, 5.85610910e-17, 1.08871430e-06, 2.39145041e-11,\n", + " 7.52245592e-13, 1.23345732e-08, 2.94906350e-01, 9.59992784e-03,\n", + " 2.31947354e-03, 9.99390325e-01, 9.88006922e-01, 1.59186446e-08,\n", + " 4.42180049e-09, 1.00000000e+00, 7.85198619e-13, 4.14332758e-13,\n", + " 6.49617582e-11, 4.45588945e-06, 7.89307465e-03, 2.39990382e-02,\n", + " 7.89307465e-03, 4.59284380e-03, 3.24286613e-10, 1.83192891e-02,\n", + " 7.38491174e-01, 9.73505944e-01, 6.05575320e-02, 3.42737552e-07,\n", + " 1.23284669e-08, 6.13163344e-02, 3.33304127e-02, 9.93858689e-22,\n", + " 1.42492255e-01, 6.29631332e-02, 3.47228888e-02, 4.82992991e-15,\n", + " 1.11775996e-02, 1.89758400e-02, 5.52866693e-02, 5.22997303e-05,\n", + " 5.69516350e-08, 2.15229839e-03, 0.00000000e+00, 1.14242658e-21,\n", + " 2.40245513e-23, 1.31105703e-02, 8.72153349e-03, 5.76142917e-21,\n", + " 3.60875252e-15, 1.45980119e-01, 1.73556718e-22, 1.18093757e-10,\n", + " 5.99833786e-02, 9.05498589e-08, 4.60978367e-10, 1.57072376e-01,\n", + " 1.66847964e-01, 2.37240682e-02, 8.07601514e-02, 2.75008841e-02,\n", + " 4.92845505e-03, 1.24459630e-01, 7.31816496e-02, 1.67096874e-01,\n", + " 7.55810089e-02, 8.78622233e-24, 1.33643046e-01, 3.04494668e-02,\n", + " 2.58369311e-02, 5.30138094e-05, 1.42657565e-16, 3.73160396e-02,\n", + " 6.95272017e-13, 0.00000000e+00, 9.79690873e-13, 2.64281353e-04,\n", + " 1.20493060e-11, 2.86305006e-09, 1.04578852e-01, 3.09944928e-02,\n", + " 2.99487758e-06, 2.77639012e-01, 5.30138094e-05, 6.17138309e-03,\n", + " 5.30138094e-05, 5.00000000e-01, 3.84710451e-01, 5.30138094e-05,\n", + " 5.30138094e-05, 1.64664515e-01, 5.30138094e-05, 9.98653446e-01,\n", + " 3.99820633e-01, 2.02868342e-02, 5.70867846e-19, 3.32362804e-10,\n", + " 9.64197643e-10, 7.10542736e-15, 5.83707586e-13, 1.19880642e-20,\n", + " 1.65079548e-01, 1.67040631e-01, 1.66498334e-01, 1.66486816e-01,\n", + " 2.02864661e-01, 6.93658809e-02, 7.10542736e-15, 1.68346480e-01,\n", + " 1.67982932e-01, 6.87189958e-10, 1.18157291e-03, 1.64332634e-01,\n", + " 8.37776917e-04, 1.66325734e-01, 1.63034142e-01, 1.65079548e-01,\n", + " 9.56970492e-08, 3.49708922e-08, 1.68206175e-01, 1.65806858e-01,\n", + " 1.67346595e-01, 7.13964619e-07, 2.64115098e-12, 9.99127911e-02,\n", + " 2.86809243e-10, 3.77737848e-01, 4.50616778e-03, 1.33250251e-01,\n", + " 3.47299284e-02, 1.61482916e-09, 1.87517315e-18, 2.09410539e-07,\n", + " 7.10542736e-15, 4.99264281e-01, 1.64929402e-01, 1.31744508e-17,\n", + " 2.11164355e-16, 1.16815875e-09, 3.25923600e-22, 6.24601420e-10,\n", + " 1.68149182e-01, 1.65450729e-01, 1.17110262e-13, 0.00000000e+00,\n", + " 1.64668868e-01, 1.66924728e-01, 0.00000000e+00, 5.10071327e-08,\n", + " 7.10542736e-15, 1.54654108e-01, 2.79420938e-22, 0.00000000e+00,\n", + " 1.67639733e-01, 6.31499266e-25, 1.68186130e-01, 9.08850267e-03,\n", + " 1.68363202e-01, 8.26542313e-11, 1.56346354e-01, 0.00000000e+00,\n", + " 0.00000000e+00, 2.11354236e-02, 2.11354236e-02, 2.38815575e-20,\n", + " 0.00000000e+00, 8.33672450e-25, 5.30138094e-05, 1.56951066e-01,\n", + " 4.03434503e-08, 1.55259196e-23, 1.59306117e-17, 5.76610077e-14,\n", + " 2.95798941e-11, 1.68378369e-01, 1.67380186e-01, 1.48151465e-18,\n", + " 2.32414994e-16, 4.70359809e-08, 1.66633397e-01, 1.87492844e-01])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "molecule_featurizer = V1RDKit2DFeaturizer()\n", + "molecule_featurizer = V1RDKit2DNormalizedFeaturizer()\n", + "molecule_featurizer(mols[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Any class that has a length and returns a 1D numpy array when given an `rdkit.Chem.Mol` can be used as a molecule featurizer. " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from rdkit import Chem\n", + "\n", + "class MyMoleculeFeaturizer:\n", + " def __len__(self) -> int:\n", + " return 1\n", + "\n", + " def __call__(self, mol: Chem.Mol) -> np.ndarray:\n", + " total_atoms = mol.GetNumAtoms()\n", + " return np.array([total_atoms])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mf = MyMoleculeFeaturizer()\n", + "mf(mols[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using molecule features as extra datapoint descriptors" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you only have molecule features for one molecule per datapoint, those features can be used directly as extra datapoint descriptors. If you have multiple molecules with extra features, or other extra datapoint descriptors, they first need to be concatenated into a single numpy array." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "mol1_features = np.random.randn(len(mols), 1)\n", + "mol2_features = np.random.randn(len(mols), 2)\n", + "other_datapoint_descriptors = np.random.randn(len(mols), 3)\n", + "\n", + "extra_datapoint_descriptors = np.hstack([mol1_features, mol2_features, other_datapoint_descriptors])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop-updated/docs/source/tutorial/python/featurizers/molgraph_molecule_featurizer.ipynb b/chemprop-updated/docs/source/tutorial/python/featurizers/molgraph_molecule_featurizer.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..cd6b866581491832fabfb2f671620533de89fd2b --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/python/featurizers/molgraph_molecule_featurizer.ipynb @@ -0,0 +1,207 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Molecule MolGraph featurizers" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.featurizers.molgraph.molecule import SimpleMoleculeMolGraphFeaturizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an example molecule to featurize." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from rdkit import Chem\n", + "\n", + "mol_to_featurize = Chem.MolFromSmiles(\"CC\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Simple molgraph featurizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A `MolGraph` represents the graph featurization of a molecule. It is made of atom features (`V`), bond features (`E`), and a mapping between atoms and bonds (`edge_index` and `rev_edge_index`). It is created by `SimpleMoleculeMolGraphFeaturizer`. " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MolGraph(V=array([[0. , 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0.12011],\n", + " [0. , 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0.12011]], dtype=float32), E=array([[0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]]), edge_index=array([[0, 1],\n", + " [1, 0]]), rev_edge_index=array([1, 0]))" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "featurizer = SimpleMoleculeMolGraphFeaturizer()\n", + "featurizer(mol_to_featurize)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The [atom](./atom_featurizers.ipynb) and [bond](./bond_featurizers.ipynb) featurizers used by the molgraph featurizer are customizable." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MolGraph(V=array([[0. , 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 1. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 1. ,\n", + " 0. , 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0.12011],\n", + " [0. , 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 1. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 1. ,\n", + " 0. , 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0.12011]], dtype=float32), E=array([[0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]]), edge_index=array([[0, 1],\n", + " [1, 0]]), rev_edge_index=array([1, 0]))" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from chemprop.featurizers import MultiHotAtomFeaturizer, MultiHotBondFeaturizer\n", + "\n", + "atom_featurizer = MultiHotAtomFeaturizer.organic()\n", + "bond_featurizer = MultiHotBondFeaturizer(stereos=[0, 1, 2, 3, 4])\n", + "featurizer = SimpleMoleculeMolGraphFeaturizer(\n", + " atom_featurizer=atom_featurizer, bond_featurizer=bond_featurizer\n", + ")\n", + "featurizer(mol_to_featurize)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Extra atom and bond features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your [datapoints](../data/datapoints.ipynb) have extra atom or bond features, the molgraph featurizer needs to know the length of the extra features when it is created so that molecules without heavy atoms (molecular hydrogen) are featurized correctly and so that the bond feature array is the correct shape." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "n_extra_atom_features = 3\n", + "n_extra_bond_features = 4\n", + "featurizer = SimpleMoleculeMolGraphFeaturizer(\n", + " extra_atom_fdim=n_extra_atom_features, extra_bond_fdim=n_extra_bond_features\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The [dataset](../data/datasets.ipynb) is given this custom featurizer and automatically handles the featurization including passing extra atom and bond features for each datapoint. " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop-updated/docs/source/tutorial/python/featurizers/molgraph_reaction_featurizer.ipynb b/chemprop-updated/docs/source/tutorial/python/featurizers/molgraph_reaction_featurizer.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..7f51a10ef224166ce93bcf29b871c704726ac3e3 --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/python/featurizers/molgraph_reaction_featurizer.ipynb @@ -0,0 +1,433 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Reaction MolGraph featurizers" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.featurizers.molgraph.reaction import CondensedGraphOfReactionFeaturizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an example reaction to featurize. The sanitizing code is to preserve atom mapped hydrogens in the graph." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from rdkit import Chem\n", + "\n", + "rct = Chem.MolFromSmiles(\"[H:1][C:4]([H:2])([H:3])[F:5]\", sanitize=False)\n", + "pdt = Chem.MolFromSmiles(\"[H:1][C:4]([H:2])([H:3]).[F:5]\", sanitize=False)\n", + "Chem.SanitizeMol(\n", + " rct, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_ADJUSTHS\n", + ")\n", + "Chem.SanitizeMol(\n", + " pdt, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_ADJUSTHS\n", + ")\n", + "\n", + "rxn = (rct, pdt)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Condensed Graph of Reaction featurizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Like a [molecule](./molgraph_molecule_featurizer.ipynb) MolGraph featurizer, reaction MolGraph featurizers produce a `MolGraph`. The difference between the molecule and reaction versions is that a reaction takes two `rdkit.Chem.Mol` objects and need to know what \"mode\" of featurization to use. Available modes are found in `RxnMode`." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "reac_prod\n", + "reac_prod_balance\n", + "reac_diff\n", + "reac_diff_balance\n", + "prod_diff\n", + "prod_diff_balance\n" + ] + } + ], + "source": [ + "from chemprop.featurizers import RxnMode\n", + "\n", + "for mode in RxnMode:\n", + " print(mode)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Briefly, \"reac\" stands for reactant features, \"prod\" stands for product features, and \"diff\" stands for the difference between reactant and product features. The two sets of features are concatenated together. \"balance\" refers to balancing imablanced reactions. See the 2022 [paper](https://doi.org/10.1021/acs.jcim.1c00975) by Heid and Green for more details. \"reac_diff\" is the default." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "reac_diff = CondensedGraphOfReactionFeaturizer()\n", + "reac_prod = CondensedGraphOfReactionFeaturizer(mode_=\"reac_prod\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, -1,\n", + " 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, -1,\n", + " 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0]])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reac_diff(rxn).E" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 0, 0, 0],\n", + " [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 0, 0, 0],\n", + " [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 0, 0, 0],\n", + " [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 0, 0, 0],\n", + " [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 0, 0, 0],\n", + " [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 0, 0, 0],\n", + " [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0],\n", + " [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0]])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reac_prod(rxn).E" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Like molecule MolGraph featurizers, reaction featurizers can use custom atom and bond featurizers." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MolGraph(V=array([[ 1. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0.01008, 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. ],\n", + " [ 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 1. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0.12011, 0. , 0. , 0. , 1. ,\n", + " -1. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. ],\n", + " [ 1. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0.01008, 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. ],\n", + " [ 1. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0.01008, 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. ],\n", + " [ 0. , 0. , 0. , 0. , 0. , 1. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0.18998, 1. , -1. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. ]]), E=array([[ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, -1,\n", + " 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, -1,\n", + " 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0]]), edge_index=array([[0, 1, 1, 2, 1, 3, 1, 4],\n", + " [1, 0, 2, 1, 3, 1, 4, 1]]), rev_edge_index=array([1, 0, 3, 2, 5, 4, 7, 6]))" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from chemprop.featurizers import MultiHotAtomFeaturizer\n", + "\n", + "atom_featurizer = MultiHotAtomFeaturizer.organic()\n", + "rxn_featurizer = CondensedGraphOfReactionFeaturizer(atom_featurizer=atom_featurizer)\n", + "rxn_featurizer(rxn)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Extra atom and bond features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Extra atom and bond features are not yet supported for reactions." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "'atom_features_extra' is currently unsupported for reactions\n" + ] + }, + { + "data": { + "text/plain": [ + "MolGraph(V=array([[ 1. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0.01008, 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. ],\n", + " [ 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 1. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0.12011, 0. , 0. , 0. , 1. ,\n", + " -1. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. ],\n", + " [ 1. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0.01008, 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. ],\n", + " [ 1. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0.01008, 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. ],\n", + " [ 0. , 0. , 0. , 0. , 0. , 1. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0.18998, 1. , -1. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. ]]), E=array([[ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, -1,\n", + " 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, -1,\n", + " 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0]]), edge_index=array([[0, 1, 1, 2, 1, 3, 1, 4],\n", + " [1, 0, 2, 1, 3, 1, 4, 1]]), rev_edge_index=array([1, 0, 3, 2, 5, 4, 7, 6]))" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rxn_featurizer(rxn, atom_features_extra=[1.0])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop-updated/docs/source/tutorial/python/index.rst b/chemprop-updated/docs/source/tutorial/python/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..e9dade8a737be7fed6fe8857ee81f4f2e096644a --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/python/index.rst @@ -0,0 +1,63 @@ +.. _python usage: + +Python Module Tutorials +======================= + +Chemprop may be used in python scripts, allowing for greater flexibility and control than the CLI. We recommend first looking through some of the worked examples to get an overview of the workflow. Then further details about the creation, customization, and use of Chemprop modules can be found in the following module tutorials: + +Data Modules: + +* :doc:`data/datapoints` +* :doc:`data/datasets` +* :doc:`data/dataloaders` +* :doc:`data/splitting` + +Featurization Modules: + +* :doc:`featurizers/atom_featurizers` +* :doc:`featurizers/bond_featurizers` +* :doc:`featurizers/molgraph_molecule_featurizer` +* :doc:`featurizers/molgraph_reaction_featurizer` +* :doc:`featurizers/molecule_featurizers` + +Model Modules: + +* :doc:`models/basic_mpnn_model` +* :doc:`models/message_passing` +* :doc:`models/aggregation` +* :doc:`models/predictor` +* :doc:`models/multicomponent_mpnn_model` + +Other module and workflow tutorials: + +* :doc:`activation` +* :doc:`loss_functions` +* :doc:`metrics` +* :doc:`saving_and_loading` +* :doc:`ensembling` +* :doc:`scaling` + +.. toctree:: + :maxdepth: 1 + :hidden: + + data/datapoints + data/datasets + data/dataloaders + data/splitting + featurizers/atom_featurizers + featurizers/bond_featurizers + featurizers/molgraph_molecule_featurizer + featurizers/molgraph_reaction_featurizer + featurizers/molecule_featurizers + models/basic_mpnn_model + models/message_passing + models/aggregation + models/predictor + models/multicomponent_mpnn_model + activation + loss_functions + metrics + saving_and_loading + ensembling + scaling \ No newline at end of file diff --git a/chemprop-updated/docs/source/tutorial/python/loss_functions.ipynb b/chemprop-updated/docs/source/tutorial/python/loss_functions.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..8c83372ce3475f2a342ce8040ce5cf9e8b50af5c --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/python/loss_functions.ipynb @@ -0,0 +1,641 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loss functions" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "\n", + "from lightning import pytorch as pl\n", + "import numpy as np\n", + "from numpy.typing import ArrayLike\n", + "import pandas as pd\n", + "from pathlib import Path\n", + "import torch\n", + "from torch import Tensor\n", + "import torchmetrics\n", + "\n", + "from chemprop import data, models, nn\n", + "from chemprop.nn.metrics import ChempropMetric, LossFunctionRegistry" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Available functions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chemprop provides several loss functions. The derivatives of these differentiable functions are used to update the model weights. Users only need to select the loss function to use. The rest of the details are handled by Chemprop and the lightning trainer, which reports the training and validation loss during model fitting.\n", + "\n", + "See also [metrics](./metrics.ipynb) which are the same as loss functions, but potentially non-differentiable and used to measure the performance of a model. " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "mse\n", + "mae\n", + "rmse\n", + "bounded-mse\n", + "bounded-mae\n", + "bounded-rmse\n", + "mve\n", + "evidential\n", + "bce\n", + "ce\n", + "binary-mcc\n", + "multiclass-mcc\n", + "dirichlet\n", + "sid\n", + "earthmovers\n", + "wasserstein\n", + "quantile\n", + "pinball\n" + ] + } + ], + "source": [ + "for lossfunction in LossFunctionRegistry:\n", + " print(lossfunction)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task weights" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A model can make predictions of multiple targets/tasks at the same time. For example, a model may predict both solubility and melting point. Task weights can be specified when some of the tasks are more important to get accurate than others. The weight for each task defaults to 1." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MSE(task_weights=[[0.10000000149011612, 0.5, 1.0]])" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from chemprop.nn.metrics import MSE\n", + "\n", + "predictor = nn.RegressionFFN(criterion=MSE(task_weights=[0.1, 0.5, 1.0]))\n", + "model = models.MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), predictor)\n", + "predictor.criterion" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Mean squared error and bounded mean square error" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`MSE` is the default loss function for regression tasks." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MSE(task_weights=[[1.0]])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predictor = nn.RegressionFFN()\n", + "model = models.MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), predictor)\n", + "predictor.criterion" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`BoundedMSE` is useful when the target values have \\\"less than\\\" or \\\"greater than\\\" behavior, e.g. the prediction is correct as long as it is below/above a target value. Datapoints have a less than/greater than property that keeps track of bounded targets. Note that, like target values, the less than and greater than masks used to make datapoints are 1-D numpy arrays of bools instead of a single bool. This is because a single datapoint can have multiple target values and the less than/greater than masks are defined for each target value separately." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ True],\n", + " [False],\n", + " [False],\n", + " [False],\n", + " [ True]])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from chemprop.nn.metrics import BoundedMSE\n", + "\n", + "smis = [\"C\" * i for i in range(1, 6)]\n", + "ys = np.random.rand(len(smis), 1)\n", + "lt_mask = np.array([[True], [False], [False], [False], [True]])\n", + "gt_mask = np.array([[False], [True], [False], [True], [False]])\n", + "datapoints = [\n", + " data.MoleculeDatapoint.from_smi(smi, y, lt_mask=lt, gt_mask=gt)\n", + " for smi, y, lt, gt in zip(smis, ys, lt_mask, gt_mask)\n", + "]\n", + "bounded_dataset = data.MoleculeDataset(datapoints)\n", + "bounded_dataset.lt_mask" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "predictor = nn.RegressionFFN(criterion=BoundedMSE())\n", + "model = models.MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), predictor)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Binary cross entropy and cross entropy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`BCELoss` is the default loss function for binary classification and `CrossEntropyLoss` is the default for multiclass classification." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BCELoss(task_weights=[[1.0]])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predictor = nn.BinaryClassificationFFN()\n", + "predictor.criterion" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "CrossEntropyLoss(task_weights=[[1.0]])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predictor = nn.MulticlassClassificationFFN(n_classes=3)\n", + "predictor.criterion" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Matthews correlation coefficient" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "MCC loss is useful for imbalanced classification data. An optimal MCC is 1, so the loss function version of MCC returns 1 - MCC." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.metrics import BinaryMCCLoss, MulticlassMCCLoss" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Uncertainty" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Various methods for estimating uncertainty in predictions are available. These methods often use specific loss functions." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.metrics import MVELoss, EvidentialLoss, DirichletLoss, QuantileLoss" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Spectral loss functions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Spectral information divergence and wasserstein (earthmover's distance) are often used for spectral predictions." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.metrics import SID, Wasserstein" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom loss functions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chemprop loss functions are instances of `chemprop.nn.metrics.ChempropMetric`, which inherits from `torchmetrics.Metric`. Custom loss functions need to follow the interface of both `ChempropMetric` and `Metric`. Start with a `Metric` either by importing an existing one from `torchmetrics` or by creating your own by following the instructions on the `torchmetrics` website. Then make the following changes:\n", + "\n", + "1. Allow for task weights to be passed to the `__init__` method.\n", + "2. Allow for the `update` method to be given `preds, targets, mask, weights, lt_mask, gt_mask` in that order.\n", + "\n", + "* `preds`: A `Tensor` of the model's predictions with dimension 0 being the batch dimension and dimension 1 being the task dimension. Dimension 2 exists for uncertainty estimation or multiclass predictions and is either used for uncertainty parameters or multiclass logits.\n", + "* `targets`: A `Tensor` of the target values with dimension 0 being the batch dimension and dimension 1 being the task dimension.\n", + "* `mask`: A `Tensor` of the same shape as `targets` with `True`s where the target value is present and finite and `False` where it is not.\n", + "* `weights`: A `Tensor` of the weights for each data point in the loss function. This is useful when some data points are more important than others.\n", + "* `lt_mask`: A `Tensor` of the same shape as `targets` with `True`s where the target value is a \"less than\" target value and `False` where it is not.\n", + "* `gt_mask`: A `Tensor` of the same shape as `targets` with `True`s where the target value is a \"greater than\" target value and `False` where it is not." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "class ChempropMulticlassHingeLoss(torchmetrics.classification.MulticlassHingeLoss):\n", + " def __init__(self, task_weights: ArrayLike = 1.0, **kwargs):\n", + " super().__init__(**kwargs)\n", + " self.task_weights = torch.as_tensor(task_weights, dtype=torch.float).view(1, -1)\n", + " if (self.task_weights != 1.0).any():\n", + " warnings.warn(\"task_weights were provided but are ignored by metric \"\n", + " f\"{self.__class__.__name__}. Got {task_weights}\")\n", + "\n", + " def update(self, preds: Tensor, targets: Tensor, mask: Tensor | None = None, *args, **kwargs):\n", + " if mask is None:\n", + " mask = torch.ones_like(targets, dtype=torch.bool)\n", + "\n", + " super().update(preds[mask], targets[mask].long())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, if your loss function can return a value for every task for every data point (i.e. not reduced in the task or batch dimension), you can inherit from `chemprop.nn.metrics.ChempropMetric` and just override the `_calc_unreduced_loss` method (and if needed the `__init__` method)." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "class BoundedNormalizedMSEPlus1(ChempropMetric):\n", + " def __init__(self, task_weights = None, norm: float = 1.0):\n", + " super().__init__(task_weights)\n", + " norm = torch.as_tensor(norm)\n", + " self.register_buffer(\"norm\", norm)\n", + "\n", + " def _calc_unreduced_loss(self, preds, targets, mask, weights, lt_mask, gt_mask) -> Tensor:\n", + " preds = torch.where((preds < targets) & lt_mask, targets, preds)\n", + " preds = torch.where((preds > targets) & gt_mask, targets, preds)\n", + "\n", + " return torch.sum((preds - targets) ** 2) / self.norm + 1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set up the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parents[3]\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"classification\" / \"mol_multiclass.csv\"\n", + "df_input = pd.read_csv(input_path)\n", + "smis = df_input.loc[:, \"smiles\"].values\n", + "ys = df_input.loc[:, [\"activity\"]].values\n", + "all_data = [data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]\n", + "train_indices, val_indices, test_indices = data.make_split_indices(all_data, \"random\", (0.8, 0.1, 0.1))\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " all_data, train_indices, val_indices, test_indices\n", + ")\n", + "train_dset = data.MoleculeDataset(train_data[0])\n", + "val_dset = data.MoleculeDataset(val_data[0])\n", + "test_dset = data.MoleculeDataset(test_data[0])\n", + "train_loader = data.build_dataloader(train_dset)\n", + "val_loader = data.build_dataloader(val_dset, shuffle=False)\n", + "test_loader = data.build_dataloader(test_dset, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a model with a custom loss function" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "n_classes = max(ys).item() + 1\n", + "\n", + "loss_function = ChempropMulticlassHingeLoss(num_classes = n_classes)\n", + "ffn = nn.MulticlassClassificationFFN(n_classes = n_classes, criterion = loss_function)\n", + "\n", + "model = models.MPNN(nn.BondMessagePassing(), nn.NormAggregation(), ffn)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run training" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (7) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "\n", + " | Name | Type | Params | Mode \n", + "------------------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | NormAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MulticlassClassificationFFN | 91.2 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "------------------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.276 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sanity Checking DataLoader 0: 0%| | 0/1 [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test/multiclass-mcc 0.0 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/multiclass-mcc \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.0 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "[{'test/multiclass-mcc': 0.0}]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trainer = pl.Trainer(max_epochs=2)\n", + "trainer.fit(model, train_loader, val_loader)\n", + "trainer.test(model, test_loader)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop-updated/docs/source/tutorial/python/metrics.ipynb b/chemprop-updated/docs/source/tutorial/python/metrics.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..82b6ff16310bcd070d2fd7f0faad81f941a7cd5f --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/python/metrics.ipynb @@ -0,0 +1,650 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from lightning import pytorch as pl\n", + "import numpy as np\n", + "from numpy.typing import ArrayLike\n", + "import pandas as pd\n", + "from pathlib import Path\n", + "import torch\n", + "from torch import Tensor\n", + "import torchmetrics\n", + "import logging\n", + "\n", + "from chemprop import data, models, nn\n", + "from chemprop.nn.metrics import ChempropMetric, MetricRegistry\n", + "\n", + "logger = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Available metric functions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chemprop provides several metrics. The functions calculate a single value that serves as a measure of model performance. Users only need to select the metric(s) to use. The rest of the details are handled by Chemprop and the lightning trainer, which logs all metric values to the trainer logger (defaults to TensorBoard) for the validation and test sets. Note that the validation metrics are in the scaled space while the test metrics are in the original target space.\n", + "\n", + "See also [loss functions](./loss_functions.ipynb) which are the same as metrics, except used to optimize the model and therefore required to be differentiable." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "mse\n", + "mae\n", + "rmse\n", + "bounded-mse\n", + "bounded-mae\n", + "bounded-rmse\n", + "r2\n", + "binary-mcc\n", + "multiclass-mcc\n", + "roc\n", + "prc\n", + "accuracy\n", + "f1\n" + ] + } + ], + "source": [ + "for metric in MetricRegistry:\n", + " print(metric)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Specifying metrics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Each FFN predictor has a default metric. If you want different metrics reported, you can give a list of metrics to the model at creation. Note that the list of metrics is used in place of the default metric and not in addition to the default metric." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.metrics import MSE, MAE, RMSE\n", + "\n", + "metrics = [MSE(), MAE(), RMSE()]\n", + "model = models.MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), nn.RegressionFFN(), metrics=metrics)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Accumulating metrics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chemprop metrics are based on `Metric` from `torchmetrics` which stores the information from each batch that is needed to calculate the metric over the whole validation or test set." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing DataLoader 0: 0%| | 0/5 [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test/mae 0.4941912293434143 │\n", + "│ test/mse 0.3071698546409607 │\n", + "│ test/rmse 0.5542290806770325 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mae \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.4941912293434143 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.3071698546409607 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test/rmse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.5542290806770325 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicting DataLoader 0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 147.05it/s]\n" + ] + } + ], + "source": [ + "smis = [\"C\" * i for i in range(1, 11)]\n", + "ys = np.random.rand(len(smis), 1)\n", + "dset = data.MoleculeDataset([data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)])\n", + "dataloader = data.build_dataloader(dset, shuffle=False, batch_size=2)\n", + "\n", + "trainer = pl.Trainer(logger=False, enable_checkpointing=False, max_epochs=1)\n", + "result_when_batched = trainer.test(model, dataloader)\n", + "preds = trainer.predict(model, dataloader)\n", + "preds = torch.concat(preds)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Batch / Not Batched\n", + "0.5542 / 0.5542\n" + ] + } + ], + "source": [ + "result_when_not_batched = RMSE()(preds, torch.from_numpy(dset.Y), None, None, None, None)\n", + "print(\"Batch / Not Batched\")\n", + "print(f\"{result_when_batched[0]['test/rmse']:.4f} / {result_when_not_batched.item():.4f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Batch normalization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It is worth noting that if your model has a batch normalization layer, the computed metric will be different depending on if the model is in training or evaluation mode. When a batch normalization layer is training, it uses a biased estimator to calculate the standard deviation, but the value stored and used during evaluation is calculated with the unbiased estimator. Lightning takes care of this if the `Trainer()` is used. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Regression" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are several metric options for regression. `MSE` is the default. There are also bounded versions (except for r2), similar to the bounded versions of the [loss functions](./loss_functions.ipynb). " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.metrics import MSE, MAE, RMSE, R2Score" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.metrics import BoundedMAE, BoundedMSE, BoundedRMSE" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Classification" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are metrics for both binary and multiclass classification." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.metrics import (\n", + " BinaryAUROC,\n", + " BinaryAUPRC,\n", + " BinaryAccuracy,\n", + " BinaryF1Score,\n", + " BinaryMCCMetric,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.metrics import MulticlassMCCMetric" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Spectra" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Spectral information divergence and wasserstein (earthmovers distance) are often used for spectral predictions" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.metrics import SID, Wasserstein" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom metrics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chemprop metrics are instances of `chemprop.nn.metrics.ChempropMetric`, which inherits from `torchmetrics.Metric`. Custom loss functions need to follow the interface of both `ChempropMetric` and `Metric`. Start with a `Metric` either by importing an existing one from `torchmetrics` or by creating your own by following the instructions on the `torchmetrics` website. Then make the following changes:\n", + "\n", + "1. Allow for task weights to be passed to the `__init__` method.\n", + "2. Allow for the `update` method to be given `preds, targets, mask, weights, lt_mask, gt_mask` in that order.\n", + "3. Provide an alias property, which is used to identify the metric value in the logs.\n", + "\n", + "* `preds`: A `Tensor` of the model's predictions with dimension 0 being the batch dimension and dimension 1 being the task dimension.\n", + "* `targets`: A `Tensor` of the target values with dimension 0 being the batch dimension and dimension 1 being the task dimension.\n", + "* `mask`: A `Tensor` of the same shape as `targets` with `True`s where the target value is present and finite and `False` where it is not.\n", + "* `weights`: Usually ignored in metrics.\n", + "* `lt_mask`: A `Tensor` of the same shape as `targets` with `True`s where the target value is a \"less than\" target value and `False` where it is not.\n", + "* `gt_mask`: A `Tensor` of the same shape as `targets` with `True`s where the target value is a \"greater than\" target value and `False` where it is not." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "class ChempropMulticlassAUROC(torchmetrics.classification.MulticlassAUROC):\n", + " def __init__(self, task_weights: ArrayLike = 1.0, **kwargs):\n", + " super().__init__(**kwargs)\n", + " self.task_weights = torch.as_tensor(task_weights, dtype=torch.float).view(1, -1)\n", + " if (self.task_weights != 1.0).any():\n", + " logger.warn(\"task_weights were provided but are ignored by metric \"\n", + " f\"{self.__class__.__name__}. Got {task_weights}\")\n", + "\n", + " def update(self, preds: Tensor, targets: Tensor, mask: Tensor | None = None, *args, **kwargs):\n", + " if mask is None:\n", + " mask = torch.ones_like(targets, dtype=torch.bool)\n", + "\n", + " super().update(preds[mask], targets[mask].long())\n", + "\n", + " @property\n", + " def alias(self) -> str:\n", + " return \"multiclass_auroc\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, if your metric can return a value for every task for every data point (i.e. not reduced in the task or batch dimension), you can inherit from `chemprop.nn.metrics.ChempropMetric` and just override the `_calc_unreduced_loss` method (and if needed the `__init__` method)." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "class BoundedNormalizedMSEPlus1(ChempropMetric):\n", + " def __init__(self, task_weights = None, norm: float = 1.0):\n", + " super().__init__(task_weights)\n", + " norm = torch.as_tensor(norm)\n", + " self.register_buffer(\"norm\", norm)\n", + "\n", + " def _calc_unreduced_loss(self, preds, targets, mask, weights, lt_mask, gt_mask) -> Tensor:\n", + " preds = torch.where((preds < targets) & lt_mask, targets, preds)\n", + " preds = torch.where((preds > targets) & gt_mask, targets, preds)\n", + "\n", + " return torch.sum((preds - targets) ** 2) / self.norm + 1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set up the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parents[3]\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"classification\" / \"mol_multiclass.csv\"\n", + "df_input = pd.read_csv(input_path)\n", + "smis = df_input.loc[:, \"smiles\"].values\n", + "ys = df_input.loc[:, [\"activity\"]].values\n", + "all_data = [data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]\n", + "train_indices, val_indices, test_indices = data.make_split_indices(all_data, \"random\", (0.8, 0.1, 0.1))\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " all_data, train_indices, val_indices, test_indices\n", + ")\n", + "train_dset = data.MoleculeDataset(train_data[0])\n", + "val_dset = data.MoleculeDataset(val_data[0])\n", + "test_dset = data.MoleculeDataset(test_data[0])\n", + "train_loader = data.build_dataloader(train_dset)\n", + "val_loader = data.build_dataloader(val_dset, shuffle=False)\n", + "test_loader = data.build_dataloader(test_dset, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a model with a custom loss function" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "n_classes = max(ys).item() + 1\n", + "\n", + "metrics = [ChempropMulticlassAUROC(num_classes = n_classes)]\n", + "\n", + "model = models.MPNN(\n", + " nn.BondMessagePassing(), \n", + " nn.NormAggregation(), \n", + " nn.MulticlassClassificationFFN(n_classes = n_classes), \n", + " metrics = metrics\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run training" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (7) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "\n", + " | Name | Type | Params | Mode \n", + "------------------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | NormAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MulticlassClassificationFFN | 91.2 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "------------------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.276 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/core/saving.py:363: Skipping 'metrics' parameter because it is not possible to safely dump to YAML.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sanity Checking DataLoader 0: 0%| | 0/1 [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test/multiclass_auroc 0.6266666650772095 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/multiclass_auroc \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.6266666650772095 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "[{'test/multiclass_auroc': 0.6266666650772095}]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trainer = pl.Trainer(max_epochs=2)\n", + "trainer.fit(model, train_loader, val_loader)\n", + "trainer.test(model, test_loader)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop-updated/docs/source/tutorial/python/models/aggregation.ipynb b/chemprop-updated/docs/source/tutorial/python/models/aggregation.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..f99275ec1ed3db62386473ecf7aa0936730dc772 --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/python/models/aggregation.ipynb @@ -0,0 +1,256 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Aggregation" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from chemprop.nn.agg import MeanAggregation, SumAggregation, NormAggregation, AttentiveAggregation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is example output from [message passing](./message_passing.ipynb) for input to aggregation." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "n_atoms_in_batch = 7\n", + "hidden_dim = 3\n", + "example_message_passing_output = torch.randn(n_atoms_in_batch, hidden_dim)\n", + "which_atoms_in_which_molecule = torch.tensor([0, 0, 1, 1, 1, 1, 2]).long()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Combine nodes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The aggregation layer combines the node level represenations into a graph level representaiton (usually atoms -> molecule)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Mean and sum aggregation " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Mean aggregation is recommended when the property to predict does not depend on the number of atoms in the molecules (intensive). Sum aggregation is recommended when the property is extensive, though usually norm aggregation is better." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "mean_agg = MeanAggregation()\n", + "sum_agg = SumAggregation()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[-0.4593, -0.1808, -0.3459],\n", + " [ 0.9343, -0.1746, 0.7430],\n", + " [-0.4747, -0.9394, -0.3877]])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mean_agg(H=example_message_passing_output, batch=which_atoms_in_which_molecule)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[-0.9187, -0.3616, -0.6917],\n", + " [ 3.7373, -0.6986, 2.9720],\n", + " [-0.4747, -0.9394, -0.3877]])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum_agg(H=example_message_passing_output, batch=which_atoms_in_which_molecule)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Norm aggregation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Norm aggregation can be better than sum aggregation when the molecules are large as it is best to keep the hidden representation values on the order of 1 (though this is less important when batch normalization is used). The normalization constant can be customized (defaults to 100.0)." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "norm_agg = NormAggregation()\n", + "big_norm = NormAggregation(norm=1000.0)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[-0.0092, -0.0036, -0.0069],\n", + " [ 0.0374, -0.0070, 0.0297],\n", + " [-0.0047, -0.0094, -0.0039]])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "norm_agg(H=example_message_passing_output, batch=which_atoms_in_which_molecule)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[-0.0009, -0.0004, -0.0007],\n", + " [ 0.0037, -0.0007, 0.0030],\n", + " [-0.0005, -0.0009, -0.0004]])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "big_norm(H=example_message_passing_output, batch=which_atoms_in_which_molecule)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Attentive aggregation " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This uses a learned weighted average to combine atom representations within a molecule graph. It needs to be told the size of the hidden dimension as it uses the hidden representation of each atom to calculate the weight of that atom. " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "att_agg = AttentiveAggregation(output_size=hidden_dim)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[-0.4551, -0.1791, -0.3438],\n", + " [ 0.9370, 0.1375, 0.3714],\n", + " [-0.4747, -0.9394, -0.3877]], grad_fn=)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "att_agg(H=example_message_passing_output, batch=which_atoms_in_which_molecule)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop-updated/docs/source/tutorial/python/models/basic_mpnn_model.ipynb b/chemprop-updated/docs/source/tutorial/python/models/basic_mpnn_model.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..4a52195f73b7fb145dc9f9888bb1cd6f2d4c6606 --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/python/models/basic_mpnn_model.ipynb @@ -0,0 +1,351 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Chemprop MPNN models" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.models.model import MPNN" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Composition" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A Chemprop `MPNN` model is made up of several submodules including a [message passing](./message_passing.ipynb) layer, an [aggregation](./aggregation.ipynb) layer, an optional batch normalization layer, and a [predictor](./predictor.ipynb) feed forward network layer. `MPNN` defines the training and predicting logic used by `lightning` when using a Chemprop model in their framework. " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): Identity()\n", + " )\n", + " (agg): NormAggregation()\n", + " (bn): Identity()\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): Identity()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0-1): 2 x MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from chemprop.nn import BondMessagePassing, NormAggregation, RegressionFFN\n", + "\n", + "mp = BondMessagePassing()\n", + "agg = NormAggregation()\n", + "ffn = RegressionFFN()\n", + "\n", + "basic_model = MPNN(mp, agg, ffn)\n", + "basic_model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Batch normalization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Batch normalization can improve training by keeping the inputs to the FFN small and centered around zero. It is off by default, but can be turned on." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): Identity()\n", + " )\n", + " (agg): NormAggregation()\n", + " (bn): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): Identity()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0-1): 2 x MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MPNN(mp, agg, ffn, batch_norm=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Optimizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`MPNN` also configures the optimizer used by lightning during training. The `torch.optim.Adam` optimizer is used with a Noam learning rate scheduler (defined in `chemprop.scheduler.NoamLR`). The following parameters are customizable:\n", + "\n", + " - number of warmup epochs, defaults to 2\n", + " - the initial learning rate, defaults to $10^{-4}$\n", + " - the max learning rate, defaults to $10^{-3}$\n", + " - the final learning rate, defaults to $10^{-4}$" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "model = MPNN(mp, agg, ffn, warmup_epochs=5, init_lr=1e-3, max_lr=1e-2, final_lr=1e-5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Metrics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "During the validation and testing loops, lightning will use the metrics stored in `MPNN` to evaluate the current model's performance. The `MPNN` has a default metric defined by the type of predictor used. Other [metrics](../metrics.ipynb) can be given to `MPNN` to use instead." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn import metrics\n", + "\n", + "metrics_list = [metrics.RMSE(), metrics.MAE()]\n", + "model = MPNN(mp, agg, ffn, metrics=metrics_list)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Fingerprinting and encoding" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`MPNN` has two helper functions to get the hidden representations at different parts of the model. The fingerprint is the learned representation of the message passing layer after aggregation and batch normalization. The encoding is the hidden representation after a number of layers of the predictor. See the predictor notebook for more details. Note that the 0th encoding is equivalent to the fingerprint." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Example batch for the model. See the [data notebooks](../data/dataloaders.ipynb) for more details." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from chemprop.data import MoleculeDatapoint, MoleculeDataset\n", + "from chemprop.data import build_dataloader\n", + "\n", + "smis = [\"C\" * i for i in range(1, 4)]\n", + "ys = np.random.rand(len(smis), 1)\n", + "dataset = MoleculeDataset([MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)])\n", + "dataloader = build_dataloader(dataset)\n", + "batch = next(iter(dataloader))\n", + "bmg, V_d, X_d, *_ = batch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[0.0333],\n", + " [0.0331],\n", + " [0.0332]], grad_fn=)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "basic_model(bmg, V_d, X_d)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([3, 300])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "basic_model.fingerprint(bmg, V_d, X_d).shape" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([3, 300])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "basic_model.encoding(bmg, V_d, X_d, i=1).shape" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor(True)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(basic_model.fingerprint(bmg, V_d, X_d) == basic_model.encoding(bmg, V_d, X_d, i=0)).all()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop-updated/docs/source/tutorial/python/models/message_passing.ipynb b/chemprop-updated/docs/source/tutorial/python/models/message_passing.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..d834341e24577056aa5030495ca4f61da80b2d95 --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/python/models/message_passing.ipynb @@ -0,0 +1,232 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Message passing" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.message_passing.base import BondMessagePassing, AtomMessagePassing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an example [dataloader](../data/dataloaders.ipynb) to make inputs for the message passing layer." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from chemprop.data import MoleculeDatapoint, MoleculeDataset, build_dataloader\n", + "\n", + "smis = [\"C\" * i for i in range(1, 4)]\n", + "ys = np.random.rand(len(smis), 1)\n", + "dataset = MoleculeDataset([MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)])\n", + "dataloader = build_dataloader(dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Message passing schemes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are two message passing schemes. Chemprop prefers a D-MPNN scheme (`BondMessagePassing`) where messages are passed between directed edges (bonds) rather than between nodes (atoms) as would be done in a traditional MPNN (`AtomMessagePassing`)." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "mp = AtomMessagePassing()\n", + "mp = BondMessagePassing()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Input dimensions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By default, the bond message passing layer's input dimension is the sum of atom and bond features from the default [atom](../featurizers/atom_featurizers.ipynb) and [bond](../featurizers/bond_featurizers.ipynb) featurizers. If you use a custom featurizer, the message passing layer needs to be told when it is created.\n", + "\n", + "Also note that an atom message passing's default input dimension is the length of the atom features from the default atom featurizer." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from chemprop.featurizers import SimpleMoleculeMolGraphFeaturizer\n", + "\n", + "n_atom_features, n_bond_features = SimpleMoleculeMolGraphFeaturizer().shape\n", + "(n_atom_features + n_bond_features) == mp.W_i.in_features" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.featurizers import MultiHotAtomFeaturizer\n", + "\n", + "n_extra_bond_features = 12\n", + "featurizer = SimpleMoleculeMolGraphFeaturizer(\n", + " atom_featurizer=MultiHotAtomFeaturizer.organic(), extra_bond_fdim=n_extra_bond_features\n", + ")\n", + "\n", + "mp = BondMessagePassing(d_v=featurizer.atom_fdim, d_e=featurizer.bond_fdim)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If extra atom descriptors are used, the message passing layer also needs to be told. A separate weight matrix is created and applied to the concatenated hidden representation and extra descriptors after message passing is complete. The output dimension of the message passing layer is the sum of the hidden size and number of extra atom descriptors." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "328" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "n_extra_atom_descriptors = 28\n", + "mp = BondMessagePassing(d_vd=n_extra_atom_descriptors)\n", + "mp.output_dim" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Customization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following hyperparameters of the message passing layer are customizable:\n", + "\n", + " - the hidden dimension during message passing, default: 300\n", + " - whether a bias term used, default: False\n", + " - the number of message passing iterations, default: 3\n", + " - whether to pass messages on undirected edges, default: False\n", + " - the dropout probability, default: 0.0 (i.e. no dropout)\n", + " - which activation function, default: ReLU" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "mp = BondMessagePassing(\n", + " d_h=600, bias=True, depth=5, undirected=True, dropout=0.5, activation=\"tanh\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The output of message passing is a torch tensor of shape # of atoms in batch x length of hidden representation." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([6, 600])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "batch_molgraph, extra_atom_descriptors, *_ = next(iter(dataloader))\n", + "hidden_atom_representations = mp(batch_molgraph, extra_atom_descriptors)\n", + "hidden_atom_representations.shape" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop-updated/docs/source/tutorial/python/models/multicomponent_mpnn_model.ipynb b/chemprop-updated/docs/source/tutorial/python/models/multicomponent_mpnn_model.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..2f142a975b184951bece7db7939ed45f41513fd4 --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/python/models/multicomponent_mpnn_model.ipynb @@ -0,0 +1,208 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Multicomponent models" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.message_passing import MulticomponentMessagePassing\n", + "from chemprop.models import MulticomponentMPNN" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Overview" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The basic Chemprop model is designed for a single molecule or reaction as input. A multicomponent Chemprop model organizes these basic building blocks to take multiple molecules/reactions as input. This is useful for properties that depend on multiple components like properties in solvents." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Message passing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`MulticomponentMessagePassing` organizes the single component [message passing](./message_passing.ipynb) modules for each component in the multicomponent dataset. The individual message passing modules can be unique for each component, shared between some components, or shared between all components. If all components share the same message passing module, the shared flag can be set to True. Note that it doesn't make sense for components that use different featurizers (e.g. molecules and reactions) to use the same message passing module." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn import BondMessagePassing\n", + "\n", + "mp1 = BondMessagePassing(d_h=100)\n", + "mp2 = BondMessagePassing(d_h=600)\n", + "blocks = [mp1, mp2]\n", + "mcmp = MulticomponentMessagePassing(blocks=blocks, n_components=len(blocks))\n", + "\n", + "mp = BondMessagePassing()\n", + "mcmp = MulticomponentMessagePassing(blocks=[mp], n_components=2, shared=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "During the forward pass of the model, the output of each message passing block is concatentated after aggregation as input to the predictor." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Aggregation\n", + "\n", + "A single [aggregation](./aggregation.ipynb) module is used on all message passing outputs." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn import MeanAggregation\n", + "\n", + "agg = MeanAggregation()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Predictor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The [predictor](./predictor.ipynb) needs to be told the output dimension of the message passing layer." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn import RegressionFFN\n", + "\n", + "ffn = RegressionFFN(input_dim=mcmp.output_dim)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Multicomponent MPNN" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The submodules are composed together in a `MulticomponentMPNN` model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MulticomponentMPNN(\n", + " (message_passing): MulticomponentMessagePassing(\n", + " (blocks): ModuleList(\n", + " (0-1): 2 x BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): Identity()\n", + " )\n", + " )\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): Identity()\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=600, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): Identity()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0-1): 2 x MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mc_model = MulticomponentMPNN(mcmp, agg, ffn)\n", + "mc_model" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop-updated/docs/source/tutorial/python/models/predictor.ipynb b/chemprop-updated/docs/source/tutorial/python/models/predictor.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..0f09019bd64271a59b5c6a0cb900cff3b6aef756 --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/python/models/predictor.ipynb @@ -0,0 +1,444 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Predictors" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from chemprop.nn.predictors import (\n", + " RegressionFFN,\n", + " BinaryClassificationFFN,\n", + " MulticlassClassificationFFN,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is example output of [aggregation](./aggregation.ipynb) for input to the predictor." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "n_datapoints_in_batch = 2\n", + "hidden_dim = 300\n", + "example_aggregation_output = torch.randn(n_datapoints_in_batch, hidden_dim)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Feed forward network" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The learned representation from message passing and aggregation is a vector like that of fixed representations. While other predictors like random forest could be used to make final predictions from this representation, Chemprop prefers and implements using a feed forward network as that allows for end-to-end training. Three basic Chemprop FFNs differ in the prediction task they are used for. Note that multiclass classification needs to know the number of classes." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "regression_ffn = RegressionFFN()\n", + "binary_class_ffn = BinaryClassificationFFN()\n", + "multi_class_ffn = MulticlassClassificationFFN(n_classes=3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Input dimension" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The default input dimension of the predictor is the same as the default dimension of the message passing hidden representation. If your message passing hidden dimension is different, or if you have addition atom or datapoint descriptors, you need to change the predictor's input dimension." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[0.2080],\n", + " [0.2787]], grad_fn=)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ffn = RegressionFFN()\n", + "ffn(example_aggregation_output)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[-0.0877],\n", + " [-0.2629]], grad_fn=)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mp_hidden_dim = 2\n", + "n_atom_descriptors = 1\n", + "mp_output = torch.randn(n_datapoints_in_batch, mp_hidden_dim + n_atom_descriptors)\n", + "example_datapoint_descriptors = torch.randn(n_datapoints_in_batch, 12)\n", + "\n", + "input_dim = mp_output.shape[1] + example_datapoint_descriptors.shape[1]\n", + "\n", + "ffn = RegressionFFN(input_dim=input_dim)\n", + "ffn(torch.cat([mp_output, example_datapoint_descriptors], dim=1))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Output dimension" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The number of tasks defaults to 1 but can be adjusted. Predictors that need to predict multiple values per task, like multiclass classification, will automatically adjust the output dimension." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([2, 4])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ffn = RegressionFFN(n_tasks=4)\n", + "ffn(example_aggregation_output).shape" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([2, 4, 3])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ffn = MulticlassClassificationFFN(n_tasks=4, n_classes=3)\n", + "ffn(example_aggregation_output).shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Customization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following hyperparameters of the predictor are customizable:\n", + "\n", + " - the hidden dimension between layer, default: 300\n", + " - the number of layer, default 1\n", + " - the dropout probability, default: 0.0 (i.e. no dropout)\n", + " - which activation function, default: ReLU" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[ 0.0121],\n", + " [-0.0760]], grad_fn=)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "custom_ffn = RegressionFFN(hidden_dim=600, n_layers=3, dropout=0.1, activation=\"tanh\")\n", + "custom_ffn(example_aggregation_output)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Intermediate hidden representations can also be extracted. Note that each predictor layer consists of an activation layer, followed by dropout, followed by a linear layer. The first predictor layer only has the linear layer." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([2, 600])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "layer = 2\n", + "custom_ffn.encode(example_aggregation_output, i=layer).shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=600, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): Tanh()\n", + " (1): Dropout(p=0.1, inplace=False)\n", + " (2): Linear(in_features=600, out_features=600, bias=True)\n", + " )\n", + " (2): Sequential(\n", + " (0): Tanh()\n", + " (1): Dropout(p=0.1, inplace=False)\n", + " (2): Linear(in_features=600, out_features=600, bias=True)\n", + " )\n", + " (3): Sequential(\n", + " (0): Tanh()\n", + " (1): Dropout(p=0.1, inplace=False)\n", + " (2): Linear(in_features=600, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): Identity()\n", + ")" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "custom_ffn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Criterion" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Each predictor has a criterion that is used as the [loss function](../loss_functions.ipynb) during training. The default criterion for a predictor is defined in the predictor class." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "print(RegressionFFN._T_default_criterion)\n", + "print(BinaryClassificationFFN._T_default_criterion)\n", + "print(MulticlassClassificationFFN._T_default_criterion)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A custom criterion can be given to the predictor." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn import MSE\n", + "\n", + "criterion = MSE(task_weights=torch.tensor([0.5, 1.0]))\n", + "ffn = RegressionFFN(n_tasks=2, criterion=criterion)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Regression vs. classification" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In addition to using different loss functions, regression and classification predictors also differ in their tranforms of the model outputs during inference. \n", + "\n", + "Regression should use a [scaler transform](../scaling.ipynb) if target normalization is used during training.\n", + "\n", + "Classification uses a sigmoid (for binary classification) or a softmax (for multiclass) transform to keep class probability predictions between 0 and 1. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor(True)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "probs = binary_class_ffn(example_aggregation_output)\n", + "(0 < probs).all() and (probs < 1).all()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Other predictors coming soon" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Beta versions of predictors for uncertainty and spectral tasks will be finalized in v2.1." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.predictors import (\n", + " MveFFN,\n", + " EvidentialFFN,\n", + " BinaryDirichletFFN,\n", + " MulticlassDirichletFFN,\n", + " SpectralFFN,\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop-updated/docs/source/tutorial/python/saving_and_loading.ipynb b/chemprop-updated/docs/source/tutorial/python/saving_and_loading.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..9d1d8aa518bc5616a3964db75a737930542e00ff --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/python/saving_and_loading.ipynb @@ -0,0 +1,161 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Saving and loading models" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from chemprop.models.utils import save_model, load_model\n", + "from chemprop.models.model import MPNN\n", + "from chemprop.models.multi import MulticomponentMPNN\n", + "from chemprop import nn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an example buffer to save to and load from, to avoid creating new files when running this notebook. A real use case would probably save to and read from a file like `model.pt`." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import io\n", + "\n", + "saved_model = io.BytesIO()\n", + "\n", + "# from pathlib import Path\n", + "# saved_model = Path(\"model.pt\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Saving models" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A valid model save file is a dictionary containing the hyper parameters and state dict of the model. `torch` is used to pickle the dictionary." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "model = MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), nn.RegressionFFN())\n", + "\n", + "save_model(saved_model, model)\n", + "\n", + "# model_dict = {\"hyper_parameters\": model.hparams, \"state_dict\": model.state_dict()}\n", + "# torch.save(model_dict, saved_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`lightning` will also automatically create checkpoint files during training. These `.ckpt` files are like `.pt` model files, but also contain information about training and can be used to restart training. See the `lightning` documentation for more details." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "from lightning.pytorch.callbacks import ModelCheckpoint\n", + "from lightning.pytorch import Trainer\n", + "\n", + "checkpointing = ModelCheckpoint(\n", + " dirpath=\"mycheckpoints\",\n", + " filename=\"best-{epoch}-{val_loss:.2f}\",\n", + " monitor=\"val_loss\",\n", + " mode=\"min\",\n", + " save_last=True,\n", + ")\n", + "trainer = Trainer(callbacks=[checkpointing])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Loading models" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`MPNN` and `MulticomponentMPNN` each have a class method to load a model from either a model file `.pt` or a checkpoint file `.ckpt`. The method to load from a file works for either model files or checkpoint files, but won't load the saved training information from a checkpoint file." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Need to set the buffer stream position to the beginning, not necessary if using a file\n", + "saved_model.seek(0)\n", + "\n", + "model = MPNN.load_from_file(saved_model)\n", + "\n", + "# Other options\n", + "# model = MPNN.load_from_checkpoint(saved_model)\n", + "# model = MulticomponentMPNN.load_from_file(saved_model)\n", + "# model = MulticomponentMPNN.load_from_checkpoint(saved_model)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop-updated/docs/source/tutorial/python/scaling.ipynb b/chemprop-updated/docs/source/tutorial/python/scaling.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..2ad64091c1602e5b4eb7a5dc589fead17a4175d5 --- /dev/null +++ b/chemprop-updated/docs/source/tutorial/python/scaling.ipynb @@ -0,0 +1,687 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Scaling inputs and outputs" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from chemprop.models import MPNN\n", + "from chemprop.nn import BondMessagePassing, NormAggregation, RegressionFFN\n", + "from chemprop.nn.transforms import ScaleTransform, UnscaleTransform, GraphTransform" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an example [dataset](./data/datasets.ipynb) with extra atom and bond features, extra atom descriptors, and extra [datapoint](./data/datapoints.ipynb) descriptors." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from chemprop.data import MoleculeDatapoint, MoleculeDataset\n", + "\n", + "smis = [\"CC\", \"CN\", \"CO\", \"CF\", \"CP\", \"CS\", \"CI\"]\n", + "ys = np.random.rand(len(smis), 1) * 100\n", + "\n", + "n_datapoints = len(smis)\n", + "n_atoms = 2\n", + "n_bonds = 1\n", + "n_extra_atom_features = 3\n", + "n_extra_bond_features = 4\n", + "n_extra_atom_descriptors = 5\n", + "n_extra_datapoint_descriptors = 6\n", + "\n", + "extra_atom_features = np.random.rand(n_datapoints, n_atoms, n_extra_atom_features)\n", + "extra_bond_features = np.random.rand(n_datapoints, n_bonds, n_extra_bond_features)\n", + "extra_atom_descriptors = np.random.rand(n_datapoints, n_atoms, n_extra_atom_descriptors)\n", + "extra_datapoint_descriptors = np.random.rand(n_datapoints, n_extra_datapoint_descriptors)\n", + "\n", + "datapoints = [\n", + " MoleculeDatapoint.from_smi(smi, y, x_d=x_d, V_f=V_f, E_f=E_f, V_d=V_d)\n", + " for smi, y, x_d, V_f, E_f, V_d in zip(\n", + " smis,\n", + " ys,\n", + " extra_datapoint_descriptors,\n", + " extra_atom_features,\n", + " extra_bond_features,\n", + " extra_atom_descriptors,\n", + " )\n", + "]\n", + "train_dset = MoleculeDataset(datapoints[:3])\n", + "val_dset = MoleculeDataset(datapoints[3:5])\n", + "test_dset = MoleculeDataset(datapoints[5:])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Scaling targets - FFN" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Scaling the target values before training can improve model performance and make training faster. The scaler for the targets should be fit to the training dataset and then applied to the validation dataset. This scaler is *not* applied to the test dataset. Instead the scaler is used to make an `UnscaleTransform` which is given to the predictor (FFN) layer and used automatically during inference. \n", + "\n", + "Note that currently the output_transform is saved both in the model's state_dict and and in the model's hyperparameters. This may be changed in the future to align with `lightning`'s recommendations. You can ignore any messages about this." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "output_scaler = train_dset.normalize_targets()\n", + "val_dset.normalize_targets(output_scaler)\n", + "# test_dset targets not scaled\n", + "\n", + "output_transform = UnscaleTransform.from_standard_scaler(output_scaler)\n", + "\n", + "ffn = RegressionFFN(output_transform=output_transform)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Scaling extra atom and bond features - Message Passing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The atom and bond features generated by Chemprop [featurizers](./featurizers/molgraph_molecule_featurizer.ipynb) are either multi-hot or on the order of 1. We recommend scaling extra atom and bond features to also be on the order of 1. Like the target scaler, these scalers are fit to the training data, applied to the validation data, and then saved to the model (in this case the message passing layer) so that they are applied automatically to the test dataset during inference." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
StandardScaler()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "StandardScaler()" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "V_f_scaler = train_dset.normalize_inputs(\"V_f\")\n", + "E_f_scaler = train_dset.normalize_inputs(\"E_f\")\n", + "\n", + "val_dset.normalize_inputs(\"V_f\", V_f_scaler)\n", + "val_dset.normalize_inputs(\"E_f\", E_f_scaler)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The scalers are used to make `ScaleTransform`s. These are combined into a `GraphTransform` which is given to the message passing module. Note that `ScaleTransform` acts on the whole feature vector, not just the extra features. The `ScaleTransform`'s mean and scale arrays are padded with enough zeros and ones so that only the extra features are actually scaled. The amount of padding required is the length of the default features of the featurizer." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.featurizers import SimpleMoleculeMolGraphFeaturizer\n", + "\n", + "featurizer = SimpleMoleculeMolGraphFeaturizer(\n", + " extra_atom_fdim=n_extra_atom_features, extra_bond_fdim=n_extra_bond_features\n", + ")\n", + "n_V_features = featurizer.atom_fdim - featurizer.extra_atom_fdim\n", + "n_E_features = featurizer.bond_fdim - featurizer.extra_bond_fdim\n", + "\n", + "V_f_transform = ScaleTransform.from_standard_scaler(V_f_scaler, pad=n_V_features)\n", + "E_f_transform = ScaleTransform.from_standard_scaler(E_f_scaler, pad=n_E_features)\n", + "\n", + "graph_transform = GraphTransform(V_f_transform, E_f_transform)\n", + "\n", + "mp = BondMessagePassing(graph_transform=graph_transform)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you only have one of extra atom features or extra bond features, you can set the transform for the unused option to `torch.nn.Identity`." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "graph_transform = GraphTransform(V_transform=torch.nn.Identity(), E_transform=E_f_transform)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Scaling extra atom descriptors - Message Passing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The atom descriptors from message passing (before aggregation) are also likely to be on the order of 1 so extra atom descriptors should also be scaled. No padding is needed (unlike above) as this scaling is only applied to the extra atom descriptors. The `ScaleTransform` is given to the message passing module for use during inference." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "V_d_scaler = train_dset.normalize_inputs(\"V_d\")\n", + "val_dset.normalize_inputs(\"V_d\", V_d_scaler)\n", + "\n", + "V_d_transform = ScaleTransform.from_standard_scaler(V_d_scaler)\n", + "\n", + "mp = BondMessagePassing(V_d_transform=V_d_transform)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A `GraphTransform` and `ScaleTransform` can both be given to the message passing." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "mp = BondMessagePassing(graph_transform=graph_transform, V_d_transform=V_d_transform)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Scaling extra datapoint descriptors - MPNN" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The molecule/reaction descriptors from message passing (after aggregation) are batch normalized by default to be on the order of 1 (can be turned off, see the [model notebook](./models/basic_mpnn_model.ipynb)). Therefore we also recommended scaling the extra datapoint level descriptors. The `ScaleTransform` for this is given to the `MPNN` or `MulticomponentMPNN` module." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "X_d_scaler = train_dset.normalize_inputs(\"X_d\")\n", + "val_dset.normalize_inputs(\"X_d\", X_d_scaler)\n", + "\n", + "X_d_transform = ScaleTransform.from_standard_scaler(X_d_scaler)\n", + "\n", + "chemprop_model = MPNN(\n", + " BondMessagePassing(), NormAggregation(), RegressionFFN(), X_d_transform=X_d_transform\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop-updated/docs/source/uncertainty.nblink b/chemprop-updated/docs/source/uncertainty.nblink new file mode 100644 index 0000000000000000000000000000000000000000..dc48565d43c209b26725082a41097cb8cfd075e6 --- /dev/null +++ b/chemprop-updated/docs/source/uncertainty.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/uncertainty.ipynb" +} diff --git a/chemprop-updated/docs/source/use_featurizer_with_other_libraries.nblink b/chemprop-updated/docs/source/use_featurizer_with_other_libraries.nblink new file mode 100644 index 0000000000000000000000000000000000000000..5112dd211f8a2693b821a4df548d2e8d3d750165 --- /dev/null +++ b/chemprop-updated/docs/source/use_featurizer_with_other_libraries.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/use_featurizer_with_other_libraries.ipynb" +} diff --git a/chemprop-updated/environment.yml b/chemprop-updated/environment.yml new file mode 100644 index 0000000000000000000000000000000000000000..fd1fb6ca6355730c2c7ec83889ade7f67e5956d4 --- /dev/null +++ b/chemprop-updated/environment.yml @@ -0,0 +1,17 @@ +name: chemprop +channels: + - conda-forge +dependencies: + - python>=3.11 + - pytorch>=2.1 + - astartes + - aimsim + - configargparse + - lightning>=2.0 + - numpy + - pandas + - rdkit + - scikit-learn + - scipy + - rich + - descriptastorus diff --git a/chemprop-updated/examples/active_learning.ipynb b/chemprop-updated/examples/active_learning.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..4200f0fa0fa1fca7cc85bfc575af2a6d864a0d6d --- /dev/null +++ b/chemprop-updated/examples/active_learning.ipynb @@ -0,0 +1,843 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Active Learning\n", + "Active learning is an iterative process where a model actively selects the most informative data points to be labeled by an oracle (e.g. a human expert), optimizing the model's performance with fewer labeled samples. Active learning can be implemented with Chemprop through Python as demonstrated by this notebook." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/active_learning.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import random\n", + "from typing import Tuple\n", + "\n", + "from lightning import pytorch as pl\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import torch\n", + "from torch.utils.data import DataLoader\n", + "\n", + "from chemprop import data, featurizers, models, nn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load some data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = (\n", + " chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\"\n", + ") # path to your data .csv file\n", + "df_input = pd.read_csv(input_path)\n", + "smis = df_input.loc[:, \"smiles\"].values\n", + "ys = df_input.loc[:, [\"lipo\"]].values\n", + "all_data = [data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this notebook we use three sets of data: An starting set of training data, a set of data to select additional training data from, and a set of data to test the model on. The set of data to select additional training data from could be unlabeled, but for this example all the data already has labels." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "mols = [d.mol for d in all_data] # RDkit Mol objects are use for structure based splits\n", + "splitting_indices = data.make_split_indices(mols, \"random\", (0.1, 0.8, 0.1))\n", + "starting_data, additional_data, test_data = data.split_data_by_indices(all_data, *splitting_indices)\n", + "starting_data, additional_data, test_data = starting_data[0], additional_data[0], test_data[0]\n", + "test_loader = data.build_dataloader(data.MoleculeDataset(test_data), shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "During each iteration of active learning, the training data will be split into training and validation sets and packaged into data loaders, so we make a helper function to do this." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def get_dataloaders(trainval_data) -> Tuple[DataLoader]:\n", + " trainval_mols = [d.mol for d in trainval_data]\n", + " train_indices, _, val_indices = data.make_split_indices(\n", + " trainval_mols, \"random\", (0.9, 0.0, 0.1)\n", + " )\n", + " train_data, val_data, _ = data.split_data_by_indices(\n", + " trainval_data, train_indices, val_indices, None\n", + " )\n", + "\n", + " train_dset = data.MoleculeDataset(train_data[0])\n", + " scaler = train_dset.normalize_targets()\n", + "\n", + " val_dset = data.MoleculeDataset(val_data[0])\n", + " val_dset.normalize_targets(scaler)\n", + "\n", + " train_loader = data.build_dataloader(train_dset)\n", + " val_loader = data.build_dataloader(val_dset, shuffle=False)\n", + " return train_loader, val_loader, scaler" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We also define a helper function to construct a chemprop model. Because this is a regression task, the targets of the training data are normalized and the model needs the scaler that was used to unnormalize the predictions." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def get_mpnn(scaler):\n", + " output_transform = nn.UnscaleTransform.from_standard_scaler(scaler)\n", + " ffn = nn.MveFFN(output_transform=output_transform)\n", + " mpnn = models.MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), ffn, batch_norm=False)\n", + " return mpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We also need a lightning trainer to run the model." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(\n", + " logger=False, enable_progress_bar=False, accelerator=\"cpu\", devices=1, max_epochs=20\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change active learning parameters here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A priority function (or acquistition function) guides the active learning process by selecting the most informative data points to label next. A good choice for such a function is the uncertainty of a model's output on each data point." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# rank datapoints based on priority, priority determined by variance\n", + "def priority_function(mpnn, datapoint):\n", + " dataset = data.MoleculeDataset([datapoint])\n", + " loader = data.build_dataloader(dataset, batch_size=1)\n", + " output = trainer.predict(mpnn, loader)\n", + " output = torch.concat(output, dim=0)\n", + " return output[..., 1]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If our additional data was unlabeled, we would need a way to get the labels for the selected data points. " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# the oracle takes a list of potentially unlabeled datapoints to be labeled for the next active learning iteration.\n", + "def request_labels(new_data):\n", + " # adding new data labels:\n", + " # for datapoint in new_data:\n", + " # datapoint.y = {label}\n", + " return" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Lastly, we also need to decide how many data points to add to our training set in each iteration." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# number of new datapoints added to trainval pool each iteration.\n", + "query_size = len(additional_data) // 8" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start training" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We start by training a model on the initial training data." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Dropping last batch of size 1 to avoid issues with batch normalization (dataset size = 1, batch_size = 64)\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MveFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.274 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/utilities/data.py:105: Total length of `DataLoader` across ranks is zero. Please make sure this was your intention.\n", + "`Trainer.fit` stopped: `max_epochs=20` reached.\n" + ] + } + ], + "source": [ + "train_loader, val_loader, scaler = get_dataloaders(starting_data)\n", + "mpnn = get_mpnn(scaler)\n", + "trainer.fit(mpnn, train_loader, val_loader)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can start the active learning loop. In each iteration, we train a model on the current training data, use the model to select the most informative data points (the ones where the model is least certain), add them to the training data, and repeat." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MveFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.274 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "`Trainer.fit` stopped: `max_epochs=20` reached.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│         test/mse              1.2045652866363525     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.2045652866363525 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MveFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.274 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "`Trainer.fit` stopped: `max_epochs=20` reached.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│         test/mse              0.9172996282577515     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9172996282577515 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MveFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.274 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "`Trainer.fit` stopped: `max_epochs=20` reached.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│         test/mse              1.0593369007110596     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.0593369007110596 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MveFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.274 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "`Trainer.fit` stopped: `max_epochs=20` reached.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│         test/mse               1.151768445968628     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.151768445968628 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MveFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.274 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "`Trainer.fit` stopped: `max_epochs=20` reached.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│         test/mse              1.2037131786346436     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.2037131786346436 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MveFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.274 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "`Trainer.fit` stopped: `max_epochs=20` reached.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│         test/mse              1.1304174661636353     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.1304174661636353 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MveFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.274 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "`Trainer.fit` stopped: `max_epochs=20` reached.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│         test/mse              1.0078696012496948     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.0078696012496948 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MveFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.274 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "`Trainer.fit` stopped: `max_epochs=20` reached.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│         test/mse              0.9942679405212402     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9942679405212402 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "trainval_data = starting_data\n", + "results = []\n", + "\n", + "for _ in range(len(additional_data) // query_size):\n", + " # sort new datapoints by priority using priority function\n", + " priority_remaining_data = [\n", + " (priority_function(mpnn, datapoint), datapoint) for datapoint in additional_data\n", + " ]\n", + " sorted_remaining_data = [\n", + " datapoint\n", + " for unc, datapoint in sorted(priority_remaining_data, key=lambda d: d[0], reverse=True)\n", + " ]\n", + "\n", + " new_data = sorted_remaining_data[:query_size]\n", + " additional_data = additional_data[query_size:]\n", + "\n", + " request_labels(new_data)\n", + " trainval_data.extend(new_data)\n", + "\n", + " train_loader, val_loader, scaler = get_dataloaders(trainval_data)\n", + "\n", + " mpnn = get_mpnn(scaler)\n", + " trainer.fit(mpnn, train_loader, val_loader)\n", + "\n", + " result = trainer.test(mpnn, test_loader)\n", + " results.append((len(trainval_data), result[0][\"test/mse\"]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally we can view the results. The model's performance will hopefully improve with each iteration of active learning. Though this notebook is just an example. We didn't train the model for many epochs, and we used a very small dataset, so we don't expect to see the model improve. " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(20, 1.2045652866363525),\n", + " (30, 0.9172996282577515),\n", + " (40, 1.0593369007110596),\n", + " (50, 1.151768445968628),\n", + " (60, 1.2037131786346436),\n", + " (70, 1.1304174661636353),\n", + " (80, 1.0078696012496948),\n", + " (90, 0.9942679405212402)]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAArwAAAK7CAYAAAAQv1z7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACTKklEQVR4nOzdd3iV9f3/8dd9TvYke0BIwt6IbBAFB8qqfq3VasGB1lrbb7W0vyraVm1t1bZWa5114SqOr1tAQGUpiDLC3oSZhJC957l/f5ycA5FhAknuM56P68p1lXPuk/POXUxefPL+vD+GaZqmAAAAAB9ls7oAAAAAoD0ReAEAAODTCLwAAADwaQReAAAA+DQCLwAAAHwagRcAAAA+jcALAAAAn0bgBQAAgE8j8AIAAMCnEXgBeIQnnnhChmFowIABZ/w5cnJydP/99ysrK+uE5+6//34ZhnEWFZ6ZOXPmyDAM7du3r8Pfu7UMw9D9999vdRke4bv3YuvWrbr//vu94v9HACci8ALwCC+99JIkacuWLVq9evUZfY6cnBw98MADJw28t9xyi1atWnU2Jfq8VatW6ZZbbrG6DI+0detWPfDAAwRewEsReAFYbs2aNdqwYYOmTJkiSXrxxRfb/D26dOmiUaNGtfnn9VT19fVqaGho1WtGjRqlLl26tFNFrdPY2Kja2lqrywDgIwi8ACznCrgPP/ywxowZozfffFNVVVUnXHf48GHdeuutSktLU1BQkFJTU3XVVVfpyJEjWrp0qYYPHy5Juummm2QYRrNfS3+3peGKK65Qenq6HA7HCe8zcuRInXvuue4/m6app59+Wuecc45CQ0MVExOjq666Snv37j3jr/mzzz7TRRddpKioKIWFhWns2LH6/PPPm12ze/du3XTTTerZs6fCwsLUuXNnTZs2TZs2bWp23dKlS2UYhl577TX95je/UefOnRUcHKzdu3frxhtvVEREhHbv3q3JkycrIiJCaWlp+s1vfnNCoPzur/Fd7RhLlizRz3/+c8XHxysuLk5XXnmlcnJymr22trZWv/nNb5ScnKywsDCdf/75Wrt2rTIyMnTjjTee9l7s27dPhmHob3/7mx588EFlZmYqODhYS5YskeT8B9EPfvADxcbGKiQkREOGDNHbb7/d7HNUVVXpt7/9rTIzMxUSEqLY2FgNGzZMc+fOdV8zfvx4jR8//oT3v/HGG5WRkXHK+ubMmaMf/ehHkqQJEya4/27NmTNHkrR+/XpNnTpViYmJCg4OVmpqqqZMmaJDhw6d9usG0HEIvAAsVV1drblz52r48OEaMGCAZs6cqfLycr3zzjvNrjt8+LCGDx+u999/X7NmzdKCBQv0+OOPKzo6WsXFxTr33HP18ssvS5J+//vfa9WqVaf9Ff3MmTN14MABffHFF80e3759u7755hvddNNN7sd+9rOf6c4779TFF1+sDz74QE8//bS2bNmiMWPG6MiRI63+ml9//XVNnDhRUVFReuWVV/T2228rNjZWl156abPQm5OTo7i4OD388MP69NNP9dRTTykgIEAjR47Ujh07Tvi8s2fP1oEDB/Tss8/q448/VmJioiTnau8PfvADXXTRRfrwww81c+ZMPfbYY3rkkUdaVO8tt9yiwMBA/fe//9Xf/vY3LV26VNOnT292zU033aTHH39cN910kz788EP98Ic/1P/8z/+opKSkxffliSee0BdffKF//OMfWrBggfr06aMlS5Zo7NixKikp0bPPPqsPP/xQ55xzjq655hp34JSkWbNm6ZlnntGvfvUrffrpp3rttdf0ox/9SIWFhS1+/1OZMmWK/vrXv0qSnnrqKfffrSlTpqiyslKXXHKJjhw5oqeeekqLFy/W448/rq5du6q8vPys3xtAGzEBwEKvvvqqKcl89tlnTdM0zfLycjMiIsIcN25cs+tmzpxpBgYGmlu3bj3l5/r2229NSebLL798wnP33Xefefy3vPr6ejMpKcm87rrrml33u9/9zgwKCjILCgpM0zTNVatWmZLMRx99tNl1Bw8eNENDQ83f/e53p/36Xn75ZVOSmZ2dbZqmaVZWVpqxsbHmtGnTml3X2NhoDh482BwxYsQpP1dDQ4NZV1dn9uzZ0/z1r3/tfnzJkiWmJPP8888/4TU33HCDKcl8++23mz0+efJks3fv3s0ek2Ted999J9R+++23N7vub3/7mynJzM3NNU3TNLds2WJKMu+6665m182dO9eUZN5www2n/JpM0zSzs7NNSWb37t3Nurq6Zs/16dPHHDJkiFlfX9/s8alTp5opKSlmY2OjaZqmOWDAAPOKK6447ftccMEF5gUXXHDC4zfccIOZnp7e7LHv3ot33nnHlGQuWbKk2XVr1qwxJZkffPDBad8bgLVY4QVgqRdffFGhoaH68Y9/LEmKiIjQj370I61YsUK7du1yX7dgwQJNmDBBffv2bZP3DQgI0PTp0/Xee++ptLRUkrNv9LXXXtPll1+uuLg4SdInn3wiwzA0ffp0NTQ0uD+Sk5M1ePBgLV26tFXvu3LlShUVFemGG25o9vkcDocuu+wyffvtt6qsrJQkNTQ06K9//av69eunoKAgBQQEKCgoSLt27dK2bdtO+Nw//OEPT/qehmFo2rRpzR4bNGiQ9u/f36Kaf/CDH5zwWknu1y9btkySdPXVVze77qqrrlJAQECL3sP1PoGBge4/7969W9u3b9dPfvITSWp2vyZPnqzc3Fz3SveIESO0YMEC3X333Vq6dKmqq6tb/L5no0ePHoqJidFdd92lZ599Vlu3bu2Q9wXQOgReAJbZvXu3li9frilTpsg0TZWUlKikpERXXXWVpGOTGyTp6NGjbb6haubMmaqpqdGbb74pSVq4cKFyc3ObtTMcOXJEpmkqKSlJgYGBzT6+/vprFRQUtOo9XS0QV1111Qmf75FHHpFpmioqKpLk/DX9H/7wB11xxRX6+OOPtXr1an377bcaPHjwSQNdSkrKSd8zLCxMISEhzR4LDg5WTU1Ni2p2hf/jXyvJXYOrbSApKanZdQEBASe89nS+W7/rXv32t7894V7dfvvtkuS+/0888YTuuusuffDBB5owYYJiY2N1xRVXNPtHU3uIjo7WsmXLdM455+iee+5R//79lZqaqvvuu0/19fXt+t4AWq7l//QGgDb20ksvyTRN/d///Z/+7//+74TnX3nlFT344IOy2+1KSEho801A/fr104gRI/Tyyy/rZz/7mV5++WWlpqZq4sSJ7mvi4+NlGIZWrFjhDnrHO9ljpxMfHy9J+ve//33KqRGu4Pj666/r+uuvd/ePuhQUFKhTp04nvM6KOcPSsUB85MgRde7c2f14Q0NDq3pov1u/617Nnj1bV1555Ulf07t3b0lSeHi4HnjgAT3wwAM6cuSIe7V32rRp2r59uyQpJCTEvZp/vNb+o+W7Bg4cqDfffFOmaWrjxo2aM2eO/vSnPyk0NFR33333WX1uAG2DwAvAEo2NjXrllVfUvXt3vfDCCyc8/8knn+jRRx/VggULNHXqVE2aNEmvvfaaduzY4Q453/XdlceWuOmmm/Tzn/9cX375pT7++GPNmjVLdrvd/fzUqVP18MMP6/Dhwyf8yv5MjB07Vp06ddLWrVv1y1/+8rTXGoZxQqCeN2+eDh8+rB49epx1LW3l/PPPlyS99dZbzaZb/N///V+rR6Mdr3fv3urZs6c2bNhwQug/naSkJN14443asGGDHn/8cVVVVSksLEwZGRl65513VFtb676vhYWFWrlypaKiok77OVvyd8swDA0ePFiPPfaY5syZo3Xr1rW4ZgDti8ALwBILFixQTk6OHnnkkZOOihowYICefPJJvfjii5o6dar+9Kc/acGCBTr//PN1zz33aODAgSopKdGnn36qWbNmqU+fPurevbtCQ0P1xhtvqG/fvoqIiFBqaqpSU1NPWce1116rWbNm6dprr1Vtbe0JI7TGjh2rW2+9VTfddJPWrFmj888/X+Hh4crNzdWXX36pgQMH6uc//3mLv+6IiAj9+9//1g033KCioiJdddVVSkxM1NGjR7VhwwYdPXpUzzzzjCRn2J4zZ4769OmjQYMGae3atfr73//uMbNyXfr3769rr71Wjz76qOx2uy688EJt2bJFjz76qKKjo2WznXn33HPPPadJkybp0ksv1Y033qjOnTurqKhI27Zt07p169zTPEaOHKmpU6dq0KBBiomJ0bZt2/Taa69p9OjRCgsLkyTNmDFDzz33nKZPn66f/vSnKiws1N/+9rfvDbuS3CcA/uc//1FkZKRCQkKUmZmpVatW6emnn9YVV1yhbt26yTRNvffeeyopKdEll1xyxl83gDZm5Y45AP7riiuuMIOCgsz8/PxTXvPjH//YDAgIMPPy8kzTdE5GmDlzppmcnGwGBgaaqamp5tVXX20eOXLE/Zq5c+eaffr0MQMDA5vttP/ulIbjXXfddaYkc+zYsaes5aWXXjJHjhxphoeHm6GhoWb37t3N66+/3lyzZs1pv87vTmlwWbZsmTllyhQzNjbWDAwMNDt37mxOmTLFfOedd9zXFBcXmzfffLOZmJhohoWFmeedd565YsWKE6YNuKY0HP9alxtuuMEMDw8/4fGT3Q+dYkrDt99+2+w61/sdP7GgpqbGnDVrlpmYmGiGhISYo0aNMletWmVGR0c3myhxMq4pDX//+99P+vyGDRvMq6++2kxMTDQDAwPN5ORk88ILL3RP9jBN07z77rvNYcOGmTExMWZwcLDZrVs389e//rV72obLK6+8Yvbt29cMCQkx+/XrZ7711lstmtJgmqb5+OOPm5mZmabdbndPA9m+fbt57bXXmt27dzdDQ0PN6Ohoc8SIEeacOXNO+zUD6FiGaZqmNVEbAODLVq5cqbFjx+qNN97QddddZ3U5APwYgRcAcNYWL16sVatWaejQoQoNDdWGDRv08MMPKzo6Whs3bjxhSgQAdCR6eAEAZy0qKkqLFi3S448/rvLycsXHx2vSpEl66KGHCLsALMcKLwAAAHwaB08AAADApxF4AQAA4NMIvAAAAPBpbFo7CYfDoZycHEVGRlp2VCcAAABOzTRNlZeXKzU19XsPuCHwnkROTo7S0tKsLgMAAADf4+DBg997AiWB9yQiIyMlOW9gS46cBAAAQMcqKytTWlqaO7edDoH3JFxtDFFRUQReAAAAD9aS9lM2rQEAAMCnEXgBAADg0wi8AAAA8GkEXgAAAPg0Ai8AAAB8GoEXAAAAPo3ACwAAAJ9G4AUAAIBPI/ACAADApxF4AQAA4NMIvAAAAPBpBF4AAAD4NAIvAAAAfBqBFwAAAD6NwAsAAACfRuAFAACATyPwAgAAwKcReAEAAODTCLwAAADwaQReAAAA+DQCLwAAAHwagRcAAAA+jcALAAAAn0bg9QB7jlbo4w052nWk3OpSAAAAfA6B1wM8tWS3/nfuei3ckmd1KQAAAD6HwOsBMuPCJUnZBVUWVwIA1jhYVKU/fLBZe45WWF0KAB8UYHUBkDLinYF3X2GlxZUAQMcrr6nXjS9/oz1HK7XhUIk+uH2sbDbD6rIA+BBWeD1ApivwFhB4AfgXh8PUb97eoD1Hnd//Nh4q1QdZhy2uCoCvIfB6gPS4MElSYWWdymrqLa4GADrO00t3a9HWIwqy23TFOamSpL99ukPVdY0WVwbAlxB4PUBkSKDiI4IkSfvp4wXgJ5bsyNeji3dKkv50eX89/MNB6twpVHllNXp+xV6LqwPgSywNvMuXL9e0adOUmpoqwzD0wQcfnPb69957T5dccokSEhIUFRWl0aNHa+HChSdc9+6776pfv34KDg5Wv3799P7777fTV9B2Mlwb1+jjBeAH9hVU6o6562Wa0nUju+rHI7oqJNCuuyf1kSQ9s3SPjpTVWFwlAF9haeCtrKzU4MGD9eSTT7bo+uXLl+uSSy7R/PnztXbtWk2YMEHTpk3T+vXr3desWrVK11xzjWbMmKENGzZoxowZuvrqq7V69er2+jLaRAZ9vAD8RGVtg3722lqV1TRoSNdOum9aP/dzUwel6NyunVRd36h/LNxhYZUAfIlhmqZpdRGSZBiG3n//fV1xxRWtel3//v11zTXX6I9//KMk6ZprrlFZWZkWLFjgvuayyy5TTEyM5s6d26LPWVZWpujoaJWWlioqKqpV9Zypp5bs1t8X7tCVQzrrn9ec0yHvCQAdzTRN/XLues3bmKuEyGB98r/nKSkqpNk16w4U68qnV8owpI9/eZ4GdI62qFoAnqw1ec2re3gdDofKy8sVGxvrfmzVqlWaOHFis+suvfRSrVy58pSfp7a2VmVlZc0+OpqrpYHRZAB82X+W79W8jbkKsBl65ifnnhB2JencrjH6weBUmab0l3nb5CHrMgC8mFcH3kcffVSVlZW6+uqr3Y/l5eUpKSmp2XVJSUnKyzv1KWYPPfSQoqOj3R9paWntVvOpuCY17Ctk0xoA3/TlrgI98ul2SdJ90/ppWEbsKa/93WW9FRRg06q9hfpsW35HlQjAR3lt4J07d67uv/9+vfXWW0pMTGz2nGE0H1humuYJjx1v9uzZKi0tdX8cPHiwXWo+HVcPb1FlnUqrGU0GwLccLKrSL+euk8OUfjS0i6aPSj/t9V1iwnTLeZmSpL/O36a6BkdHlAnAR3ll4H3rrbd088036+2339bFF1/c7Lnk5OQTVnPz8/NPWPU9XnBwsKKiopp9dLSI4AAlRAZLYuMaAN9SXdeon722ViVV9RrUJVp/vmLAaRchXG6f0EPxEUHKLqjU61/v74BKAfgqrwu8c+fO1Y033qj//ve/mjJlygnPjx49WosXL2722KJFizRmzJiOKvGMZdLHC8DHmKape97fpK25ZYoLD9Kz04cqJNDeotdGBAfoNxN7S5L+9fkulVTVtWepAHyYpYG3oqJCWVlZysrKkiRlZ2crKytLBw4ckORsNbj++uvd18+dO1fXX3+9Hn30UY0aNUp5eXnKy8tTaWmp+5o77rhDixYt0iOPPKLt27frkUce0WeffaY777yzI7+0M5IR39THy+ETAHzEnJX79P76w7LbDD153blK7RTaqtdfPSxNfZIjVVpdr399vqudqgTg6ywNvGvWrNGQIUM0ZMgQSdKsWbM0ZMgQ94ix3Nxcd/iVpOeee04NDQ36xS9+oZSUFPfHHXfc4b5mzJgxevPNN/Xyyy9r0KBBmjNnjt566y2NHDmyY7+4M+CexcsKLwAf8PXeQj04b5sk6Z7JfTW6e1yrP4fdZujeKX0lSa+t2q+9RyvatEYA/sFj5vB6Eivm8ErS/E25uv2NdTonrZM++MXYDntfAGhrOSXVmvbvL1VYWacrzknVY9ec06K+3VOZOedbfbE9X5f0S9Lz1w9rw0oBeCu/mcPra5jFC8AX1NQ36uevr1VhZZ36pUTpoSsHnVXYlaR7JveR3WZo8dYjWrmnoI0qBeAvCLwexNXDW1JVz+YMAF7JNE398cPN2nCoVJ3CAvXcjKEKDWrZJrXT6ZEYqZ+M7CpJevCTbWp08MtJAC1H4PUgYUEBSopqGk3GARQAvNAbqw/o7TWHZDOkf187RGmxYW32ue+8uJciQwK0NbdM76471GafF4DvI/B6GHdbA7N4AXiZtfuL9MDHWyRJv7usj8b1TGjTzx8bHqRfXdhTkvSPhTtUWdvQpp8fgO8i8HoYV+DNJvAC8CJHymp02+vrVN9oasrAFP3s/G7t8j7Xj0lX19gw5ZfX6rnle9vlPQD4HgKvh2E0GQBvU9fg0O1vrNPR8lr1SorQ3646+01qpxIcYNfsSX0kSf9Zvke5pdXt8j4AfAuB18Nkug+fIPAC8A5/+mSL1u4vVmRIgP4zY5jCgwPa9f0uG5CsERmxqql36O+f7mjX9wLgGwi8Hsa1wptdUClGJAPwdG9/e1Cvf31AhiH968fnuL+HtSfDMPT7qc7DKN5bf1gbD5W0+3sC8G4EXg+THuv8YVFW06CSqnqLqwGAU8s6WKLff7BZkjTr4l66sE9Sh733oC6ddOWQzpKcY8pYIABwOgReDxMaZFdyVIgkKZs+XgAe6mh5rW57ba3qGh26pF+SfjGhR4fX8P8u662QQJu+2VekTzfndfj7A/AeBF4PlEEfLwAPVt/o0C/+u055ZTXqlhCuf149WDZb+2xSO52U6FDden53SdJDC7artqGxw2sA4B0IvB4oM55ZvAA811/nb9M32UWKCHZuUosMCbSslp+d302JkcE6UFSlV1fut6wOAJ6NwOuB3LN4OW0NgId5f/0hvfzVPknSo1cPVo/ECEvrCQ8O0G8v7S1JeuKLXSqsqLW0HgCeicDrgVy7nPfTwwvAg2w+XKq7390kSfrfC3vo0v7JFlfk9MNzu6hfSpTKaxr0r893WV0OAA9E4PVAx5+2xs5jAJ6gqLJOP3ttrWobHJrQO0F3XtzL6pLc7LZjY8reWH1Au/PLLa4IgKch8Hqg9DjnprXymgYVVdZZXA0Af9fQ6ND/zl2nwyXVyogL0+M/HiK7BZvUTmdM93hd0i9JjQ5Tf52/3epyAHgYAq8HCgm0KzXaOZqMI4YBWO3vC3foq92FCguy67kZwxQdat0mtdOZPamPAmyGvtierxW7jlpdDgAPQuD1UMdOXGPjGgDrfLwhR88t3ytJ+vtVg9U7OdLiik6tW0KEZoxOl+Q8jKLRQUsYACcCr4di4xoAq23PK9Pv/m+jJOlnF3TTlEEpFlf0/e64qKeiQwO140i53l5z0OpyAHgIAq+Hyjxu4xoAdLTSqnrd+upaVdc3alzPeP3u0j5Wl9QincKCdMdFPSVJjy7aoYraBosrAuAJCLweyrVxjR5eAB2t0WHqjrfW60BRlbrEhOoJD9ykdjrTR6UrMz5cBRV1enrJbqvLAeABCLwe6thpa1WMJgPQoR5bvFNLdxxVSKBNz80YqpjwIKtLapWgAJvumewcU/bCl9k6VMxeCMDfEXg9VFpsmAxDqqhtUEEFo8kAdIxPN+fpyaZV0YevHKT+qdEWV3RmLu6bqNHd4lTX4NDfPt1hdTkALEbg9VDO0WShkti4BqBj7M4v12/ezpIkzRybqSuGdLa2oLNgGIbundJXhiF9tCFH6w4UW10SAAsReD1YZjwb1wB0jLKaet362lpV1jVqVLdYzZ7sHZvUTmdA52hddW4XSdKDn2ylPQzwYwReD8bGNQAdweEwNeutDdp7tFIp0SF68rpzFWj3jR8Pv720t8KC7Fp3oESfbMy1uhwAFvGN72g+6viNawDQXv79xW59tu2IggJsenb6UMVHBFtdUptJigrRbRd0lyQ9vGC7auobLa4IgBUIvB4sg1m8ANrZ59uO6PHPd0qSHrxigAandbK2oHbw03HdlBwVosMl1Xr5q31WlwPAAgReD+Y6bW1fYSW9ZwDaXHZBpe58K0umKc0Yla6rh6VZXVK7CA2y63eX9ZYkPbVkt46W11pcEYCORuD1YGmxobIZUlVdo45W8A0aQNupqG3Qra+uUXlNg4alx+gPU/tZXVK7uuKczhrUJVoVtQ167LOdVpcDoIMReD1YcIBdqZ2co8no4wXQVkzT1O/+b4N25VcoMTJYT//kXAUF+PaPA5vN0O+nOEP9m98c0I68cosrAtCRfPs7nA84tnGNPl4AbePZZXs1f1OeAu2Gnpk+VIlRIVaX1CFGZMZq0oBkOUzpwXmMKQP8CYHXw7k3rjGaDEAbWLbzqP62cLsk6f4f9NfQ9BiLK+pYd0/qoyC7TSt2FWjpzqNWlwOggxB4PVwGK7wA2siBwir9au56mab04+Fpum5EV6tL6nDpceG6cWyGJOkv87apodFhbUEAOgSB18NlxrsOn6CHF8CZq6pr0K2vrVFpdb0Gp3XSA5f3l2EYVpdliV9M6KGYsEDtzq/Q3G8PWl0OgA5A4PVw6U0tDfsZTQbgDJmmqbvf3aTteeWKjwjSs9PPVXCA3eqyLBMdGqhfX9JLkvTY4p0qra63uCIA7Y3A6+HSYsLco8nymR0J4Ay8+GW2PtqQowCboaeuO1cp0aFWl2S5a0d0VfeEcBVV1unpJbutLgdAOyPwerigAJu6xDjbGjhxDUBrrdxToIcWODep/X5KX43sFmdxRZ4h0G5zjyl7+at9OkDbGODTCLxegI1rAM7E4ZJq/fK/69XoMHXlkM66YUyG1SV5lPG9EzSuZ7zqGh165NPtVpcDoB0ReL1AZhwb1wC0Tk19o257ba2KKuvUPzVKf71yoN9uUjsVwzB075S+shnSvE25+nZfkdUlAWgnBF4vwAovgNYwTVP3vr9Zmw6XKiYsUM/NGKqQQP/dpHY6fZKjdM1w53i2Bz/ZKoeDzcGALyLwegHX4RP7OHwCQAu89vV+vbvukGyG9NR157r3AeDkZl3SS+FBdm04VKqPNuRYXQ6AdkDg9QLuFd7CSlYfAJzWN9lF+tPHWyVJsyf11Zge8RZX5PkSIoN1+4QekqRHPt2u6rpGiysC0NYIvF6gS0yo7DZDNfUOHSmvsbocAB4qr7RGt7+xTg0OU9MGp+qWcZlWl+Q1bj4vU507hSq3tEYvrNhrdTkA2hiB1wsE2m1Ki3HOzdxXwMY1ACeqbWjUba+vVUFFrfokR+qRH7JJrTVCAu26a1IfSdIzy/Yov4zFBcCXEHi9xPFtDQDwXfd/tFVZB0sUFRKg52YMVVhQgNUleZ1pg1I0pGsnVdU16tFFO60uB0AbIvB6CffGNSY1APiOud8c0NxvDsgwpCeuHeI+khytYxiG+zCKt9ce1JacUosrAtBWCLxeIiOO09YAnGjdgWLd9+EWSdJvJ/bW+N6JFlfk3Yamx2jqoBSZpvSXedtkmmwUBnwBgddL0NIA4Lvyy2v089fXqq7Rocv6J+v28d2tLskn3HVZHwUF2LRyT6E+35ZvdTkA2gCB10tkNgXe/YVVjCYDoLoGh37xxjodKatVj8QI/ePqwWxSayNpsWG6+TznhIu/zt+m+kaHxRUBOFsEXi/RuVOoAmyGahscymP3MOD3/jJvq77dV6zI4AD9Z8ZQRQSzSa0t3T6+u+LCg7S3oFKvf73f6nIAnCUCr5cIsNuUFuvs42XjGuDf/m/tIb2yyhnCHrvmHHVLiLC4It8TGRKoWRN7SZIe/2yXSqrqLK4IwNkg8HoR98Y1+ngBv7XpUKnueX+TJOmOi3rq4n5JFlfku64ZlqbeSZEqra7Xv7/YbXU5AM4CgdeLuDeuscIL+KXCilr97LU1qmtw6KI+ibrjop5Wl+TTAuw23TulryTp1VX7mJIDeDECrxdxbVzL5rQ1wO80NDr0y/+uV05pjbrFh+uxH58jm41Nau3t/F4JGt87QfWNph6av83qcgCcIQKvF3EdPrGflgbA7zy8YLtW7S1UeJBdz80YqqiQQKtL8hv3Tu4ru83Qoq1HtGpPodXlADgDBF4v4g68RYwmA/zJh1mH9cKX2ZKkR68erJ5JkRZX5F96JkXquhFdJUkPztvK91/ACxF4vUhqpxAF2g3VNTiUU1ptdTkAOsDWnDLd9e5GSc5RWZcNSLG4Iv9058U9FRkcoC05ZXpv/WGrywHQSgReL9J8NBl9vICvK6mq089eX6OaeofO75Wg30zsbXVJfisuIli/vLCHJOnvC7erqq7B4ooAtAaB18tkNrU1MJoM8G2NDlP/O3e9DhZVq2tsmJ748Tmys0nNUjeOzVBabKiOlNXquWV7rS4HQCsQeL2MazTZfsbjAD7tH4t2aMWuAoUGOjepdQoLsrokvxccYNfsSc4xZc8t36O8Uk69BLwFgdfLuA6f2McKL+Cz5m/K1TNL90iSHrlqkPqmRFlcEVwmDUjWsPQY1dQ79LeF260uB0ALEXi9TIZ7Fi+BF/BFO4+U67fvbJAk/XRcpn4wONXiinA8wzD0h6n9JEnvrTusjYdKrC0IQIsQeL2MazTZwaJqNTIaB/AppdX1+tlra1VV16gx3eN012V9rC4JJzE4rZP+Z0hnSdKD87bJNPleDHg6Aq+XSe0UqiC7TXWNDuWUMJoM8BUOh6lfv5Wl7IJKde4UqievO1cBdr5Fe6r/d2lvBQfY9E12kRZuOWJ1OQC+B99NvYzdZqgrfbyAz3n88136Ynu+ggNsem7GUMWGs0nNk6V2CtWt53eTJD20YJtqGxotrgjA6RB4vZCrrWEffbyAT1i89Yie+HyXJOmv/zNQAzpHW1wRWuK2C7orITJY+wur9Nqq/VaXA+A0CLxeyDWpIZvDJwCvt+dohX79VpYk6cYxGfrh0C7WFoQWCw8O0P9rOgzkX5/vUlFlncUVATgVAq8Xck1qoKUB8G7lNfW69dU1qqht0IiMWN07pa/VJaGVfji0i/qmRKm8psG9Sg/A8xB4vVBmPC0NgLdzOEz99p0N2nO0UslRIXrqJ+cqkE1qXsduM/T7pn+ovPb1fu3Or7C4IgAnw3dXL+Ra4T1YXKWGRofF1QA4E88s26OFW44oyG7TM9PPVUJksNUl4QyN7RGvi/smqtFh6qH526wuB8BJEHi9UEpUiIICbKpvNJVTwtGWgLdZsiNf/1i0Q5L0p8v7a0jXGIsrwtmaPbmvAmyGPt+ery93FVhdDoDvIPB6IZvNUHps08Y1+ngBr7K/sFJ3zF0v05SuG9lVPx7R1eqS0Aa6J0Ro+qh0SdKD87ZyMBDgYQi8XiqDPl7A61TWNujWV9eqrKZBQ7p20n3T+lldEtrQHRf1VHRooLbnleudNQetLgfAcQi8Xsq1cS2bwAt4BdM09bt3N2rHkXIlRAbr2elDFRxgt7ostKGY8CD96qKekqR/LNqpitoGiysC4ELg9VKuwyf209IAeIXnV+zVvI25CrAZevon5yopKsTqktAOZoxKV0ZcmAoqavXs0j1WlwOgCYHXS2XEu44X5vAJwNN9uatADy/YLkm6b1o/Dc+ItbgitJegAJtmT3aOKXt+xV4dLqm2uCIAEoHXa7lWeA8WMZoM8GQHi6r0v3PXyWFKVw3t4t7YBN81sV+SRmbGqrbBob99ut3qcgCIwOu1kqNCFBxgU4PD1KFiVhAAT1RT36jbXl+r4qp6DeoSrQevGCDDMKwuC+3MMAz9YWo/GYb0YVaO1h8otrokwO8ReL2UzWa4V3kZTQZ4HtM0Nfu9TdqSU6a48CA9O32oQgLZpOYvBnSO1g/P7SJJenDeNpkmY8oAKxF4vZi7j5dJDYDHmbNyn95ff1h2m6EnrztXqZ1CrS4JHey3E3srNNCutfuLNW9TrtXlAH6NwOvFXLN497NxDfAoX+8t1IPznEfM3jO5r0Z3j7O4IlghOTpEP7ugmyTp4QXbVVPfaHFFgP8i8Hoxd0sDK7yAx8gpqdYv3linRoepy89J1cyxGVaXBAvden43JUeF6FBxteas3Gd1OYDfIvB6MVfg3UcPL+ARauob9fPX16qwsk59U6L08JWD2KTm58KCAvT/Lu0tSXrqi90qqKi1uCLAPxF4vZjrtLVDxdWqZzQZYCnTNPXHDzdrw6FSdQoL1H9mDFVoEJvUIP3PkM4a0DlK5bUNemzxTqvLAfwSgdeLJUUFKzTQrkaHqYNF9PECVnpj9QG9veaQbIb072uHKC02zOqS4CFsNkN/mNJPkjT3mwPaeaTc4ooA/2Np4F2+fLmmTZum1NRUGYahDz744LTX5+bm6rrrrlPv3r1ls9l05513nnDNnDlzZBjGCR81NTXt80VYyDAMpcc5f6iycQ2wTn55jf70yVZJ0u8u66NxPRMsrgieZmS3OF3WP1kOU/pL04ZGAB3H0sBbWVmpwYMH68knn2zR9bW1tUpISNC9996rwYMHn/K6qKgo5ebmNvsICfHNc+tdbQ1sXAOs8/qq/aprcOictE762fndrC4HHuruSX0UaDe0bOdRLd2Rb3U5gF8JsPLNJ02apEmTJrX4+oyMDP3rX/+SJL300kunvM4wDCUnJ591fd4gnY1rgKWq6xr12tf7JTl35LNJDaeSER+uG0Zn6IUvs/WXedt0Xo94BdjpLAQ6gk/+l1ZRUaH09HR16dJFU6dO1fr16097fW1trcrKypp9eIvMpsMnWOEFrPHe+kMqrqpXl5hQTeyXZHU58HD/e2FPxYQFald+hd789qDV5QB+w+cCb58+fTRnzhx99NFHmjt3rkJCQjR27Fjt2rXrlK956KGHFB0d7f5IS0vrwIrPDqPJAOs4HKZe/DJbkjRzbCardfhe0WGBuvPiXpKkxxbvVFlNvcUVAf7B5747jxo1StOnT9fgwYM1btw4vf322+rVq5f+/e9/n/I1s2fPVmlpqfvj4EHv+Ve3q4f3cHG16hoYTQZ0pCU78rX3aKUiQwJ09XDv+YcyrHXdyK7qlhCuwso6Pb1kj9XlAH7B5wLvd9lsNg0fPvy0K7zBwcGKiopq9uEtEiKDFRZkl8OUDhYzqQHoSC+scK7uXjeiqyKCLd0SAS8SaLfp3sl9JUkvfZnNWEmgA/h84DVNU1lZWUpJSbG6lHbhHE3W1NZAHy/QYTYfLtWqvYWy2wzdMCbD6nLgZS7sk6jzesSrrtGhhz/dbnU5gM+zNPBWVFQoKytLWVlZkqTs7GxlZWXpwIEDkpytBtdff32z17iur6io0NGjR5WVlaWtW7e6n3/ggQe0cOFC7d27V1lZWbr55puVlZWl2267rcO+ro7GxjWg47l6d6cMTFFqp1CLq4G3MQxD907pK8OQ5m3M1dr9RVaXBPg0S38Ht2bNGk2YMMH951mzZkmSbrjhBs2ZM0e5ubnu8OsyZMgQ9/9eu3at/vvf/yo9PV379u2TJJWUlOjWW29VXl6eoqOjNWTIEC1fvlwjRoxo/y/IImxcAzpWXmmNPt6QI0m6ZVymxdXAW/VNidI1w9L05rcH9adPtun9n4+RzcZYO6A9WBp4x48fL9M0T/n8nDlzTnjsdNdL0mOPPabHHnvsbEvzKhlNG9c4bQ3oGHNW7lODw9SIzFgN6tLJ6nLgxWZN7KWPN+Row8ESfbwxR5ef09nqkgCf5PM9vP6A09aAjlNZ26D/rnYeNPHTcZyqhrOTGBmi2yf0kCQ9smC7auobLa4I8E0EXh+QHufs4c0pqVZtA98sgfb0zpqDKqtpUEZcmC7qk2h1OfABN5+XqdToEOWU1rh7wwG0LQKvD0iICFa4azQZ422AdtPoMPXSV/skOUMK/ZZoCyGBdt01qY8k6eklu5VfXmNxRYDvIfD6AMMw3H282QUEXqC9LN56RAeKqtQpLFA/HNrF6nLgQ6YNStXgtE6qrGvUPxfttLocwOcQeH3EsY1r9PEC7eWFFXslST8Z2VVhQRw0gbZjsxn641TnYRRvrTmorTllFlcE+BYCr4/IjGPjGtCe1h8o1pr9xQq0G7phdIbV5cAHDU2P1ZRBKTJN6S/zt37vVCIALUfg9RGujWvM4gXaxwtNm4l+MLizEqNCLK4Gvuruy/ooyG7TV7sL9cX2fKvLAXwGgddHuEaT7aOHF2hzB4uqtGBTriTnZjWgvaTFhumm8zIkSX+Zv031jQ5rCwJ8BIHXR7h6eHNKq5njCLSxV1buk8OUzusRr36pUVaXAx/3iwk9FBcepL1HK/Xf1Qe+/wUAvheB10fEhQcpMjhApikdYDQZ0GbKaur15rcHJUk3c4wwOkBUSKB+fUkvSdLjn+1UaVW9xRUB3o/A6yOOH022j41rQJt5+9uDqqhtUI/ECF3QM8HqcuAnfjw8TT0TI1RcVa9/f7HL6nIAr0fg9SHuwMvGNaBNNDQ69HLTQRO3cNAEOlCA3aZ7pzjHlL2yah8LGcBZIvD6kIymSQ0cPgG0jQWb83S4pFpx4UG6Ykhnq8uBnxnfO1EX9EpQfaOphxdst7ocwKsReH1IRhwtDUBbMU3TfdDEjNHpCgm0W1wR/NG9U/rKZkifbsnT6r2FVpcDeC0Crw+hpQFoO2v2F2vDoVIFBdg0fVS61eXAT/VKitS1I7pKkh6ct00OB4dRAGeCwOtDXLN4c0trGE0GnCXX6u6VQzorPiLY4mrgz359SS9FBgdo0+FSvb/+sNXlAF6JwOtDYsICFRUSIEnaX0gfL3Cm9hVUatHWI5I4aALWi48I1i8u7CFJ+vvCHaqqa7C4IsD7EHh9yPGjybLp4wXO2MtfZcs0pfG9E9QzKdLqcgDdOCZDXWJClVdWo/8s32t1OYDXIfD6GPfGNfp4gTNSWlWvt9cckiT9dFw3i6sBnEIC7bp7Uh9J0nPL9iqvtMbiigDvQuD1MRw+AZydN77Zr+r6RvVJjtSY7nFWlwO4TRmYoqHpMaqub9Q/Fu2wuhzAqxB4fUxmvHMWLyu8QOvVNTj0ysp9kqRbxnWTYXDQBDyHYRj6fdNhFO+uO6TNh0strgjwHgReH3NsFi+b1oDW+mRjjo6U1SoxMlg/GJxqdTnACYZ0jdHl56TKNKU/f7JVpsmYMqAlCLw+xhV488pqVF3HaDKgpZwHTWRLkm4Yk6GgAL49wjP97rI+Cg6waXV2kXuaCIDT4zu6j4kJD1J0aKAk2hqA1li1t1Bbc8sUGmjXT0Z2tboc4JQ6dwp1b6h8aP421TU4LK4I8HwEXh/ExjWg9Vyru1cN7aJOYUEWVwOc3m3juys+Ilj7Cqv02tf7rS4H8HgEXh+UGefauEYfL9ASu/Mr9MX2fBmGNJODJuAFIoID9NuJvSRJ//psp4or6yyuCPBsBF4fxAov0Dovfulc3b24b5L7iG7A0/1oWJr6JEeqrKZB//p8l9XlAB6NwOuDXBvXsunhBb5XYUWt3lvnPGjiFlZ34UXsNkO/n9JPkvT61/u152iFxRUBnovA64NY4QVa7o3VB1Tb4NCgLtEakRlrdTlAq5zXM14X9UlUg8PUQ/O3W10O4LEIvD4os2mFN7+8VpW1DRZXA3iumvpGvbpqnyTp5vMyOWgCXmn25L6y2wx9tu2IVu4usLocwCMReH1QdFigYsIYTQZ8n4+yclRQUaeU6BBNHphidTnAGemRGKHpTaP0/jxvmxwODqMAvovA66NcbQ37mdQAnJRpmnrhy72SpJvGZijQzrdDeK87L+6liOAAbcst09oDxVaXA3gcvsP7KFdbQzZ9vMBJLd9VoJ1HKhQeZNc1wzloAt4tJjxIE/snSZLmbcy1uBrA8xB4fVR6HBvXgNN5YYVzdfea4V3dpxMC3mzqIGdbzvxNuWqkrQFohsDrozLiXYdPEHiB79qeV6YVuwpkM5ztDIAvOK9HgqJCApRfXqs1+4qsLgfwKAReH+Uanp9dQA8v8F0vNh0jfNmAZKXFhllcDdA2ggJsurR/siTpE9oagGYIvD7KtWmtoKJWFYwmA9zyy2v0YVaOJOmWcd0srgZoW1Oa2hoWbKatATgegddHRYUEKi48SBJ9vMDxXlu1X3WNDp3btZPO7RpjdTlAmxrbI16dwgJVUFGn1dmFVpcDeAwCrw9Lj6OPFzhedV2jXv96vyTpp6zuwgcF2m26jLYG4AQEXh/GEcNAc++uO6TiqnqlxYZqYlMoAHyNq63h0815amh0WFwN4BkIvD7s2CxeNq4BDoepl750bla7aUym7DaOEYZvGt0tTrHhQSqqrNPXe5nWAEgEXp927LQ1VniBL7bna29BpSJDAnT18DSrywHaTYDdpssGOH+DMW9TjsXVAJ6BwOvDXKPJ6OEF5D5G+LoRXRURHGBxNUD7mjrQNa0hT/W0NQAEXl/m2rRWUFGn8pp6i6sBrLP5cKm+3lukAJuhGzloAn5gRGas4iOCVFJVr5V7mNYAEHh9WGRIoOIjXKPJ6OOF/3IdIzxlUIpSokMtrgZofwF2myYNcK7yzttIWwNA4PVxGa6Na7Q1wE/llla7xzPdch6jyOA/jp/WUNdAWwP8G4HXx7k3rjGaDH5qzsp9anCYGpkZq4Fdoq0uB+gwwzNilRAZrLKaBn21u8DqcgBLEXh9nGvjGiu88EeVtQ367+oDkjhGGP7HbjM0eQCHUAASgdfnuU9bY4UXfuidNQdVXtOgzPhwXdQn0epygA43dXCqJGnR1jzVNjRaXA1gHQKvj3P18O4rZNMa/Eujw9RLX+2TJM08L1M2DpqAHxraNUZJUcEqr2nQip20NcB/EXh9nKuHt6iyTqXVjCaD/1i8NU8HiqrUKSxQV53bxepyAEvYbIYmN83knbeJtgb4LwKvj4sIDlBCZLAkTlyDf3l+hfMY4ekj0xUaZLe4GsA6Uwc52xoWbz2imnraGuCfCLx+INM1mow+XviJdQeKtXZ/sQLthq4fnW51OYClhqR1Ump0iCpqG7Rs51GrywEsQeD1A8c2rtHHC//w4pfO1d0fDO6sxKgQi6sBrNWsrYFpDfBTBF4/4Orj3UdLA/zAwaIqLWjqVbxlXKbF1QCewXUIxWfbaGuAfyLw+gH3LF5aGuAH5qzcJ4cpndcjXn1ToqwuB/AI56R1UudOoaqqa9TSHflWlwN0OAKvHzg2mozAC99WVlOvt749KInVXeB4hmFoatMq78e0NcAPEXj9QEa8s4e3pKpeJVV1FlcDtJ+3vjmoitoG9UyM0AW9EqwuB/AorraGL7blq6quweJqgI5F4PUDYUEBSopyjibjAAr4qoZGh17+yrlZ7ebzMmUYHDQBHG9g52h1jQ1TdX2jlmxnWgP8C4HXT6S72hro44WPmr85TzmlNYoLD9IVQzpbXQ7gcQzDcK/yfrIxx+JqgI5F4PUTzOKFLzNNUy+s2CtJmjE6XSGBHDQBnMyUpvFkX2zPV2UtbQ3wHwReP8FoMviyNfuLtfFQqYICbJoxioMmgFPpnxqljLgw1TY49Pl2pjXAfxB4/URm08Y1enjhi55f7lzd/eG5nRUXEWxxNYDnck5rcB41PI+2BvgRAq+fcK/w0tIAH7OvoFKLtx2R5NysBuD0XH28S3YcVXlNvcXVAB2DwOsn0mOdgbe0ul7FlYwmg+946atsmaY0oXeCeiRGWl0O4PH6JEeqW0K46hoc+nwbbQ3wDwRePxEaZFdyVIgkKZs+XviIkqo6vbPmkCTplnHdLK4G8A6GYWjqQNe0Bg6hgH8g8PoR1wEUtDXAV/z3mwOqrm9U35QojekeZ3U5gNeYOtjZx7t851GVVtPWAN9H4PUjme5JDWxcg/era3DolZX7JEm3cNAE0Cq9kiLVMzFCdY0Ofbb1iNXlAO2OwOtHMjh8Aj7kk405OlJWq8TIYE1rWq0C0HKuzWvzNtHWAN9H4PUj7tPW6OGFlzNNU8+vcB4jfMOYDAUF8K0MaK2pTYF3xa6jKq2irQG+jZ8SfsTV0pBdUCnTNC2uBjhzq/YUaltumUID7frJyK5WlwN4pR6JkeqTHKn6RlMLt+ZZXQ7Qrgi8fiQ9zrlprbymQUWMJoMXe+FL5+ruj4Z1UaewIIurAbyX66jheUxrgI8j8PqRkEC7UqOdo8nYuAZvtTu/XF9sz5dhSDeN5aAJ4Gy4+ni/2l3AjHb4NAKvn+HENXi7F7/cJ0m6uG+Su00HwJnplhChfilRanCYWriFtgb4LgKvn2HjGrxZYUWt3lvnPGjipxw0AbQJpjXAHxB4/Uxm0+ET2azwwgu9/vUB1TY4NKhLtIZnxFhdDuATXNMaVu4pVGFFrcXVAO2DwOtnMljhhZeqqW/Ua1/vk+Q8RpiDJoC2kR4XroGdo9XoMLVwC4dQwDcReP2Mq+dxf0EVo8ngVT7MOqyCijqlRodo0oBkq8sBfIqrreGTjTkWVwK0DwKvn0mLDZNhSOW1DSpkRy68hGmaeqHpoIkbx2Yo0M63LqAtucaTfb23UEfLaWuA7+Gnhp9xjiYLlcSkBniPZTuPald+hcKD7PrxCA6aANpaWmyYBneJlsOUPmVaA3wQgdcPZbBxDV7mxaaDJq4Z3lVRIYEWVwP4pqmDUiVJn2ygrQG+h8Drh9i4Bm+yPa9MK3YVyGZIN43NsLocwGdNGujsjf9mX5Hyy2osrgZoWwReP5TpPnyC09bg+Vy9u5MGpCgtNsziagDf1SUmTEO6dpJpSgs209YA30Lg9UOs8MJb5JfV6MOsw5Kkm8dxjDDQ3lxtDfM2cggFfAuB1w8df7wwo8ngyV5dtV/1jaaGpsfo3K4cNAG0t8lNbQ3f7i9SXiltDfAdBF4/lBYbKpshVdY16iin6sBDVdc16vXV+yVJt5zH6i7QEVKiQzUsPUamKc3nqGH4EAKvHwoOsCu1k2s0GX288Ezvrjukkqp6pcWGamJ/DpoAOorrEIp5BF74EEsD7/LlyzVt2jSlpqbKMAx98MEHp70+NzdX1113nXr37i2bzaY777zzpNe9++676tevn4KDg9WvXz+9//77bV+8l8s8rq0B8DQOh6mXmkaRzRybKbuNY4SBjjJ5YIoMQ1q7v1g5JdVWlwO0CUsDb2VlpQYPHqwnn3yyRdfX1tYqISFB9957rwYPHnzSa1atWqVrrrlGM2bM0IYNGzRjxgxdffXVWr16dVuW7vXYuAZP9sX2fO0tqFRkSIB+NCzN6nIAv5IUFaLhGbGSaGuA77A08E6aNEkPPvigrrzyyhZdn5GRoX/961+6/vrrFR0dfdJrHn/8cV1yySWaPXu2+vTpo9mzZ+uiiy7S448/3oaVez/3xjUCLzzQ8yv2SpKuG9lVEcEBFlcD+J+pTW0NnzCtAT7C53p4V61apYkTJzZ77NJLL9XKlStP+Zra2lqVlZU1+/B1GXGu09bo4YVn2XSoVKuzixRgM3TjmAyrywH80mUDkmUzpKyDJTpYxM8JeD+fC7x5eXlKSkpq9lhSUpLy8k49RPuhhx5SdHS0+yMtzfd/hepa4d1fyGgyeJYXv3Su7k4ZlKKU6FCLqwH8U2JkiEZmxkmirQG+wecCryQZRvMNLqZpnvDY8WbPnq3S0lL3x8GDB9u7RMulxYTJZkhVdY3KL2c0GTxDbmm1+1eot5zXzeJqAP/GtAb4Ep8LvMnJySes5ubn55+w6nu84OBgRUVFNfvwdUEBNnWJcbY1MKkBnmLOyn1qcJgamRmrgV1O3qcPoGO42ho2HirVgULaGuDdfC7wjh49WosXL2722KJFizRmzBiLKvJcbFyDJ6mobdB/Vx+QJP10HKu7gNXiI4I1pnu8JOmTTTkWVwOcHUsDb0VFhbKyspSVlSVJys7OVlZWlg4ccP7Qmz17tq6//vpmr3FdX1FRoaNHjyorK0tbt251P3/HHXdo0aJFeuSRR7R9+3Y98sgj+uyzz045s9efsXENnuSdNQdVXtOgzPhwXdgn0epyAOi4tgamNcDLWRp416xZoyFDhmjIkCGSpFmzZmnIkCH64x//KMl50IQr/Lq4rl+7dq3++9//asiQIZo8ebL7+TFjxujNN9/Uyy+/rEGDBmnOnDl66623NHLkyI77wryEexYvLQ2wWKPD1EtfNR00cV6mbBw0AXiES/sny24ztCWnTNn8rIAXs3TA5fjx4087IWDOnDknPNaSiQJXXXWVrrrqqrMpzS9k0tIAD7FoS54OFlWrU1igrjq3i9XlAGgSGx6ksT3itXznUc3flKtfTOhhdUnAGfG5Hl603PE9vIwmg5VeaDpGePrIdIUG2S2uBsDxpg50tjV8vIE+XngvAq8f6xITKrvNUE29Q0fKGE0Ga6w7UKy1+4sVZLfp+jHpVpcD4Dsm9k9SgM3Q9rxy7c6vsLoc4IwQeP1YoN2mtBjnYH96s2CVF1c4V3d/cE6qEiNDLK4GwHd1CgvSeT2d0xo4hALeisDr59Lj6OOFdQ4WVWnBZucP0JvPy7S4GgCnMnVQqiTpk420NcA7EXj9nHvjGiu8sMDLX+2Tw5TG9YxX3xTfP/AF8FaX9EtSoN3QziMV2nmk3OpygFYj8Pq5Y7N4CbzoWGU19XrrW+fYQVZ3Ac8WHRqo83smSGImL7wTgdfPuSY17OfYSHSwN785oMq6RvVMjNAFvRKsLgfA95g6uOkQik25TPaB1yHw+rnjZ/E6HHwDQ8eob3Rozlf7JEm3jMuUYXDQBODpLu6bpKAAm3bnV2gHbQ3wMgReP9e5U6gCbIZqGxzKK6uxuhz4iQWb85RTWqP4iCBdfk5nq8sB0AKRIYHu38bQ1gBvQ+D1cwF2m9JinX28bFxDRzBNUy+s2CtJmjEqQyGBHDQBeIupg5raGjbS1gDvQuDFsY1rjCZDB/h2X7E2HipVcIBN00d1tbocAK1wUd8kBQfYtLegUltzy6wuB2gxAi/YuIYO9XzT6u6V53ZRXESwxdUAaI2I4ABN6J0oibYGeBcCL9wb1xhNhvaWXVCpz7YdkSTdfF6GtcUAOCNTBjGtAd6HwItjp60ReNHOXv4qW6YpTeidoB6JkVaXA+AMXNQ3USGBNu0vrNKWHNoa4B0IvFBmU+DdX1TFaDK0m5KqOr2z5pAk6afjullcDYAzFRYUoIv6JEmSPuaoYXgJAi+U2ilEgXZDdQ0O5ZRWW10OfNQbqw+our5RfVOiNLp7nNXlADgLU5jWAC9D4EWz0WRsXEN7qGtw6JWV+yRJP+WgCcDrTeidqLAguw4VV2vjoVKrywG+F4EXko61NbBxDe3h4w05yi+vVWJksKYOSrW6HABnKTTIrov6OtsaPqGtAV6AwAtJbFxD+zFNUy98mS1JumFMhoIC+LYD+IIpA2lrgPfgJw8kSZnxTaetcfgE2tjKPYXallum0EC7fjKSgyYAXzG+d4LCg+zKKa3R+oMlVpcDnBaBF5KOHT5BSwPamusY4R8N66JOYUEWVwOgrYQE2nVJP2dbA4dQwNMReCFJymhqaThYVK1GRpOhjezOL9eSHUdlGNLMsZlWlwOgjU1p6smftzGXsZbwaAReSJJSO4UqyG5TXaNDOSWMJkPbeLGpd/eSvknu3yIA8B3jesYrMjhAeWU1Wneg2OpygFMi8EKSZLcZ6hpHHy/aTmFFrd5dd1iSdAsHTQA+6fi2hk9oa4AHI/DCLcMVeOnjRRt47ev9qmtwaHCXaA3PiLG6HADtZOpg57SG+ZtyaYmDxyLwwi3DPYuXwydwdmrqG/Xaqv2SpJvHdeOgCcCHndcjQZEhAcovr9WafUVWlwOcFIEXbq4ey/20NOAsfbD+sAor65QaHaJJA5KtLgdAOwoKsOnS/s7/zudtoq0BnonAC7dM12gyAi/OwvEHTdw0NlOBdr7NAL5u6iBXW0MebQ3wSPwkgptrhfdgUZUaGh0WVwNvtWznUe3Or1BEcICuGZFmdTkAOsDYHvGKDg1UQUWtVmcXWl0OcAICL9xSokIUFGBTfaOpnJIaq8uBl3phhXN195rhaYoKCbS4GgAdIdBu02WutgamNcADEXjhZrMZSo91TmqgrQFnYltumb7cXSCbId04JsPqcgB0oClNbQ2fbs7jt4TwOAReNONqa2A0Gc6Ea3V30oAUpTX94wmAfxjTPU4xYYEqrKzT13uZ1gDPQuBFM66Naxw+gdbKL6vRRxtcB01wjDDgbwLsNl02wLnKO29TjsXVAM0ReNGMaxYvK7xorVdX7Vd9o6mh6TEa0pWDJgB/NPW4toZ62hrgQQi8aMZ92lohh0+g5arqGvT6audBEz9ldRfwWyMzYxUfEaTiqnqt2sO0BngOAi+aYTQZzsS76w6rpKpeabGhuqQfB00A/srZ1uD8HvDJRtoa4DkIvGgmOSpEwQE2NThMHSqutroceAGHw9RLTQdNzBybKbuNY4QBfzZlYKokaeGWI6prYOEEnoHAi2ZsNuNYHy8b19ACn2/PV3ZBpSJDAnT1MA6aAPzdiMxYJUQGq7S6Xl/tKbC6HEASgRcnkRHf1MfLxjW0wAsr9kqSrhvZVeHBARZXA8Bqdpuhya62hg0cQgHPQODFCY6t8LJxDae36VCpVmcXKcBmcNAEALcpg5xtDYu25qm2odHiagACL07CtXEtmxVefI8XvnSu7k4dlKKU6FCLqwHgKYalxygpKljlNQ36chdtDbAegRcnoIcXLZFTUq15G52/rrxlXDeLqwHgSWw2Q5MHOmfyfrKRtgZYj8CLE7hOWztUXM3gcJzSKyv3qcFhalS3WA3oHG11OQA8jOsQisVbj6imnrYGWIvAixMkRQUrJNCmRkaT4RQqahv0328OSJJuOY/VXQAnGpIWo5ToEFXUNmj5zqNWlwM/R+DFCQzD4IhhnNbb3x5UeU2DusWH68I+iVaXA8ADHd/WMG8TbQ2wFoEXJ+UKvGxcw3c1Oky99FXTQRPnZcrGQRMATsHV1vAZbQ2wGIEXJ+Wa1MDGNXzXoi15OlRcrZiwQP3w3C5WlwPAg52T1kmdO4Wqsq5RS3fkW10O/BiBFyeV6Tp8glm8+I7nmw6amD4qXaFBdourAeDJDMPQlEFMa4D1CLw4KXp4cTJr9xdr3YESBdltmjE63epyAHiBKU19vJ9vy1dVXYPF1cBfEXhxUsdGk1WproHRZHB6semgicvPSVViZIjF1QDwBoO6RCstNlTV9Y1asp1pDbAGgRcnlRAZrLAguxymdLCYtgZIB4uq9OnmPEnSzeMyLa4GgLcwDENTBjqPGp63KcfiauCvCLw4KcMwlE5bA47z8lf75DClcT3j1Sc5yupyAHgR17SGL7bnq7KWtgZ0PAIvTomNa3Apra7XW982HTTBMcIAWql/apQy4sJUU+/Q59uZ1oCOR+DFKbFxDS5vfXtAlXWN6pUUofN7xltdDgAvc/y0hnkbaWtAxyPw4pSYxQtJqm906OWv9klyHiNsGBw0AaD1XH28S3YcVQVtDehgBF6cEqetQZLmb8pVbmmN4iOC9INzUq0uB4CX6psSqW7x4aprcOizrUesLgd+hsCLU8po6uHNKalWbQNHQvoj0zT14pfOY4RnjMpQSCAHTQA4M4ZhuDevcQgFOhqBF6eUEBGscNdosiI2rvmjb7KLtPFQqYIDbJo+qqvV5QDwclMGOX9LtHznUZXV1FtcDfxJqwLv3/72N1VXV7v/vHz5ctXW1rr/XF5erttvv73tqoOlDMM41sdbQOD1Ry80re5eeW4XxUUEW1wNAG/XKylCPRIjVNdIWwM6VqsC7+zZs1VeXu7+89SpU3X48GH3n6uqqvTcc8+1XXWwHBvX/Fd2QaU+2+b8gXTzeRw0AeDs0dYAq7Qq8Jqmedo/w/dkxDn7eNm45n9e+jJbpild2CdRPRIjrC4HgI+YMtAZeFfsOqrSKtoa0DHo4cVpuWfxssLrV0qq6vTO2oOSpFtY3QXQhnomRap3UqTqG00t2ppndTnwEwRenFYmPbx+6Y3VB1RT71C/lCiN7h5ndTkAfMwU2hrQwQJa+4IXXnhBERHOX282NDRozpw5io93nrx0fH8vfIOrhzentFo19Y2MpfIDtQ2NmrNynyTplnGZHDQBoM1NGZSify7eqa92F6i4sk4x4UFWlwQf16rA27VrVz3//PPuPycnJ+u111474Rr4jrjwIEUGB6i8tkEHi6rUMynS6pLQzj7ekKuj5bVKigrW1EEcNAGg7XVPiFDflChtyy3Toq15umY42QHtq1WBd9++fe1UBjyVYRhKjw/T5sNlyi6oJPD6ONM09cKKvZKkG8ZkKCiAricA7WPqoBRtyy3TJxtzCbxod/w0w/di45r/WLmnUNvzyhUaaNd1I/gBBKD9uKY1rNxTqMKK2u+5Gjg7rQq8q1ev1oIFC5o99uqrryozM1OJiYm69dZbmx1EAd/g2riWzcY1n/d80+ru1cO6qFMYPXUA2k9GfLgGdI5So8PUwi0cQoH21arAe//992vjxo3uP2/atEk333yzLr74Yt199936+OOP9dBDD7V5kbCWa4V3Pyu8Pm3XkXIt3XFUhiHdNJZRZADa35SBzn0C8zblWFwJfF2rAm9WVpYuuugi95/ffPNNjRw5Us8//7xmzZqlJ554Qm+//XabFwlrHTtemMDry15sOkb4kr5J7v/PAaA9udoaVu0p1NFyfkOM9tOqwFtcXKykpCT3n5ctW6bLLrvM/efhw4fr4MGDbVcdPEKmezRZjWrqGy2uBu2hoKJW7613HhP+0/O7WVwNAH/RNS5Mg7tEy2FKn27hEAq0n1YF3qSkJGVnO1eB6urqtG7dOo0ePdr9fHl5uQIDA9u2QlguJixQkSHOgR77C+nj9UVvfXtQdQ0ODe4SrWHpMVaXA8CPuA6hmLeRtga0n1YF3ssuu0x33323VqxYodmzZyssLEzjxo1zP79x40Z17969zYuEtQzDOG7jGm0NvmjxVueGkR+P6MpBEwA61OSmtobV2UXKL6+xuBr4qlYF3gcffFB2u10XXHCBnn/+ef3nP/9RUNCxndwvvfSSJk6c2OZFwnpsXPNdhRW12nCoRJJ0YZ9Ea4sB4He6xIRpSNdOMk1pwSbaGtA+WnXwREJCglasWKHS0lJFRETIbm9+zOw777yjyEgOJvBF7o1rBF6fs2znUZmm1C8lSklRIVaXA8APTRmYovUHSjRvY65uGJNhdTnwQa0KvDNnzmzRdS+99NIZFQPPlRkfJomWBl+0ZMdRSazuArDO5IEpenDeNn27v0h5pTVKjuYf32hbrQq8c+bMUXp6uoYMGSLTNNurJnigdNdpaxw+4VMaGh1atiNfkjShT4LF1QDwV6mdQjU0PUZr9xdr/qZczTyPWeBoW60KvLfddpvefPNN7d27VzNnztT06dMVGxvbXrXBg2Q2Bd68shpV1zUqNMj+Pa+AN1h/sERlNQ3qFBaoc9KYzgDAOlMHpWjt/mLNI/CiHbRq09rTTz+t3Nxc3XXXXfr444+Vlpamq6++WgsXLmTF18fFhAcpOtQ5co4+Xt/xxXbn6u4FvRJktzGdAYB1Jg1IkWFIa/cXK6ek2upy4GNaFXglKTg4WNdee60WL16srVu3qn///rr99tuVnp6uioqK9qgRHsK1cY1JDb5jSVPgndCb/l0A1kqODtHwdOdvjedvyrW4GviaVgfe4xmGIcMwZJqmHA5HW9UED5UZ59q4Rh+vL8gpqdb2vHIZhnOFFwCsNnWwcybvJxsJvGhbrQ68tbW1mjt3ri655BL17t1bmzZt0pNPPqkDBw4oIiKiPWqEhzi2cY0VXl+wtGk6w5C0TooJD/qeqwGg/V02IFmGIWUdLNHBIhZX0HZaFXhvv/12paSk6JFHHtHUqVN16NAhvfPOO5o8ebJstrNaLIYXcJ+2RkuDT1jSNJ2BcWQAPEViZIhGZjrbGhZsZpUXbadVUxqeffZZde3aVZmZmVq2bJmWLVt20uvee++9NikOnsV9+AQrvF6vtqFRX+0ukCSNp38XgAeZMihVX+8t0icbc3Xr+d2tLgc+olWB9/rrr5dhsJPbX7lGk+WX16qqrkFhQa366wMP8k12karqGpUYGaz+qVFWlwMAbpMGJOu+Dzdr46FSHSisUtem/SPA2Wj1wRPwX9FhgYoJC1RxVb32FVSpH0HJa31x3HQG/hELwJPERwRrdPc4fbW7UPM25ern41nlxdmztPF2+fLlmjZtmlJTU2UYhj744IPvfc2yZcs0dOhQhYSEqFu3bnr22WebPT9nzhz39IjjP2pqatrpq/Av7o1r9PF6NdeGNU5XA+CJpgxMlSTN25RjcSXwFZYG3srKSg0ePFhPPvlki67Pzs7W5MmTNW7cOK1fv1733HOPfvWrX+ndd99tdl1UVJRyc3ObfYSEcC53W3BvXKOP12tlF1Qqu6BSgXZDY3vEW10OAJzgsgHJstsMbT5cxr4RtAlLmzAnTZqkSZMmtfh616a5xx9/XJLUt29frVmzRv/4xz/0wx/+0H2dYRhKTk5u63IhKYPRZF7PddjE8IxYRYYEWlwNAJwoNjxIY7rHacWuAs3blKtfTOhhdUnwcl41S2zVqlWaOHFis8cuvfRSrVmzRvX19e7HKioqlJ6eri5dumjq1Klav379aT9vbW2tysrKmn3g5DLinZsH9hcyH9FbMY4MgDeYOohDKNB2vCrw5uXlKSkpqdljSUlJamhoUEGBc8RSnz59NGfOHH300UeaO3euQkJCNHbsWO3ateuUn/ehhx5SdHS0+yMtLa1dvw5vxixe71ZZ26DVe4skMY4MgGe7tH+yAmyGtuWWac/RCqvLgZfzqsAr6YQd5aZpNnt81KhRmj59ugYPHqxx48bp7bffVq9evfTvf//7lJ9z9uzZKi0tdX8cPHiw/b4AL+fatHa0vFYVtQ0WV4PWWrmnUHWNDnWNDVP3hHCrywGAU+oUFqTzejr3GcxjlRdnyasCb3JysvLy8po9lp+fr4CAAMXFxZ30NTabTcOHDz/tCm9wcLCioqKafeDkokMDFdt0DC19vN7n2DiyBMaRAfB4UwY62xoIvDhbXhV4R48ercWLFzd7bNGiRRo2bJgCA0+++cY0TWVlZSklJaUjSvQLGU1DwBlN5l1M09TSpv7dCfTvAvACE/slK9BuaMeRcu06Um51OfBilgbeiooKZWVlKSsrS5Jz7FhWVpYOHDggydlqcP3117uvv+2227R//37NmjVL27Zt00svvaQXX3xRv/3tb93XPPDAA1q4cKH27t2rrKws3XzzzcrKytJtt93WoV+bL3MdMczGNe+yPa9cuaU1Cgm0aVS3k/9GBAA8SXRYoM7v6ZwXzuY1nA1LA++aNWs0ZMgQDRkyRJI0a9YsDRkyRH/84x8lSbm5ue7wK0mZmZmaP3++li5dqnPOOUd//vOf9cQTTzQbSVZSUqJbb71Vffv21cSJE3X48GEtX75cI0aM6Ngvzoe5jhhmFq93cU1nGNM9XiGBdourAYCWmdI0rWHeplz3vh2gtSydwzt+/PjT/uU92VHGF1xwgdatW3fK1zz22GN67LHH2qI8nIJrhZceXu/imr9LOwMAb3JxvyQF2W3anV+hnUcq1Ds50uqS4IW8qocXniGD44W9TmlVvdbuL5bk3LAGAN4iKiRQ5/dytTVw1DDODIEXreY6fKKgok7lNfXfczU8wbJdR+UwpV5JEeoSE2Z1OQDQKtMGH5vWQFsDzgSBF60WGRKo+AjnaDI2rnmHpe5xZLQzAPA+F/VNUlCATXsLKrUtl2kNaD0CL85IBhvXvIbDYWrpzqOS6N8F4J0iggPc7VjzNtHWgNYj8OKMsHHNe2w4VKKiyjpFhgRoaHqM1eUAwBmZOihVknM8GW0NaC0CL86I6/CJbDauebwlO5yru+f3TFCgnf/kAXinC/skKiTQpv2FVdqSU2Z1OfAy/PTDGWGF13u4xpGNZzoDAC8WHhygC5vasjiEAq1F4MUZOTaajE1rniy/vEabDpdKksazYQ2Al5sy0NXWkENbA1qFwIsz4lrhLaqsU2k1o8k81dKmdoZBXaKVEBlscTUAcHYu7JOo0EC7DhVXa+OhUqvLgRch8OKMRAQHuAPUfvp4PdbSHa52BlZ3AXi/0CC7Lurr/H42bxNtDWg5Ai/OmHvjGn28Hqm+0aEVOwskyd33BgDebuogDqFA6xF4ccbcfbwF9PF6ojX7ilVe26C48CAN6hxtdTkA0CbG905UeJBdh0uqtf5gidXlwEsQeHHG3JMaaGnwSEua2hku6J0gm82wuBoAaBshgXZd3C9JknOVF2gJAi/OWCaB16Mt4ThhAD5qykBnW8P8TblyOGhrwPcj8OKMHWtpIPB6moNFVdqVXyG7zdD5PZm/C8C3nN8rQZHBAcotrdG6A8VWlwMvQODFGcuId25aK66qV2kVo8k8iWs6w9CuMYoOC7S4GgBoWyGBdl3S1NbAIRRoCQIvzlhYUIASm0aTccSwZ3EdJzyB6QwAfNSUQbQ1oOUIvDgrHDHseWrqG7Vyj3Mc2YQ+tDMA8E3n9YxXZEiA8strtWY/bQ04PQIvzkpmHBvXPM2qvYWqqXcoJTpEvZMirS4HANpFcIBdl/ZPluQ8ahg4HQIvzgorvJ7HPZ2hT6IMg3FkAHzXsbaGPDXS1oDTIPDirGQ2bVzLLuTwCU9gmqa+YBwZAD8xtnu8okMDVVBRq2+yi6wuBx6MwIuzks5oMo+y52iFDhVXK8hu09gecVaXAwDtKijApkv7u6Y10NaAUyPw4qy4ZvGWVteruLLO4mqwZLtzOsPIbrEKCwqwuBoAaH9TB6VKkj7dnKeGRofF1cBTEXhxVkKD7EqOCpHExjVP4DpO+ELGkQHwE6O7xykmLFCFlXVaTVsDToHAi7PmOoCCwGut8pp6dw8b/bsA/EWg3abLBrimNXAIBU6OwIuzltk0qSG7gI1rVvpqd4EaHKa6xYe7p2cAgD841taQq3raGnASBF6cNTaueQbXdIbxrO4C8DMjM2MVFx6k4qp6rdpTaHU58EAEXpy1DA6fsJxpmscdJ8zpagD8S8BxbQ3zaGvASRB4cdaOtTRUyjQZ/G2FLTllOlpeq7Agu0ZkxlpdDgB0ONchFJ9uyVNdA20NaI7Ai7OWHufctFZe06DiqnqLq/FPrtPVxvaIV3CA3eJqAKDjjcyMU3xEsEqr6/XVngKry4GHIfDirIUE2pUa7RxNlk0fryW+YBwZAD9ntxmaPJC2BpwcgRdtgo1r1imqrFPWwRJJ0vje9O8C8F9TBjrbGhbS1oDvIPCiTbjGYLFxreMt33lUpin1TYlSSnSo1eUAgGWGZcQqMTJY5TUNWrHrqNXlwIMQeNEmMpsOn6CloeO5xpFNYHUXgJ9ztjU4V3lpa8DxCLxoE67RZPsLOXyiIzU6TC3b6VzFoH8XAKSpTdMaFm89opr6Rourgacg8KJNuEaT7WM0WYdaf6BYpdX1ig4N1DlpnawuBwAsd27XGCVHhai8tkHLd9LWACcCL9pEWmyYDEMqr21QYWWd1eX4jSVN0xnO75WgADv/OQOAzWa4Z/LO20RbA5z4CYk24RxN5twwxaSGjvPFdlc7A/27AODiCryf0daAJgRetJkMNq51qLzSGm3LLZNhSOf3JPACgMuQtE7q3ClUlXWNWrqDtgYQeNGG2LjWsVztDOekdVJcRLDF1QCA5zCMY4dQfLIxx+Jq4AkIvGgzro1r2czi7RBL3OPImM4AAN81dVCqJOnzbfmqrqOtwd8ReNFmMjhtrcPUNjTqq93Os+IZRwYAJxrUJVpdYkJVXd/o/o0Y/BeBF23G1cPLaLL29212sSrrGpUQGax+KVFWlwMAHscwjk1roK0BBF60mbTYMNkMqbKuUUcraq0ux6e5Vism9E6QzWZYXA0AeKZpTW0NX2zPV2Vtg8XVwEoEXrSZ4AC7Ujs5R5Oxca190b8LAN+vf2qU0uPCVFPvcB/DDv9E4EWbcm9co4+33ewrqNTegkoF2AyN7RlvdTkA4LEMw9CUgU2HUGzkEAp/RuBFm2LjWvtztTMMz4hVVEigxdUAgGdz9fEu2ZGvCtoa/BaBF20qPa5p4xqjydrNkqYh6hM4XQ0Avle/lCh1iw9XbYNDn287YnU5sAiBF23qWEsDPbztoaquQV/vLZTEODIAaInm0xpoa/BXBF60qYx412lrjCZrDyt3F6quwaEuMaHqnhBhdTkA4BVcgXfZjqMqq6m3uBpYgcCLNpUW4xxNVlXXqKPljCZra67+3Qv7JMowGEcGAC3ROylSPRIjVNfo0GdbaWvwRwRetKmgAJu6xDj7eJnU0LZM02QcGQCcAaY1gMCLNsfGtfax80iFckprFBxg0+jucVaXAwBexdXWsHzXUZVW09bgbwi8aHNsXGsfrqHpY7rHKSTQbnE1AOBdeiVFqldShOobTS3akmd1OehgBF60OWbxtg/3ccJMZwCAMzK16ajheZtoa/A3BF60OdcKLy0Nbae0ql5r9xdLon8XAM7U5KY+3i93Faikqs7iatCRCLxoc8dGk1UxmqyNrNh9VI0OUz0SI5QWG2Z1OQDglXokRqhPcqQaHKYW0tbgVwi8aHNdYkJltxmqrm/UkTJGk7UFV/8uh00AwNmZNtjZ1sAhFP6FwIs2F2i3qUtMqCRGk7UFh8PUsqbjhMf35jhhADgbrraGlXsKVVRJW4O/IPCiXbg3rtHHe9Y2HS5VYWWdIoIDNDwj1upyAMCrZcaHq39qlBppa/ArBF60CzautR1XO8O4nvEKtPOfLACcLddM3k825lhcCToKPz3RLjJch0/Q0nDWljKODADa1NSBzj7eVXsKVVDBXhN/QOBFu3BNatjH4RNn5Wh5rTYcKpUkje9F/y4AtIWucWEa1CVaDlP6dDNtDf6AwIt2cXwPr8PBaLIztWync7PagM5RSowKsbgaAPAdUwbS1uBPCLxoF11iQhVgM1Tb4FBeWY3V5XitJa5xZBw2AQBtytXHuzq7SPnl/JzydQRetIsAu819QAIb185MfaNDy3c1jSOjfxcA2lSXmDCdk9ZJJm0NfoHAi3ZzbOMafbxnYu3+YpXXNCg2PEiDu3SyuhwA8DlTXdMaNnAIha8j8KLdZDCa7KwsaZrOcEGvBNlthsXVAIDvcR1C8e3+IuWV0tbgywi8aDeujWuctnZmXP27jCMDgPaR2ilUQ9NjZJrSgs2s8voyAi/azbHRZATe1jpUXKWdRypkM6Tze8ZbXQ4A+CzXtIZ5Gwm8vozAi3aT2bTCu7+oitFkrbR0h3Oz2tD0GHUKC7K4GgDwXZMHpsgwpDX7i5VTUm11OWgnBF60m9ROIQq0G6prcCiX0WSt4mpnGM84MgBoV8nRIRqeHitJmr+JVV5fReBFu2k2moy2hharqW/UV3sKJEkTCLwA0O5cM3nnEXh9FoEX7YqNa6339d5C1dQ7lBwVor4pkVaXAwA+b9KAZBmGtP5AiQ4VM0rTFxF40a7cRwwTeFvM1b87oU+CDINxZADQ3hKjQjQiw9nWMPu9TVq89Yhq6hstrgptKcDqAuDbMuNdp63xL+aWME1TX7jGkdHOAAAd5vrRGVqdXaQVuwq0YleBIoIDdFHfRE0akKLxvRMUEmi3ukScBQIv2hWHT7TO3oJKHSiqUpDdprE9GEcGAB1lyqAUpXQao4835GjBpjzlldXow6wcfZiVo7Aguy7sk6jJA1M0oXeiQoMIv96GwIt25WppOFBYpUaHyYlh38M1nWFkt1iFB/OfJwB0pHO7xujcrjH6w5R+Wn+wRAs25WrB5jwdLqnWJxtz9cnGXIUG2jWhT4ImDUjRhX0S+V7tJfh/Ce0qtVOoguw21TU6lFNS7Z7agJNzHSfMODIAsI7NZmhoeoyGpsfo3il9teFQqRZsytW8Tbk6VFyt+ZvyNH9TnoIDbBrfO0GTBzrDb2RIoNWl4xQIvGhXdpuhtNhQ7TlaqX2FlQTe06iobdA32UWSpAs5ThgAPIJhGDonrZPOSeukuyf10ebDZZq/OVfzN+Vqf2GVFm45ooVbjigowKbzeyZo8sBkXdQ3SdGhhF9PQuBFu8uMD3cG3oJKjeuZYHU5HuvLXQWqbzSVERemzKbeZwCA5zAMQwO7RGtgl2j97tLe2ppbpgWb8jR/U672FlTqs21H9Nm2Iwq0GxrXM0GTBiTrkn5JnJjpAQi8aHfu0WRMajitpbQzAIDXMAxD/VOj1T81Wr+Z2Es7j1Ro3qZcLdiUq135Ffpie76+2J6vAJuhMT3iNWVgsi7pl6zYcMKvFQi8aHfuSQ3M4j0l0zTd/bu0MwCAdzEMQ72TI9U7OVKzLumlXUfKNX9TnhZsztX2vHIt33lUy3ce1T3vb9aY7nGaNCBFE/snKT4i2OrS/QaBF+3O9ev5bEaTndLW3DIdKatVaKBdIzJjrS4HAHAWeiZF6o6kSN1xcU/tOVqhBZtyNX9Tnrbmlrnn/P7+g00a1S1Okwam6NL+SUqMDLG6bJ9G4EW7S49zblQ7WFSlhkaHAuwc8PddrnFkY3vEM9wcAHxI94QI/fLCnvrlhT21r6BS8zfnasGmPG06XKqVewq1ck+h/vjhZg3PiNWUgSm6bECykqIIv22NwIt2lxodqqAAm+oaHMopqVHXOCY1fNeS444TBgD4poz4cN0+voduH99DB4uqNH9TruZvztOGgyX6JrtI32QX6f6Pt2ho1xhNbgq/qZ1CrS7bJxB40e5sNkPpsWHalV+hfYWVBN7vKK6s0/oDxZI4ThgA/EVabJh+dkF3/eyC7jpUXKVPNzunPaw7UKI1+4u1Zn+x/vTJVg3p2sm98tslhp+fZ8rS3y0vX75c06ZNU2pqqgzD0AcffPC9r1m2bJmGDh2qkJAQdevWTc8+++wJ17z77rvq16+fgoOD1a9fP73//vvtUD1agyOGT235rqNymFKf5Ej+JQ8AfqhLTJhuGddN790+VqtmX6j7pvXT8IwYGYa0/kCJHpy3Tec9skSXP/mlnl22RweYetRqlgbeyspKDR48WE8++WSLrs/OztbkyZM1btw4rV+/Xvfcc49+9atf6d1333Vfs2rVKl1zzTWaMWOGNmzYoBkzZujqq6/W6tWr2+vLQAu4N64xqeEErv7dCUxnAAC/lxIdqpvGZuqd28bo69kX6U+X99fIzFgZhrThUKkeXrBd5/99iab+e4WeWrKbCUgtZJimaVpdhOQc6fH+++/riiuuOOU1d911lz766CNt27bN/dhtt92mDRs2aNWqVZKka665RmVlZVqwYIH7mssuu0wxMTGaO3dui2opKytTdHS0SktLFRUVdWZfEJp5Y/V+3fv+Zk3onaCXbxphdTkeo9FhatiDi1VcVa+3fzaaCQ0AgJPKL6/Roi1HNH9Trr7eWyjHcemtb0qUJg9I1uRBKeqeEGFdkR2sNXnNq3p4V61apYkTJzZ77NJLL9WLL76o+vp6BQYGatWqVfr1r399wjWPP/74KT9vbW2tamtr3X8uKytr07ohZXL4xEllHSxRcVW9okICdG7XTlaXAwDwUImRIZo+Kl3TR6WrsKJWi7Y6w+/KPYXallumbbllenTxTvVOitSkgcmaPDBFvZIirS7bY3hV4M3Ly1NSUlKzx5KSktTQ0KCCggKlpKSc8pq8vLxTft6HHnpIDzzwQLvUDCdXDy+jyZpztTOc3yuBewIAaJG4iGBdO6Krrh3RVcWVdVq89YjmbcrVV7sLtONIuXYcKdfjn+1Sj8QI98pv76RIGYZhdemW8arAK+mE/7NcHRnHP36ya073f/Ls2bM1a9Ys95/LysqUlpbWFuWiSXJUiIIDbKptcOhwSbXSm1Z8/Z3rdDWmMwAAzkRMeJCuHp6mq4enqbSqXou3OVd+V+w6qt35FXrii9164ovd6hYfrkkDkzVpQIr6p0b5Xfj1qsCbnJx8wkptfn6+AgICFBcXd9prvrvqe7zg4GAFB3O8X3uy2QxlxIVrx5FyZRdUEnglHSmr0ZacMhmGdEFv5u8CAM5OdFigrhraRVcN7aKymnp9vu2I5m/K07KdR7W3oFJPLdmjp5bsUXpcmCYNSNHkgcka2DnaL8KvVwXe0aNH6+OPP2722KJFizRs2DAFBga6r1m8eHGzPt5FixZpzJgxHVorTpQeF6YdR8qdO0p7W12N9ZY2re4O6tKJ89QBAG0qKiRQ/zOki/5nSBeV19Tri+35WrApT0t25Gt/YZWeXbZHzy7boy4xoZo8MEWTBiTrnLROPht+LQ28FRUV2r17t/vP2dnZysrKUmxsrLp27arZs2fr8OHDevXVVyU5JzI8+eSTmjVrln76059q1apVevHFF5tNX7jjjjt0/vnn65FHHtHll1+uDz/8UJ999pm+/PLLDv/60FxmPBvXjvdFU//uhbQzAADaUWRIoC4/p7MuP6ezKmsbtGSHM/x+sT1fh4qr9Z/le/Wf5XuVGh2iSQOdK79D0mJks/lO+LU08K5Zs0YTJkxw/9nVR3vDDTdozpw5ys3N1YEDB9zPZ2Zmav78+fr1r3+tp556SqmpqXriiSf0wx/+0H3NmDFj9Oabb+r3v/+9/vCHP6h79+566623NHLkyI77wnBSHD5xTF2DQ1/uKpDEccIAgI4THhygqYNSNXVQqqrqGrRsx1HN35ynz7cdUU5pjV78Mlsvfpmt5KgQXTbAOe1haHqM7F4efj1mDq8nYQ5v+1i1p1DXPv+1MuLCtPT/Tfj+F/iwlbsLdN0LqxUfEaxv7rnIp/4VDQDwPjX1jVq286gWbMrVZ9vyVVHb4H4uITJYkwY4N7yNyIz1mPDrs3N44d1cLQ0Hi6tV3+hQoB+P4XK1M4zvnUDYBQBYLiTQrkv7J+vS/smqqW/Ul7sKNH9zrhZvPaKj5bV6ddV+vbpqv+IjgnRpf+fK78jMWK8ZqUngRYdJjAxWSKBNNfUOHSqudgdgf8Q4MgCApwoJtOvifkm6uF+S6hoc+mp3geZvytWirUdUUFGnN1Yf0BurDygmLNAdfkd3j/PohSwCLzqMazTZ9jznpAZ/DbwHCqu052il7DZD43rFW10OAACnFBRg04Q+iZrQJ1F/bXRo5Z5CLdiUq4Vb8lRcVa83vz2oN789qOjQQE3sl6TJg1I0tnu8ggI8K/x6VjXweRlxbFxzre4OS49RVEigxdUAANAygXabLuiVoId/OEjf3nuxXr95pK4b2VVx4UEqra7XO2sP6aaXv9Xa/cVWl3oCVnjRodyTGgr8N/C6x5H1oZ0BAOCdAuw2ndczXuf1jNefLx+gb7KLNH9TrtbsL9aIzFiryzsBgRcdKjM+TJKU7aezeKvrGrVqb6EkaQKBFwDgA+w2Q6O7x2l09zirSzklWhrQodwtDX66wrtyT4HqGhzq3ClUPRMjrC4HAAC/QOBFh3K1NBwqrlJdg8PiajqeezpDnwSfPb4RAABPQ+BFh0qMDFZYkF0OUzpY7F9tDaZpasn2o5Lo3wUAoCMReNGhDMNQelNbw34/m9SwK79Ch0uqFRRg0+hujCMDAKCjEHjR4dwb1wr8a4V3SdN0htHd4hQaZLe4GgAA/AeBFx3OXzeuMY4MAABrEHjR4fzx8ImymnqtaRrEzXHCAAB0LAIvOpxrUkO2H63wrthZoEaHqe4J4eoaF2Z1OQAA+BUCLzpcRlMPb05JtWobGi2upmO4x5GxugsAQIcj8KLDJUQEK9w1mqyo2upy2p3DYWrpDvp3AQCwCoEXHc4wDHdbgz9sXNucU6qCijpFBAdoWIbnnS8OAICvI/DCEv60cc01neG8HvEKCuA/OQAAOho/fWGJDPcsXt8PvEt2OE9Xm9AnweJKAADwTwReWCLDfdqabx8+UVBRq42HSiRJ49mwBgCAJQi8sESmn4wmW7bjqExT6p8apaSoEKvLAQDALxF4YQnXprWc0mrV1PvuaDLGkQEAYD0CLywRFx6kiOAAmaZ0sMg32xoaGh1avtPVv0vgBQDAKgReWMI5msy3N66tO1CispoGxYQF6py0TlaXAwCA3yLwwjK+vnHNNY7sgl4JstsMi6sBAMB/EXhhGffGNR+dxes6XY12BgAArEXghWXch0/4YEvD4ZJqbc8rl82Qzu/J/F0AAKxE4IVlfPl4Ydfq7pCuMYoJD7K4GgAA/BuBF5bJiHNuWssprfG50WRLtjunM1xIOwMAAJYj8MIyseFBigwJkORbG9dq6hv11e4CSdL43rQzAABgNQIvLGMYhnvj2j4f2rj2TXaRqusblRQVrH4pUVaXAwCA3yPwwlK+uHHNNY5sQu9EGQbjyAAAsBqBF5bK8MEVXteGtfEcJwwAgEcg8MJSro1rvnLa2t6jFdpXWKVAu6HzesZbXQ4AABCBFxY7NprMNzatLdnhnM4wIjNWEcEBFlcDAAAkAi8sltnUw5tXVqPqOu8fTbbkuP5dAADgGQi8sFRMeJCiQwMlSfuLvLutobK2QauzCyVxnDAAAJ6EwAvL+cqJa1/tLlB9o6n0uDB1a/qaAACA9Qi8sNyxjWve3ce7ZAfjyAAA8EQEXljOF2bxmqbpPk6Y09UAAPAsBF5YzhdOW9uWW668shqFBNo0qluc1eUAAIDjEHhhOV84fMLVzjC2e7xCAu0WVwMAAI5H4IXlXKPJjpTVqqquweJqzox7HBnTGQAA8DgEXlguOixQncKco8m88QCKkqo6rTtQLInACwCAJyLwwiO4N655YVvDsp1H5TCl3kmR6twp1OpyAADAdxB44RG8eePa0qbjhMf3YToDAACeiMALj+Cto8kaHaaW7XQG3gs5ThgAAI9E4IVHyIh3Hj7hbT28Gw6VqKiyTpEhATo3PcbqcgAAwEkQeOERXCu82V7W0rC0aTrD+T0TFGjnPycAADwRP6HhEVyzeI+W16qi1ntGk32xg3FkAAB4OgIvPEJ0aKBiw4MkSfu9ZJU3v6xGmw+XSZIu6MWGNQAAPBWBFx4jI867+nhd0xkGd4lWQmSwxdUAAIBTIfDCY3jbEcOu44THM50BAACPRuCFx3AdMZztBaPJ6hocWrGrQJJ0If27AAB4NAIvPEZ6vPfM4l2zv0gVtQ2KjwjSwM7RVpcDAABOg8ALj5HpRccLu/p3L+iVKJvNsLgaAABwOgReeAzX4RMFFXUqr6m3uJrT+2K7axwZ0xkAAPB0BF54jMiQQMVHuEaTee6khoNFVdqdXyG7zdC4ngReAAA8HYEXHiXDCzauuaYzDE2PUXRooMXVAACA70PghUdJj/P8jWtLXO0MjCMDAMArEHjhUTKb+nizPXTjWnVdo1buKZTEODIAALwFgRcexXX4hKf28H69t1C1DQ6lRoeoV1KE1eUAAIAWIPDCo2R4eEvDsekMiTIMxpEBAOANCLzwKK4V3sLKOpV52Ggy0zTdG9bo3wUAwHsQeOFRIoIDFB8RLMnzVnl351foUHG1ggJsGtMjzupyAABACxF44XHcG9c8LPC6VndHdYtTWFCAxdUAAICWIvDC47j6eD1t49qS7c7jhCf05rAJAAC8CYEXHsfVx+tJLQ1lNfX6dl+RJPp3AQDwNgReeJzMpsDrSbN4v9pVoAaHqW7x4e5ADgAAvAOBFx4nPc7Zw+tJK7zHjyMDAADehcALj+Pq4S2uqldplfWjyRwOU0t3uvp3CbwAAHgbAi88TnhwgBIjm0aTeUBbw5acMh0tr1V4kF3DM2OsLgcAALQSgRceyb1xzQMCr2sc2dge8QoOsFtcDQAAaC0CLzxSZlNbgyfM4nX1715I/y4AAF6JwAuP5CmjyQorarXhUIkkaTz9uwAAeCUCLzxSRtOkhmyLD59YvuuoTFPqmxKl5OgQS2sBAABnhsALj+Ra4d1vcQ/vF02nq13Yh9PVAADwVgReeCTXaLKSqnqVVNVZUkNDo0PLGUcGAIDXI/DCI4UG2ZUc5WwhsGrj2vqDJSqtrlensEAN6co4MgAAvBWBFx4rI77pxDWL2hqWNE1nOL9nguw2w5IaAADA2SPwwmNluEeTWbNxjXFkAAD4BgIvPJaVo8lyS6u1Pa9chiGd34sNawAAeDMCLzyWa4XXikkNS5qmMwxJ66TY8KAOf38AANB2CLzwWJnxx05bM02zQ9/bdZww0xkAAPB+BF54rPSmwyfKahpUXFXfYe9b29Cor3YXSJIm0L8LAIDXI/DCY4UE2pUS3fGjyb7JLlJVXaMSI4PVPzWqw94XAAC0D8sD79NPP63MzEyFhIRo6NChWrFixWmvf+qpp9S3b1+Fhoaqd+/eevXVV5s9P2fOHBmGccJHTU1Ne34ZaCeuPt6O3Ljm6t8d3ztBhsE4MgAAvF2AlW/+1ltv6c4779TTTz+tsWPH6rnnntOkSZO0detWde3a9YTrn3nmGc2ePVvPP/+8hg8frm+++UY//elPFRMTo2nTprmvi4qK0o4dO5q9NiQkpN2/HrS9jPhwrdpb2KEb11z9u4wjAwDAN1gaeP/5z3/q5ptv1i233CJJevzxx7Vw4UI988wzeuihh064/rXXXtPPfvYzXXPNNZKkbt266euvv9YjjzzSLPAahqHk5OSO+SLQrjKbDp/ILuyYWbzZBZXKLqhUoN3Q2B7xHfKeAACgfVnW0lBXV6e1a9dq4sSJzR6fOHGiVq5cedLX1NbWnrBSGxoaqm+++Ub19cc2NVVUVCg9PV1dunTR1KlTtX79+tPWUltbq7KysmYf8Awd3dLgOl1teEasIkMCO+Q9AQBA+7Is8BYUFKixsVFJSUnNHk9KSlJeXt5JX3PppZfqhRde0Nq1a2WaptasWaOXXnpJ9fX1Kihw7qrv06eP5syZo48++khz585VSEiIxo4dq127dp2yloceekjR0dHuj7S0tLb7QnFWjj98oiNGkzGODAAA32P5prXvbgoyTfOUG4X+8Ic/aNKkSRo1apQCAwN1+eWX68Ybb5Qk2e12SdKoUaM0ffp0DR48WOPGjdPbb7+tXr166d///vcpa5g9e7ZKS0vdHwcPHmybLw5nrWtsmAxDKq9tUGFlXbu+V2Vtg1bvLZLEODIAAHyJZYE3Pj5edrv9hNXc/Pz8E1Z9XUJDQ/XSSy+pqqpK+/bt04EDB5SRkaHIyEjFx5+839Jms2n48OGnXeENDg5WVFRUsw94hpBAu1KjQyW1/4lrK/cUqq7RobTYUHVPCG/X9wIAAB3HssAbFBSkoUOHavHixc0eX7x4scaMGXPa1wYGBqpLly6y2+168803NXXqVNlsJ/9STNNUVlaWUlJS2qx2dKwM18a1gvbduHZ8OwPjyAAA8B2WTmmYNWuWZsyYoWHDhmn06NH6z3/+owMHDui2226T5Gw1OHz4sHvW7s6dO/XNN99o5MiRKi4u1j//+U9t3rxZr7zyivtzPvDAAxo1apR69uypsrIyPfHEE8rKytJTTz1lydeIs5cRF66vdhe268Y10zTdG9ZoZwAAwLdYGnivueYaFRYW6k9/+pNyc3M1YMAAzZ8/X+np6ZKk3NxcHThwwH19Y2OjHn30Ue3YsUOBgYGaMGGCVq5cqYyMDPc1JSUluvXWW5WXl6fo6GgNGTJEy5cv14gRIzr6y0MbyWzauJbdji0NO46UK7e0RiGBNo3uFtdu7wMAADqeYXbE1ncvU1ZWpujoaJWWltLP6wEWbz2in766Rv1TozTvV+Pa5T2eXrpbf/t0hy7sk6iXbhzeLu8BAADaTmvymuVTGoDv4zp8Yn9hVbuNJlvadJzwhN4J7fL5AQCAdQi88HhpsWGyGVJFbYMKKtp+NFlpVb3WHiiWJI1n/i4AAD6HwAuPFxxgV2on52iyfe3Qx7t811E1Okz1TIxQWmxYm39+AABgLQIvvIJ741o7TGpwTWe4kOkMAAD4JAIvvEJ6nHPlta1HkzkcppbudPbv0s4AAIBvIvDCK2TEOVd427qlYePhUhVV1ikyOEDDMmLa9HMDAADPQOCFV3C1NOxr49PWvmhqZxjXK16Bdv5zAADAF/ETHl4hI/7YCm9bjiZb2nScMO0MAAD4LgIvvEJajHM0WVVdo46W17bJ58wvr9HGQ6WSpPHM3wUAwGcReOEVggJs6hzjHE3WVpMalu1wblYb2DlaiZEhbfI5AQCA5yHwwmu09ca1JU3tDBMYRwYAgE8j8MJruDeuFZ79xrX6RodW7CyQxHHCAAD4OgIvvIZ7hbcNWhrW7CtWeW2D4sKDNLhLp7P+fAAAwHMReOE12vK0Ndd0hgt6JchmM8768wEAAM9F4IXXcJ22tr+w6qxHk7nm746nfxcAAJ9H4IXXSIsNk91mqLq+UUfKznw02cGiKu3Kr5DNkC7oSf8uAAC+jsALrxFot6lL02iys5nUsHSncxzZ0PQYRYcFtkltAADAcxF44VXaYuPaku2MIwMAwJ8QeOFV3BvXznCFt6a+USv3uMaREXgBAPAHBF54lYymjWtnusK7am+hauodSokOUZ/kyLYsDQAAeCgCL7xKuuvwiYIzO3xiqWs6Q+9EGQbjyAAA8AcEXniVzKYe3v1FlXI4WjeazDRNfdE0f/dC+ncBAPAbBF54lS4xoQqwGaqpd+hIeU2rXrvnaKUOFlUryG7TmO5x7VQhAADwNAReeJUAu01psc4+3taeuOaazjCyW6zCgwPavDYAAOCZCLzwOsc2rrWuj3dJUzsD0xkAAPAvBF54nXTXLN5WjCYrr6nXt/uKJDF/FwAAf0PghdfJjG/94RNf7S5QfaOpzPhw9+sBAIB/IPDC62TEt36Fd8l253HC43sntEtNAADAcxF44XXco8kKq1o0msw0TXf/LuPIAADwPwReeJ3UTiEKtBuqbXAot+z7R5NtySlTfnmtwoLsGpEZ2wEVAgAAT0LghdcJsNuUFtPyI4Zd48jG9ohXcIC9XWsDAACeh8ALr+Tq423JLF7GkQEA4N8IvPBKGe4+3tMH3qLKOq0/WCKJDWsAAPgrAi+8Uma867S10x8+sXznUZmm1Cc5UqmdQjuiNAAA4GEIvPBKLR1N5m5nYDoDAAB+i8ALr+RqaThQWKXGU4wma3SYWrbTOX+XcWQAAPgvAi+8UmqnUAXZbaprdCinpPqk12QdLFZJVb2iQwM1JK1TxxYIAAA8BoEXXsluM5QW6+zJ3V948j7eL5rGkZ3fK0EBdv6qAwDgr0gB8FqZrtFkp+jjdR0nPIHpDAAA+DUCL7yWq4/3ZIdP5JXWaGtumQxDuqAXgRcAAH9G4IXXSo8/deBd2jSdYXCXToqLCO7QugAAgGch8MJrZcaduqXB1b/LdAYAAEDghdfKaDp84mBR89FktQ2N+mp3gSSOEwYAAAReeLHU6FAFBdhU32g2G022Zl+xKusaFR8RrP6pURZWCAAAPAGBF17LZjOUHus6YvhYW4OrnWFC7wTZbIYltQEAAM9B4IVXO9kRwxwnDAAAjkfghVfLiGu+wru/sFJ7j1YqwGbovJ7xVpYGAAA8BIEXXs21wus6bW1JUzvDsIwYRYUEWlYXAADwHAReeLXM7xw+8cUO5+lqjCMDAAAuBF54NdcK74GiKpXX1OvrvYWSGEcGAACOIfDCqyVHhSg4wKYGh6l31hxSXYNDXWJC1SMxwurSAACAhyDwwqvZbIbSmzauzVm5T5JzddcwGEcGAACcCLzwehlxx9oaJGlCnwQrywEAAB6GwAuvl9nUxytJwQE2je7GODIAAHAMgRdeL+O4wDu6e5xCg+wWVgMAADwNgRdez9XSIDGODAAAnIjAC693fEsD48gAAMB3BVhdAHC2kqND9NuJvWS32ZQWG2Z1OQAAwMMQeOETfnlhT6tLAAAAHoqWBgAAAPg0Ai8AAAB8GoEXAAAAPo3ACwAAAJ9G4AUAAIBPI/ACAADApxF4AQAA4NMIvAAAAPBpBF4AAAD4NAIvAAAAfBqBFwAAAD6NwAsAAACfRuAFAACATyPwAgAAwKcReAEAAODTCLwAAADwaQReAAAA+DQCLwAAAHwagRcAAAA+jcALAAAAn0bgBQAAgE8j8AIAAMCnEXgBAADg0wi8AAAA8GkEXgAAAPi0AKsL8ESmaUqSysrKLK4EAAAAJ+PKaa7cdjoE3pMoLy+XJKWlpVlcCQAAAE6nvLxc0dHRp73GMFsSi/2Mw+FQTk6OIiMjZRhGu79fWVmZ0tLSdPDgQUVFRbX7+/ki7uHZ4f6dPe7h2eMenh3u39njHp6djr5/pmmqvLxcqampstlO36XLCu9J2Gw2denSpcPfNyoqiv/AzhL38Oxw/84e9/DscQ/PDvfv7HEPz05H3r/vW9l1YdMaAAAAfBqBFwAAAD6NwOsBgoODdd999yk4ONjqUrwW9/DscP/OHvfw7HEPzw737+xxD8+OJ98/Nq0BAADAp7HCCwAAAJ9G4AUAAIBPI/ACAADApxF4AQAA4NMIvB3ooYce0vDhwxUZGanExERdccUV2rFjR7NrTNPU/fffr9TUVIWGhmr8+PHasmWLRRV7lmeeeUaDBg1yD7QePXq0FixY4H6ee9c6Dz30kAzD0J133ul+jHt4evfff78Mw2j2kZyc7H6e+9cyhw8f1vTp0xUXF6ewsDCdc845Wrt2rft57uOpZWRknPB30DAM/eIXv5DEvWuJhoYG/f73v1dmZqZCQ0PVrVs3/elPf5LD4XBfw308vfLyct15551KT09XaGioxowZo2+//db9vEfePxMd5tJLLzVffvllc/PmzWZWVpY5ZcoUs2vXrmZFRYX7mocfftiMjIw03333XXPTpk3mNddcY6akpJhlZWUWVu4ZPvroI3PevHnmjh07zB07dpj33HOPGRgYaG7evNk0Te5da3zzzTdmRkaGOWjQIPOOO+5wP849PL377rvP7N+/v5mbm+v+yM/Pdz/P/ft+RUVFZnp6unnjjTeaq1evNrOzs83PPvvM3L17t/sa7uOp5efnN/v7t3jxYlOSuWTJEtM0uXct8eCDD5pxcXHmJ598YmZnZ5vvvPOOGRERYT7++OPua7iPp3f11Veb/fr1M5ctW2bu2rXLvO+++8yoqCjz0KFDpml65v0j8FooPz/flGQuW7bMNE3TdDgcZnJysvnwww+7r6mpqTGjo6PNZ5991qoyPVpMTIz5wgsvcO9aoby83OzZs6e5ePFi84ILLnAHXu7h97vvvvvMwYMHn/Q57l/L3HXXXeZ55513yue5j61zxx13mN27dzcdDgf3roWmTJlizpw5s9ljV155pTl9+nTTNPk7+H2qqqpMu91ufvLJJ80eHzx4sHnvvfd67P2jpcFCpaWlkqTY2FhJUnZ2tvLy8jRx4kT3NcHBwbrgggu0cuVKS2r0VI2NjXrzzTdVWVmp0aNHc+9a4Re/+IWmTJmiiy++uNnj3MOW2bVrl1JTU5WZmakf//jH2rt3ryTuX0t99NFHGjZsmH70ox8pMTFRQ4YM0fPPP+9+nvvYcnV1dXr99dc1c+ZMGYbBvWuh8847T59//rl27twpSdqwYYO+/PJLTZ48WRJ/B79PQ0ODGhsbFRIS0uzx0NBQffnllx57/wi8FjFNU7NmzdJ5552nAQMGSJLy8vIkSUlJSc2uTUpKcj/n7zZt2qSIiAgFBwfrtttu0/vvv69+/fpx71rozTff1Lp16/TQQw+d8Bz38PuNHDlSr776qhYuXKjnn39eeXl5GjNmjAoLC7l/LbR3714988wz6tmzpxYuXKjbbrtNv/rVr/Tqq69K4u9ha3zwwQcqKSnRjTfeKIl711J33XWXrr32WvXp00eBgYEaMmSI7rzzTl177bWSuI/fJzIyUqNHj9af//xn5eTkqLGxUa+//rpWr16t3Nxcj71/AZa9s5/75S9/qY0bN+rLL7884TnDMJr92TTNEx7zV71791ZWVpZKSkr07rvv6oYbbtCyZcvcz3PvTu3gwYO64447tGjRohP+ZX487uGpTZo0yf2/Bw4cqNGjR6t79+565ZVXNGrUKEncv+/jcDg0bNgw/fWvf5UkDRkyRFu2bNEzzzyj66+/3n0d9/H7vfjii5o0aZJSU1ObPc69O7233npLr7/+uv773/+qf//+ysrK0p133qnU1FTdcMMN7uu4j6f22muvaebMmercubPsdrvOPfdcXXfddVq3bp37Gk+7f6zwWuB///d/9dFHH2nJkiXq0qWL+3HXbu/v/gsoPz//hH8p+augoCD16NFDw4YN00MPPaTBgwfrX//6F/euBdauXav8/HwNHTpUAQEBCggI0LJly/TEE08oICDAfZ+4hy0XHh6ugQMHateuXfwdbKGUlBT169ev2WN9+/bVgQMHJPF9sKX279+vzz77TLfccov7Me5dy/y///f/dPfdd+vHP/6xBg4cqBkzZujXv/61+zdf3Mfv1717dy1btkwVFRU6ePCgvvnmG9XX1yszM9Nj7x+BtwOZpqlf/vKXeu+99/TFF18oMzOz2fOuvyiLFy92P1ZXV6dly5ZpzJgxHV2uVzBNU7W1tdy7Frjooou0adMmZWVluT+GDRumn/zkJ8rKylK3bt24h61UW1urbdu2KSUlhb+DLTR27NgTxjHu3LlT6enpkvg+2FIvv/yyEhMTNWXKFPdj3LuWqaqqks3WPP7Y7Xb3WDLuY8uFh4crJSVFxcXFWrhwoS6//HLPvX8WbZbzSz//+c/N6Ohoc+nSpc3GylRVVbmvefjhh83o6GjzvffeMzdt2mRee+21lo/y8BSzZ882ly9fbmZnZ5sbN24077nnHtNms5mLFi0yTZN7dyaOn9JgmtzD7/Ob3/zGXLp0qbl3717z66+/NqdOnWpGRkaa+/btM02T+9cS33zzjRkQEGD+5S9/MXft2mW+8cYbZlhYmPn666+7r+E+nl5jY6PZtWtX86677jrhOe7d97vhhhvMzp07u8eSvffee2Z8fLz5u9/9zn0N9/H0Pv30U3PBggXm3r17zUWLFpmDBw82R4wYYdbV1Zmm6Zn3j8DbgSSd9OPll192X+NwOMz77rvPTE5ONoODg83zzz/f3LRpk3VFe5CZM2ea6enpZlBQkJmQkGBedNFF7rBrmty7M/HdwMs9PD3XLMnAwEAzNTXVvPLKK80tW7a4n+f+tczHH39sDhgwwAwODjb79Olj/uc//2n2PPfx9BYuXGhKMnfs2HHCc9y771dWVmbecccdZteuXc2QkBCzW7du5r333mvW1ta6r+E+nt5bb71lduvWzQwKCjKTk5PNX/ziF2ZJSYn7eU+8f4ZpmqZ168sAAABA+6KHFwAAAD6NwAsAAACfRuAFAACATyPwAgAAwKcReAEAAODTCLwAAADwaQReAAAA+DQCLwAAAHwagReA3xg/frzuvPNOq8vQ/fffr3POOcf95xtvvFFXXHHFaV/TVrV7yj1oC3PmzFGnTp3O+PWGYeiDDz5os3oAeC4CLwCPYxjGaT9uvPHGM/q87733nv785z+3+nXV1dUKCwvTX/7yF0VHR6uqquqEa2pqatSpUyf985//bPXn/9e//qU5c+a0+nWns3TpUhmGoZKSkmaPn+k98EW5ubmaNGmS1WUA6AAEXgAeJzc31/3x+OOPKyoqqtlj//rXv5pdX19f36LPGxsbq8jIyFbXs3jxYqWlpenWW29VdXW13n333ROueffdd1VVVaUZM2a0+vNHR0ef1Upla5zpPfBFycnJCg4OtroMAB2AwAvA4yQnJ7s/oqOjZRiG+8+uldS3335b48ePV0hIiF5//XUVFhbq2muvVZcuXRQWFqaBAwdq7ty5zT7vd3+dn5GRob/+9a+aOXOmIiMj1bVrV/3nP/85oZ4PP/xQP/jBD5SQkKBp06bppZdeOuGal156yX3NXXfdpV69eiksLEzdunXTH/7wh9OG8u+2NFRWVur6669XRESEUlJS9Oijj57wmtdff13Dhg1TZGSkkpOTdd111yk/P1+StG/fPk2Y8P/bu/+YqOs/gONPRPnVBx1iwo0pmAZBTRwHcygLdCrKHOfMgTUK6marRjommBtBBGuSopQ/52qD1D8QdSSBW6xEKg5NHTQNFHLQ+Yf9gbIlmmNw7+8f7D7rowdcal/z9npst/H+ce/36/2GsRfve3O3BICgoCDDqfj9ezAwMMAbb7xBUFAQAQEBrFq1ip6eHr3deW3g22+/JTo6Gk3TWLlyJTdu3BhzPc7T5cbGRmJjY/Hz82PhwoVcunTJ0O/EiRO8+OKL+Pr6EhER8cA6J4ptIkNDQ+Tm5mIymfDz8yMiIoJt27bp7X+/0lBSUuLy1QTnybtSiu3bt/Pcc8/h7+9PbGwsx48fdzsWIcSTJQmvEOKp9MEHH7Bx40a6urpITU3l3r17mM1mGhoauHz5Mm+//Tavv/46586dG3ecnTt3Eh8fT3t7O++99x7vvvsuV65c0dsdDgcNDQ1YLBYArFYrLS0t9Pb26n36+vpobm7GarUCEBgYSHV1NZ2dnXz++ed88cUXVFZWur22goICmpubqauro6mpiTNnznDx4kVDn6GhIcrKyvjll1/4+uuv6e3t1ZPaWbNm6afQV69edXkq7pSTk8OFCxeor6+nra0NpRRpaWmGBP3u3btUVFRw+PBhfvjhB+x2O/n5+W6to6KigvPnzzNz5kzS09P1cS9evEhGRgbr16/n0qVLlJSUUFRUZLja4U5s49m9ezf19fXU1tZy9epVjhw5QkREhMu++fn5hlcRKioqCAgIID4+HoAPP/yQqqoqDhw4wK+//kpeXh5ZWVm0tLS4FYsQ4glTQgjxH1ZVVaWmTZuml3t7exWgPvvsswmfm5aWpjZv3qyXk5OT1aZNm/RyeHi4ysrK0ssOh0PNnDlTHThwQK9rbW1VM2bMUCMjI0oppYaHh1VYWJgqLi7W+xQXF6uwsDA1PDzsMo7t27crs9mslz/66CMVGxurl7Ozs5XFYlFKKXX79m3l4+Ojampq9PabN28qf39/Q+z3+/nnnxWgbt++rZRSqrm5WQFqYGDA0O/ve9Dd3a0A1draqrf39/crf39/VVtbq5Qa3X9A/fbbb3qfffv2qZCQkDFjcc7tag1Hjx5VSin12muvqeXLlxueV1BQoGJiYv5RbH//2bjf+++/r5YuXaocDofLdkDV1dU9UN/W1qb8/Pz0WAcHB5Wfn5+y2WyGflarVb366qtjzi+E+O+QE14hxFPJefLmNDIywieffML8+fMJDg5G0zSampqw2+3jjjN//nz9a+fVCefVABi9zrB69WomTRr9dent7U12djbV1dU4HA6UUnz11Vfk5OTg7e0NwPHjx0lKSiI0NBRN0ygqKpowDqdr164xNDREYmKiXjd9+nSioqIM/drb27FYLISHhxMYGEhKSgqA2/MAdHV1MXnyZBYuXKjXBQcHExUVRVdXl14XEBDA3Llz9bLJZDLs0VhcrcE5bldXF4sXLzb0X7x4MT09PYyMjLgd23hycnLo6OggKiqKjRs30tTUNOFz7HY7a9asIT8/n4yMDAA6Ozu5d+8ey5cvR9M0/XHo0CGuXbvmVixCiCdLEl4hxFPpmWeeMZR37txJZWUlW7Zs4fTp03R0dJCamsrQ0NC440yZMsVQ9vLywuFw6OX6+nr9OoPTW2+9xfXr1zl9+jTff/89drudN998E4CzZ8+yfv16Vq1aRUNDA+3t7RQWFk4Yh5NSasI+d+7cYcWKFWiaxpEjRzh//jx1dXUAbs8z3lxKKby8vPSyqz1yJ05XnOPeP8f98bgb23ji4uLo7e2lrKyMv/76i4yMDNatWzdm/zt37pCenk5iYiKlpaV6vfPnobGxkY6ODv3R2dkp93iFeEpMftIBCCHE4/Djjz9isVjIysoCRpOUnp4eoqOjH3rMnp4e+vr6WLFihaF+7ty5JCcnU1VVhVKKlJQU/QS0tbWV8PBwCgsL9f6///6723POmzePKVOmcPbsWWbPng2M/vNWd3c3ycnJAFy5coX+/n7Ky8uZNWsWABcuXDCM4+PjA4yefI8lJiaG4eFhzp07x6JFiwC4efMm3d3dj7RvTq7W8MILL+hz//TTT4b+NpuNyMhIvL29H1tsU6dOJTMzk8zMTNatW8fKlSu5desW06dPN/RTSpGVlYXD4eDw4cOGpDomJgZfX1/sdrv+PRBCPF0k4RVCeIR58+Zx4sQJbDYbQUFB7Nq1iz/++OOREreTJ0+ybNkyAgICHmizWq1s2LABgC+//NIQh91up6amhoSEBBobG/XTV3domobVaqWgoIDg4GBCQkIoLCzUr1QAzJ49Gx8fH/bs2cM777zD5cuXH3hv3fDwcLy8vGhoaCAtLQ1/f380TTP0ef7557FYLGzYsIGDBw8SGBjI1q1bCQsLe+BU+2GUlpYa1jBjxgz93Sg2b95MQkICZWVlZGZm0tbWxt69e9m/f/9ji62yshKTycSCBQuYNGkSx44dIzQ01OVbwJWUlPDdd9/R1NTE4OAgg4ODwOhbxgUGBpKfn09eXh4Oh4OkpCT+/PNPbDYbmqaRnZ39yHslhPh3yZUGIYRHKCoqIi4ujtTUVFJSUggNDZ3w08smcvLkyTGTq1deeQVfX198fX1Zu3atXm+xWMjLyyM3N5cFCxZgs9koKir6R/Pu2LGDl19+mfT0dJYtW0ZSUhJms1lvf/bZZ6murubYsWPExMRQXl5ORUWFYYywsDA+/vhjtm7dSkhICLm5uS7nqqqqwmw2s3r1ahITE1FKcerUqQeuMTyM8vJyNm3ahNls5saNG9TX1+snz3FxcdTW1lJTU8NLL71EcXExpaWlhg8VedTYNE3j008/JT4+noSEBPr6+jh16pThjwenlpYWBgcHWbRoESaTSX8cPXoUgLKyMoqLi9m2bRvR0dGkpqbyzTffMGfOnEfeJyHEv89LPexFLCGE8GD9/f2YTCauX79OaGjokw7nqXLmzBmWLFnCwMDA/+0DNYQQYjxywiuEEC7cunWLXbt2SbIrhBAeQO7wCiGEC5GRkURGRj7pMIQQQjwGcqVBCCGEEEJ4NLnSIIQQQgghPJokvEIIIYQQwqNJwiuEEEIIITyaJLxCCCGEEMKjScIrhBBCCCE8miS8QgghhBDCo0nCK4QQQgghPJokvEIIIYQQwqP9D3apBfFvB4ZUAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = plt.figure(figsize=(8, 8))\n", + "ax = fig.add_subplot(1, 1, 1)\n", + "ax.set_title(\"Active learning results\")\n", + "ax.set_xlabel(\"Train/Validation pool size\")\n", + "ax.set_ylabel(\"MSE\")\n", + "\n", + "ax.plot([a[0] for a in results], [a[1] for a in results])\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop-updated/examples/convert_v1_to_v2.ipynb b/chemprop-updated/examples/convert_v1_to_v2.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..227cc49efa93b4e530321013cea7121ba830f2e0 --- /dev/null +++ b/chemprop-updated/examples/convert_v1_to_v2.ipynb @@ -0,0 +1,495 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Convert v1 to v2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/convert_v1_to_v2.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from pprint import pprint\n", + "from pathlib import Path\n", + "\n", + "from chemprop.utils.v1_to_v2 import convert_model_dict_v1_to_v2\n", + "from chemprop.models.model import MPNN\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change model paths here" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "model_v1_input_path = chemprop_dir / \"tests/data/example_model_v1_regression_mol.pt\" # path to v1 model .pt file\n", + "model_v2_output_path = Path.cwd() / \"converted_model.ckpt\" # path to save the converted model .ckpt file" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load v1 model .pt file" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "model_v1_dict = torch.load(model_v1_input_path, weights_only=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['args',\n", + " 'state_dict',\n", + " 'data_scaler',\n", + " 'features_scaler',\n", + " 'atom_descriptor_scaler',\n", + " 'bond_descriptor_scaler',\n", + " 'atom_bond_scaler']\n" + ] + } + ], + "source": [ + "# Here are all the keys that is stored in v1 model\n", + "pprint(list(model_v1_dict.keys()))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'activation': 'ReLU',\n", + " 'adding_bond_types': True,\n", + " 'adding_h': False,\n", + " 'aggregation': 'mean',\n", + " 'aggregation_norm': 100,\n", + " 'atom_constraints': [],\n", + " 'atom_descriptor_scaling': True,\n", + " 'atom_descriptors': None,\n", + " 'atom_descriptors_path': None,\n", + " 'atom_descriptors_size': 0,\n", + " 'atom_features_size': 0,\n", + " 'atom_messages': False,\n", + " 'atom_targets': [],\n", + " 'batch_size': 50,\n", + " 'bias': False,\n", + " 'bias_solvent': False,\n", + " 'bond_constraints': [],\n", + " 'bond_descriptor_scaling': True,\n", + " 'bond_descriptors': None,\n", + " 'bond_descriptors_path': None,\n", + " 'bond_descriptors_size': 0,\n", + " 'bond_features_size': 0,\n", + " 'bond_targets': [],\n", + " 'cache_cutoff': 10000,\n", + " 'checkpoint_dir': None,\n", + " 'checkpoint_frzn': None,\n", + " 'checkpoint_path': None,\n", + " 'checkpoint_paths': None,\n", + " 'class_balance': False,\n", + " 'config_path': None,\n", + " 'constraints_path': None,\n", + " 'crossval_index_dir': None,\n", + " 'crossval_index_file': None,\n", + " 'crossval_index_sets': None,\n", + " 'cuda': False,\n", + " 'data_path': '/Users/hwpang/Software/chemprop/tests/data/regression.csv',\n", + " 'data_weights_path': None,\n", + " 'dataset_type': 'regression',\n", + " 'depth': 3,\n", + " 'depth_solvent': 3,\n", + " 'device': device(type='cpu'),\n", + " 'dropout': 0.0,\n", + " 'empty_cache': False,\n", + " 'ensemble_size': 1,\n", + " 'epochs': 1,\n", + " 'evidential_regularization': 0,\n", + " 'explicit_h': False,\n", + " 'extra_metrics': [],\n", + " 'features_generator': None,\n", + " 'features_only': False,\n", + " 'features_path': None,\n", + " 'features_scaling': True,\n", + " 'features_size': None,\n", + " 'ffn_hidden_size': 300,\n", + " 'ffn_num_layers': 2,\n", + " 'final_lr': 0.0001,\n", + " 'folds_file': None,\n", + " 'freeze_first_only': False,\n", + " 'frzn_ffn_layers': 0,\n", + " 'gpu': None,\n", + " 'grad_clip': None,\n", + " 'hidden_size': 300,\n", + " 'hidden_size_solvent': 300,\n", + " 'ignore_columns': None,\n", + " 'init_lr': 0.0001,\n", + " 'is_atom_bond_targets': False,\n", + " 'keeping_atom_map': False,\n", + " 'log_frequency': 10,\n", + " 'loss_function': 'mse',\n", + " 'max_data_size': None,\n", + " 'max_lr': 0.001,\n", + " 'metric': 'rmse',\n", + " 'metrics': ['rmse'],\n", + " 'minimize_score': True,\n", + " 'mpn_shared': False,\n", + " 'multiclass_num_classes': 3,\n", + " 'no_adding_bond_types': False,\n", + " 'no_atom_descriptor_scaling': False,\n", + " 'no_bond_descriptor_scaling': False,\n", + " 'no_cache_mol': False,\n", + " 'no_cuda': False,\n", + " 'no_features_scaling': False,\n", + " 'no_shared_atom_bond_ffn': False,\n", + " 'num_folds': 1,\n", + " 'num_lrs': 1,\n", + " 'num_tasks': 1,\n", + " 'num_workers': 8,\n", + " 'number_of_molecules': 1,\n", + " 'overwrite_default_atom_features': False,\n", + " 'overwrite_default_bond_features': False,\n", + " 'phase_features_path': None,\n", + " 'pytorch_seed': 0,\n", + " 'quiet': False,\n", + " 'reaction': False,\n", + " 'reaction_mode': 'reac_diff',\n", + " 'reaction_solvent': False,\n", + " 'resume_experiment': False,\n", + " 'save_dir': '/Users/hwpang/Software/test_chemprop_v1_to_v2/fold_0',\n", + " 'save_preds': False,\n", + " 'save_smiles_splits': True,\n", + " 'seed': 0,\n", + " 'separate_test_atom_descriptors_path': None,\n", + " 'separate_test_bond_descriptors_path': None,\n", + " 'separate_test_constraints_path': None,\n", + " 'separate_test_features_path': None,\n", + " 'separate_test_path': None,\n", + " 'separate_test_phase_features_path': None,\n", + " 'separate_val_atom_descriptors_path': None,\n", + " 'separate_val_bond_descriptors_path': None,\n", + " 'separate_val_constraints_path': None,\n", + " 'separate_val_features_path': None,\n", + " 'separate_val_path': None,\n", + " 'separate_val_phase_features_path': None,\n", + " 'shared_atom_bond_ffn': True,\n", + " 'show_individual_scores': False,\n", + " 'smiles_columns': ['smiles'],\n", + " 'spectra_activation': 'exp',\n", + " 'spectra_phase_mask': None,\n", + " 'spectra_phase_mask_path': None,\n", + " 'spectra_target_floor': 1e-08,\n", + " 'split_key_molecule': 0,\n", + " 'split_sizes': [0.8, 0.1, 0.1],\n", + " 'split_type': 'random',\n", + " 'target_columns': None,\n", + " 'target_weights': None,\n", + " 'task_names': ['logSolubility'],\n", + " 'test': False,\n", + " 'test_fold_index': None,\n", + " 'train_data_size': 400,\n", + " 'undirected': False,\n", + " 'use_input_features': False,\n", + " 'val_fold_index': None,\n", + " 'warmup_epochs': 2.0,\n", + " 'weights_ffn_num_layers': 2}\n" + ] + } + ], + "source": [ + "# Here are the input arguments that is stored in v1 model\n", + "pprint(model_v1_dict['args'].__dict__)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['encoder.encoder.0.cached_zero_vector',\n", + " 'encoder.encoder.0.W_i.weight',\n", + " 'encoder.encoder.0.W_h.weight',\n", + " 'encoder.encoder.0.W_o.weight',\n", + " 'encoder.encoder.0.W_o.bias',\n", + " 'readout.1.weight',\n", + " 'readout.1.bias',\n", + " 'readout.4.weight',\n", + " 'readout.4.bias']\n" + ] + } + ], + "source": [ + "# Here are the state_dict that is stored in v1 model\n", + "pprint(list(model_v1_dict['state_dict'].keys()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Convert loaded v1 model dictionary into v2 model dictionary" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "model_v2_dict = convert_model_dict_v1_to_v2(model_v1_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['epoch',\n", + " 'global_step',\n", + " 'pytorch-lightning_version',\n", + " 'state_dict',\n", + " 'loops',\n", + " 'callbacks',\n", + " 'optimizer_states',\n", + " 'lr_schedulers',\n", + " 'hparams_name',\n", + " 'hyper_parameters']\n" + ] + } + ], + "source": [ + "# Here are all the keys in the converted model\n", + "pprint(list(model_v2_dict.keys()))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['message_passing.W_i.weight',\n", + " 'message_passing.W_h.weight',\n", + " 'message_passing.W_o.weight',\n", + " 'message_passing.W_o.bias',\n", + " 'predictor.ffn.0.0.weight',\n", + " 'predictor.ffn.0.0.bias',\n", + " 'predictor.ffn.1.2.weight',\n", + " 'predictor.ffn.1.2.bias',\n", + " 'predictor.output_transform.mean',\n", + " 'predictor.output_transform.scale',\n", + " 'predictor.criterion.task_weights']\n" + ] + } + ], + "source": [ + "# Here are all the keys in the converted state_dict\n", + "pprint(list(model_v2_dict['state_dict'].keys()))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['batch_norm',\n", + " 'metrics',\n", + " 'warmup_epochs',\n", + " 'init_lr',\n", + " 'max_lr',\n", + " 'final_lr',\n", + " 'message_passing',\n", + " 'agg',\n", + " 'predictor']\n" + ] + } + ], + "source": [ + "# Here are all the keys in the converted hyper_parameters\n", + "pprint(list(model_v2_dict['hyper_parameters'].keys()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Save" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "torch.save(model_v2_dict, model_v2_output_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load converted model" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "mpnn = MPNN.load_from_checkpoint(model_v2_output_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=147, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=433, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): Identity()\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): Identity()\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0): RMSE(task_weights=[[1.0]])\n", + " (1): MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# now visually check the converted model is what is expected\n", + "mpnn" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop-updated/examples/extra_features_descriptors.ipynb b/chemprop-updated/examples/extra_features_descriptors.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..1dcdfaf4af8c59446b932f1dddd8f6edae5f8955 --- /dev/null +++ b/chemprop-updated/examples/extra_features_descriptors.ipynb @@ -0,0 +1,1101 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using extra features and descriptors\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook demonstrates how to use extra features and descriptors in addition to the default Chemprop featurizers.\n", + "\n", + "* Extra atom and bond features are used in addition to those calculated by Chemprop internally. \n", + "* Extra atom descriptors get incorporated into the atom descriptors from message passing via a learned linear transformation. \n", + "* Extra bond descriptors are not currently supported because the bond descriptors from message passing are not used for molecular property prediction. \n", + "* Extra molecule features can be used as extra datapoint descriptors, which are concatenated to the output of the aggregation layer before the final prediction layer." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/extra_features_descriptors.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Loading packages and data" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from pathlib import Path\n", + "\n", + "from lightning import pytorch as pl\n", + "from rdkit import Chem\n", + "\n", + "from chemprop import data, featurizers, models, nn, utils" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\"\n", + "smiles_column = \"smiles\"\n", + "target_columns = [\"lipo\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "df_input = pd.read_csv(input_path)\n", + "smis = df_input.loc[:, smiles_column].values\n", + "ys = df_input.loc[:, target_columns].values" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Getting extra features and descriptors" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `rdkit.Chem.Mol` representation of molecules is needed as input to many featurizers. Chemprop provides a helpful wrapper to rdkit to make these from SMILES." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "mols = [utils.make_mol(smi, keep_h=False, add_h=False) for smi in smis]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Extra atom features, atom descriptors, bond features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Extra atom and bond features frequently come from QM calculations. The calculation results can be saved to a file and then loaded in a notebook using pandas or numpy. The loaded atom or bond features can be a list of numpy arrays where each numpy array of features corresponds to a single molecule in the dataset. Each row in an array corresponds to a different atom or bond in the same order of atoms or bonds in the `rdkit.Chem.Mol` objects. \n", + "\n", + "The atom features could also be used as extra atom descriptors." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# This code is just a placeholder for the actual QM calculation\n", + "\n", + "\n", + "def QM_calculation(mol):\n", + " n_extra_atom_feats = 10\n", + " n_extra_bond_feats = 4\n", + " extra_atom_features = np.array([np.random.randn(n_extra_atom_feats) for a in mol.GetAtoms()])\n", + " extra_bond_features = np.array([np.random.randn(n_extra_bond_feats) for a in mol.GetBonds()])\n", + " return extra_atom_features, extra_bond_features\n", + "\n", + "\n", + "extra_atom_featuress = []\n", + "extra_bond_featuress = []\n", + "\n", + "for mol in mols:\n", + " extra_atom_features, extra_bond_features = QM_calculation(mol)\n", + " extra_atom_featuress.append(extra_atom_features)\n", + " extra_bond_featuress.append(extra_bond_features)\n", + "\n", + "# Save to a file\n", + "np.savez(\"atom_features.npz\", *extra_atom_featuress)\n", + "np.savez(\"bond_features.npz\", *extra_bond_featuress)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "extra_atom_featuress = np.load(\"atom_features.npz\")\n", + "extra_atom_featuress = [extra_atom_featuress[f\"arr_{i}\"] for i in range(len(extra_atom_featuress))]\n", + "\n", + "extra_atom_descriptorss = extra_atom_featuress\n", + "\n", + "extra_bond_featuress = np.load(\"bond_features.npz\")\n", + "extra_bond_featuress = [extra_bond_featuress[f\"arr_{i}\"] for i in range(len(extra_bond_featuress))]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also get extra atom and bond features from other sources." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "atom_radii = {1: 0.79, 5: 1.2, 6: 0.91, 7: 0.75, 8: 0.65, 9: 0.57, 16: 1.1, 17: 0.97, 35: 1.1}\n", + "\n", + "extra_atom_featuress = [\n", + " np.vstack([np.array([[atom_radii[a.GetAtomicNum()]] for a in mol.GetAtoms()])]) for mol in mols\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Extra molecule features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A QM calculation could also be used to get extra molecule features. Extra molecule features are different from extra atom and bond features in that they are stored in a single numpy array where each row corresponds to a single molecule in the dataset, instead of a list of numpy arrays." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def QM_calculation(mol):\n", + " n_extra_mol_feats = 7\n", + " return np.random.randn(n_extra_mol_feats)\n", + "\n", + "\n", + "extra_mol_features = np.array([QM_calculation(mol) for mol in mols])\n", + "\n", + "np.savez(\"mol_features.npz\", extra_mol_features)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "extra_mol_features = np.load(\"mol_features.npz\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The extra molecule features can also be calculated using built-in Chemprop featurizers or featurizers from other packages." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "molecule_featurizer = featurizers.MorganBinaryFeaturizer()\n", + "\n", + "extra_mol_features = np.array([molecule_featurizer(mol) for mol in mols])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# First install other package\n", + "# !pip install descriptastorus\n", + "\n", + "# from descriptastorus.descriptors import rdNormalizedDescriptors\n", + "# generator = rdNormalizedDescriptors.RDKit2DNormalized()\n", + "# extra_mol_features = np.array([generator.process(smi)[1:] for smi in smis])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The molecule featurizers available in Chemprop are registered in `MoleculeFeaturizerRegristry`." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "morgan_binary\n", + "morgan_count\n", + "rdkit_2d\n", + "v1_rdkit_2d\n", + "v1_rdkit_2d_normalized\n" + ] + } + ], + "source": [ + "for MoleculeFeaturizer in featurizers.MoleculeFeaturizerRegistry.keys():\n", + " print(MoleculeFeaturizer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your model takes multiple components as input, you can use extra molecule features for each component as extra datapoint descriptors. Simply concatentate the extra molecule features together before passing them to the datapoints." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "extra_mol_features_comp1 = np.random.rand(len(mols), 5)\n", + "extra_mol_features_comp2 = np.random.rand(len(mols), 5)\n", + "\n", + "extra_datapoint_descriptors = np.concatenate(\n", + " [extra_mol_features_comp1, extra_mol_features_comp2], axis=1\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Making datapoints, datasets, and dataloaders" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once you have all the extra features and descriptors your model will use, you can make the datapoints." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "datapoints = [\n", + " data.MoleculeDatapoint(mol, y, V_f=V_f, E_f=E_f, V_d=V_d, x_d=X_d)\n", + " for mol, y, V_f, E_f, V_d, X_d in zip(\n", + " mols,\n", + " ys,\n", + " extra_atom_featuress,\n", + " extra_bond_featuress,\n", + " extra_atom_descriptorss,\n", + " extra_datapoint_descriptors,\n", + " )\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After splitting the data, the datasets are made. To make a dataset, you need a `MolGraph` featurizer, which needs to be told the size of extra atom and bond features. " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "n_extra_atom_feats = extra_atom_featuress[0].shape[1]\n", + "n_extra_bond_feats = extra_bond_featuress[0].shape[1]\n", + "\n", + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer(\n", + " extra_atom_fdim=n_extra_atom_feats, extra_bond_fdim=n_extra_bond_feats\n", + ")\n", + "\n", + "train_indices, val_indices, test_indices = data.make_split_indices(mols, \"random\", (0.8, 0.1, 0.1))\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " datapoints, train_indices, val_indices, test_indices\n", + ")\n", + "\n", + "train_dset = data.MoleculeDataset(train_data[0], featurizer)\n", + "val_dset = data.MoleculeDataset(val_data[0], featurizer)\n", + "test_dset = data.MoleculeDataset(test_data[0], featurizer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Often scaling the extra features and descriptors improves model performance. The scalers for the extra features and descriptors should be fit to the training dataset, applied to the validation dataset, and then given to the model to apply to the test dataset at prediction time. This is the same as for scaling target values to improve model performance." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
StandardScaler()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "StandardScaler()" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "targets_scaler = train_dset.normalize_targets()\n", + "extra_atom_features_scaler = train_dset.normalize_inputs(\"V_f\")\n", + "extra_bond_features_scaler = train_dset.normalize_inputs(\"E_f\")\n", + "extra_atom_descriptors_scaler = train_dset.normalize_inputs(\"V_d\")\n", + "extra_datapoint_descriptors_scaler = train_dset.normalize_inputs(\"X_d\")\n", + "\n", + "val_dset.normalize_targets(targets_scaler)\n", + "val_dset.normalize_inputs(\"V_f\", extra_atom_features_scaler)\n", + "val_dset.normalize_inputs(\"E_f\", extra_bond_features_scaler)\n", + "val_dset.normalize_inputs(\"V_d\", extra_atom_descriptors_scaler)\n", + "val_dset.normalize_inputs(\"X_d\", extra_datapoint_descriptors_scaler)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# Featurize the train and val datasets to save computation time.\n", + "train_dset.cache = True\n", + "val_dset.cache = True\n", + "\n", + "train_loader = data.build_dataloader(train_dset)\n", + "val_loader = data.build_dataloader(val_dset, shuffle=False)\n", + "test_loader = data.build_dataloader(test_dset, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Making the model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The message passing layer needs to know the total size of atom and bond features (i.e. the sum of the sizes of the Chemprop atom and bond features and the extra atom and bond features). The `MolGraph` featurizer collects this information. The message passing layer also needs to know the number of extra atom descriptors.\n", + "\n", + "The extra atom and bond features scalers are combined into a graph transform which is given to the message passing layer to use at prediction time. To avoid scaling the atom and bond features from the internal Chemprop featurizers, the graph transform uses a pad equal to the length of features from the Chemprop internal atom and bond featurizers. This information is stored in the `MolGraph` featurizer.\n", + "\n", + "The extra atom descriptor scaler are also converted to a transform and given to the message passing layer to use at prediction time." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "n_V_features = featurizer.atom_fdim - featurizer.extra_atom_fdim\n", + "n_E_features = featurizer.bond_fdim - featurizer.extra_bond_fdim\n", + "\n", + "V_f_transform = nn.ScaleTransform.from_standard_scaler(extra_atom_features_scaler, pad=n_V_features)\n", + "E_f_transform = nn.ScaleTransform.from_standard_scaler(extra_bond_features_scaler, pad=n_E_features)\n", + "\n", + "graph_transform = nn.GraphTransform(V_f_transform, E_f_transform)\n", + "\n", + "V_d_transform = nn.ScaleTransform.from_standard_scaler(extra_atom_descriptors_scaler)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n_extra_atom_descs = extra_atom_descriptorss[0].shape[1]\n", + "\n", + "mp = nn.BondMessagePassing(\n", + " d_v=featurizer.atom_fdim,\n", + " d_e=featurizer.bond_fdim,\n", + " d_vd=n_extra_atom_descs,\n", + " graph_transform=graph_transform,\n", + " V_d_transform=V_d_transform,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The predictor layer needs to know the size of the its input, including any extra datapoint descriptors. " + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "ffn_input_dim = mp.output_dim + extra_datapoint_descriptors.shape[1]\n", + "\n", + "output_transform = nn.UnscaleTransform.from_standard_scaler(targets_scaler)\n", + "ffn = nn.RegressionFFN(input_dim=ffn_input_dim, output_transform=output_transform)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The overall model is given the transform from the extra datapoint descriptors scaler." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X_d_transform = nn.ScaleTransform.from_standard_scaler(extra_datapoint_descriptors_scaler)\n", + "\n", + "chemprop_model = models.MPNN(mp, nn.NormAggregation(), ffn, X_d_transform=X_d_transform)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training and prediction" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The rest of the training and prediction are the same as other Chemprop workflows." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(\n", + " logger=False, enable_checkpointing=False, enable_progress_bar=True, max_epochs=5\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 325 K | train\n", + "1 | agg | NormAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | RegressionFFN | 96.6 K | train\n", + "4 | X_d_transform | ScaleTransform | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "422 K Trainable params\n", + "0 Non-trainable params\n", + "422 K Total params\n", + "1.690 Total estimated model params size (MB)\n", + "27 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sanity Checking DataLoader 0: 0%| | 0/1 [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test/mse 0.9464761018753052 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9464761018753052 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results = trainer.test(chemprop_model, test_loader)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop-updated/examples/hpopting.ipynb b/chemprop-updated/examples/hpopting.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..768b48b5bad6e051cb9ce7cfafb2d7fd8728e67f --- /dev/null +++ b/chemprop-updated/examples/hpopting.ipynb @@ -0,0 +1,1467 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Running hyperparameter optimization on Chemprop model using RayTune" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/hpopting.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install \".[hpopt]\"\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "2024-10-22 09:03:28,414\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n", + "2024-10-22 09:03:28,801\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n", + "2024-10-22 09:03:29,333\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "from lightning import pytorch as pl\n", + "import ray\n", + "from ray import tune\n", + "from ray.train import CheckpointConfig, RunConfig, ScalingConfig\n", + "from ray.train.lightning import (RayDDPStrategy, RayLightningEnvironment,\n", + " RayTrainReportCallback, prepare_trainer)\n", + "from ray.train.torch import TorchTrainer\n", + "from ray.tune.search.hyperopt import HyperOptSearch\n", + "from ray.tune.search.optuna import OptunaSearch\n", + "from ray.tune.schedulers import FIFOScheduler\n", + "\n", + "from chemprop import data, featurizers, models, nn" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\" # path to your data .csv file\n", + "num_workers = 0 # number of workers for dataloader. 0 means using main process for data loading\n", + "smiles_column = 'smiles' # name of the column containing SMILES strings\n", + "target_columns = ['lipo'] # list of names of the columns containing targets\n", + "\n", + "hpopt_save_dir = Path.cwd() / \"hpopt\" # directory to save hyperopt results\n", + "hpopt_save_dir.mkdir(exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smileslipo
0Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc143.54
1COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)...-1.18
2COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl3.69
3OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C...3.37
4Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N...3.10
.........
95CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C...2.20
96CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)...2.04
97CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)...4.49
98COc1ccc(Cc2c(N)n[nH]c2N)cc10.20
99CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(...2.00
\n", + "

100 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " smiles lipo\n", + "0 Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14 3.54\n", + "1 COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)... -1.18\n", + "2 COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl 3.69\n", + "3 OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C... 3.37\n", + "4 Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N... 3.10\n", + ".. ... ...\n", + "95 CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C... 2.20\n", + "96 CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)... 2.04\n", + "97 CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)... 4.49\n", + "98 COc1ccc(Cc2c(N)n[nH]c2N)cc1 0.20\n", + "99 CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(... 2.00\n", + "\n", + "[100 rows x 2 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_input = pd.read_csv(input_path)\n", + "df_input" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "smis = df_input.loc[:, smiles_column].values\n", + "ys = df_input.loc[:, target_columns].values" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Make data points, splits, and datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "all_data = [data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "mols = [d.mol for d in all_data] # RDkit Mol objects are use for structure based splits\n", + "train_indices, val_indices, test_indices = data.make_split_indices(mols, \"random\", (0.8, 0.1, 0.1))\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " all_data, train_indices, val_indices, test_indices\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer()\n", + "\n", + "train_dset = data.MoleculeDataset(train_data[0], featurizer)\n", + "scaler = train_dset.normalize_targets()\n", + "\n", + "val_dset = data.MoleculeDataset(val_data[0], featurizer)\n", + "val_dset.normalize_targets(scaler)\n", + "\n", + "test_dset = data.MoleculeDataset(test_data[0], featurizer)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Define helper function to train the model" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def train_model(config, train_dset, val_dset, num_workers, scaler):\n", + "\n", + " # config is a dictionary containing hyperparameters used for the trial\n", + " depth = int(config[\"depth\"])\n", + " ffn_hidden_dim = int(config[\"ffn_hidden_dim\"])\n", + " ffn_num_layers = int(config[\"ffn_num_layers\"])\n", + " message_hidden_dim = int(config[\"message_hidden_dim\"])\n", + "\n", + " train_loader = data.build_dataloader(train_dset, num_workers=num_workers, shuffle=True)\n", + " val_loader = data.build_dataloader(val_dset, num_workers=num_workers, shuffle=False)\n", + "\n", + " mp = nn.BondMessagePassing(d_h=message_hidden_dim, depth=depth)\n", + " agg = nn.MeanAggregation()\n", + " output_transform = nn.UnscaleTransform.from_standard_scaler(scaler)\n", + " ffn = nn.RegressionFFN(output_transform=output_transform, input_dim=message_hidden_dim, hidden_dim=ffn_hidden_dim, n_layers=ffn_num_layers)\n", + " batch_norm = True\n", + " metric_list = [nn.metrics.RMSE(), nn.metrics.MAE()]\n", + " model = models.MPNN(mp, agg, ffn, batch_norm, metric_list)\n", + "\n", + " trainer = pl.Trainer(\n", + " accelerator=\"auto\",\n", + " devices=1,\n", + " max_epochs=20, # number of epochs to train for\n", + " # below are needed for Ray and Lightning integration\n", + " strategy=RayDDPStrategy(),\n", + " callbacks=[RayTrainReportCallback()],\n", + " plugins=[RayLightningEnvironment()],\n", + " )\n", + "\n", + " trainer = prepare_trainer(trainer)\n", + " trainer.fit(model, train_loader, val_loader)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define parameter search space" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "search_space = {\n", + " \"depth\": tune.qrandint(lower=2, upper=6, q=1),\n", + " \"ffn_hidden_dim\": tune.qrandint(lower=300, upper=2400, q=100),\n", + " \"ffn_num_layers\": tune.qrandint(lower=1, upper=3, q=1),\n", + " \"message_hidden_dim\": tune.qrandint(lower=300, upper=2400, q=100),\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "
\n", + "

Tune Status

\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Current time:2024-10-22 09:05:01
Running for: 00:01:23.70
Memory: 10.9/15.3 GiB
\n", + "
\n", + "
\n", + "
\n", + "

System Info

\n", + " Using FIFO scheduling algorithm.
Logical resource usage: 2.0/12 CPUs, 0/0 GPUs\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "

Trial Status

\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Trial name status loc train_loop_config/de\n", + "pth train_loop_config/ff\n", + "n_hidden_dim train_loop_config/ff\n", + "n_num_layers train_loop_config/me\n", + "ssage_hidden_dim iter total time (s) train_loss train_loss_step val/rmse
TorchTrainer_f1a6e41aTERMINATED172.31.231.162:24873220002500 20 49.8815 0.0990423 0.168217 0.861368
TorchTrainer_d775c15dTERMINATED172.31.231.162:24953222002400 20 56.6533 0.069695 0.119898 0.90258
\n", + "
\n", + "
\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Setting up process group for: env:// [rank=0, world_size=1]\n", + "\u001b[36m(TorchTrainer pid=24873)\u001b[0m Started distributed worker processes: \n", + "\u001b[36m(TorchTrainer pid=24873)\u001b[0m - (ip=172.31.231.162, pid=24952) world_rank=0, local_rank=0, node_rank=0\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m GPU available: False, used: False\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m TPU available: False, using: 0 TPU cores\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m HPU available: False, using: 0 HPUs\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sanity Checking DataLoader 0: 0%| | 0/1 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000001)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1: 100%|██████████| 2/2 [00:01<00:00, 1.90it/s, v_num=0, train_loss_step=0.406, val_loss=0.904, train_loss_epoch=0.869]\n", + "Epoch 2: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000002)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 2: 100%|██████████| 2/2 [00:01<00:00, 1.29it/s, v_num=0, train_loss_step=1.290, val_loss=0.842, train_loss_epoch=1.210]\n", + "Epoch 3: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 3: 100%|██████████| 2/2 [00:01<00:00, 1.62it/s, v_num=0, train_loss_step=0.749, val_loss=0.912, train_loss_epoch=0.861]\n", + "Epoch 4: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000004)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 4: 100%|██████████| 2/2 [00:01<00:00, 1.31it/s, v_num=0, train_loss_step=0.578, val_loss=0.912, train_loss_epoch=0.792]\n", + "Epoch 5: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000005)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 5: 100%|██████████| 2/2 [00:01<00:00, 1.59it/s, v_num=0, train_loss_step=0.751, val_loss=0.887, train_loss_epoch=0.618]\n", + "Epoch 6: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 6: 100%|██████████| 2/2 [00:01<00:00, 1.53it/s, v_num=0, train_loss_step=0.569, val_loss=0.876, train_loss_epoch=0.450]\n", + "Epoch 7: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:15,207\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 7: 50%|█████ | 1/2 [00:00<00:00, 2.28it/s, v_num=0, train_loss_step=0.339, val_loss=0.876, train_loss_epoch=0.450]\u001b[32m [repeated 2x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)\u001b[0m\n", + "Epoch 1: 100%|██████████| 2/2 [00:00<00:00, 3.75it/s, v_num=0, train_loss_step=0.335, val_loss=0.854, train_loss_epoch=1.010]\u001b[32m [repeated 3x across cluster]\u001b[0m\n", + "Validation: | | 0/? [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1: 100%|██████████| 2/2 [00:00<00:00, 2.01it/s, v_num=0, train_loss_step=0.335, val_loss=0.893, train_loss_epoch=0.703]\u001b[32m [repeated 3x across cluster]\u001b[0m\n", + "Epoch 2: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:17,399\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000009)\u001b[32m [repeated 6x across cluster]\u001b[0m\n", + "2024-10-22 09:04:17,944\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:18,760\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:19,250\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:20,250\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 11: 50%|█████ | 1/2 [00:00<00:00, 1.25it/s, v_num=0, train_loss_step=0.175, val_loss=0.897, train_loss_epoch=0.258]\u001b[32m [repeated 8x across cluster]\u001b[0m\n", + "Epoch 11: 100%|██████████| 2/2 [00:01<00:00, 1.79it/s, v_num=0, train_loss_step=0.312, val_loss=0.897, train_loss_epoch=0.258]\u001b[32m [repeated 7x across cluster]\u001b[0m\n", + "Validation: | | 0/? [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m \u001b[32m [repeated 11x across cluster]\u001b[0m\n", + "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 7.84it/s]\u001b[A\u001b[32m [repeated 7x across cluster]\u001b[0m\n", + "Epoch 11: 100%|██████████| 2/2 [00:01<00:00, 1.56it/s, v_num=0, train_loss_step=0.312, val_loss=0.869, train_loss_epoch=0.258]\u001b[32m [repeated 7x across cluster]\u001b[0m\n", + "Epoch 11: 100%|██████████| 2/2 [00:01<00:00, 1.27it/s, v_num=0, train_loss_step=0.312, val_loss=0.869, train_loss_epoch=0.203]\u001b[32m [repeated 7x across cluster]\u001b[0m\n", + "Epoch 12: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:22,323\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:22,766\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000013)\u001b[32m [repeated 8x across cluster]\u001b[0m\n", + "2024-10-22 09:04:24,404\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:25,524\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 14: 50%|█████ | 1/2 [00:01<00:01, 0.88it/s, v_num=0, train_loss_step=0.131, val_loss=0.841, train_loss_epoch=0.141] \u001b[32m [repeated 6x across cluster]\u001b[0m\n", + "Epoch 7: 100%|██████████| 2/2 [00:01<00:00, 1.13it/s, v_num=0, train_loss_step=0.368, val_loss=0.836, train_loss_epoch=0.399]\u001b[32m [repeated 5x across cluster]\u001b[0m\n", + "Validation: | | 0/? [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:28,260\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000015)\u001b[32m [repeated 4x across cluster]\u001b[0m\n", + "2024-10-22 09:04:30,172\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 9: 50%|█████ | 1/2 [00:01<00:01, 0.72it/s, v_num=0, train_loss_step=0.216, val_loss=0.889, train_loss_epoch=0.254]\u001b[32m [repeated 3x across cluster]\u001b[0m\n", + "Epoch 9: 100%|██████████| 2/2 [00:01<00:00, 1.04it/s, v_num=0, train_loss_step=0.322, val_loss=0.889, train_loss_epoch=0.254]\u001b[32m [repeated 4x across cluster]\u001b[0m\n", + "Validation: | | 0/? [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m \u001b[32m [repeated 9x across cluster]\u001b[0m\n", + "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 4.73it/s]\u001b[A\u001b[32m [repeated 4x across cluster]\u001b[0m\n", + "Epoch 9: 100%|██████████| 2/2 [00:02<00:00, 0.90it/s, v_num=0, train_loss_step=0.322, val_loss=0.910, train_loss_epoch=0.254]\u001b[32m [repeated 4x across cluster]\u001b[0m\n", + "Epoch 9: 100%|██████████| 2/2 [00:02<00:00, 0.70it/s, v_num=0, train_loss_step=0.322, val_loss=0.910, train_loss_epoch=0.237]\u001b[32m [repeated 4x across cluster]\u001b[0m\n", + "Epoch 16: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:33,534\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:34,844\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/d775c15d/checkpoint_000011)\u001b[32m [repeated 5x across cluster]\u001b[0m\n", + "2024-10-22 09:04:35,472\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 18: 50%|█████ | 1/2 [00:01<00:01, 0.98it/s, v_num=0, train_loss_step=0.0962, val_loss=0.781, train_loss_epoch=0.116]\u001b[32m [repeated 5x across cluster]\u001b[0m\n", + "Epoch 11: 100%|██████████| 2/2 [00:01<00:00, 1.91it/s, v_num=0, train_loss_step=0.263, val_loss=0.889, train_loss_epoch=0.219]\u001b[32m [repeated 3x across cluster]\u001b[0m\n", + "Validation: | | 0/? [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:38,006\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000019)\u001b[32m [repeated 3x across cluster]\u001b[0m\n", + "2024-10-22 09:04:40,708\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:41,380\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m `Trainer.fit` stopped: `max_epochs=20` reached.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 13: 50%|█████ | 1/2 [00:00<00:00, 1.17it/s, v_num=0, train_loss_step=0.118, val_loss=0.849, train_loss_epoch=0.122]\u001b[32m [repeated 3x across cluster]\u001b[0m\n", + "Epoch 13: 100%|██████████| 2/2 [00:01<00:00, 1.62it/s, v_num=0, train_loss_step=0.0846, val_loss=0.849, train_loss_epoch=0.122]\u001b[32m [repeated 4x across cluster]\u001b[0m\n", + "Validation: | | 0/? [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 15: 50%|█████ | 1/2 [00:01<00:01, 0.64it/s, v_num=0, train_loss_step=0.0923, val_loss=0.839, train_loss_epoch=0.0974]\u001b[32m [repeated 2x across cluster]\u001b[0m\n", + "Epoch 15: 100%|██████████| 2/2 [00:02<00:00, 0.94it/s, v_num=0, train_loss_step=0.0867, val_loss=0.839, train_loss_epoch=0.0974]\u001b[32m [repeated 2x across cluster]\u001b[0m\n", + "Validation: | | 0/? [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 15: 100%|██████████| 2/2 [00:03<00:00, 0.54it/s, v_num=0, train_loss_step=0.0867, val_loss=0.837, train_loss_epoch=0.0912]\n", + "Epoch 16: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 16: 100%|██████████| 2/2 [00:04<00:00, 0.41it/s, v_num=0, train_loss_step=0.0703, val_loss=0.837, train_loss_epoch=0.0774]\n", + "Epoch 17: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 17: 100%|██████████| 2/2 [00:01<00:00, 1.01it/s, v_num=0, train_loss_step=0.156, val_loss=0.836, train_loss_epoch=0.0882]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 18: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 18: 100%|██████████| 2/2 [00:01<00:00, 1.32it/s, v_num=0, train_loss_step=0.064, val_loss=0.830, train_loss_epoch=0.0675]\n", + "Epoch 19: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 19: 100%|██████████| 2/2 [00:01<00:00, 1.55it/s, v_num=0, train_loss_step=0.120, val_loss=0.815, train_loss_epoch=0.0697]\n", + "Epoch 19: 100%|██████████| 2/2 [00:01<00:00, 1.13it/s, v_num=0, train_loss_step=0.120, val_loss=0.815, train_loss_epoch=0.0697]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m `Trainer.fit` stopped: `max_epochs=20` reached.\n", + "2024-10-22 09:05:01,809\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:05:01,823\tINFO tune.py:1016 -- Wrote the latest version of all result files and experiment state to '/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37' in 0.0305s.\n", + "2024-10-22 09:05:01,873\tINFO tune.py:1048 -- Total run time: 83.87 seconds (83.66 seconds for the tuning loop).\n" + ] + } + ], + "source": [ + "ray.init()\n", + "\n", + "scheduler = FIFOScheduler()\n", + "\n", + "# Scaling config controls the resources used by Ray\n", + "scaling_config = ScalingConfig(\n", + " num_workers=1,\n", + " use_gpu=False, # change to True if you want to use GPU\n", + ")\n", + "\n", + "# Checkpoint config controls the checkpointing behavior of Ray\n", + "checkpoint_config = CheckpointConfig(\n", + " num_to_keep=1, # number of checkpoints to keep\n", + " checkpoint_score_attribute=\"val_loss\", # Save the checkpoint based on this metric\n", + " checkpoint_score_order=\"min\", # Save the checkpoint with the lowest metric value\n", + ")\n", + "\n", + "run_config = RunConfig(\n", + " checkpoint_config=checkpoint_config,\n", + " storage_path=hpopt_save_dir / \"ray_results\", # directory to save the results\n", + ")\n", + "\n", + "ray_trainer = TorchTrainer(\n", + " lambda config: train_model(\n", + " config, train_dset, val_dset, num_workers, scaler\n", + " ),\n", + " scaling_config=scaling_config,\n", + " run_config=run_config,\n", + ")\n", + "\n", + "search_alg = HyperOptSearch(\n", + " n_initial_points=1, # number of random evaluations before tree parzen estimators\n", + " random_state_seed=42,\n", + ")\n", + "\n", + "# OptunaSearch is another search algorithm that can be used\n", + "# search_alg = OptunaSearch() \n", + "\n", + "tune_config = tune.TuneConfig(\n", + " metric=\"val_loss\",\n", + " mode=\"min\",\n", + " num_samples=2, # number of trials to run\n", + " scheduler=scheduler,\n", + " search_alg=search_alg,\n", + " trial_dirname_creator=lambda trial: str(trial.trial_id), # shorten filepaths\n", + " \n", + ")\n", + "\n", + "tuner = tune.Tuner(\n", + " ray_trainer,\n", + " param_space={\n", + " \"train_loop_config\": search_space,\n", + " },\n", + " tune_config=tune_config,\n", + ")\n", + "\n", + "# Start the hyperparameter search\n", + "results = tuner.fit()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hyperparameter optimization results" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ResultGrid<[\n", + " Result(\n", + " metrics={'train_loss': 0.09904231131076813, 'train_loss_step': 0.16821686923503876, 'val/rmse': 0.8613682389259338, 'val/mae': 0.7006751298904419, 'val_loss': 0.7419552206993103, 'train_loss_epoch': 0.09904231131076813, 'epoch': 19, 'step': 40},\n", + " path='/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a',\n", + " filesystem='local',\n", + " checkpoint=Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000019)\n", + " ),\n", + " Result(\n", + " metrics={'train_loss': 0.06969495117664337, 'train_loss_step': 0.11989812552928925, 'val/rmse': 0.902579665184021, 'val/mae': 0.7176367044448853, 'val_loss': 0.8146500587463379, 'train_loss_epoch': 0.06969495117664337, 'epoch': 19, 'step': 40},\n", + " path='/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/d775c15d',\n", + " filesystem='local',\n", + " checkpoint=Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/d775c15d/checkpoint_000019)\n", + " )\n", + "]>" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
train_losstrain_loss_stepval/rmseval/maeval_losstrain_loss_epochepochsteptimestampcheckpoint_dir_name...pidhostnamenode_iptime_since_restoreiterations_since_restoreconfig/train_loop_config/depthconfig/train_loop_config/ffn_hidden_dimconfig/train_loop_config/ffn_num_layersconfig/train_loop_config/message_hidden_dimlogdir
00.0990420.1682170.8613680.7006750.7419550.09904219401729602279checkpoint_000019...24873Knathan-Laptop172.31.231.16249.88151620220002500f1a6e41a
10.0696950.1198980.9025800.7176370.8146500.06969519401729602299checkpoint_000019...24953Knathan-Laptop172.31.231.16256.65333620222002400d775c15d
\n", + "

2 rows × 27 columns

\n", + "
" + ], + "text/plain": [ + " train_loss train_loss_step val/rmse val/mae val_loss \\\n", + "0 0.099042 0.168217 0.861368 0.700675 0.741955 \n", + "1 0.069695 0.119898 0.902580 0.717637 0.814650 \n", + "\n", + " train_loss_epoch epoch step timestamp checkpoint_dir_name ... pid \\\n", + "0 0.099042 19 40 1729602279 checkpoint_000019 ... 24873 \n", + "1 0.069695 19 40 1729602299 checkpoint_000019 ... 24953 \n", + "\n", + " hostname node_ip time_since_restore iterations_since_restore \\\n", + "0 Knathan-Laptop 172.31.231.162 49.881516 20 \n", + "1 Knathan-Laptop 172.31.231.162 56.653336 20 \n", + "\n", + " config/train_loop_config/depth config/train_loop_config/ffn_hidden_dim \\\n", + "0 2 2000 \n", + "1 2 2200 \n", + "\n", + " config/train_loop_config/ffn_num_layers \\\n", + "0 2 \n", + "1 2 \n", + "\n", + " config/train_loop_config/message_hidden_dim logdir \n", + "0 500 f1a6e41a \n", + "1 400 d775c15d \n", + "\n", + "[2 rows x 27 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# results of all trials\n", + "result_df = results.get_dataframe()\n", + "result_df" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'depth': 2,\n", + " 'ffn_hidden_dim': 2000,\n", + " 'ffn_num_layers': 2,\n", + " 'message_hidden_dim': 500}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# best configuration\n", + "best_result = results.get_best_result()\n", + "best_config = best_result.config\n", + "best_config['train_loop_config']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best model checkpoint path: /home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000019/checkpoint.ckpt\n" + ] + } + ], + "source": [ + "# best model checkpoint path\n", + "best_result = results.get_best_result()\n", + "best_checkpoint_path = Path(best_result.checkpoint.path) / \"checkpoint.ckpt\"\n", + "print(f\"Best model checkpoint path: {best_checkpoint_path}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "ray.shutdown()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop-updated/examples/interpreting_monte_carlo_tree_search.ipynb b/chemprop-updated/examples/interpreting_monte_carlo_tree_search.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..6bc828b536a17a43e5b69e3d9506a0191d0015fe --- /dev/null +++ b/chemprop-updated/examples/interpreting_monte_carlo_tree_search.ipynb @@ -0,0 +1,1116 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Interpretability with Monte Carlo Tree search\n", + "\n", + "Based on the paper Jin et al., [Multi-Objective Molecule Generation using Interpretable Substructures](https://arxiv.org/abs/2002.03244) and modified from Chemprop v1 [interpret.py](https://github.com/chemprop/chemprop/blob/master/chemprop/interpret.py)\n", + "\n", + "Please scroll to after the helper functions to change the model and data input and run the interpretation algorithm\n", + "\n", + "Note: \n", + "- The interpret function does not yet work with additional atom or bond features, as the substructure extracted doesn't necessarily have the corresponding additional atom or bond features readily available.\n", + "- It currently only works with single molecule model\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/interpreting_monte_carlo_tree_search.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from dataclasses import dataclass, field\n", + "import math\n", + "from pathlib import Path\n", + "import time\n", + "from typing import Callable, Union, Iterable\n", + "\n", + "from lightning import pytorch as pl\n", + "import numpy as np\n", + "import pandas as pd\n", + "from rdkit import Chem\n", + "import torch\n", + "\n", + "from chemprop import data, featurizers, models\n", + "from chemprop.models import MPNN" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define helper function to make model predictions from SMILES" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def make_prediction(\n", + " models: list[MPNN],\n", + " trainer: pl.Trainer,\n", + " smiles: list[str],\n", + ") -> np.ndarray:\n", + " \"\"\"Makes predictions on a list of SMILES.\n", + "\n", + " Parameters\n", + " ----------\n", + " models : list\n", + " A list of models to make predictions with.\n", + " smiles : list\n", + " A list of SMILES to make predictions on.\n", + "\n", + " Returns\n", + " -------\n", + " list[list[float]]\n", + " A list of lists containing the predicted values.\n", + " \"\"\"\n", + "\n", + " test_data = [data.MoleculeDatapoint.from_smi(smi) for smi in smiles]\n", + " test_dset = data.MoleculeDataset(test_data)\n", + " test_loader = data.build_dataloader(\n", + " test_dset, batch_size=1, num_workers=0, shuffle=False\n", + " )\n", + "\n", + " with torch.inference_mode():\n", + " sum_preds = []\n", + " for model in models:\n", + " predss = trainer.predict(model, test_loader)\n", + " preds = torch.cat(predss, 0)\n", + " preds = preds.cpu().numpy()\n", + " sum_preds.append(preds)\n", + "\n", + " # Ensemble predictions\n", + " sum_preds = sum(sum_preds)\n", + " avg_preds = sum_preds / len(models)\n", + "\n", + " return avg_preds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Classes/functions relevant to Monte Carlo Tree Search\n", + "\n", + "Mostly similar to the scripts from Chemprop v1 [interpret.py](https://github.com/chemprop/chemprop/blob/master/chemprop/interpret.py) with additional documentation" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "@dataclass\n", + "class MCTSNode:\n", + " \"\"\"Represents a node in a Monte Carlo Tree Search.\n", + "\n", + " Parameters\n", + " ----------\n", + " smiles : str\n", + " The SMILES for the substructure at this node.\n", + " atoms : list\n", + " A list of atom indices in the substructure at this node.\n", + " W : float\n", + " The total action value, which indicates how likely the deletion will lead to a good rationale.\n", + " N : int\n", + " The visit count, which indicates how many times this node has been visited. It is used to balance exploration and exploitation.\n", + " P : float\n", + " The predicted property score of the new subgraphs' after the deletion, shown as R in the original paper.\n", + " \"\"\"\n", + "\n", + " smiles: str\n", + " atoms: Iterable[int]\n", + " W: float = 0\n", + " N: int = 0\n", + " P: float = 0\n", + " children: list[...] = field(default_factory=list)\n", + "\n", + " def __post_init__(self):\n", + " self.atoms = set(self.atoms)\n", + "\n", + " def Q(self) -> float:\n", + " \"\"\"\n", + " Returns\n", + " -------\n", + " float\n", + " The mean action value of the node.\n", + " \"\"\"\n", + " return self.W / self.N if self.N > 0 else 0\n", + "\n", + " def U(self, n: int, c_puct: float = 10.0) -> float:\n", + " \"\"\"\n", + " Parameters\n", + " ----------\n", + " n : int\n", + " The sum of the visit count of this node's siblings.\n", + " c_puct : float\n", + " A constant that controls the level of exploration.\n", + " \n", + " Returns\n", + " -------\n", + " float\n", + " The exploration value of the node.\n", + " \"\"\"\n", + " return c_puct * self.P * math.sqrt(n) / (1 + self.N)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def find_clusters(mol: Chem.Mol) -> tuple[list[tuple[int, ...]], list[list[int]]]:\n", + " \"\"\"Finds clusters within the molecule. Jin et al. from [1]_ only allows deletion of one peripheral non-aromatic bond or one peripheral ring from each state,\n", + " so the clusters here are defined as non-ring bonds and the smallest set of smallest rings.\n", + "\n", + " Parameters\n", + " ----------\n", + " mol : RDKit molecule\n", + " The molecule to find clusters in.\n", + "\n", + " Returns\n", + " -------\n", + " tuple\n", + " A tuple containing:\n", + " - list of tuples: Each tuple contains atoms in a cluster.\n", + " - list of int: Each atom's cluster index.\n", + " \n", + " References\n", + " ----------\n", + " .. [1] Jin, Wengong, Regina Barzilay, and Tommi Jaakkola. \"Multi-objective molecule generation using interpretable substructures.\" International conference on machine learning. PMLR, 2020. https://arxiv.org/abs/2002.03244\n", + " \"\"\"\n", + "\n", + " n_atoms = mol.GetNumAtoms()\n", + " if n_atoms == 1: # special case\n", + " return [(0,)], [[0]]\n", + "\n", + " clusters = []\n", + " for bond in mol.GetBonds():\n", + " a1 = bond.GetBeginAtom().GetIdx()\n", + " a2 = bond.GetEndAtom().GetIdx()\n", + " if not bond.IsInRing():\n", + " clusters.append((a1, a2))\n", + "\n", + " ssr = [tuple(x) for x in Chem.GetSymmSSSR(mol)]\n", + " clusters.extend(ssr)\n", + "\n", + " atom_cls = [[] for _ in range(n_atoms)]\n", + " for i in range(len(clusters)):\n", + " for atom in clusters[i]:\n", + " atom_cls[atom].append(i)\n", + "\n", + " return clusters, atom_cls" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_subgraph_from_mol(mol: Chem.Mol, selected_atoms: set[int]) -> tuple[Chem.Mol, list[int]]:\n", + " \"\"\"Extracts a subgraph from an RDKit molecule given a set of atom indices.\n", + "\n", + " Parameters\n", + " ----------\n", + " mol : RDKit molecule\n", + " The molecule from which to extract a subgraph.\n", + " selected_atoms : list of int\n", + " The indices of atoms which form the subgraph to be extracted.\n", + "\n", + " Returns\n", + " -------\n", + " tuple\n", + " A tuple containing:\n", + " - RDKit molecule: The subgraph.\n", + " - list of int: Root atom indices from the selected indices.\n", + " \"\"\"\n", + "\n", + " selected_atoms = set(selected_atoms)\n", + " roots = []\n", + " for idx in selected_atoms:\n", + " atom = mol.GetAtomWithIdx(idx)\n", + " bad_neis = [y for y in atom.GetNeighbors() if y.GetIdx() not in selected_atoms]\n", + " if len(bad_neis) > 0:\n", + " roots.append(idx)\n", + "\n", + " new_mol = Chem.RWMol(mol)\n", + "\n", + " for atom_idx in roots:\n", + " atom = new_mol.GetAtomWithIdx(atom_idx)\n", + " atom.SetAtomMapNum(1)\n", + " aroma_bonds = [\n", + " bond for bond in atom.GetBonds() if bond.GetBondType() == Chem.rdchem.BondType.AROMATIC\n", + " ]\n", + " aroma_bonds = [\n", + " bond\n", + " for bond in aroma_bonds\n", + " if bond.GetBeginAtom().GetIdx() in selected_atoms\n", + " and bond.GetEndAtom().GetIdx() in selected_atoms\n", + " ]\n", + " if len(aroma_bonds) == 0:\n", + " atom.SetIsAromatic(False)\n", + "\n", + " remove_atoms = [\n", + " atom.GetIdx() for atom in new_mol.GetAtoms() if atom.GetIdx() not in selected_atoms\n", + " ]\n", + " remove_atoms = sorted(remove_atoms, reverse=True)\n", + " for atom in remove_atoms:\n", + " new_mol.RemoveAtom(atom)\n", + "\n", + " return new_mol.GetMol(), roots" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_subgraph(smiles: str, selected_atoms: set[int]) -> tuple[str, list[int]]:\n", + " \"\"\"Extracts a subgraph from a SMILES given a set of atom indices.\n", + "\n", + " Parameters\n", + " ----------\n", + " smiles : str\n", + " The SMILES string from which to extract a subgraph.\n", + " selected_atoms : list of int\n", + " The indices of atoms which form the subgraph to be extracted.\n", + "\n", + " Returns\n", + " -------\n", + " tuple\n", + " A tuple containing:\n", + " - str: SMILES representing the subgraph.\n", + " - list of int: Root atom indices from the selected indices.\n", + " \"\"\"\n", + " # try with kekulization\n", + " mol = Chem.MolFromSmiles(smiles)\n", + " Chem.Kekulize(mol)\n", + " subgraph, roots = extract_subgraph_from_mol(mol, selected_atoms)\n", + " try:\n", + " subgraph = Chem.MolToSmiles(subgraph, kekuleSmiles=True)\n", + " subgraph = Chem.MolFromSmiles(subgraph)\n", + " except Exception:\n", + " subgraph = None\n", + "\n", + " mol = Chem.MolFromSmiles(smiles) # de-kekulize\n", + " if subgraph is not None and mol.HasSubstructMatch(subgraph):\n", + " return Chem.MolToSmiles(subgraph), roots\n", + "\n", + " # If fails, try without kekulization\n", + " subgraph, roots = extract_subgraph_from_mol(mol, selected_atoms)\n", + " subgraph = Chem.MolToSmiles(subgraph)\n", + " subgraph = Chem.MolFromSmiles(subgraph)\n", + "\n", + " if subgraph is not None:\n", + " return Chem.MolToSmiles(subgraph), roots\n", + " else:\n", + " return None, None" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def mcts_rollout(\n", + " node: MCTSNode,\n", + " state_map: dict[str, MCTSNode],\n", + " orig_smiles: str,\n", + " clusters: list[set[int]],\n", + " atom_cls: list[set[int]],\n", + " nei_cls: list[set[int]],\n", + " scoring_function: Callable[[list[str]], list[float]],\n", + " min_atoms: int = 15,\n", + " c_puct: float = 10.0,\n", + ") -> float:\n", + " \"\"\"A Monte Carlo Tree Search rollout from a given MCTSNode.\n", + "\n", + " Parameters\n", + " ----------\n", + " node : MCTSNode\n", + " The MCTSNode from which to begin the rollout.\n", + " state_map : dict\n", + " A mapping from SMILES to MCTSNode.\n", + " orig_smiles : str\n", + " The original SMILES of the molecule.\n", + " clusters : list\n", + " Clusters of atoms.\n", + " atom_cls : list\n", + " Atom indices in the clusters.\n", + " nei_cls : list\n", + " Neighboring cluster indices.\n", + " scoring_function : function\n", + " A function for scoring subgraph SMILES using a Chemprop model.\n", + " min_atoms : int\n", + " The minimum number of atoms in a subgraph.\n", + " c_puct : float\n", + " The constant controlling the level of exploration.\n", + "\n", + " Returns\n", + " -------\n", + " float\n", + " The score of this MCTS rollout.\n", + " \"\"\"\n", + " # Return if the number of atoms is less than the minimum\n", + " cur_atoms = node.atoms\n", + " if len(cur_atoms) <= min_atoms:\n", + " return node.P\n", + "\n", + " # Expand if this node has never been visited\n", + " if len(node.children) == 0:\n", + " # Cluster indices whose all atoms are present in current subgraph\n", + " cur_cls = set([i for i, x in enumerate(clusters) if x <= cur_atoms])\n", + "\n", + " for i in cur_cls:\n", + " # Leaf atoms are atoms that are only involved in one cluster.\n", + " leaf_atoms = [a for a in clusters[i] if len(atom_cls[a] & cur_cls) == 1]\n", + "\n", + " # This checks\n", + " # 1. If there is only one neighbor cluster in the current subgraph (so that we don't produce unconnected graphs), or\n", + " # 2. If the cluster has only two atoms and the current subgraph has only one leaf atom.\n", + " # If either of the conditions is met, remove the leaf atoms in the current cluster.\n", + " if len(nei_cls[i] & cur_cls) == 1 or len(clusters[i]) == 2 and len(leaf_atoms) == 1:\n", + " new_atoms = cur_atoms - set(leaf_atoms)\n", + " new_smiles, _ = extract_subgraph(orig_smiles, new_atoms)\n", + " if new_smiles in state_map:\n", + " new_node = state_map[new_smiles] # merge identical states\n", + " else:\n", + " new_node = MCTSNode(new_smiles, new_atoms)\n", + " if new_smiles:\n", + " node.children.append(new_node)\n", + "\n", + " state_map[node.smiles] = node\n", + " if len(node.children) == 0:\n", + " return node.P # cannot find leaves\n", + "\n", + " scores = scoring_function([x.smiles for x in node.children])\n", + " for child, score in zip(node.children, scores):\n", + " child.P = score\n", + "\n", + " sum_count = sum(c.N for c in node.children)\n", + " selected_node = max(node.children, key=lambda x: x.Q() + x.U(sum_count, c_puct=c_puct))\n", + " v = mcts_rollout(\n", + " selected_node,\n", + " state_map,\n", + " orig_smiles,\n", + " clusters,\n", + " atom_cls,\n", + " nei_cls,\n", + " scoring_function,\n", + " min_atoms=min_atoms,\n", + " c_puct=c_puct,\n", + " )\n", + " selected_node.W += v\n", + " selected_node.N += 1\n", + "\n", + " return v" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def mcts(\n", + " smiles: str,\n", + " scoring_function: Callable[[list[str]], list[float]],\n", + " n_rollout: int,\n", + " max_atoms: int,\n", + " prop_delta: float,\n", + " min_atoms: int = 15,\n", + " c_puct: int = 10,\n", + ") -> list[MCTSNode]:\n", + " \"\"\"Runs the Monte Carlo Tree Search algorithm.\n", + "\n", + " Parameters\n", + " ----------\n", + " smiles : str\n", + " The SMILES of the molecule to perform the search on.\n", + " scoring_function : function\n", + " A function for scoring subgraph SMILES using a Chemprop model.\n", + " n_rollout : int\n", + " The number of MCTS rollouts to perform.\n", + " max_atoms : int\n", + " The maximum number of atoms allowed in an extracted rationale.\n", + " prop_delta : float\n", + " The minimum required property value for a satisfactory rationale.\n", + " min_atoms : int\n", + " The minimum number of atoms in a subgraph.\n", + " c_puct : float\n", + " The constant controlling the level of exploration.\n", + "\n", + " Returns\n", + " -------\n", + " list\n", + " A list of rationales each represented by a MCTSNode.\n", + " \"\"\"\n", + "\n", + " mol = Chem.MolFromSmiles(smiles)\n", + "\n", + " clusters, atom_cls = find_clusters(mol)\n", + " nei_cls = [0] * len(clusters)\n", + " for i, cls in enumerate(clusters):\n", + " nei_cls[i] = [nei for atom in cls for nei in atom_cls[atom]]\n", + " nei_cls[i] = set(nei_cls[i]) - {i}\n", + " clusters[i] = set(list(cls))\n", + " for a in range(len(atom_cls)):\n", + " atom_cls[a] = set(atom_cls[a])\n", + "\n", + " root = MCTSNode(smiles, set(range(mol.GetNumAtoms())))\n", + " state_map = {smiles: root}\n", + " for _ in range(n_rollout):\n", + " mcts_rollout(\n", + " root,\n", + " state_map,\n", + " smiles,\n", + " clusters,\n", + " atom_cls,\n", + " nei_cls,\n", + " scoring_function,\n", + " min_atoms=min_atoms,\n", + " c_puct=c_puct,\n", + " )\n", + "\n", + " rationales = [\n", + " node\n", + " for _, node in state_map.items()\n", + " if len(node.atoms) <= max_atoms and node.P >= prop_delta\n", + " ]\n", + "\n", + " return rationales" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load model" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "model_path = (\n", + " chemprop_dir / \"tests\" / \"data\" / \"example_model_v2_regression_mol.pt\"\n", + ") # path to model checkpoint (.ckpt) or model file (.pt)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): GraphTransform(\n", + " (V_transform): Identity()\n", + " (E_transform): Identity()\n", + " )\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0-1): 2 x MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mpnn = models.MPNN.load_from_file(model_path) # this is a dummy model for testing purposes\n", + "mpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load data to run interpretation for" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "test_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\"\n", + "smiles_column = \"smiles\"" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smileslipo
0Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc143.54
1COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)...-1.18
2COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl3.69
3OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C...3.37
4Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N...3.10
.........
95CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C...2.20
96CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)...2.04
97CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)...4.49
98COc1ccc(Cc2c(N)n[nH]c2N)cc10.20
99CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(...2.00
\n", + "

100 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " smiles lipo\n", + "0 Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14 3.54\n", + "1 COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)... -1.18\n", + "2 COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl 3.69\n", + "3 OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C... 3.37\n", + "4 Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N... 3.10\n", + ".. ... ...\n", + "95 CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C... 2.20\n", + "96 CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)... 2.04\n", + "97 CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)... 4.49\n", + "98 COc1ccc(Cc2c(N)n[nH]c2N)cc1 0.20\n", + "99 CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(... 2.00\n", + "\n", + "[100 rows x 2 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_test = pd.read_csv(test_path)\n", + "df_test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set up trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(logger=None, enable_progress_bar=False, accelerator=\"cpu\", devices=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Running interpretation" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# MCTS options\n", + "rollout = 10 # number of MCTS rollouts to perform. If mol.GetNumAtoms() > 50, consider setting n_rollout = 1 to avoid long computation time\n", + "\n", + "c_puct = 10.0 # constant that controls the level of exploration\n", + "\n", + "max_atoms = 20 # maximum number of atoms allowed in an extracted rationale\n", + "\n", + "min_atoms = 8 # minimum number of atoms in an extracted rationale\n", + "\n", + "prop_delta = 0.5 # Minimum score to count as positive.\n", + "# In this algorithm, if the predicted property from the substructure if larger than prop_delta, the substructure is considered satisfactory.\n", + "# This value depends on the property you want to interpret. 0.5 is a dummy value for demonstration purposes\n", + "\n", + "num_rationales_to_keep = 5 # number of rationales to keep for each molecule" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# Define the scoring function. \"Score\" for a substructure is the predicted property value of the substructure.\n", + "\n", + "models = [mpnn]\n", + "\n", + "property_for_interpretation = \"lipo\"\n", + "\n", + "property_id = (\n", + " df_test.columns.get_loc(property_for_interpretation) - 1\n", + ") # property index in the dataset; -1 for the SMILES column\n", + "\n", + "\n", + "def scoring_function(smiles: list[str]) -> list[float]:\n", + " return make_prediction(\n", + " models=models,\n", + " trainer=trainer,\n", + " smiles=smiles,\n", + " )[:, property_id]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14',\n", + " 'COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)CCc3ccccc23',\n", + " 'COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl',\n", + " 'OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(Cl)sc4[nH]3',\n", + " 'Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)NCC#N)c1']" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# only use the first 5 SMILES for demonstration purposes\n", + "all_smiles = df_test[smiles_column].tolist()[:5]\n", + "all_smiles" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 10 11 12 13 14\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 11 12 13 14 15\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 8 9 10 11 12\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 7 8 9 10 11\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 1 2 3 4 5\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 0 1 3 4 5\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 0 1 2 3 4\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 11 12 13 14 15\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 8 9 10 11 12\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 7 8 9 10 11\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 10 11 12 13 14\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 13 s, sys: 1.38 s, total: 14.4 s\n", + "Wall time: 3.67 s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "results_df = {\"smiles\": [], property_for_interpretation: []}\n", + "\n", + "for i in range(num_rationales_to_keep):\n", + " results_df[f\"rationale_{i}\"] = []\n", + " results_df[f\"rationale_{i}_score\"] = []\n", + "\n", + "for smiles in all_smiles:\n", + " score = scoring_function([smiles])[0]\n", + " if score > prop_delta:\n", + " rationales = mcts(\n", + " smiles=smiles,\n", + " scoring_function=scoring_function,\n", + " n_rollout=rollout,\n", + " max_atoms=max_atoms,\n", + " prop_delta=prop_delta,\n", + " min_atoms=min_atoms,\n", + " c_puct=c_puct,\n", + " )\n", + " else:\n", + " rationales = []\n", + "\n", + " results_df[\"smiles\"].append(smiles)\n", + " results_df[property_for_interpretation].append(score)\n", + "\n", + " if len(rationales) == 0:\n", + " for i in range(num_rationales_to_keep):\n", + " results_df[f\"rationale_{i}\"].append(None)\n", + " results_df[f\"rationale_{i}_score\"].append(None)\n", + " else:\n", + " min_size = min(len(x.atoms) for x in rationales)\n", + " min_rationales = [x for x in rationales if len(x.atoms) == min_size]\n", + " rats = sorted(min_rationales, key=lambda x: x.P, reverse=True)\n", + "\n", + " for i in range(num_rationales_to_keep):\n", + " if i < len(rats):\n", + " results_df[f\"rationale_{i}\"].append(rats[i].smiles)\n", + " results_df[f\"rationale_{i}_score\"].append(rats[i].P)\n", + " else:\n", + " results_df[f\"rationale_{i}\"].append(None)\n", + " results_df[f\"rationale_{i}_score\"].append(None)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smilesliporationale_0rationale_0_scorerationale_1rationale_1_scorerationale_2rationale_2_scorerationale_3rationale_3_scorerationale_4rationale_4_score
0Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc142.253542c1ccc2c(c1)n[cH:1][nH:1]22.275024NoneNaNNoneNaNNoneNaNNoneNaN
1COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)...2.235016O=[SH:1]c1c[cH:1][cH:1]cc1[OH:1]2.252582c1c([OH:1])c([S:1][NH2:1])c[cH:1][cH:1]12.252185c1c(N[CH3:1])[cH:1]c[cH:1]c1[SH:1]2.251068c1c([S:1][NH2:1])[cH:1]cc([OH:1])[cH:1]12.250288c1c([NH2:1])[cH:1]c[cH:1]c1[S:1][NH2:1]2.249267
2COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl2.245891c1cc[cH:1]c([CH2:1][CH2:1][OH:1])c12.249289O=[CH:1][CH2:1]c1cccc[cH:1]12.249207c1cc[cH:1]c([C@@H]([CH3:1])[NH2:1])c12.247827Clc1ccccc1[CH2:1][NH2:1]2.245391Clc1ccccc1[CH2:1][CH3:1]2.243280
3OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C...2.249847c1c([CH3:1])[nH]c2s[cH:1]cc122.267990Clc1cc2c[cH:1][nH]c2s12.267004O=C1N(C[CH3:1])[CH:1]=[CH:1]C[CH2:1]12.211323NoneNaNNoneNaN
4Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N...2.228097c1cc(C[CH2:1][NH2:1])c[cH:1]c12.247070c1cc(C[CH2:1][CH3:1])c[cH:1]c12.245314Cn1nc([CH3:1])cc1[CH2:1][NH2:1]2.225729C[CH2:1]c1cc([CH2:1][NH2:1])[nH:1]n12.223793c1c([CH3:1])n[nH:1]c1[CH2:1]N[CH3:1]2.223478
\n", + "
" + ], + "text/plain": [ + " smiles lipo \\\n", + "0 Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14 2.253542 \n", + "1 COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)... 2.235016 \n", + "2 COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl 2.245891 \n", + "3 OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C... 2.249847 \n", + "4 Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N... 2.228097 \n", + "\n", + " rationale_0 rationale_0_score \\\n", + "0 c1ccc2c(c1)n[cH:1][nH:1]2 2.275024 \n", + "1 O=[SH:1]c1c[cH:1][cH:1]cc1[OH:1] 2.252582 \n", + "2 c1cc[cH:1]c([CH2:1][CH2:1][OH:1])c1 2.249289 \n", + "3 c1c([CH3:1])[nH]c2s[cH:1]cc12 2.267990 \n", + "4 c1cc(C[CH2:1][NH2:1])c[cH:1]c1 2.247070 \n", + "\n", + " rationale_1 rationale_1_score \\\n", + "0 None NaN \n", + "1 c1c([OH:1])c([S:1][NH2:1])c[cH:1][cH:1]1 2.252185 \n", + "2 O=[CH:1][CH2:1]c1cccc[cH:1]1 2.249207 \n", + "3 Clc1cc2c[cH:1][nH]c2s1 2.267004 \n", + "4 c1cc(C[CH2:1][CH3:1])c[cH:1]c1 2.245314 \n", + "\n", + " rationale_2 rationale_2_score \\\n", + "0 None NaN \n", + "1 c1c(N[CH3:1])[cH:1]c[cH:1]c1[SH:1] 2.251068 \n", + "2 c1cc[cH:1]c([C@@H]([CH3:1])[NH2:1])c1 2.247827 \n", + "3 O=C1N(C[CH3:1])[CH:1]=[CH:1]C[CH2:1]1 2.211323 \n", + "4 Cn1nc([CH3:1])cc1[CH2:1][NH2:1] 2.225729 \n", + "\n", + " rationale_3 rationale_3_score \\\n", + "0 None NaN \n", + "1 c1c([S:1][NH2:1])[cH:1]cc([OH:1])[cH:1]1 2.250288 \n", + "2 Clc1ccccc1[CH2:1][NH2:1] 2.245391 \n", + "3 None NaN \n", + "4 C[CH2:1]c1cc([CH2:1][NH2:1])[nH:1]n1 2.223793 \n", + "\n", + " rationale_4 rationale_4_score \n", + "0 None NaN \n", + "1 c1c([NH2:1])[cH:1]c[cH:1]c1[S:1][NH2:1] 2.249267 \n", + "2 Clc1ccccc1[CH2:1][CH3:1] 2.243280 \n", + "3 None NaN \n", + "4 c1c([CH3:1])n[nH:1]c1[CH2:1]N[CH3:1] 2.223478 " + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results_df = pd.DataFrame(results_df)\n", + "results_df" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop-updated/examples/mpnn_fingerprints.ipynb b/chemprop-updated/examples/mpnn_fingerprints.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..559f3bfc8bfc9aa71b7678a7a8d9b72fd7a7edf9 --- /dev/null +++ b/chemprop-updated/examples/mpnn_fingerprints.ipynb @@ -0,0 +1,395 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Encoding fingerprint latent representation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/mpnn_fingerprints.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import torch\n", + "from sklearn.decomposition import PCA\n", + "from pathlib import Path\n", + "\n", + "from chemprop import data, featurizers, models" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change model input here" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "checkpoint_path = chemprop_dir / \"tests/data/example_model_v2_regression_mol.ckpt\" # path to the checkpoint file.\n", + "# If the checkpoint file is generated using the training notebook,\n", + "# it will be in the `checkpoints` folder with name similar to `checkpoints/epoch=19-step=180.ckpt`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load model" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): GraphTransform(\n", + " (V_transform): Identity()\n", + " (E_transform): Identity()\n", + " )\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0-1): 2 x MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mpnn = models.MPNN.load_from_checkpoint(checkpoint_path)\n", + "mpnn.eval()\n", + "mpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change data input here" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "test_path = '../tests/data/smis.csv'\n", + "smiles_column = 'smiles'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load data" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[MoleculeDatapoint(mol=, y=None, weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14', V_f=None, E_f=None, V_d=None),\n", + " MoleculeDatapoint(mol=, y=None, weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)CCc3ccccc23', V_f=None, E_f=None, V_d=None),\n", + " MoleculeDatapoint(mol=, y=None, weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl', V_f=None, E_f=None, V_d=None),\n", + " MoleculeDatapoint(mol=, y=None, weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(Cl)sc4[nH]3', V_f=None, E_f=None, V_d=None),\n", + " MoleculeDatapoint(mol=, y=None, weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)NCC#N)c1', V_f=None, E_f=None, V_d=None)]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_test = pd.read_csv(test_path)\n", + "\n", + "smis = df_test[smiles_column]\n", + "\n", + "test_data = [data.MoleculeDatapoint.from_smi(smi) for smi in smis]\n", + "test_data[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Get featurizer" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Get datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "test_dset = data.MoleculeDataset(test_data, featurizer=featurizer)\n", + "test_loader = data.build_dataloader(test_dset, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Calculate fingerprints" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`models.MPNN.encoding(inputs : BatchMolGraph, i : int)` calculate the i-th hidden representation.\n", + "\n", + "`i` ia the stop index of slice of the MLP used to encode the input. That is, use all\n", + "layers in the MLP _up to_ :attr:`i` (i.e., ``MLP[:i]``). This can be any integer\n", + "value, and the behavior of this function is dependent on the underlying list\n", + "slicing behavior. For example:\n", + "\n", + "* ``i=0``: use a 0-layer MLP (i.e., a no-op)\n", + "* ``i=1``: use only the first block\n", + "* ``i=-1``: use _up to_ the second-to-last block" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([100, 300])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "with torch.no_grad():\n", + " fingerprints = [\n", + " mpnn.encoding(batch.bmg, batch.V_d, batch.X_d, i=0)\n", + " for batch in test_loader\n", + " ]\n", + " fingerprints = torch.cat(fingerprints, 0)\n", + "\n", + "fingerprints.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([100, 300])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "with torch.no_grad():\n", + " encodings = [\n", + " mpnn.encoding(batch.bmg, batch.V_d, batch.X_d, i=1)\n", + " for batch in test_loader\n", + " ]\n", + " encodings = torch.cat(encodings, 0)\n", + "\n", + "encodings.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using fingerprints" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAsAAAAK7CAYAAAD8yjntAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABXqklEQVR4nO3de3xU5b3v8e8kQKKQjAYKCRcxUq2EiAg0EPDaCgYtXrY9chHUVluherygVSnagLUbObu2WsvFWtStKLJVVGJtND1Yt0iQQqBKw1bEKAqJSJAJXhJhZp0/ciZlkkkyazIz6/Z5v155vczKmuSZ5SR851m/5/f4DMMwBAAAAHhEmtUDAAAAAFKJAAwAAABPIQADAADAUwjAAAAA8BQCMAAAADyFAAwAAABPIQADAADAUwjAAAAA8BQCMAAAADyFAAwAcXjsscfk8/mifvzwhz+Uz+fTY489ZvUwU+bss8/W2WefHddjX375Zc2fPz+h4wGAjnSzegAA4GSPPvqoTj755IhjeXl5uvXWWzVkyBCLRpV6S5YsifuxL7/8shYvXkwIBpAyBGAA6ILCwkKNHj26zfHBgwdbMJrYHTp0SD6fT926de2fga+++kpHH320CgoKEjQyAEg+SiAAIME+/PDDNiUQ8+fPl8/n0z//+U9NmzZNfr9f/fr1049//GMFAoGIxx84cEBXX321cnJy1KtXL11wwQX64IMP5PP52syS7tixQ9OnT1ffvn2VkZGhoUOHavHixRHn/O1vf5PP59MTTzyhW265RQMGDFBGRobef//9llKOiooK/ehHP1JOTo569uypyZMn64MPPoj4PmeffbYKCwv13//93xo3bpyOPvpo/fjHP2752pElEOFr8Jvf/Ea//e1vlZ+fr169eqm4uFgbNmxoOe+qq65qGe+RZSQffvihJOmZZ57RmDFj5Pf7dfTRR+uEE05o+ZkAEC9mgAGgC4LBoA4fPhzz+ZdeeqmmTJmiq6++Wu+8847mzp0rSXrkkUckSaFQSJMnT9amTZs0f/58jRw5UpWVlSopKWnzvaqrqzVu3Dgdd9xxuu+++5Sbm6tXXnlFN9xwg/bt26fS0tKI8+fOnavi4mItW7ZMaWlp6tu3b8vXrr76ak2YMEFPPfWUPv74Y9155506++yz9fbbb+uYY45pOa+2tlYzZszQbbfdpn//939XWlrH8yiLFy/WySefrPvvv1+SdNddd+n8889XTU2N/H6/7rrrLn355Zd69tlnVVlZ2fK4vLw8VVZWasqUKZoyZYrmz5+vzMxMffTRR1q7dm3M1xsAoiEAA0AXjB07ts2xHTt2tHv+1VdfrZ///OeSpHPPPVfvv/++HnnkES1fvlw+n0/l5eVat26dli5dqlmzZkmSJkyYoB49erSE5bA5c+YoKytL69atU3Z2dsu5TU1Nuvfee3XDDTfo2GOPbTl/yJAheuaZZ6KOa/To0Vq+fHnL58OGDdP48eO1ePFizZs3r+X4/v379cwzz+h73/teZ5dGkpSVlaWXXnpJ6enpkqT+/furqKhIf/nLXzR16lQNGTJE/fr1k9T2Wq5fv16GYWjZsmXy+/0tx6+66qqYfjYAtIcSCADogscff1x///vfIz46qqu98MILIz4fPny4GhsbtXfvXknS66+/Lkm67LLLIs6bNm1axOeNjY36v//3/+qSSy7R0UcfrcOHD7d8nH/++WpsbIwoNZCaZ5/bc/nll0d8Pm7cOA0ePFivvfZaxPFjjz025vArSRdccEFL+JWan68kffTRR50+9rvf/a6k5mvxX//1X9q9e3fMPxcAOkIABoAuGDp0qEaPHh3x0ZHevXtHfJ6RkSFJ+vrrryVJ9fX16tatm3JyciLOC8+ShtXX1+vw4cN68MEH1b1794iP888/X5K0b9++iMfk5eW1O67c3Nyox+rr62P+HtF09nw7cuaZZ+qFF17Q4cOHdcUVV2jgwIEqLCzUypUrTY0BAFqjBAIAbKR37946fPiw9u/fHxGC6+rqIs479thjlZ6erpkzZ+q6666L+r3y8/MjPvf5fO3+3NbfP3zs29/+dszfIxkuuugiXXTRRWpqatKGDRu0cOFCTZ8+Xccff7yKi4tTOhYA7sEMMADYyFlnnSVJWrVqVcTxp59+OuLzo48+Wuecc462bNmi4cOHt5mFHj16dJvZ1448+eSTEZ+vX79eH330UdybW5gRy6xwRkaGzjrrLC1atEiStGXLlqSPC4B7MQMMADZSUlKi8ePH65ZbblFDQ4NGjRqlyspKPf7445IU0XXhgQce0Omnn64zzjhDs2fP1vHHH6+DBw/q/fffV1lZmaluCZs2bdI111yj//W//pc+/vhjzZs3TwMGDNDPfvazhD/H1k455RRJ0qJFizRp0iSlp6dr+PDhuueee/TJJ5/o+9//vgYOHKgDBw7ogQceUPfu3VveKABAPAjAAGAjaWlpKisr0y233KJ7771X33zzjcaPH68VK1Zo7NixES3JCgoKVFVVpV/96le68847tXfvXh1zzDE68cQTW+qAY7V8+XI98cQTmjp1qpqamnTOOefogQceaFOLnAzTp0/Xm2++qSVLlujuu++WYRiqqanRmDFjtGnTJt1+++367LPPdMwxx2j06NFau3athg0blvRxAXAvn2EYhtWDAAB07KmnntLll1+uN998U+PGjUvY933sscf0ox/9SH//+987XcAHAG7BDDAA2MzKlSu1e/dunXLKKUpLS9OGDRv0H//xHzrzzDMTGn4BwKsIwABgM1lZWXr66ad1zz336Msvv1ReXp6uuuoq3XPPPVYPDQBcgRIIAAAAeApt0AAAAOApBGAAAAB4CgEYAAAAnsIiuE6EQiHt2bNHWVlZKd8CFAAAAJ0zDEMHDx5U//79IzYMag8BuBN79uzRoEGDrB4GAAAAOvHxxx9r4MCBnZ5HAO5EVlaWpOYLmp2dbfFoAAAA0FpDQ4MGDRrUkts6QwDuRLjsITs7mwAMAABgY7GWq7IIDgAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeEo3qwcAAHCuYMjQxpr92nuwUX2zMlWUn6P0NJ/VwwKADhGAAQBxKd9WqwVl1aoNNLYcy/NnqnRygUoK8ywcGQB0jBIIAIBp5dtqNXtFVUT4laS6QKNmr6hS+bZai0YGAJ0jAAMATAmGDC0oq5YR5WvhYwvKqhUMRTsDAKxHAAYAmLKxZn+bmd8jGZJqA43aWLM/dYMCABMIwAAAU/YebD/8xnMeAKQaARgAYErfrMyEngcAqUYABgCYUpSfozx/ptprduZTczeIovycVA4LAGJGAAYAmJKe5lPp5AJJahOCw5+XTi6gHzAA2yIAAwBMKynM09IZI5XrjyxzyPVnaumMkfQBBmBrbIQBAIhLSWGeJhTkshMcAMchAAMA4pae5lPxkN5WDwMATKEEAgAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeIrjAvCSJUuUn5+vzMxMjRo1Sm+88Ua7565bt07jx49X7969ddRRR+nkk0/W7373uxSOFgAAAHbTzeoBmLFq1SrddNNNWrJkicaPH6+HHnpIkyZNUnV1tY477rg25/fs2VPXX3+9hg8frp49e2rdunW69tpr1bNnT/30pz+14BkASJZgyNDGmv3ae7BRfbMyVZSfo/Q0n9XDAgDYkM8wDMPqQcRqzJgxGjlypJYuXdpybOjQobr44ou1cOHCmL7Hv/3bv6lnz5564oknYjq/oaFBfr9fgUBA2dnZcY0bQHKVb6vVgrJq1QYaW47l+TNVOrlAJYV5Fo4MAJAKZvOaY0ogvvnmG23evFkTJ06MOD5x4kStX78+pu+xZcsWrV+/XmeddVa75zQ1NamhoSHiA4B9lW+r1ewVVRHhV5LqAo2avaJK5dtqLRoZAMCuHBOA9+3bp2AwqH79+kUc79evn+rq6jp87MCBA5WRkaHRo0fruuuu0zXXXNPuuQsXLpTf72/5GDRoUELGDyDxgiFDC8qqFe02VvjYgrJqBUOOudEFAEgBxwTgMJ8vsqbPMIw2x1p74403tGnTJi1btkz333+/Vq5c2e65c+fOVSAQaPn4+OOPEzJuAIm3sWZ/m5nfIxmSagON2lizP3WDAgDYnmMWwfXp00fp6eltZnv37t3bZla4tfz8fEnSKaecok8//VTz58/XtGnTop6bkZGhjIyMxAwaQFLtPdh++I3nPACANzhmBrhHjx4aNWqUKioqIo5XVFRo3LhxMX8fwzDU1NSU6OEBsEDfrMyEngcA8AbHzABL0pw5czRz5kyNHj1axcXF+uMf/6hdu3Zp1qxZkprLF3bv3q3HH39ckrR48WIdd9xxOvnkkyU19wX+zW9+o//9v/+3Zc8BQOIU5ecoz5+pukBj1Dpgn6Rcf3NLNAAAwhwVgKdMmaL6+nrdfffdqq2tVWFhoV5++WUNHjxYklRbW6tdu3a1nB8KhTR37lzV1NSoW7duGjJkiO69915de+21Vj0FAAmUnuZT6eQCzV5RJZ8UEYLDKwNKJxfQDxgAEMFRfYCtQB9gwP7oAwwA3mY2rzlqBhgAoikpzNOEglx2ggMAxIQADMAV0tN8Kh7S2+phAAAcwDFdIAAAAIBEIAADAADAUwjAAAAA8BQCMAAAADyFAAwAAABPIQADAADAUwjAAAAA8BQCMAAAADyFjTBsJBgy2MkqwbimAACgNQKwTZRvq9WCsmrVBhpbjuX5M1U6uUAlhXkWjsy5uKYAACAaSiBsoHxbrWavqIoIapJUF2jU7BVVKt9Wa9HInItrCgAA2kMAtlgwZGhBWbWMKF8LH1tQVq1gKNoZiIZrCgAAOkIAttjGmv1tZimPZEiqDTRqY83+1A3K4bimAACgIwRgi+092H5Qi+c8cE0BAEDHCMAW65uVmdDzwDUFAAAdIwBbrCg/R3n+TLXXmMun5s4FRfk5qRyWo3FNAQBARwjAFktP86l0coEktQls4c9LJxfQu9YErikAAOgIAdgGSgrztHTGSOX6I2/J5/oztXTGSHrWxoFrCgAA2uMzDINeUB1oaGiQ3+9XIBBQdnZ2Un8Wu5YlHtcUAAD3M5vX2AnORtLTfCoe0tvqYbgK1xQAALRGCQQAAAA8hQAMAAAATyEAAwAAwFMIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwAAAAPAUAjAAAAA8pZvVAwAApEYwZGhjzX7tPdiovlmZKsrPUXqaz+phAUDKEYABwAPKt9VqQVm1agONLcfy/JkqnVygksI8C0cGAKlHCQQAuFz5tlrNXlEVEX4lqS7QqNkrqlS+rdaikQGANQjAAOBiwZChBWXVMqJ8LXxsQVm1gqFoZwCAOxGAAcDFNtbsbzPzeyRDUm2gURtr9qduUABgMQIwALjY3oPth994zgMAN2ARHDyFVfDwmr5ZmQk9DwDcgAAMz2AVPLyoKD9Hef5M1QUao9YB+yTl+pvfDAKAV1ACAU9gFTy8Kj3Np9LJBZKaw+6Rwp+XTi7gTggATyEAw/VYBQ+vKynM09IZI5XrjyxzyPVnaumMkdwBAeA5lEDA9cysgi8e0jt1AwNSqKQwTxMKcqmBBwARgOEBrIIHmqWn+XiTBwCiBAIewCp4AABwJAIwXC+8Cr69G70+NXeDYBU8AADeQACG67EKHgAAHIkADE9gFTwAAAhjERw8g1XwAABAIgDDY1gFDwAAKIEAAACApxCAAQAA4CkEYAAAAHgKARgAAACeQgAGAACApxCAAQAA4CkEYAAAAHgKARgAAACeQgAGAACApxCAAQAA4CkEYAAAAHgKARgAAACeQgAGAACApzguAC9ZskT5+fnKzMzUqFGj9MYbb7R77urVqzVhwgR961vfUnZ2toqLi/XKK6+kcLQAAACwG0cF4FWrVummm27SvHnztGXLFp1xxhmaNGmSdu3aFfX8//7v/9aECRP08ssva/PmzTrnnHM0efJkbdmyJcUjBwAAgF34DMMwrB5ErMaMGaORI0dq6dKlLceGDh2qiy++WAsXLozpewwbNkxTpkzRL3/5y5jOb2hokN/vVyAQUHZ2dlzjBgAAQPKYzWuOmQH+5ptvtHnzZk2cODHi+MSJE7V+/fqYvkcoFNLBgweVk5PT7jlNTU1qaGiI+AAAAIB7OCYA79u3T8FgUP369Ys43q9fP9XV1cX0Pe677z59+eWXuuyyy9o9Z+HChfL7/S0fgwYN6tK4AQAAYC+OCcBhPp8v4nPDMNoci2blypWaP3++Vq1apb59+7Z73ty5cxUIBFo+Pv744y6PGQAAAPbRzeoBxKpPnz5KT09vM9u7d+/eNrPCra1atUpXX321nnnmGZ177rkdnpuRkaGMjIwujxcAAAD25JgZ4B49emjUqFGqqKiIOF5RUaFx48a1+7iVK1fqqquu0lNPPaULLrgg2cMEAACAzTlmBliS5syZo5kzZ2r06NEqLi7WH//4R+3atUuzZs2S1Fy+sHv3bj3++OOSmsPvFVdcoQceeEBjx45tmT0+6qij5Pf7LXseAAAAsI6jAvCUKVNUX1+vu+++W7W1tSosLNTLL7+swYMHS5Jqa2sjegI/9NBDOnz4sK677jpdd911LcevvPJKPfbYY6kePgCkVDBkaGPNfu092Ki+WZkqys9RelrnayYAwO0c1QfYCvQBBuBE5dtqtaCsWrWBxpZjef5MlU4uUElhnoUjA4DEc20fYABAbMq31Wr2iqqI8CtJdYFGzV5RpfJttRaNDADsgQAMAC4SDBlaUFataLf2wscWlFUrGOLmH5wtGDJUubNeL27drcqd9bymYYqjaoABAB3bWLO/zczvkQxJtYFGbazZr+IhvVM3MCCBKPFBVzEDDAAusvdg++E3nvMAu6HEB4lAAAYAF+mblZnQ8wA7ocQHiUIABgAXKcrPUZ4/U+01O/Op+VZxUX5OKocFJISZEh+gIwRgAHCR9DSfSicXSFKbEBz+vHRyAf2A4UiU+CBRCMAA4DIlhXlaOmOkcv2RZQ65/kwtnTGSRUJwLEp8kCh0gQAAFyopzNOEglx2goOrhEt86gKNUeuAfWp+o0eJDzpDAAYAl0pP89HqDK4SLvGZvaJKPikiBFPiAzMogQAAAI5BiQ8SgRlgAADgKJT4oKsIwABgkWDI4B9wIE6U+KArCMAAYAEzW7kSlAEgsQjAAJBi4a1cW69iD2/lemQdo5mgDACIDYvgACCFzGzlGg7KrXe+Cgfl8m21SR8vALgRARgAUijWrVw37KyPOSgDAMwhAANACsW6RWvlB/tiCsoba/YnaGQA4B0EYABIodi3aI1tkVusgRoA8C8EYABIofBWru3FW5+aF7nF2t4p9kANAAgjAANACoW3cpXazvEeuZXr2BN6xxSUi/JzkjRSAHAvAjAApFgsW7nGGpTpBwwA5vkMw2AJcQcaGhrk9/sVCASUnZ1t9XAAuEgsG1zQBxgAOmc2rxGAO0EABmA1doIDgI6ZzWvsBAcANpee5ot5URwAoHPUAAMAAMBTCMAAAADwFAIwAAAAPIUADAAAAE8hAAMAAMBTCMAAAADwFAIwAAAAPIUADAAAAE8hAAMAAMBTCMAAAADwFAIwAAAAPIUADAAAAE/pZvUAAAAAkHjBkKGNNfu192Cj+mZlqig/R+lpvnaPewkBGAAAwGXKt9VqQVm1agONLcfy/Jm68NQ8rflHbZvjpZMLVFKYZ8VQLeEzDMOwehB21tDQIL/fr0AgoOzsbKuHAwAA0KHybbWavaJKsQa88Nzv0hkjHRuCzeY1aoABAABcIhgytKCsOubwK6nl3AVl1QqGvDEvSgAGAABwiY01+yPKG2JlSKoNNGpjzf7ED8qGqAEGAABwib0HzYffRD5ean/xnZ0QgAEAAFyib1ZmSh/fOux+/uU3+tWf2y6+s9siOwIwAACASxTl5yjPn6m6QKOpOmCfpFx/82xtrKJ1moimLtCo2SuqbLXIjhpgAAAAl0hP86l0coGkf3V36Ez4vNLJBTGXKoQ7TcRSb2zHRXYEYADwgGDIUOXOer24dbcqd9bb5h8hwC3s9DtWUpinpTNGKtcfWc6Q58/UtWfmK6/V8Vx/pqnZ2Xg7TdhpkR0lEADgcu01xLdbTR7a54RFRV5mx9+xksI8TSjIjfq6ua1kaJdeT/F2mpASs8guEQjAAOBi7TXEt2NNHqKzY7jCv9j5dyw9zafiIb1jPh6rroTYri7SSxRKIADApTq6TWnHmjy01V6dZThclW+rtWhkkLz7OxZPiPWp+Y2bmUV2yUQABgCX6uw2pd1q8hDJq+HKSbz6OxbuNJHMRXbJRgAGAJeK9TalXWryEMmr4cpJvPo7ZrbThNlFdqlADTAAuFSstyntUpOHSF4NV07i5d+xcKeJaPXpd10wVMf2zLD1ok0CMAC4VGcN8eNpfI/U8XK4cgqv/4511GnC7iiBAACX6ug2pR1r8hCpszpLuy0q8iJ+x/7VUeKiEQNUPKS3Y54rARgAXKy9hvh2rMlDJMKVM/A75kw+wzBYPtqBhoYG+f1+BQIBZWdnWz0cAIgLGyk4F32AnYHfMWuZzWsE4E4QgAEAViNcAR0zm9dYBAcAgM11decuAJGoAQYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICndLN6AAAAOF0wZGhjzX7tPdiovlmZKsrPUXqaz+phAWgHARgAgC4o31arBWXVqg00thzL82eqdHKBSgrzLBwZgPZQAgEAQJzKt9Vq9oqqiPArSXWBRs1eUaXybbUWjQxARwjAAADEIRgytKCsWkaUr4WPLSirVjAU7QwAViIAAwAQh401+9vM/B7JkFQbaNTGmv2pGxSAmBCAAQCIw96D7YffeM4DkDoEYAAA4tA3KzOh5wFIHQIwAABxKMrPUZ4/U+01O/OpuRtEUX5OKocFIAYEYAAA4pCe5lPp5AJJahOCw5+XTi6gHzBgQwRgAADiVFKYp6UzRirXH1nmkOvP1NIZI+kDDE8KhgxV7qzXi1t3q3JnvS07oThuI4wlS5boP/7jP1RbW6thw4bp/vvv1xlnnBH13NraWt1yyy3avHmzduzYoRtuuEH3339/agcMAHC1ksI8TSjIZSc4QM7ZGMZRM8CrVq3STTfdpHnz5mnLli0644wzNGnSJO3atSvq+U1NTfrWt76lefPm6dRTT03xaAEAXpGe5lPxkN66aMQAFQ/pTfiFJzlpYxifYRj2m5dux5gxYzRy5EgtXbq05djQoUN18cUXa+HChR0+9uyzz9aIESNMzwA3NDTI7/crEAgoOzs7nmEDAAC4WjBk6PRFa9vtje1Tc2nQutu/l5Q3iGbzmmNmgL/55htt3rxZEydOjDg+ceJErV+/PmE/p6mpSQ0NDREfAAAAaJ/TNoZxTADet2+fgsGg+vXrF3G8X79+qqurS9jPWbhwofx+f8vHoEGDEva9AQCAMzlhYZeVnLYxjOMWwfl8kdPmhmG0OdYVc+fO1Zw5c1o+b2hoIAQDAOBhTlnYZSWnbQzjmBngPn36KD09vc1s7969e9vMCndFRkaGsrOzIz4AAIA3OWlhl5WctjGMYwJwjx49NGrUKFVUVEQcr6io0Lhx4ywaFQAAcKtgyNCCsmpFK3YIH1tQVk05hJy3MYxjArAkzZkzR3/605/0yCOPaPv27br55pu1a9cuzZo1S1Jz+cIVV1wR8ZitW7dq69at+uKLL/TZZ59p69atqq6utmL4gG1QywYAnXPawi6rOWljGEfVAE+ZMkX19fW6++67VVtbq8LCQr388ssaPHiwpOaNL1r3BD7ttNNa/nvz5s166qmnNHjwYH344YepHDpgG9SyAUCzYMjocAMTpy3ssgOnbAzjqD7AVqAPMNwkXMvW+pc+/GfJbu/QASBZYpkMqNxZr2kPb+j0e638yVgVD+mdtLEmU2dvApzCbF5z1AwwgPh1VsvmU3Mt24SCXEf+8QOAWLU3GRBe2BaeDAgv7KoLNEb92xne3MEuC7vM8vIdQUfVAAOIH7VsAGBuYZvTFnaZ4fXuFgRgwCOoZQNSjwWn9mN2MsBJC7tiRXcLSiAAz3Bak3LA6bx8e9nO4pkMcMrCrliZeRPg1NrmzhCAAY9wey0bYCex1phayS2Ln8yKdzIgPc3nmjDIHUECMOAZ4Vq22Suq5JMi/mF2ei0bYCdOWHDq5dlpJgO4IyhRAwx4ihtr2QC7sfuCU68vfnLzwrZYOW3b4mRgBhjwGLfVsgF2Y+fby06YnU6F8GRA61nwXI/MgnNHkAAMeJKbatkAu7Hz7WUWP/2L1ycDvP4mgAAMAEAC2bnG1M6z01bw+mSAl98EEIABAEggO99etvPsNKzh1TcBLIIDACDB7LrglMVPQDNmgAEASAI73l628+w0kEo+wzDcu89dAjQ0NMjv9ysQCCg7O9vq4QAA0GVe7gMMdzKb15gBBgDAY5I1O+3V3eXgPARgAAA8KNGLn5hVhpOwCA4AAHSJ13eXg/MQgAEAQNw6211Oat5dLhhiyZEZwZChyp31enHrblXurOf6JRglEAAAIG7sLpd4lJMkHzPAAAAgbuwul1iUk6QGARgAAMSN3eUSh3KS1CEAAwCAuLG7XOKYKSdB1xCAAQBA3MK7y0lqE4LZXc4cyklShwAMAAC6pKQwT0tnjFSuP7LMIdefqaUzRrJwK0aUk6QOXSAAAECXJWt3OS8Jl5PUBRqj1gH71PymgnKSriMAAwCAhEj07nJeEy4nmb2iSj4pIgRTTpJYcZVAhEKhdo/v2rWrSwMCAAD2wGYMqUc5SWqYmgFuaGjQNddco7KyMmVnZ2vWrFn65S9/qfT0dEnSZ599pvz8fAWDwaQMFgAApIYbNmMIhgxHlmRQTpJ8pgLwXXfdpX/84x964okndODAAd1zzz3avHmzVq9erR49ekiSDIN3hwAAOFl4M4bW/6KHN2Nwwkyk0wM85STJZaoE4oUXXtBDDz2kH/7wh7rmmmu0efNm7du3T5MnT1ZTU5Mkyefj3QkAAE7lhs0Y2E0NnTEVgPft26fBgwe3fN67d29VVFTo4MGDOv/88/XVV18lfIAAACB1nL4ZgxsCPJLPVAAeNGiQtm/fHnEsKytLr776qr7++mtdcsklCR0cAABILadvxuD0AI/UMBWAJ06cqEcffbTN8V69eumVV15RZiaNmQEAcDKnb8bg9ACP1DC1CG7BggXas2dP1K9lZWXpr3/9qzZv3pyQgQEAgNQLb8bQ0Sxqno03Y3B6gEdqmJoBPvbYYzVs2LB2v97U1KQtW7Z0eVAAADiJm/rlpqf5dOGpHXdJuPDUPNu25AoH+PZG55O9AzxSo8s7wRmGoVdffVXLly/Xiy++qOzsbN10000JGBoAAPbnlHZbsfbEDYYMrflHx10S1vyjVreVDLVlCGY3NcQirp3gJOnDDz/UL3/5Sw0ePFjnn3++MjMz9ec//1l1dXWJHB8AALbllHZb5dtqdfqitZr28Abd+PRWTXt4g05ftDbq+DpbRCbZfxEZu6mhM6ZmgJuamrR69Wr96U9/0vr16zVp0iT99re/1bRp03THHXeooKAgWeMEANdx6i5VaNZZuy2fmtttTSjItfT/q9lNLdyyiIzd1NARUwF4wIABKigo0IwZM/Tss8/q2GOPlSRNmzYtKYMDALdyym1ztM9Muy2rdvSKJ6S7aREZu6mhPaZKIILBoHw+n3w+n9LT05M1JgBwNafcNkfHnDBTGk9PXBaRwQtMBeDa2lr99Kc/1cqVK5Wbm6tLL71Uzz//PNsfA0CM2KXKPZwwUxpPSA8vIpPUJgSziAxuYSoAZ2Zm6vLLL9fatWv1zjvvaOjQobrhhht0+PBh/frXv1ZFRYWCwWCyxgoAjscuVe7hhJnSeEM6i8jgdnG3QRsyZIjuuece3X333XrllVe0fPly/eAHP1CvXr1UX1+fyDECgGs44bY5YuOEdlvhkF4XaIx618Gn5lAbLaSziAxuFncbtJZvkJamSZMm6dlnn9Xu3bs1b968RIwLAFzJCbfNETu7z5R2tZwhvIjsohEDVDykt6vCr5s2L4F5pmaAP//8c61YsUJXXnmlsrOzI74WCAS0cuVKXXPNNQkdIAC4SVdm5GBPdp8pDYf01l1Hcj3cdYQuLPAZhhHzW55f/epXevvtt/XMM89E/fpll12mESNG6Be/+EXCBmi1hoYG+f1+BQKBNqEfAOIR7gIhRb9tboeZQ7gPfaebtdcXmd8/ZzOb10yVQDz33HOaNWtWu1+/9tpr2w3HAIBmdr9tHgtuHzuPm8sZYkUXFoSZKoHYuXOnTjzxxHa/fuKJJ2rnzp1dHhQAOJGZGTa73zbvCLeP7YEZ3fa1d22csHkJUsNUAE5PT9eePXt03HHHRf36nj17lJbW5XV1AOA48YRCJ+5SZXZbXSQHb0La19G1aTociul70IXF/Uyl1dNOO00vvPBCu19//vnnddppp3V1TADgKF7Z2Y3bx/bglddbPDq7Nh/u+yqm70MXFvczFYCvv/563XffffrDH/4QseFFMBjUgw8+qN/97ne67rrrEj5IALArL4VCNvGwnpdeb2bFcm2e/vsu5WZn2HrzEqSGqQB86aWX6rbbbtMNN9ygnJwcnXbaaRo5cqRycnJ00003ac6cOfrhD3+YrLECgO14KRSyiYf1vPR6MyvWazOtqLmMk22evc30TnC//vWvdfHFF+vJJ5/Ujh07ZBiGzjzzTE2fPl1FRUXJGCMA2JaXQqFXNvGw8+IyL73ezIr1OR/fpyd9kWEuAH/11Vf6+c9/rhdeeEGHDh3S97//fT344IPq06dPssYHALbmlVAoeWMTD7svLvPS680sM9emeEhvx3ZhQWKYKoEoLS3VY489pgsuuEDTpk3TX//6V82ePTtZYwMA2wuHQi/UFHZ1W127c8LiMi+93joSrQ+12WtDX2RvM7UT3JAhQ/TrX/9aU6dOlSRt3LhR48ePV2Njo9LT05M2SCuxExyAznhtZze7z5LGIxgydPqite3WkIZnt9fd/j3Lg1IiXm92LvPoTEevP0me+l3Ev5jNa6YCcI8ePVRTU6MBAwa0HDvqqKP03nvvadCgQfGN2OYIwABi4cZQ2BEnB6hoKnfWa9rDGzo9b+VPxtqid3NXXm9Ofq3Gso2xJMc+P8TPbF4zVQMcDAbVo0ePyG/QrZsOHz5sbpQA4DJO3tktHk7YxMNMSHfa4rJ4X29O3sikszZnPjUH33W3f89Tv4uIj6kAbBiGrrrqKmVkZLQca2xs1KxZs9SzZ8+WY6tXr07cCAHAIZwQCr3C7CynExeXmX29xRogJxTk2jIsmt3GmN9FdMRUAL7yyivbHJsxY0bCBgMAQFfFM8vphQ4XZgOk3Thtlh72ZioAP/roo8kaBwAAXRbvLGe4w8XsFVXyKfoCqvAiq8qd9Z3eWrdjjbTTA6QTZ+lhX6Y3wgAAwK5ineV87M0a9cnKiAinJYV5HW6QIKlNp4hoZRV2XWTm9ADphVl6pI6pLhBeRBcIAHCOF7fu1o1PbzX1mNbhNNrsbUV1XafdB0oK82LqUmBVCA63eussQNqh1Vt7vNZyELEzm9dMbYQBAICdxTN72Xqji9YbJEjqsKwi/PVvDodiOi8YsmbeyQ0bmYRn6XP9kf+fc/2ZhF+YQgkEAKDL7FLz2tlt8mg664AQa1nFE5Uf2n6RWWdlHk4IkF5rOYjkIAADAOIWDBn6w9odevTND3Xg60Mtx62qee1oMVtHOgqnsS4K+2j/VzGdZ/UiMzcESFoOoqsogQAAxKV8W61G3VOh3/11R0T4ldqWFaRSe7fJYxEtnMZaVjE45+iYzrPDIrPWZR5OCr9AIjADDAAwrb3FXmFWb6zQepZz38Em/erP2zt9XLRwGmv3gZnFx+tP62roUgA4ADPAAABTOuq1e6QjywqscOQs51Xj85Xnz2yz+CvMp+ayjWjhNNbFYz26pTl+kRngFQRgAIApnS0Ka83qmlep6x0QYu0+QJcCwBkogQAAmGI20Nqh5lXqegeEWBePuWGRmR3YpbMI3IkADAAwxUygba+swCpdDaexdh+gS0HX2HU3PbgHARgAYIqZXrt2rHklnNpbewssw51FKCVBIlADDAAwpaN62rBjju6uZQQVmNTRAks77KYH9yAAAwBMa2+x1zFHd9fN556kzXdOIPzCtFh33bOqswjcgxIIAEBcWOyFRIt1gaUdOovA2QjAAIC4UU+LRIp1gaVdOovAuSiBAAAAthBeYBnPhiWAGQRgAABgC13dsASIFQEYAADYBrvpIRWoAQYAALaSqgWW7DbnXQRgAABgO8leYMluc95GCQQAAPCU8G5zrXsOh3ebK99Wa9HIkCoEYACA7QRDhip31uvFrbtVubOenb+QMOw2B4kSCACAzXBrGslkZrc5ely7FzPAAADb4NY0ko3d5iA5MAAvWbJE+fn5yszM1KhRo/TGG290eP7rr7+uUaNGKTMzUyeccIKWLVuWopECAMzg1jRS4cN9X8V0HrvNuZujAvCqVat00003ad68edqyZYvOOOMMTZo0Sbt27Yp6fk1Njc4//3ydccYZ2rJli37xi1/ohhtu0HPPPZfikQMAWmtd57thZ33Mt6aBeJRvq9X9f32vw3PYbc4bfIZhOOat9JgxYzRy5EgtXbq05djQoUN18cUXa+HChW3Ov/3227VmzRpt37695disWbP0j3/8Q5WVlTH9zIaGBvn9fgUCAWVnZ3f9SQCAh4X7rlZU1+mFrXu0/8tvWr52zFHddeDrQ51+jwemjtBFIwYkc5hwoWDI0OmL1nb4JitsGRtuOI7ZvOaYRXDffPONNm/erDvuuCPi+MSJE7V+/fqoj6msrNTEiRMjjp133nlavny5Dh06pO7du7d5TFNTk5qamlo+b2hoSMDoAQDRFrcdKZbwK3FrGvHpbPFb2M3nnkj49QDHlEDs27dPwWBQ/fr1izjer18/1dXVRX1MXV1d1PMPHz6sffv2RX3MwoUL5ff7Wz4GDRqUmCcAAB7W3uI2M7g1ja60x4t1UdvxfXrGOzw4iGNmgMN8vsgtCg3DaHOss/OjHQ+bO3eu5syZ0/J5Q0MDIRgAuqCjxW2xCv/FLp1cwFa1HtXV9nix3jngDoM3OCYA9+nTR+np6W1me/fu3dtmljcsNzc36vndunVT797Re/tlZGQoIyMjMYMGAMR86/lIreuBc+kD7GnhOwit30SF2+Mtnn6aju2Zob0HG9U3q/kuQes3SkX5OcrzZ6ou0Bj1zZhPza8z7jB4g2MCcI8ePTRq1ChVVFTokksuaTleUVGhiy66KOpjiouLVVZWFnHs1Vdf1ejRo6PW/wIAEi+efqqLLx+pNJ+vw0ADb4ilPd71K7foyGqIaDPD6Wk+lU4u0OwVVfId8ViJOwxe5JgaYEmaM2eO/vSnP+mRRx7R9u3bdfPNN2vXrl2aNWuWpObyhSuuuKLl/FmzZumjjz7SnDlztH37dj3yyCNavny5br31VqueAgB4jplbyuE637En9FbxkN66aMQAFQ/pTSjxsFjuILQuBW5v45SSwjwtnTFSuf7I12SuP1NL6fzgKY6ZAZakKVOmqL6+Xnfffbdqa2tVWFiol19+WYMHD5Yk1dbWRvQEzs/P18svv6ybb75ZixcvVv/+/fX73/9el156qVVPAQA8p7Nbz2HMwiGaeO4gGGp+PS0oq9aEgtyI11NJYZ4mFORqY81+7jB4mKP6AFuBPsAA0HXhGk5J7YZgMwua4B2VO+s17eENcT9+5U/GqnhI9HU/cA/X9gEGADhX+NZz61X8OT2765IRA3RuQS6zcIgq1jsI7YlnBhnuRwAGAKQEt54Rj44Wr8WCtmaIhgAMAEiZ9DQft6NhWnt3ENJ8bRfAhdHWDB0hAAMAANuLdgfh8y+/0XVPta0tZ0ElOkMABgAAjhDtDsLStLYzw2ycgs4QgAEAgGNRW454EIABAICjUVsOsxy1ExwAAADQVQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKXSBAAC4SjBk0BILQIcIwAAA1yjfVttmU4Q8NkUA0AolEAAAVyjfVqvZK6oiwq8k1QUaNXtFlcq31Vo0MgB2QwAGADheMGRoQVm1jChfCx9bUFatYCjaGQC8hgAMAHC8jTX728z8HsmQVBto1Maa/akbFADbIgADABxv78H2w2885wFwNwIwAMDx+mZlJvQ8AO5GAAYAOF5Rfo7y/Jlqr9mZT83dIIryc1I5LAA2RQAGADheeppPpZMLJKlNCA5/Xjq5gH7AACQRgAEALlFSmKelM0Yq1x9Z5pDrz9TSGSPpAwygBRthAABco6QwTxMKctkJDkCHCMAAAFdJT/OpeEhvq4eBdrBVNeyAAAwAAFKCraphF9QAAwCApGOratgJARgAACQVW1XDbgjAAGBDwZChyp31enHrblXurCcYwNHYqhp2Qw0wANgMdZJwG7aqht0wAwwANkKdJNyIraphNwRgALAJ6iThVvFuVU0pEJKFEggAsAkzdZL0uYWThLeqnr2iSj4p4k1ee1tVUwqEZGIGGABsgjpJuJmZraopBUKyMQMMADZBnWRb7BrmLrFsVd1ZKZBPzaVAEwpyeS0gbgRgALCJcJ1kXaAx6j/+PjXPlrWuk3QrboG7U2dbVVMKhFSgBAIAbCJcJympzWKh9uok3Ypb4N5FKRBSgQAMADZipk7SreiG4W2UAiEVKIEAAJuJpU7SzbgF7m2UAiEVCMAAYEOd1Um2xw2LxrgF7m3xtEwDzCIAA0CCWRVC3bJojFvgCJcCtX495zrw9Qx7IgADQAJZFULDi8Za3zIOLxpzUv0wt8AhUQqE5GIRHAAkiFWdC9y2aIxuGAgLlwJdNGKAiof05v85EoYADAAJYGUIjXXR2O8q3lXlznpHBGG6YQBIJkogACABrOxcEOtisD+8tlN/eG2nY+qCuQUOIFkIwACQAFZ2LjC7GMxJdcHxdsMAgI5QAgEACWBl54LworFY50U7KskIhgxV7qzXi1t3O6Zcwu34fwIkHjPAAJAAVnYu6KhvanuilWS4pY2am/D/BEgOZoABIAGs7lzQ3qKxzoRLMhLZwYIZy8SwqqtIsvH6gB0wAwwACWJ18/4jF429+f4+/eG19zt9TN+szE47WPjUXC4xoSC30wDPjGViJPL/iZ3w+oBdEIABIIGs7lwQXjRWlJ+j56o+iakkI9YOFht21mv8iX3aPc9Nm3EkS6y7BFrZVSRZeH3ATgjAAJBgduhc0FFdcOuSjFg7U1z3VJXuvfSUqCHFrTOWiWRm9tPKriLJwOsDdkMNMAC4VKybScTameLA14farT01M2PpRWbrea3sKpIMvD5gN8wAA4CLxVKS0VkHi9aizdS5bcYykeKZ/bSyq0gy8PqA3TADDAAuFy7JuGjEABUP6d3mFvORHSw6095MndtmLBMpntlPq7uKJBqvD9gNARgA0FIuccxR3WM6v/VMXWebcfjUXO/qlBnLRIp39jPWEhYn4PUBu6EEAgAgqTlwZWV21+V/eqvTc1vP1JlZdOc1XZn9tLqrSKLw+oDdMAMMAGgx9oTecc/UuWnGMpG6OvvZWQmLU/D6gJ34DMNgC5YONDQ0yO/3KxAIKDs72+rhAEDShTsWSNFn6joLK7H2uvWSrl5TN+H1gWQwm9cIwJ0gAAPwInbsSjyuKZA8BOAEIwAD8Cpm6hLPq9fUq88bqWM2r7EIDgAQlR12tHMbL15TZr5hRyyCAwAASWF2BzwgVQjAAAAg4TrbAU9q3gEvGKISE6lHAAYAAAkXzw54QKpQAwzAEiyKAdwt3h3wgFQgAANIORbFAO7XlR3wgGSjBAJASrEoxp6CIUOVO+v14tbdqtxZT10muqyrO+ABycQMMICU6WxRjE/Ni2ImFORSDpFCzMgjGdLTfCqdXKDZK6rkU/Qd8EonF/C7DkswAwwgZVgUYz/MyCOZSgrztHTGSOX6I8sccv2Zntr+GfbDDDCAlGFRjL0wI49UKCnM04SCXBa9wlYIwABShkUx9mJmRt5ru5chsby4Ax7sjQAMIGXCi2LqAo1RZx19ar41yqKY1Ej1jDyt7wDYBQEYQMqwKMZeUjkjz0I7AHbCIjgAKcWimI6lsh1ZqtpUsdAOgN0wAwwg5VgUE12qZ0lTMSPPQjsAdsQMMABLhBfFXDRigIqH9PZ8+LFqljTZM/K0vgNgR8wAA4DFrJ4lTeaMPK3vANgRARgALGaHdmTJalNF6zsAdkQABjpA2yakgptnSWl9B8COCMBAO2jbhFRx8ywpre8A2BGL4IAoaNuEVEpVOzKr0PoOgN0wAwy0YvWCJHiPF2ZJaX0HwE6YAQZaoW0TrOCFWVJa3wGwC2aAgVbcvCAJ9sYsKQCkBgEYaMXNC5KQOMnqEJKsdmQAgH8hAAOt0LYJnaFDCOyGlo2AOY6pAf788881c+ZM+f1++f1+zZw5UwcOHOjwMatXr9Z5552nPn36yOfzaevWrSkZK5wtvCBJUptV+W5ZkIT40SEEdlO+rVanL1qraQ9v0I1Pb9W0hzfo9EVreS0CHXBMAJ4+fbq2bt2q8vJylZeXa+vWrZo5c2aHj/nyyy81fvx43XvvvSkaJdzCCwuSYF5nHUKk5g4hwVC0M4DE4w0ZEB9HlEBs375d5eXl2rBhg8aMGSNJevjhh1VcXKx3331X3/nOd6I+LhyQP/zww1QNFS7CgiS0Zocti4EwWjYC8XNEAK6srJTf728Jv5I0duxY+f1+rV+/vt0AHI+mpiY1NTW1fN7Q0JCw7w3nYUESjpSMDiHUbiJevCED4ueIAFxXV6e+ffu2Od63b1/V1dUl9GctXLhQCxYsSOj3BOAOie4QwmI6dAUtG4H4WVoDPH/+fPl8vg4/Nm3aJEny+drOiBiGEfV4V8ydO1eBQKDl4+OPP07o9wfgXIncspjaTXQVLRuB+Fk6A3z99ddr6tSpHZ5z/PHH6+2339ann37a5mufffaZ+vXrl9AxZWRkKCMjI6HfE4A7JGrLYmo3kQi0bATiZ2kA7tOnj/r06dPpecXFxQoEAtq4caOKiookSW+99ZYCgYDGjRuX7GECQItwh5DWpQu5JkoXqN1EIiTqDRngRY6oAR46dKhKSkr0k5/8RA899JAk6ac//al+8IMfRCyAO/nkk7Vw4UJdcsklkqT9+/dr165d2rNnjyTp3XfflSTl5uYqNzc3xc8CgFt0tUMItZtIlES8IQO8yBEBWJKefPJJ3XDDDZo4caIk6cILL9Qf/vCHiHPeffddBQKBls/XrFmjH/3oRy2fh8stSktLNX/+/OQPGoBrdaVDCLWbSCRaNgLm+QzDoGN7BxoaGuT3+xUIBJSdnW31cAC4QDBk6PRFazut3Vx3+/cIMQAQA7N5zTE7wQGAW7DdNpItGDJUubNeL27drcqd9exOCLTimBIIAHATajeRLPSXBjpHCUQnKIEAkEzsBJd8XrrG4f7Srf9hDz/bpTNGEoLhSmbzGjPAAGAhtttOLi/NhtJfGogdNcAAAFfy2m57ZvpLA15HAAYAuE5ns6FS82yomxaH0V8aiB0BGADgOl6cDU11f2k6TcDJqAEGALiOF2dDi/JzlOfP7LS/dFF+Tpd/lpdqq+FOzAADAFzHi7vtpaq/tNdqq+FOBGAAgOuEZ0Pbi3o+Nc9YJmI21E7C/aVz/ZHBPtefmZAWaF6srYY7UQIBAHCd8Gzo7BVV8kkRgc3tu+2VFOZpQkFuUnofm6mtpr0f7IwADACwhURvWOHl3faS1V/ai7XVcCcCMADAcslaVJXM2VAv8mJtNdyJAAwAsFR72/eGF1V1tXaV3fYSJ5WdJoBkYhEcAMAyLKpyllR1mgCSjQAMALCMFzescLpkd5oAUoESCACAZVhU5UzUVsPpCMAAAMuwqMq5qK2GkxGAAQCW8dKiqkS3eQMQPwIwAMAyXtmwIllt3gDEh0VwAABLuX1RVbjNW+vFfuE2b+Xbai0aGeBdzAADACzn1kVVnbV586m5zduEglzHP1fASQjAAABbcOOiKjNt3tz23AE7owQCAIAkoc0bYE8EYAAAkoQ2b4A9EYABAEiScJu39qp7fWruBuGGNm+AkxCAAQBIknCbN0ltQrCb2rxJzQv+KnfW68Wtu1W5s17BULSlf4A9sAgOAIB2JGLzinCbt9Z9gHNd1AeYPsdwGp9hGLxF60BDQ4P8fr8CgYCys7OtHg4AIEUSHercuhNcuM9x6zARfmZu6OUM+zOb1wjAnSAAm+fWP/IAvINQF5tgyNDpi9a22+otvJX1utu/x78DSCqzeY0SCCQUt8EAOB2bV8SOPsdwKhbBIWHY7hOAG5gJdV5Hn2M4FQEYCdHZjInUPGPCqmAAdkeoix19juFUBGAkBDMmANyCUBc7+hzDqQjASAhmTAC4BaEudl7qcwx3IQAjIZgxAeAWhDpzwn2Oc/2Rf99z/Zl0y4Bt0QUCCRGeMakLNEatAw63wmHGBIATeGHzikQqKczThIJcWmDCMegD3An6AMcu3AVCUkQIpm8mAKeirzngDGyEkWAEYHPoAwwAAFKNjTBgKW6DAQAAuyMAI+HS03zs+APANMoNAKQKARgAYDnKpwCkEm3QAACWYht1AKlGAAYAWIZt1AFYgQAMALAM26gDsAIBGABgGbZRB2AFFsEBAOKSiK4NbKNuDTpuwOsIwAAA0xLVtYFt1FOPjhsAJRAA4DrBkKHKnfV6cetuVe6sT/gCskR2bUhP86l0coGkf22bHhb+vHRyAbOTCULHDaAZM8AA4CLJnt3rrGuDT81dGyYU5MYcWksK87R0xsg2485lVjKhkvH/DnAqAjAAuER4dq91wAnP7i2dMbLLYdJM1wYzO0KyjXryJev/nRWoYUZXEYABwAVSNbtX15C8rg1so55cbum4QQ0zEoEaYABwgVT00y3fVqtfvfTPmM6la4P9uKHjBjXMSBQCMAC4QLJn98LBY/+Xhzo8z6fm2Ti6NthPuONGe/P/dv9/x66BSCQCMAC4QDJn9zoKHkeia4O9Ob3jBrsGIpEIwADgAsmc3esseITl9OzRpYV2yW7fhn913Mj1R74RyvVnJmSRZDK5pYYZ9sAiOABwgfDs3uwVVfJJEbO1XZ3dizVQ3HnB0LgDFAubUsepHTfcUMMM+2AGGABcIlmze7EGilz/UXF9fxY2pV6448ZFIwaoeEhv24dfyfk1zLAXZoABwEWSMbuXzO2K2ZwBsUrmXQ54DzPAAOAyiZ7dS+biKTsubKIW2b6cXMMMe2EGGADQqWRtV2y3hU3UItufU2uYYS8EYABATJIRPOy0sCkVW0kjMdg1EF1FAAYAxCzRwSOZ9cVmUIsMeAs1wAAAy9hlcwY71iIDSB4CMADAUnZY2GS3WmQAyUUJBADAclYvbLJTLTKA5CMAAwBswcqFTXapRQaQGpRAAAA8zy61yABSgwAMAIDsUYsMIDUogQAA4P+zuhYZQGoQgAEAOAKbLDT3ReZNANyMAAwAAFqwHTS8gBpgAAAg6V/bQbfeFCS8HXT5tlqLRgYkFgEYAAB0uh201LwddDAU7QzAWQjAAACA7aDhKQRgAADAdtDwFAIwAABgO2h4CgEYAAC0bAfdXrMzn5q7QbAdNNyAAAwAANgOGp5CAAYAAJLYDhrewUYYAICUY6cx+2I7aHgBARgAkFLsNGZ/bAcNt6MEAgCQMuw0BsAOCMAAgJRgpzEAdkEABgCkhBt3GguGDFXurNeLW3ercmc94R1wCGqAAQAp4badxqhlBpzLMTPAn3/+uWbOnCm/3y+/36+ZM2fqwIED7Z5/6NAh3X777TrllFPUs2dP9e/fX1dccYX27NmTukEDAFq4aacxapkBZ3NMAJ4+fbq2bt2q8vJylZeXa+vWrZo5c2a753/11VeqqqrSXXfdpaqqKq1evVrvvfeeLrzwwhSOGgC8IZZSALfsNEYtM+B8jiiB2L59u8rLy7VhwwaNGTNGkvTwww+ruLhY7777rr7zne+0eYzf71dFRUXEsQcffFBFRUXatWuXjjvuuJSMHQDcLtZSgPBOY7NXVMknRQRIJ+00ZqaWmVZigD05Yga4srJSfr+/JfxK0tixY+X3+7V+/fqYv08gEJDP59MxxxzT7jlNTU1qaGiI+AAARGe2FMANO425rZYZ8CJHzADX1dWpb9++bY737dtXdXV1MX2PxsZG3XHHHZo+fbqys7PbPW/hwoVasGBB3GMFAK/orBTAp+ZSgAkFuRGzuk7facxNtcyAV1k6Azx//nz5fL4OPzZt2iRJ8vna/mE0DCPq8dYOHTqkqVOnKhQKacmSJR2eO3fuXAUCgZaPjz/+OL4nBwAu15W2ZuGdxi4aMUDFQ3o7JvxK7qllBrzM0hng66+/XlOnTu3wnOOPP15vv/22Pv300zZf++yzz9SvX78OH3/o0CFddtllqqmp0dq1azuc/ZWkjIwMZWRkdD54APA4r5YCuKWWGfAySwNwnz591KdPn07PKy4uViAQ0MaNG1VUVCRJeuuttxQIBDRu3Lh2HxcOvzt27NBrr72m3r1ZjAAAieLlUoBwLXPrxX+59AEGHMFnGIYj+rRMmjRJe/bs0UMPPSRJ+ulPf6rBgwerrKys5ZyTTz5ZCxcu1CWXXKLDhw/r0ksvVVVVlV566aWImeKcnBz16NEjpp/b0NAgv9+vQCDQ6ewxAHhJMGTo9EVrVRdojFoH7FNzIFx3+/dcOxsaDBmOrWUG3MRsXnNEFwhJevLJJ3XKKado4sSJmjhxooYPH64nnngi4px3331XgUBAkvTJJ59ozZo1+uSTTzRixAjl5eW1fJjpHAEAiC5cCiCpTT2sV0oBnFzLDHiZY2aArcIMMAB0jC2BAVjNbF5zRBs0AIB9Ob2tGQDvIQADALosXAoAAE7gmBpgAAAAIBGYAQYAtIsuBwDciAAMAIiKxW0A3IoSCABAG+XbajV7RVWbrY7rAo2avaJK5dtqLRoZAHQdARgAECEYMrSgrDrq5hbhYwvKqhUM0UUTgDMRgAEAETbW7G8z83skQ1JtoFEba/anblAAkEAEYABAhL0H2w+/8ZwHAHZDAAYAROiblZnQ8wDAbgjAAIAIRfk5yvNnqr1mZz41d4Moys9J5bAAIGEIwACACOlpPpVOLpCkNiE4/Hnp5AL6AQNwLAIwAKCNksI8LZ0xUrn+yDKHXH+mls4YSR9gAI7GRhgAgKhKCvM0oSCXneAAuA4BGADQrvQ0n4qH9LZ6GACQUJRAAAAAwFMIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFPYCQ4AAAcKhgy2qQbiRAAGAMBhyrfVakFZtWoDjS3H8vyZKp1coJLCPAtHBjgDJRAAADhI+bZazV5RFRF+Jaku0KjZK6pUvq3WopEBzkEABgDAIYIhQwvKqmVE+Vr42IKyagVD0c4AEEYABgDAITbW7G8z83skQ1JtoFEba/anblCAAxGAAQBwiL0H2w+/8ZwHeBUBGAAAh+iblZnQ8wCvIgADAOAQRfk5yvNnqr1mZz41d4Moys9J5bAAxyEAAwDgEOlpPpVOLpCkNiE4/Hnp5AL6AQOdIAADAOAgJYV5WjpjpHL9kWUOuf5MLZ0xkj7AQAzYCAMAAIcpKczThIJcdoID4kQABgDAgdLTfCoe0tvqYQCORAkEAAAAPIUADAAAAE8hAAMAAMBTCMAAAADwFAIwAAAAPIUADAAAAE8hAAMAAMBTCMAAAADwFAIwAAAAPIUADAAAAE8hAAMAAMBTCMAAAADwFAIwAAAAPIUADAAAAE8hAAMAAMBTCMAAAADwFAIwAAAAPIUADAAAAE8hAAMAAMBTCMAAAADwlG5WD8DuDMOQJDU0NFg8EgAAAEQTzmnh3NYZAnAnDh48KEkaNGiQxSMBAABARw4ePCi/39/peT4j1qjsUaFQSHv27JFhGDruuOP08ccfKzs72+ph2V5DQ4MGDRrE9YoR18scrpc5XC9zuF7mcc3M4XqZE8v1MgxDBw8eVP/+/ZWW1nmFLzPAnUhLS9PAgQNbptazs7N5sZrA9TKH62UO18scrpc5XC/zuGbmcL3M6ex6xTLzG8YiOAAAAHgKARgAAACeQgCOUUZGhkpLS5WRkWH1UByB62UO18scrpc5XC9zuF7mcc3M4XqZk4zrxSI4AAAAeAozwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwO349a9/rXHjxunoo4/WMcccE9NjDMPQ/Pnz1b9/fx111FE6++yz9c9//jO5A7WJzz//XDNnzpTf75ff79fMmTN14MCBDh/zxRdf6Prrr9fAgQN11FFHaejQoVq6dGlqBmwD8VwzSdq+fbsuvPBC+f1+ZWVlaezYsdq1a1fyB2yxeK9X2LXXXiufz6f7778/aWO0E7PX69ChQ7r99tt1yimnqGfPnurfv7+uuOIK7dmzJ3WDTqElS5YoPz9fmZmZGjVqlN54440Oz3/99dc1atQoZWZm6oQTTtCyZctSNFJ7MHO9Vq9erQkTJuhb3/qWsrOzVVxcrFdeeSWFo7We2ddX2Jtvvqlu3bppxIgRyR2gDZm9Zk1NTZo3b54GDx6sjIwMDRkyRI888kjsP9BAVL/85S+N3/72t8acOXMMv98f02PuvfdeIysry3juueeMd955x5gyZYqRl5dnNDQ0JHewNlBSUmIUFhYa69evN9avX28UFhYaP/jBDzp8zDXXXGMMGTLEeO2114yamhrjoYceMtLT040XXnghRaO2VjzX7P333zdycnKMn//850ZVVZWxc+dO46WXXjI+/fTTFI3aOvFcr7Dnn3/eOPXUU43+/fsbv/vd75I7UJswe70OHDhgnHvuucaqVauM//mf/zEqKyuNMWPGGKNGjUrhqFPj6aefNrp37248/PDDRnV1tXHjjTcaPXv2ND766KOo53/wwQfG0Ucfbdx4441GdXW18fDDDxvdu3c3nn322RSP3Bpmr9eNN95oLFq0yNi4caPx3nvvGXPnzjW6d+9uVFVVpXjk1jB7vcIOHDhgnHDCCcbEiRONU089NTWDtYl4rtmFF15ojBkzxqioqDBqamqMt956y3jzzTdj/pkE4E48+uijMQXgUChk5ObmGvfee2/LscbGRsPv9xvLli1L4gitV11dbUgyNmzY0HKssrLSkGT8z//8T7uPGzZsmHH33XdHHBs5cqRx5513Jm2sdhHvNZsyZYoxY8aMVAzRVuK9XoZhGJ988okxYMAAY9u2bcbgwYM9EYC7cr2OtHHjRkNSp/9wO01RUZExa9asiGMnn3yycccdd0Q9/7bbbjNOPvnkiGPXXnutMXbs2KSN0U7MXq9oCgoKjAULFiR6aLYU7/WaMmWKceeddxqlpaWeC8Bmr9lf/vIXw+/3G/X19XH/TEogEqSmpkZ1dXWaOHFiy7GMjAydddZZWr9+vYUjS77Kykr5/X6NGTOm5djYsWPl9/s7fO6nn3661qxZo927d8swDL322mt67733dN5556Vi2JaK55qFQiH9+c9/1kknnaTzzjtPffv21ZgxY/TCCy+kaNTWifc1FgqFNHPmTP385z/XsGHDUjFUW4j3erUWCATk8/liLgNzgm+++UabN2+O+FstSRMnTmz32lRWVrY5/7zzztOmTZt06NChpI3VDuK5Xq2FQiEdPHhQOTk5yRiircR7vR599FHt3LlTpaWlyR6i7cRzzdasWaPRo0fr//yf/6MBAwbopJNO0q233qqvv/465p9LAE6Quro6SVK/fv0ijvfr16/la25VV1envn37tjnet2/fDp/773//exUUFGjgwIHq0aOHSkpKtGTJEp1++unJHK4txHPN9u7dqy+++EL33nuvSkpK9Oqrr+qSSy7Rv/3bv+n1119P9pAtFe9rbNGiRerWrZtuuOGGZA7PduK9XkdqbGzUHXfcoenTpys7OzvRQ7TMvn37FAwGTf2trquri3r+4cOHtW/fvqSN1Q7iuV6t3Xffffryyy912WWXJWOIthLP9dqxY4fuuOMOPfnkk+rWrVsqhmkr8VyzDz74QOvWrdO2bdv0/PPP6/7779ezzz6r6667Luaf66kAPH/+fPl8vg4/Nm3a1KWf4fP5Ij43DKPNMacwc72iPcfOnvvvf/97bdiwQWvWrNHmzZt133336Wc/+5n++te/Ju05JVsyr1koFJIkXXTRRbr55ps1YsQI3XHHHfrBD37g2AU5ybxemzdv1gMPPKDHHnvMsb+DrSX7dzLs0KFDmjp1qkKhkJYsWZLw52EHZv9WRzs/2nG3ivfftpUrV2r+/PlatWpV1DdlbhXr9QoGg5o+fboWLFigk046KVXDsyUzr7FQKCSfz6cnn3xSRUVFOv/88/Xb3/5Wjz32WMyzwJ56q3H99ddr6tSpHZ5z/PHHx/W9c3NzJTXPFOTl5bUc37t3b5t3NU4R6/V6++239emnn7b52meffdbuc//666/1i1/8Qs8//7wuuOACSdLw4cO1detW/eY3v9G5557b9SdggWResz59+qhbt24qKCiIOD506FCtW7cu/kFbKJnX64033tDevXt13HHHtRwLBoO65ZZbdP/99+vDDz/s0titkMzrFXbo0CFddtllqqmp0dq1a101+ys1/x6lp6e3mVnq6G91bm5u1PO7deum3r17J22sdhDP9QpbtWqVrr76aj3zzDOO/ZtultnrdfDgQW3atElbtmzR9ddfL6k53BmGoW7duunVV1/V9773vZSM3SrxvMby8vI0YMAA+f3+lmNDhw6VYRj65JNPdOKJJ3b6cz0VgPv06aM+ffok5Xvn5+crNzdXFRUVOu200yQ117W8/vrrWrRoUVJ+ZrLFer2Ki4sVCAS0ceNGFRUVSZLeeustBQIBjRs3LupjDh06pEOHDiktLfImRHp6estMpxMl85r16NFD3/3ud/Xuu+9GHH/vvfc0ePDgrg/eAsm8XjNnzmzzj+55552nmTNn6kc/+lHXB2+BZF4v6V/hd8eOHXrttddcGe569OihUaNGqaKiQpdccknL8YqKCl100UVRH1NcXKyysrKIY6+++qpGjx6t7t27J3W8VovneknNM78//vGPtXLlypZJDi8we72ys7P1zjvvRBxbsmSJ1q5dq2effVb5+flJH7PV4nmNjR8/Xs8884y++OIL9erVS1Lzv4VpaWkaOHBgbD847uVzLvfRRx8ZW7ZsMRYsWGD06tXL2LJli7Flyxbj4MGDLed85zvfMVavXt3y+b333mv4/X5j9erVxjvvvGNMmzbNU23Qhg8fblRWVhqVlZXGKaec0qblUuvrddZZZxnDhg0zXnvtNeODDz4wHn30USMzM9NYsmRJqodviXiu2erVq43u3bsbf/zjH40dO3YYDz74oJGenm688cYbqR5+ysVzvVrzShcIwzB/vQ4dOmRceOGFxsCBA42tW7catbW1LR9NTU1WPIWkCbdcWr58uVFdXW3cdNNNRs+ePY0PP/zQMAzDuOOOO4yZM2e2nB9ug3bzzTcb1dXVxvLlyz3ZBi3W6/XUU08Z3bp1MxYvXhzxOjpw4IBVTyGlzF6v1rzYBcLsNTt48KAxcOBA44c//KHxz3/+03j99deNE0880bjmmmti/pkE4HZceeWVhqQ2H6+99lrLOZKMRx99tOXzUChklJaWGrm5uUZGRoZx5plnGu+8807qB2+B+vp64/LLLzeysrKMrKws4/LLLzc+//zziHNaX6/a2lrjqquuMvr3729kZmYa3/nOd4z77rvPCIVCqR28ReK5ZoZhGMuXLze+/e1vG5mZmcapp57qmb7J8V6vI3kpAJu9XjU1NVH/5rX+u+cWixcvNgYPHmz06NHDGDlypPH666+3fO3KK680zjrrrIjz//a3vxmnnXaa0aNHD+P44483li5dmuIRW8vM9TrrrLOivo6uvPLK1A/cImZfX0fyYgA2DPPXbPv27ca5555rHHXUUcbAgQONOXPmGF999VXMP89nGP+/kh8AAADwAE91gQAAAAAIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwADgMFdddZV8Pp98Pp+6d++uE044Qbfeequ+/PLLlnOee+45nX322fL7/erVq5eGDx+uu+++W/v374/4Xl9//bWOPfZY5eTk6Ouvv27zs/74xz/q7LPPVnZ2tnw+nw4cOJDspwcASUcABgAHKikpUW1trT744APdc889WrJkiW699VZJ0rx58zRlyhR997vf1V/+8hdt27ZN9913n/7xj3/oiSeeiPg+zz33nAoLC1VQUKDVq1e3+TlfffWVSkpK9Itf/CIlzwsAUsFnGIZh9SAAALG76qqrdODAAb3wwgstx37yk5/opZde0osvvqgxY8bo/vvv14033tjmsQcOHNAxxxzT8vk555yjqVOnyjAM/dd//ZfWrl0b9Wf+7W9/0znnnKPPP/884vEA4ETdrB4AAKDrjjrqKB06dEhPPvmkevXqpZ/97GdRzzsyvO7cuVOVlZVavXq1DMPQTTfdpA8++EAnnHBCikYNANagBAIAHG7jxo166qmn9P3vf187duzQCSecoO7du3f6uEceeUSTJk1qqQEuKSnRI488koIRA4C1CMAA4EAvvfSSevXqpczMTBUXF+vMM8/Ugw8+KMMw5PP5On18MBjUf/7nf2rGjBktx2bMmKH//M//VDAYTObQAcBylEAAgAOdc845Wrp0qbp3767+/fu3zPiedNJJWrdunQ4dOtThLPArr7yi3bt3a8qUKRHHg8GgXn31VU2aNCmp4wcAKzEDDAAO1LNnT33729/W4MGDI4Lu9OnT9cUXX2jJkiVRHxduY7Z8+XJNnTpVW7dujfi4/PLLtXz58lQ8BQCwDDPAAOAiY8aM0W233aZbbrlFu3fv1iWXXKL+/fvr/fff17Jly3T66adr+vTpKisr05o1a1RYWBjx+CuvvFIXXHCBPvvsM33rW99SXV2d6urq9P7770uS3nnnHWVlZem4445TTk6OFU8RALqMGWAAcJlFixbpqaee0ltvvaXzzjtPw4YN05w5czR8+HBdeeWVevzxx9WzZ099//vfb/PYc845R1lZWS39gpctW6bTTjtNP/nJTyRJZ555pk477TStWbMmpc8JABKJPsAAAADwFGaAAQAA4CkEYAAAAHgKARgAAACeQgAGAACApxCAAQAA4CkEYAAAAHgKARgAAACeQgAGAACApxCAAQAA4CkEYAAAAHgKARgAAACe8v8AXBe7GAAcJRkAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fingerprints = fingerprints.detach()\n", + "\n", + "pca = PCA(n_components=2)\n", + "\n", + "principalComponents = pca.fit_transform(fingerprints)\n", + "\n", + "fig = plt.figure(figsize=(8, 8))\n", + "ax = fig.add_subplot(1, 1, 1)\n", + "ax.set_title(\"Fingerprints\")\n", + "ax.set_xlabel('PCA1'); ax.set_ylabel('PCA2')\n", + "\n", + "ax.scatter(principalComponents[:, 0], principalComponents[:, 1])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAscAAAK7CAYAAAAeFiKUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABbOUlEQVR4nO3dfXzT9b338XdaSjuQRmhtUxS1cDxKrQ4KK1S8V6CIiOzsACJVrnnYJTsexZsN0eNK3XZx4XU23eZA5/AWRM8UlTpXRVGns1ihFE8tMof1DlK5T/GmUNPf9UeTSNq0Tdokv98veT0fjz62/vpN+k1NyzvffL6fr8MwDEMAAAAAlGL2BAAAAACrIBwDAAAAPoRjAAAAwIdwDAAAAPgQjgEAAAAfwjEAAADgQzgGAAAAfAjHAAAAgA/hGAAAAPAhHAOACR5++GE5HI4uP1577TWzp9jJa6+91mlu8+bN08knn2zanAAg2vqZPQEASGYPPfSQTjvttE7XCwoKTJhN5O644w7dcMMNZk8DAKKGcAwAJiosLNTYsWPNnkavjRgxwuwpAEBUUVYBABbmcDh03XXX6bHHHtPIkSM1YMAAffe739Xzzz/faez777+vK664Qrm5uUpPT9eJJ56oq666SocPHw6Mqa+v1/Tp0zV48GBlZGRo1KhReuSRR0LeV2lpqQYMGKDs7Gxde+21OnToUKdxocoqIpnzc889pzPPPFPp6ekaPny4fvOb32jJkiVyOBxB4/70pz9p3LhxcjqdGjBggIYPH64f/vCH4f4YASBsrBwDgIm8Xq+++eaboGsOh0OpqamBz//85z/rnXfe0Z133qljjjlGd911l2bMmKHt27dr+PDhkqStW7fq7LPPVnZ2tu68806dcsopcrvdWrdunY4cOaL09HRt375dZ511lnJycvTb3/5WWVlZWrVqlebNm6fPP/9cP/3pTyVJn3/+uc477zylpaVp+fLlys3N1erVq3XdddeF/bjCmXNVVZW+//3v69xzz9WTTz6pb775Rv/1X/+lzz//POi+qqurNWvWLM2aNUtLlixRRkaGPv74Y23YsKFXP3MA6JYBAIi7hx56yJAU8iM1NTUwTpKRm5trNDc3B641NTUZKSkpxtKlSwPXLrzwQuPYY481du/e3eX3nD17tpGenm588sknQdenTJliDBgwwDh48KBhGIaxaNEiw+FwGHV1dUHjJk6caEgyXn311cC1q6++2jjppJOCxoU75+9973vGsGHDjMOHDweuHTp0yMjKyjKO/ufpv/7rvwxJgfkBQCxRVgEAJnr00Uf1zjvvBH28/fbbQWMuuOACDRo0KPB5bm6ucnJy9PHHH0uSvvrqK73++uuaOXOmjjvuuC6/14YNG3TRRRdp2LBhQdfnzZunr776StXV1ZKkV199Vaeffrq++93vBo2bM2dO2I+rpzl/+eWX2rRpky6//HL1798/MO6YY47RtGnTgu7re9/7niRp5syZ+u///m/t3Lkz7HkAQKQIxwBgopEjR2rs2LFBH2PGjAkak5WV1el26enp+vrrryVJBw4ckNfr1QknnNDt99q3b5/y8vI6XR86dGjg6/7/dblcncaFutaVcOZsGIZyc3M7jet47dxzz9Wzzz6rb775RldddZVOOOEEFRYWas2aNWHPBwDCRTgGAJsbMmSIUlNT9dlnn3U7LisrS263u9P1Xbt2SZKys7MD45qamjqNC3WttwYPHiyHw9Gpvrir7zN9+nS98sor8ng8eu2113TCCSdozpw5gdVuAIgWwjEA2Nx3vvMdnXfeefrTn/6kvXv3djnuoosu0oYNGwJh2O/RRx/VgAEDNH78eEntJRHvvfeetm7dGjTu8ccfj9qcBw4cqLFjx+rZZ5/VkSNHAte/+OKLkF0t/NLT03Xeeedp2bJlkqQtW7ZEbU4AINGtAgBMVV9f36lbhdTeP7i7+uGOfv3rX+vss8/WuHHjdOutt+qf/umf9Pnnn2vdunW6//77NWjQIJWXl+v555/XBRdcoJ/97GcaMmSIVq9erT//+c+666675HQ6JUkLFy7Ugw8+qKlTp+oXv/hFoFvF+++/H7XHLUl33nmnpk6dqsmTJ+uGG26Q1+vV//t//0/HHHOM9u/fHxj3s5/9TJ999pkuuuginXDCCTp48KB+85vfKC0tTeedd15U5wQAhGMAMNH/+l//K+T1Bx54QP/2b/8W9v1897vfVU1NjcrLy7V48WIdOnRILpdLF154YWDD26mnnqq33npLt912m/793/9dX3/9tUaOHKmHHnpI8+bNC9yXy+XS66+/rhtuuEELFizQgAEDNGPGDN17772aPn16nx7v0UpLS/X000/rZz/7mWbNmiWXy6Uf//jH2rVrlx577LHAuHHjxmnTpk1atGiR9uzZo2OPPVZjx47Vhg0bdPrpp0dtPgAgSQ7DMAyzJwEAgCS1trZq1KhROv744/XSSy+ZPR0ASYiVYwCAaa655hpNnDhReXl5ampq0n333adt27bpN7/5jdlTA5CkCMcAANMcOnRIt9xyi/bs2aO0tDQVFRXphRde0MUXX2z21AAkKcoqAAAAAB9auQEAAAA+hGMAAADAh3AMAAAA+LAhLwra2tq0a9cuDRo0SA6Hw+zpAAAAoAPDMHTo0CENHTpUKSldrw8TjqNg165dGjZsmNnTAAAAQA8+/fRTnXDCCV1+nXAcBYMGDZLU/sPOzMw0eTYAAADoqLm5WcOGDQvktq4QjqPAX0qRmZlJOAYAALCwnkpg2ZAHAAAA+BCOAQAAAB/CMQAAAOBDOAYAAAB8CMcAAACAD+EYAAAA8CEcAwAAAD6EYwAAAMCHcAwAAAD4EI4BAAAAH8IxAAAA4EM4BgAAAHwIxwAAAIAP4RgAAADwIRwDAAAAPoRjAAAAwIdwDAAAAPgQjgEAAAAfwjEAAADgQzgGAAAAfPqZPQEAycXbZqimcb92H2pRzqAMFecPUWqKw+xpAQAgiXAMII6q6t2qqGyQ29MSuJbnzFD5tAKVFuaZODMAANpRVgEgLqrq3VqwqjYoGEtSk6dFC1bVqqrebdLMAAD4FuEYQMx52wxVVDbICPE1/7WKygZ520KNAAAgfgjHAGKupnF/pxXjoxmS3J4W1TTuj9+kAAAIgXAMIOZ2H+o6GPdmHAAAsUI4BhBzOYMyojoOAIBYIRwDiLni/CHKc2aoq4ZtDrV3rSjOHxLPaQEA0AnhGEDMpaY4VD6tQJI6BWT/5+XTCuh3DAAwHeEYQFyUFuZpxdwiuZzBpRMuZ4ZWzC2izzEAwBI4BARA3JQW5mligYsT8gAAlkU4BhBXqSkOlYzIMnsaAACERFkFAAAA4EM4BgAAAHwIxwAAAIAP4RgAAADwIRwDAAAAPoRjAAAAwIdwDAAAAPjQ5xgAkpS3zeBAFgDogHAMAEmoqt6tisoGuT0tgWt5zgyVTyvgKG8ASY2yCgBIMlX1bi1YVRsUjCWpydOiBatqVVXvNmlmAGA+wjEAJBFvm6GKygYZIb7mv1ZR2SBvW6gRAJD4CMcAkERqGvd3WjE+miHJ7WlRTeP++E0KACyEcAwASWT3oa6DcW/GAUCiIRwDQBLJGZQR1XEAkGgIxwCQRIrzhyjPmaGuGrY51N61ojh/SDynBQCWQTgGgCSSmuJQ+bQCSeoUkP2fl08roN8xgKRFOAaSlLfNUPWOfXqubqeqd+yjO0ESKS3M04q5RXI5g0snXM4MrZhbRJ9jAEmNQ0CAJMQBECgtzNPEAhcn5AFABw7DMFgu6qPm5mY5nU55PB5lZmaaPR2gW/4DIDr+4vsjESuHAIBEFG5eo6wCSCIcAAEAQPcIx0AS4QAIe6I+HADih5pjIIlwAIT9UB8OAPHFyjGQRDgAwl789eEdV/ubPC1asKpWVfVuk2YGAImLcAwkEQ6AsA/qwwHAHIRjIIlwAIR9UB8OAOYgHANJhgMg7IH6cAAwBxvygCTEARDWR304AJiDcAwkqdQUh0pGZJk9DXTBXx/e5GkJWXfsUPtqP/XhABBdlFUAgAVRHw4A5iAcA4BFUR8OAPFHWQUAWBj14QAQX4RjALA46sMBIH4oqwAAAAB8CMcAAACAD2UVAJCAvG0GdcoA0AuEYwBIMFX1blVUNgQdP53nzFD5tAI6XABADyirAIAEUlXv1oJVtUHBWJKaPC1asKpWVfVuk2YGAPZAOAaABOFtM1RR2RDyRD3/tYrKBnnbQo0AAEiEYwBIGDWN+zutGB/NkOT2tKimcX/8JgUANkM4BoAEsftQ18G4N+MAIBkRjgEgQeQMyuh5UATjACAZEY4BIEEU5w9RnjNDXTVsc6i9a0Vx/pB4TgsAbIVwDAAJIjXFofJpBZLUKSD7Py+fVkC/YwDohu3C8fLly5Wfn6+MjAyNGTNGb7zxRpdj3W635syZo1NPPVUpKSlauHBhyHFPP/20CgoKlJ6eroKCAj3zzDMxmj2AePK2GaresU/P1e1U9Y59SdGlobQwTyvmFsnlDC6dcDkztGJuEX2OAaAHtjoE5Mknn9TChQu1fPlyTZgwQffff7+mTJmihoYGnXjiiZ3GHz58WMcdd5xuv/123X333SHvs7q6WrNmzdLPf/5zzZgxQ88884xmzpypN998U+PGjYv1QwIQI8l8EEZpYZ4mFrg4IQ8AesFhGIZtllLGjRunoqIirVixInBt5MiRuvzyy7V06dJub3v++edr1KhRuueee4Kuz5o1S83NzfrLX/4SuFZaWqrBgwdrzZo1Yc2rublZTqdTHo9HmZmZ4T8gADHhPwij4x83fzRkBRUAkk+4ec02ZRVHjhzR5s2bNWnSpKDrkyZN0ltvvdXr+62uru50n5MnT+72Pg8fPqzm5uagDwDWwEEYAIC+sE043rt3r7xer3Jzc4Ou5+bmqqmpqdf329TUFPF9Ll26VE6nM/AxbNiwXn9/ANHFQRgAgL6wTTj2cziCa+YMw+h0Ldb3uXjxYnk8nsDHp59+2qfvDyB6OAgDANAXttmQl52drdTU1E4rurt37+608hsJl8sV8X2mp6crPT29198TQOxwEAYAu/K2GWyktQDbhOP+/ftrzJgxWr9+vWbMmBG4vn79ek2fPr3X91tSUqL169frxhtvDFx76aWXdNZZZ/VpvkAisOMfav9BGE2elpB1xw61tzXjIAwAVpLMHXasxjbhWJJuuukmlZWVaezYsSopKdEf/vAHffLJJ7r22msltZc77Ny5U48++mjgNnV1dZKkL774Qnv27FFdXZ369++vgoL2Rvk33HCDzj33XC1btkzTp0/Xc889p5dffllvvvlm3B8fYCV2/UPtPwhjwapaOaSggMxBGACsqKsOO02eFi1YVUuHnTizVSs3qf0QkLvuuktut1uFhYW6++67de6550qS5s2bp48++kivvfZaYHyo2uGTTjpJH330UeDzp556Sv/5n/+pDz/8UCNGjNAvf/lLff/73w97TrRyQ6JJhFZodg33AJKLt83Q2cs2dLmR2P9u15uLLuRFfR+Fm9dsF46tiHCMRJJIf6jtWBYCILlU79inKx7Y2OO4NfPHq2REVhxmlLjCzWu2KqsAEHuRtEKz+h/q1BSH5ed4NMI8kHzosGM9hGMAQfhDbQ7KQIDkRIcd67Fdn2MAscUf6vjz13h3XLH3b8apqnebNDMAsebvsNPVe0QOtb9QpsNO/BCOAQThD3V8cdw1kNz8HXYkdfq7S4cdcxCOAQThD3V8cdw1gNLCPK2YWySXM/gdOZczwxbdgRINNccAOvH/oe5YA+uiBjbqqPEGILX/3Z1Y4GJTrgUQjoEYSISuA/yhjg9qvAH42a3DTqIiHANRlkhdB5LxD3W8X9gc+PJIj2Oo8QaA+CEcA1HEEaD2Fu8XNt42Qz//c0OP4+6YOpIVewCIEzbkAVFC1wF7M6OdWk+b8fwGD0yP6vf1thmq3rFPz9XtVPWOfTwnAeAorBwDUZJIJ8slm55e2DjU/sJmYoErqiu4ZmzGS6SyHwCIBVaOgSih64B9mdVOLd6b8ThsBAB6RjgGooSuA/Zl1gubeB64QtkPAISHcAxECSfL2ZdZL2zieeBKPFbHqWUGkAioOQaixB90FqyqlUMKWqHjZDlr87+wafK0hFxZdaj9AJRYvLCJ14ErsV4dp5YZQKIgHANRxMly9mT2C5t4HLgSy9VxWhgCSCQOwzB436uPmpub5XQ65fF4lJmZafZ0YAHxPEgiEU7js4pEXv30thk6e9mGHlfH31x0YUTPH//9dlWy0dv7BYBoCzevsXIMxEC8TpZL5DBnhkQ+MjtWq+O0MASQaNiQB9gUbbn6pqvNY/4XNtNHHa+SEVkJEYz9/GU/Lmdw6YTLmdHr0gdaGAJINKwcAzZk1qEViSKZV9yjvTpOC0MAiYaVY8CGzDq0IhGw4h7d1XFaGAJINIRjwIZ4K7t3kvEgjFj3Ho5nr2YAiAfKKgAb4q3s3km2zWPxKh+hhSGAREI4BmzIzEMr7CyZVtzj3Xs4kTt9AEgulFUANsRb2b2TLCvuZpWPJHKnDwDJg3AM2FQs2nIlumTZPMaGTQDoPcoqABvjrezImH1MdLwkU/kIAEQb4RiwuXidxpcokmHzWLKUjwBALBCOASSdRF9xZ8MmAPQe4RhAUkrkFfdkKR8BgFhgQx4AJCA2bAJA77ByDAAJKtHLRwAgFgjHAJDAErl8BABigbIKAAAAwIdwDAAAAPgQjgEAAAAfwjEAAADgQzgGAAAAfOhWAQC94G0zaJEGAAmIcAwgKpIpLFbVu1VR2SC3pyVwLc+ZofJpBRyuAQA2RzgG0GfJFBar6t1asKo26EhmSWrytGjBqlpOnwMAm6PmGECf+MPi0cFY+jYsVtW7TZpZ9HnbDFVUNnQKxpIC1yoqG+RtCzUCAGAHhGMAvZZsYbGmcX+nFwFHMyS5PS2qadwfv0kBAKKKcAyg15ItLO4+1PVj7c04AID1EI4B9FqyhcWcQRlRHQcAsB425AHotXiHRbM7YhTnD1GeM0NNnpaQpSQOSS5n+7wAAPZEOAbQa/EMi1boiJGa4lD5tAItWFUrhxT0mP0RvXxaQcK2sAOAZEBZBYBe84dF6dtw6BfNsGiljhilhXlaMbdILmfwarjLmRFxGzdvm6HqHfv0XN1OVe/YlzAbFwHAzhyGYfDXuI+am5vldDrl8XiUmZlp9nSAuIvlqq63zdDZyzZ0ufHPvzr95qIL47pi29cSDyushANAMgk3rxGOo4BwDDuLVh1vrOqBq3fs0xUPbOxx3Jr541UyIqvP3y8eujpIxP/T4iARAIi+cPMaNcdAEovm6mVqiiMm4TTROmL01Bvaofbe0BMLXNQuA4AJqDkGkpSV6ni7k2jt05KtNzQA2A3hGEhCdjrZzt8Ro6s1VIfaV7vt0j4t0VbCASDREI6BJGSn1ct4dcSIl0RbCQeAREM4BpKQ3VYvo9k+zWyJthIOAImGDXlAErLj6mVpYZ4mFrhMPSEvGjhIBACsjZVjIAnZdfXS3xFj+qjjVTIiy7YBMpFWwgEg0bByDCQhVi/Nlygr4QCQaDgEJAo4BAR2xSltAIBkwSEgAHrE6iUAAMEIx0CSi9XJdgAA2BEb8gAAAAAfwjEAAADgQzgGAAAAfKg5BgAkNG+bwaZTAGEjHAOIOsIIIhWr5wztCtERf5/QE8IxgKgijCBSsXrOVNW7tWBVrTo282/ytGjBqlpOI0xC/H1COKg5BhA1/jBy9D880rdhpKrebdLMYFWxes542wxVVDZ0CsbStydCVlQ2yNvGOVjJgr9PCBfhGEBUEEYQqVg+Z2oa93cKQR3v3+1pUU3j/ojvG/bD3ydEgnAMICoII4hULJ8zuw91fb+9GQd74+8TIkE4BhAVhBFEKpbPmZxBGVEdB3vj7xMiwYY8AFFBGDGHnXfex/I5U5w/RHnODDV5WkK+le6Q5HK2/7yQ+Pj7hEgQjgFEBWEk/uy+8z6Wz5nUFIfKpxVowapaOaSg+/e/dCifVmCbFxLoG/4+IRKUVQCICn8Ykb4NH36EkehLhJ33sX7OlBbmacXcIrmcwauBLmcGbdySDH+fEAmHYRhszeyj5uZmOZ1OeTweZWZmmj0dwFR2X820A2+bobOXbehyg5F/FezNRRfa4h/7WD9n7Fx6guji71NyCzevEY6jgHAMBCOMxFb1jn264oGNPY5bM3+8SkZkxWFGfcdzBvHCcy15hZvXbFdWsXz5cuXn5ysjI0NjxozRG2+80e34119/XWPGjFFGRoaGDx+u++67L+jrDz/8sBwOR6ePlhZ2rMLavG2Gqnfs03N1O1W9Y5+l+nOmpjhUMiJL00cdr5IRWfzDE2WJtPPe/zx+/t1dkqRLzxzKcwYxxd8n9MRWG/KefPJJLVy4UMuXL9eECRN0//33a8qUKWpoaNCJJ57YaXxjY6MuueQSzZ8/X6tWrdLf/vY3/fjHP9Zxxx2nf/mXfwmMy8zM1Pbt24Num5HBjlVYF28NJrdE2XnP8xiAFdmqrGLcuHEqKirSihUrAtdGjhypyy+/XEuXLu00ftGiRVq3bp22bdsWuHbttddq69atqq6ultS+crxw4UIdPHiw1/OirALx5N+I1fEX17/2wUajxOevOe5p572Va455HgOIt4Qrqzhy5Ig2b96sSZMmBV2fNGmS3nrrrZC3qa6u7jR+8uTJ2rRpk1pbWwPXvvjiC5100kk64YQTdOmll2rLli3dzuXw4cNqbm4O+gDigSNQIdl/5z3PYwBWZptwvHfvXnm9XuXm5gZdz83NVVNTU8jbNDU1hRz/zTffaO/evZKk0047TQ8//LDWrVunNWvWKCMjQxMmTNAHH3zQ5VyWLl0qp9MZ+Bg2bFgfHx0QHo5AhZ+d25TxPAZgZbaqOZYkhyN4JcQwjE7Xehp/9PXx48dr/Pjxga9PmDBBRUVF+t3vfqff/va3Ie9z8eLFuummmwKfNzc3E5ARF4m0EQt9V1qYp4kFLtvtvOd5DMDKbBOOs7OzlZqa2mmVePfu3Z1Wh/1cLlfI8f369VNWVuj2RikpKfre977X7cpxenq60tPTI3wEQN8lykasSNB2qXv+nfd2kozPYwD2YZtw3L9/f40ZM0br16/XjBkzAtfXr1+v6dOnh7xNSUmJKisrg6699NJLGjt2rNLS0kLexjAM1dXV6Ywzzoje5IEoSbYjUOlmkJiS7XkMwF5sU3MsSTfddJP++Mc/6sEHH9S2bdt044036pNPPtG1114rqb3c4aqrrgqMv/baa/Xxxx/rpptu0rZt2/Tggw9q5cqVuuWWWwJjKioq9OKLL+rDDz9UXV2drrnmGtXV1QXuE7ASu2/EikQiHI+M0JLpeQzAfmwVjmfNmqV77rlHd955p0aNGqW//vWveuGFF3TSSSdJktxutz755JPA+Pz8fL3wwgt67bXXNGrUKP385z/Xb3/726AexwcPHtSPfvQjjRw5UpMmTdLOnTv117/+VcXFxXF/fEA47LwRK1x0M4i/eB8qkwzPYwD2ZKs+x1ZFn2OYIZFrcRPxeGQrM7N8JZGfxwCsJdy8ZpuaYwDB7LgRK1x0M4ifrg7j8JevxHoVN5GfxwDsyVZlFQCSA90M4oPyFQDojHAMwHL83Qy6enPdofa3/elm0DccxgEAnRGOAVgO3Qzig/IVAOiMcAzAkuhmEHuUrwBAZ2zIA2BZdj0e2S44jAMAOiMcA7A0uhnEjr98ZcGqWjmkoIBM+QqAZEVZBQBTxfvwCQSjfMV6+J0AzMXKMQDTmHn4hN1F8/AMylesg98JwHyckBcFnJAHRK6rwyf8cYxVy64RoBITvxNAbIWb1yirACwqkd9a5fCJ3vMHqI79if0n2lXVu02aGfqC3wnAOiirACwo0VcGIzl8gs143+opQDnUHqAmFrgoibAZficA62DlGLCYZFgZ5PCJ3uFEu8TF7wRgHYRjwEKS5a1VDp/oHQJU4uJ3ArAOwjFgIcmyMug/fKKrN/4dai8j4fCJYASoxMXvBGAdhGPAQpJlZdB/+ISkTmGAwye6RoBKXPxOANZBOAYsJJlWBjl8InLRDFCJ3A3FrvidAKyBPsdRQJ9jRIu3zdDZyzaoydMSsu7YofZ/KN9cdGHCrCBF8zCLZNHXbiaJ3g3F7vidAGIj3LxGOI4CwjGiyd+tQlJQQOYgAByttwGKgyYAJCsOAQFsirdWEY7UFIdKRmRp+qjjVTIiK+xSimTohgIAfcEhIIAFlRbmaWKBi7dWEVUcNAEAPSMcAxblXxkEoiVZuqEAQF9QVgEASSKZuqEAQG8RjgEgSdAnGQB6RjgGgCTBQRMA0DPCMQAkkdLCPP1+zmgNHpgWdN3q3VA4tARAvLAhDwCSSFW9Wz//8zbt/7I1cG3IwP66Y6p1DwDh0BIA8cTKMQAkCf8BIB3buR348oj+/fFaVdW7TZpZ17qac5OnRQtWWXPOAOyNcAwAScCOB4DYcc4A7I9wDFgUNZaIpkgOALEKO84ZgP1RcwxYEDWWiLa+HADibTNMOa2RQ0sAmIFwDFiMv8ay4zqxv8bSyh0FYF29PQDEzBdqHFoCwAyUVQAWQo0lYqU3B4CYvRmOQ0sAmIFwDFgINZaIlXAOAJn9vRP1/Lu7VL1jn45802b6CzUOLQFgBsoqAAuhxtL+zKrPDUdpYZ5WzC3qVCZx7IA0GZLufvnvgWtDBqYF9ULu6OgXaiUjsuI+Zxc1+ABihHAMWAg1lvZmh42UpYV5mljgCgT4j/Z+qbtf/qDTuO6C8dHi8UKt45yt9qIDQGIhHAMW4q+xbPK0hHw726H2FTNqLK3HThspU1McKhmRJW+bobOXbejTfUXjhVo4q+3+OQNArBGOAQvx11guWFUrhxQUtKixtK6eNlI61F6fO7HAZan/dj3VuHcnWi/U7LDaDiC5sCEPsBh/jaXLGbwi53JmWGr1Ed+y60bK3pZEROuFmtndMAAgFFaOAQuixtJe7LqRMtySiCED+2v/l0cCn0djM5xdV9sBJD7CMWBR1Fjah103UoZb4/76Ty7Q5o8PRPWFWiSr7fweAIgnyioAoI/selhFuH2E+/dLUcmILE0fdbxKRmRFZSXXrqvtABIf4RgA+sjOh1WYVeNu19V2AImPsgoAiAI7H1ZhRo07bQsBWJXDMIzYnf2ZJJqbm+V0OuXxeJSZmWn2dGAzVj5RDZHjv2f4/N0qpNBtC+nOAiCaws1rhOMoIByjt+jximTH7wCAeCEcxxHhGL3R1YlqrJoh2bDaDiAews1r1BwDJqDHq70R5qKLtoUArIRwDJiAHq/2RRkAACQ2WrkBJqDHqz1x3DEAJD7CMWACerzaT0+lMFJ7KYy3jW0cAGBnhGPABHY9US2ZRVIKAwCwL8IxYAI7n6iWrCiFAYDkQDgGTGLWsb3oHUphACA50K0CMJEZx/aidzjuGACSA+EYMBk9Xu3BXwqzYFWtHAp93DGlMABgf5RVAECYKIUBgMTHyjEARIBSGABIbIRjAIgQpTAAkLgoqwAAAAB8CMcAAACAD+EYAAAA8CEcAwAAAD6EYwAAAMCHcAwAAAD40MoNiDFvm0FPXISF5woAmI9wDMRQVb1bFZUNcntaAtfynBkqn1bAaWoIwnMFAKyBsgogRqrq3VqwqjYo7EhSk6dFC1bVqqrebdLMYDU8VwDAOgjHQAx42wxVVDbICPE1/7WKygZ520KNQDLhuQIA1kI4BmKgpnF/p1XAoxmS3J4W1TTuj9+kYEk8VwDAWgjHQAzsPtR12Dnayw1NMZ4JrC7c50q44wAAfUM4BmIgZ1BGWOOeqdvJ2+VJLtznSrjjAAB9QzgGYqA4f4iGDOzf47j9X7bydnmSK84fojxnhrpq2OZQe9eK4vwh8ZwWACQtwjEQA6kpDl0+amhYY3m7PLmlpjhUPq2g2zHl0wrodwwAcUI4BmJkYoErrHG8XY7Swjz96Nx8dcy/KQ7pR+fmR63PsbfNUPWOfXqubqeqd+yjpAcAQrBdOF6+fLny8/OVkZGhMWPG6I033uh2/Ouvv64xY8YoIyNDw4cP13333ddpzNNPP62CggKlp6eroKBAzzzzTKymjyTif7u8K7xdDr+qerf+8NdGdcyqhiH94a+NUelzXFXv1tnLNuiKBzbqhifqdMUDG3X2sg30UAaADmwVjp988kktXLhQt99+u7Zs2aJzzjlHU6ZM0SeffBJyfGNjoy655BKdc8452rJli2677TZdf/31evrppwNjqqurNWvWLJWVlWnr1q0qKyvTzJkz9fbbb8frYSFB+d8ud0id6kn9n/N2OeLR55hDRgAgfA7DMGzzvtq4ceNUVFSkFStWBK6NHDlSl19+uZYuXdpp/KJFi7Ru3Tpt27YtcO3aa6/V1q1bVV1dLUmaNWuWmpub9Ze//CUwprS0VIMHD9aaNWvCmldzc7OcTqc8Ho8yMzN7+/CQoDgWGN2p3rFPVzywscdxa+aPV8mIrIjv39tm6OxlG7rspeyQ5HJm6M1FF/JCDUBCCzev9YvjnPrkyJEj2rx5s2699dag65MmTdJbb70V8jbV1dWaNGlS0LXJkydr5cqVam1tVVpamqqrq3XjjTd2GnPPPfd0OZfDhw/r8OHDgc+bm5sjfDRIJqWFeZpY4FJN437tPtSinEHtpRQEEUix73McySEjvQnfAJBobBOO9+7dK6/Xq9zc3KDrubm5amoKfZBCU1NTyPHffPON9u7dq7y8vC7HdHWfkrR06VJVVFT08pEgGaWmOAgeCCnWfY45ZAQAImOrmmNJcjiCV9sMw+h0rafxHa9Hep+LFy+Wx+MJfHz66adhzx8AjhbrPsccMgIAkbFNOM7OzlZqamqnFd3du3d3Wvn1c7lcIcf369dPWVlZ3Y7p6j4lKT09XZmZmUEfANAbR/c5jsXGTQ4ZAYDI2CYc9+/fX2PGjNH69euDrq9fv15nnXVWyNuUlJR0Gv/SSy9p7NixSktL63ZMV/eJ5EFPWMRLaWGeVswtkqtD6z+XM0Mr5hb1aeNmrMM3ACQa29QcS9JNN92ksrIyjR07ViUlJfrDH/6gTz75RNdee62k9nKHnTt36tFHH5XU3pni3nvv1U033aT58+erurpaK1euDOpCccMNN+jcc8/VsmXLNH36dD333HN6+eWX9eabb5ryGGENdJhAvMVy46Y/fHd8Trt4TgNAJ7Zq5Sa1HwJy1113ye12q7CwUHfffbfOPfdcSdK8efP00Ucf6bXXXguMf/3113XjjTfqvffe09ChQ7Vo0aJAmPZ76qmn9J//+Z/68MMPNWLECP3yl7/U97///bDnRCu3xOLvCdvxF8MfUfq6kgeYxdtm0DUFQNIKN6/ZLhxbEeE4cdATFgCAxBRuXrNNzTEQD5H0hAUAAImHcAwchZ6wAAAkN8IxcBR6wgIAkNxs1a0CiDV/T9gmT0unDXnStzXH9IRFNLBBDgCsh3AMHMXfE3bBqlo5pKCAHGlPWIIPukO7QACwJrpVRAHdKhJPX4MLwQfdoV0gAMQfrdziiHCcmHq78kvwQXdoFwgA5gg3r1FWAXQhNcWhkhFZEd3G22aoorIhZL2yofbgU1HZoIkFLoJPkoqkXWA4zz/KdwAgugjHQJjCCSHRDj5IPNFsF0j5DgBEH+EYCEO4IYQ+yehJtNoFdlW+0+Rp0YJVtZTvAEAv0ecY6IE/hHRcEfaHkKp6d+AafZLRE3+7wK4KHxxqf+HVXbvAnsp3pPbyHW8bW0oAIFKEY6AbkYaQaASfROBtM1S9Y5+eq9up6h37YhLS4vE9YsHfLlBSp+dJuO0COeYcAGKHsgqgG5HWEHfXJ9k/fvb3hsVwxuaLRx2s3WttSwvztGJuUafH4ArzMVC+AwCxQzgGutGbENJV8PG7++UP9MQ7n9omyEUiHnWwVqm17WuXiNLCPE0scPXqPijfAYDYIRwD3ehtCPEHn3s3/EN3v/z3TuMTcdNUPNrYWaVVXrRWrnvTLlDimHMAiCVqjoFu9LWG+Il3Pgl5PRE3TcWjDtYKtbaRbNCMlWjULQMAQiMcA93oSwixQpCLp3jUwZpda2ulLhH+8h2XM/hdi8ED0/TDCSfL+Z3+CfPCCwDiiXAM9KCrEOJyZnRbFmF2kIu3eNTBml1ra7UXPKWFeXpz0YVaM3+8fjjhZA0Z2F/7v2zVyr99pCse2Kizl22Iy0o2ACQSao6BMPRm85TZQS7e4lEHG+3vEemmOiu+4ElNccjz9RE99LePTN+kCACJgHAMhCnSzVPJtmmquzZ20aqDjeb36M2mOiu+4LHKJsV46munEADoDmUVQIzYbdNUNA7V6G0JSry/R2831VnxkBerlXrEWlW9W2cv26ArHtioG56oo3wEQNQ5DMNgx0YfNTc3y+l0yuPxKDMz0+zpwGLscGBFtOcYj5W93n4Pb5uhs5dt6DJQ+lf031x0Ycj78wdrKfTKdbxLGJ6r26kbnqjrcdxvZo/S9FHHx35CMdRVj2uzfvYA7CXcvEZZBRBjfTnsIR5icahGb/v3xuN7RHrqYUd9Pd0u2qxY6hELyVg+AsAchGMgDuIRFnsjGQNHNDbVWekFT7LUtvf1RQ0AhIuaYyCJJVu9qhS9lVb/C57po45XyYgs01482K22vbes2CkEQGIiHANJLBkDR3H+EB07IK3Lr5uxqa6v4rER0mzJUj4CwHyUVQAhJEurqGQMHOsbmnTwq9Yuv27IniutVir1iIVkKR8BYD7CMdCBHbpLREuyBQ5/jXV3Bg9I08QCV5xmFF1WrW2Phnj00QYAibIKIEhv+9/a1dH1qh11FTii0Q/ZLD3VWEvSga9aE6rGOpEkQ/kIAPOxcgz4JGPnBj/ngLROpQbHDkjT0u+fERQ47L6qnow11okm0ctHAJiPcAz4JGOrqK56HEvtK6jhjO1LP+RY6apmPBlrrBNRIpePADAf4RjwSbZVxe5WyqXglXL5/r8dVtW7W92eWOBKqhprAEDkelVz3NbW1uX1Tz75pE8TAsySbKuKkayU26Ufck814+sbmpKiJzAAoPciCsfNzc2aOXOmBg4cqNzcXJWXl8vr9Qa+vmfPHuXn50d9kkA8+Ds3dBWL7Nj/tjuRrJTbYVW9p5px6dvVbTZ1AQC6ElFZxR133KGtW7fqscce08GDB/WLX/xCmzdv1tq1a9W/f39JkmHYZ+c6cLRkaxX10d6vwhoXyUq5mavqkaxus6kLANCViMLxs88+q0ceeUTnn3++JGnGjBmaOnWqpk2bpnXr1kmSHA7+cYF9+VtFdaxZddmoI0M4vG2G1tT0XALlykwPrJRbpVa3q812ka5us6kLABBKROF47969OumkkwKfZ2Vlaf369Zo8ebIuueQS/fGPf4z6BIF4S4ZVxZrG/Wpq7jlMXlF8YuBxW2FVvbvNdslWMw4AiI2Iao6HDRumbdu2BV0bNGiQXnrpJX399deaMWNGVCcHmMW/qjh91PEqGZGVUMFYCr82+OTsgYH/b/YBDD1ttjvw5eGkqhkHAMRGRCvHkyZN0kMPPaRLLrkk6PoxxxyjF198URMnTozq5ADERm9XWc1aVQ/ngJaf/3mb7phaoH9/PDlqxgEAsRFROK6oqNCuXbtCfm3QoEF6+eWXtXnz5qhMDEDs+Dtz9KaG2Ixa3XA32w0e2D8pasYBALETUTgePHiwBg8e3OXXDx8+rC1btui8887r88QAxI7dOnNEstlu+qjjE75mHAAQO706BORohmHoxRdf1MyZMzV06FD98pe/jMa8AMSY2TXEkYi0DCTRa8YBALHT6+OjP/roIz344IN6+OGHtXPnTl155ZX685//rAsuuCCa8wMQQ3bpzNGXMhAAACIR0crx4cOHtWbNGl100UUaOXKk6uvr9etf/1opKSm69dZbdfHFFys1NTVWcwUQA3ZYZfWXgUgc+wwAiK2IwvHxxx+vFStWaNasWdq1a5fWrl2rH/zgB7GaGwAE2KkMBABgXxGVVXi9XjkcDjkcDlaIAcSdXcpAAAD2FVE4drvdevrpp7Vy5UrdcMMNmjJliubOncuR0UgaXR1djPjh2GcAQCw5DMMItb+lRzt27NBDDz2kRx55RDt37tQVV1yhefPm6cILL0y6VeXm5mY5nU55PB5lZmaaPR3ESHdHF/OWPgAA1hZuXut1OPZra2vTiy++qJUrV6qyslLHHHOM9u3b15e7tB3CceLzH13c8ZfFv2ZMzSsAANYWbl7rdSs3v5SUFE2ZMkVTpkzR3r179eijj/b1LgFLCefo4orKBk0scFFiAQCAzUXUreLAgQP63e9+p+bm5k5f83g8WrNmjf7t3/4tapMDrCDco4trGvfHb1IAACAmIgrH9957r/7617+GXIp2Op164403dO+990ZtcoAVRHJ0MYDo8bYZqt6xT8/V7VT1jn3ytvWpChAAwhJRWcXTTz+tX/3qV11+/X//7/+tW265RbfddlufJwZYRaRHFwPoOzbAAjBLRCvHO3bs0CmnnNLl10855RTt2LGjz5MCrMR/dHFX1cQOtf+jzdHFQHT4N8B2LGdq8rRowapaVdW7TZoZgGQQUThOTU3Vrl27uvz6rl27lJIS0V0ClsfRxUD89LQBVmrfAEuJBYBYiSjJjh49Ws8++2yXX3/mmWc0evTovs4JsByOLgbigw2wAMwWUc3xddddp9mzZ+uEE07QggULAod9eL1eLV++XHfffbcef/zxmEwUMBtHFwOxxwZYAGaLKBz/y7/8i37605/q+uuv1+23367hw4fL4XBox44d+uKLL/STn/xEP/jBD2I1V8QQxyLbG//9kCjYAAvAbBEfAvLLX/5Sl19+uVavXq0PPvhAhmHo3HPP1Zw5c1RcXByLOSLG2BUeHqv+nKw6L6A3/BtgmzwtIeuOHWovZ2IDLIBYiej46K+++ko/+clP9Oyzz6q1tVUXXXSRfve73yk7OzuWc7Q8Ox8fzbHI4bHqz8mq8wL6wv+8lhT03OZ5DaAvws1rEW3IKy8v18MPP6ypU6fqiiuu0Msvv6wFCxb0ebIwB7vCw2PVn5NV5wX0FRtgAZgporKKtWvXauXKlZo9e7Yk6corr9SECRPk9XoDm/NgH5HsCi8ZkRW/iVmMVX9OVp0XEA1sgAVglojC8aeffqpzzjkn8HlxcbH69eunXbt2adiwYVGfHGKLXeHhserPyarzAqIlNcXBCzsAcRdRWYXX61X//v2DrvXr10/ffPNNVCeF+GBXeHis+nOy6rwAALCziFaODcPQvHnzlJ6eHrjW0tKia6+9VgMHDgxcW7t2bfRmiJhhV3h4rPpzsuq8AACws4hWjq+++mrl5OTI6XQGPubOnauhQ4cGXYM9cCxyeKz6c7LqvAAAsLOIWrkhNDu3cpPokxsuq/6crDovAACsJNy8RjiOAruHY4kT1sJl1Z+TVecFAIBVhJvXIj4hD4mJXeHhserPyarzAgDAbiKqOQYAAAASGSvHsAxKAwAAgNkIx7AENpUBAAAroKwCpquqd2vBqtpORyE3eVq0YFWtqurdUfte3jZD1Tv26bm6naresU/eNvajAgCAb7FyDFN52wxVVDaEPMTCUHu/3orKBk0scPW5xILVaQAA0BNWjmGqmsb9nVaMj2ZIcntaVNO4v0/fJ56r0wAAwL4IxzDV7kNdB+PejAulp9VpqX11mhILe6JUBgAQTbYJxwcOHFBZWVngiOqysjIdPHiw29sYhqElS5Zo6NCh+s53vqPzzz9f7733XtCY888/Xw6HI+hj9uzZMXwkOFrOoIyojgslXqvTiL+qerfOXrZBVzywUTc8UacrHtios5dt4J0AAECv2SYcz5kzR3V1daqqqlJVVZXq6upUVlbW7W3uuusu/frXv9a9996rd955Ry6XSxMnTtShQ4eCxs2fP19utzvwcf/998fyoeAoxflDlOfMUFfVxA611wUX5w/p9feIx+o04o9SGQBALNgiHG/btk1VVVX64x//qJKSEpWUlOiBBx7Q888/r+3bt4e8jWEYuueee3T77bfr+9//vgoLC/XII4/oq6++0uOPPx40dsCAAXK5XIEPp9MZj4cFtZ/sVj6tQJI6BWT/5+XTCvq0GS8eq9OIL0plAACxYotwXF1dLafTqXHjxgWujR8/Xk6nU2+99VbI2zQ2NqqpqUmTJk0KXEtPT9d5553X6TarV69Wdna2Tj/9dN1yyy2dVpY7Onz4sJqbm4M+0HulhXlaMbdILmdwOHU5M7RiblGfO0nEY3Ua8UWpDAAgVmzRyq2pqUk5OTmdrufk5KipqanL20hSbm5u0PXc3Fx9/PHHgc+vvPJK5efny+Vyqb6+XosXL9bWrVu1fv36LuezdOlSVVRU9OahoAulhXmaWOCKyQl5/tXpBatq5ZCCVhujtTqdyKx4ciGlMgCAWDE1HC9ZsqTHkPnOO+9IkhyOzv8YG4YR8vrROn69423mz58f+P+FhYU65ZRTNHbsWNXW1qqoqCjkfS5evFg33XRT4PPm5mYNGzas23mgZ6kpDpWMyIrJfftXpzv2OXZ16HNsxSBoJqv2hqZUBgAQK6aG4+uuu67HzhAnn3yy3n33XX3++eedvrZnz55OK8N+LpdLUvsKcl7et/+I7969u8vbSFJRUZHS0tL0wQcfdBmO09PTlZ6e3u2844UwF76eVqetGgTN4t/w1rFq17/hLRolL73lL5Vp8rSErDt2qP2FD6UyAIBImRqOs7OzlZ2d3eO4kpISeTwe1dTUqLi4WJL09ttvy+Px6Kyzzgp5G3+pxPr16zV69GhJ0pEjR/T6669r2bJlXX6v9957T62trUGB2qoIc5HranXaykHQDPE8ubA3KJUBAMSKLTbkjRw5UqWlpZo/f742btyojRs3av78+br00kt16qmnBsaddtppeuaZZyS1l1MsXLhQ/+f//B8988wzqq+v17x58zRgwADNmTNHkrRjxw7deeed2rRpkz766CO98MIL+td//VeNHj1aEyZMMOWxhos2VtFD54PO7LDhLdYbOQEAyckWG/Kk9o4S119/faD7xGWXXaZ77703aMz27dvl8XgCn//0pz/V119/rR//+Mc6cOCAxo0bp5deekmDBg2SJPXv31+vvPKKfvOb3+iLL77QsGHDNHXqVJWXlys1NTV+Dy5CVl/Vs5tIgmCsaqKtxi4b3mK5kRNIdpTtIVnZJhwPGTJEq1at6naMYQTHRYfDoSVLlmjJkiUhxw8bNkyvv/56tKYYN4S56LJLEIwnO214i+VGTiBZUbaHZGaLsgoEI8xFl52CYLzQGxpIXpTtIdkRjm2IMBddBMHO4nFyIQDrYQ8GQDi2JcJcdBEEQ7Pyhjdvm6HqHfv0XN1OVe/Yxz/UQJTYYTMuEGu2qTnGt2hjFX3hHhKSbKy44Y1aSCB2KNsDCMe2RZiLPisGQSuw0oY3+lEDsUXZHkA4tjXCXPRZKQgiGC0Mgdjj9EmAmmPb84e56aOOV8mILEIBEha1kEDssQcDIBwDsAlqIYH4sPJmXCAeKKsAYAvUQgLxQ9kekhnhGIAtUAsJxBd7MJCsKKsAYAvUQgIA4oFwDMA2qIUEAMQaZRUAbIVaSABALBGOAdgOtZAAgFihrAIAAADwIRwDAAAAPoRjAAAAwIdwDAAAAPgQjgEAAAAfwjEAAADgQzgGAAAAfAjHAAAAgA/hGAAAAPDhhDwApvC2GRwBDQCwHMIxgLirqnerorJBbk9L4FqeM0Pl0wpUWphn4swAAMmOsgoAcVVV79aCVbVBwViSmjwtWrCqVlX1bpNmBgAA4RhAHHnbDFVUNsgI8TX/tYrKBnnbQo0AACD2CMcA4qamcX+nFeOjGZLcnhbVNO6P36QAADgK4RhA3Ow+1HUw7s04AACijQ15NsMOf9hZzqCMqI4DACDaCMc2wg5/2F1x/hDlOTPU5GkJWXfskORytr/oAwDADJRV2AQ7/JEIUlMcKp9WIKk9CB/N/3n5tALeDQEAmIZwbAPs8EciKS3M04q5RXI5g0snXM4MrZhbxLsgAABTUVZhA5Hs8C8ZkRW/iQG9VFqYp4kFLurnAQCWQzi2AXb4IxGlpjh4MQcAsBzCsQ2wwz8YHTsAAECsEI5tgB3+36JjBwAAiCU25NkAO/zb0bEDAADEGuHYJpJ9hz8dOwAAQDxQVmEjybzDn44dAAAgHgjHNpOsO/zp2AEAAOKBsgrYAh07AABAPBCOYQv+jh1dFZA41N61Ihk6dgAAgNghHCPA22aoesc+PVe3U9U79llqcxsdOwAAQDxQcwxJ9ugf7O/Y0XGeLovNEwAA2JfDMAzrLA/aVHNzs5xOpzwejzIzM02bR29PjvP3D+74RPDf0mqt4jghDwAARCrcvMbKcYLo7cpvT/2DHWrvHzyxwGWZAJqsHTsAAEDsUXOcAPpyclwk/YMBAAASHeHY5vp6chz9gwEAAL5FOLa5vq780j8YAADgW4Rjm+vryi/9gwEAAL5FOLa5vq78htM/+I6pI1XTuN+S/Y8BAACiiW4VNudf+W3ytISsO3aovQ9wdyu/3fUPvuy7efr5n7dZuv8xAABAtNDnOArM7nPs71YhKSggR9qnuGP/4ANfHtG/P26f/scAAABdCTevUVaRAPwrvy5ncOmEy5kRUYD19w+ePup4FecP0c//3PsuGAAAAHZEWUWCKC3M08QCV9ROjoukCwYHcgAAgERBOE4g0Tw5jv7HAAAgGVFWgZDofwwAAJIRK8cIKRpdMADYV8cNun0p0wIAOyEcIyR//+MFq2rlUOguGOXTCvjHEkhAVfXuTq0daeEIIFlQVoEuRasLBgD78LeG7Lght8nTogWralVV7zZpZgAQH6wco1vR7oIBwLq8bYYqKrtu4ehQewvHiQUu/gYASFiEY/Qoml0wAFgXLRwBgLIKAIAPLRwBgJVjAICP2S0c6ZABwAoIxwAASea2cKRDBgCroKwCACDp2xaO0rctG/1i2cKRDhkArIRwDAAIiHcLx546ZEjtHTK8baFGAED0UVYBAAgSzxaOdMgAYDWEYwBAJ9Fq4djTJjs6ZACwGsIxACAmwtlkZ3aHDADoiJpjAEDUhbvJzt8ho6uCDYfaA3UsOmQAQCiEYwBAVEWyyc6sDhkA0BXCMQAgqiLZZCfFv0MGAHSHmmMAQFT1ZpNdPDtkAEB3bLNyfODAAZWVlcnpdMrpdKqsrEwHDx7s9jZr167V5MmTlZ2dLYfDobq6uk5jDh8+rP/4j/9Qdna2Bg4cqMsuu0yfffZZbB4EACQgb5uh6h379FzdTlXv2KfsY9LDul3HTXb+DhnTRx2vkhFZBGMAprDNyvGcOXP02WefqaqqSpL0ox/9SGVlZaqsrOzyNl9++aUmTJigf/3Xf9X8+fNDjlm4cKEqKyv1xBNPKCsrSzfffLMuvfRSbd68WampqTF5LACQKEJ1pHBlpuvYAWnyfNUa92OoAaCvHIZhWP7YoW3btqmgoEAbN27UuHHjJEkbN25USUmJ3n//fZ166qnd3v6jjz5Sfn6+tmzZolGjRgWuezweHXfccXrsscc0a9YsSdKuXbs0bNgwvfDCC5o8eXJY82tubpbT6ZTH41FmZmbvHiQA2Iy/I0XHf0Qc+nbj3dH/3/+5JGqJAcRduHnNFmUV1dXVcjqdgWAsSePHj5fT6dRbb73V6/vdvHmzWltbNWnSpMC1oUOHqrCwsNv7PXz4sJqbm4M+ACCZ9NSRwiHp2AFpys1kkx0Ae7FFWUVTU5NycnI6Xc/JyVFTU1Of7rd///4aPHhw0PXc3Nxu73fp0qWqqKjo9fcFALsLpyPFwa9atfqaIqWkONhkB8A2TF05XrJkiRwOR7cfmzZtkiQ5HJ3/mBqGEfJ6X/V0v4sXL5bH4wl8fPrpp1GfAwBYWbgdKfZ+eZhNdgBsxdSV4+uuu06zZ8/udszJJ5+sd999V59//nmnr+3Zs0e5ubm9/v4ul0tHjhzRgQMHglaPd+/erbPOOqvL26Wnpys9Pbzd2ACQiDj2GUCiMjUcZ2dnKzs7u8dxJSUl8ng8qqmpUXFxsSTp7bfflsfj6TbE9mTMmDFKS0vT+vXrNXPmTEmS2+1WfX297rrrrl7fLwAkOv+xz02eFjpSAEgottiQN3LkSJWWlmr+/PnauHGjNm7cqPnz5+vSSy8N6lRx2mmn6Zlnngl8vn//ftXV1amhoUGStH37dtXV1QXqiZ1Op6655hrdfPPNeuWVV7RlyxbNnTtXZ5xxhi6++OL4PkgAsBGOfQaQqGwRjiVp9erVOuOMMzRp0iRNmjRJZ555ph577LGgMdu3b5fH4wl8vm7dOo0ePVpTp06VJM2ePVujR4/WfffdFxhz99136/LLL9fMmTM1YcIEDRgwQJWVlfQ4BoAecOwzgERkiz7HVkefYwDJzNtmcOwzAMsLN6/ZopUbAMC6/Mc+A0AisE1ZBQAAABBrhGMAAADAh3AMAAAA+BCOAQAAAB825AERYmc+AACJi3AMRKCq3q2Kyga5PS2Ba3nODJVPK6CnKwAACYCyCiBMVfVuLVhVGxSMJanJ06IFq2pVVe82aWYAACBaCMdAGLxthioqGxTqxBz/tYrKBnnbOFMHAAA7IxwDYahp3N9pxfhohiS3p0U1jfvjNykAABB1hGMgDLsPdR2MezMOAABYE+EYCEPOoIyojgMAANZEOAbCUJw/RHnODHXVsM2h9q4VxflD4jktAAAQZYRjIAypKQ6VTyuQpE4B2f95+bQC+h0DAGBzhGMgTKWFeVoxt0guZ3DphMuZoRVzi+hzDABAAuAQECACpYV5mljg4oQ8AAASFOEYiFBqikMlI7LMngYAAIgByioAAAAAH8IxAAAA4EM4BgAAAHwIxwAAAIAP4RgAAADwIRwDAAAAPoRjAAAAwIdwDAAAAPgQjgEAAAAfwjEAAADgQzgGAAAAfAjHAAAAgE8/sycAAABgVd42QzWN+7X7UItyBmWoOH+IUlMcZk8LMUQ4BgAACKGq3q2Kyga5PS2Ba3nODJVPK1BpYZ6JM0MsUVYBAADQQVW9WwtW1QYFY0lq8rRowapaVdW7TZoZYo1wDAAAcBRvm6GKygYZIb7mv1ZR2SBvW6gRsDvCMQAAwFFqGvd3WjE+miHJ7WlRTeP++E0KcUM4BgAAOMruQ10H496Mg70QjgEAAI6SMygjquNgL4RjAACAoxTnD1GeM0NdNWxzqL1rRXH+kHhOC3FCOAYAADhKaopD5dMKJKlTQPZ/Xj6tgH7HCYpwDAAA0EFpYZ5WzC2SyxlcOuFyZmjF3CL6HCcwDgEBAAAIobQwTxMLXJyQl2QIxwAAAF1ITXGoZESW2dNAHFFWAQAAAPgQjgEAAAAfyioAAAHeNoP6SgBJjXAMAJAkVdW7VVHZEHRsbp4zQ+XTCtiZDyBpUFYBAFBVvVsLVtUGBWNJavK0aMGqWlXVu02aGQDEF+EYAJKct81QRWWDjBBf81+rqGyQty3UCABILIRjAEhyNY37O60YH82Q5Pa0qKZxf/wmBQAmIRwDQJLbfajrYNybcQBgZ4RjAEhyOYMyeh4UwTgAsDPCMQAkueL8IcpzZqirhm0OtXetKM4fEs9pAYApCMcAkORSUxwqn1YgSZ0Csv/z8mkF9DsGkBQIxwAAlRbmacXcIrmcwaUTLmeGVswtos8xgKTBISAAAEntAXligYsT8gAkNcIxACAgNcWhkhFZZk8DAExDWQUAAADgQzgGAAAAfAjHAAAAgA/hGAAAAPAhHAMAAAA+hGMAAADAh3AMAAAA+BCOAQAAAB/CMQAAAOBDOAYAAAB8CMcAAACAD+EYAAAA8CEcAwAAAD6EYwAAAMCHcAwAAAD4EI4BAAAAH8IxAAAA4EM4BgAAAHz6mT0BAAAAq/C2Gapp3K/dh1qUMyhDxflDlJriMHtaiCPCMQAAgKSqercqKhvk9rQEruU5M1Q+rUClhXkmzgzxRFkFAABIelX1bi1YVRsUjCWpydOiBatqVVXvNmlmiDfbhOMDBw6orKxMTqdTTqdTZWVlOnjwYLe3Wbt2rSZPnqzs7Gw5HA7V1dV1GnP++efL4XAEfcyePTs2DwIAAFiOt81QRWWDjBBf81+rqGyQty3UCCQa24TjOXPmqK6uTlVVVaqqqlJdXZ3Kysq6vc2XX36pCRMm6P/+3//b7bj58+fL7XYHPu6///5oTh0AAFhYTeP+TivGRzMkuT0tqmncH79JwTS2qDnetm2bqqqqtHHjRo0bN06S9MADD6ikpETbt2/XqaeeGvJ2/vD80UcfdXv/AwYMkMvliuqcAQCAPew+1HUw7s042JstVo6rq6vldDoDwViSxo8fL6fTqbfeeqvP97969WplZ2fr9NNP1y233KJDhw51O/7w4cNqbm4O+gAAAPaUMygjquNgb7ZYOW5qalJOTk6n6zk5OWpqaurTfV955ZXKz8+Xy+VSfX29Fi9erK1bt2r9+vVd3mbp0qWqqKjo0/cFAADWUJw/RHnODDV5WkLWHTskuZztbd2Q+ExdOV6yZEmnzXAdPzZt2iRJcjg69xg0DCPk9UjMnz9fF198sQoLCzV79mw99dRTevnll1VbW9vlbRYvXiyPxxP4+PTTT/s0BwAAYJ7UFIfKpxVIag/CR/N/Xj6tgH7HScLUlePrrruux84QJ598st599119/vnnnb62Z88e5ebmRnVORUVFSktL0wcffKCioqKQY9LT05Wenh7V7wsAAMxTWpinFXOLOvU5dtHnOOmYGo6zs7OVnZ3d47iSkhJ5PB7V1NSouLhYkvT222/L4/HorLPOiuqc3nvvPbW2tiovj18CAACSSWlhniYWuDghL8nZouZ45MiRKi0t1fz58wNt1n70ox/p0ksvDepUcdppp2np0qWaMWOGJGn//v365JNPtGvXLknS9u3bJUkul0sul0s7duzQ6tWrdckllyg7O1sNDQ26+eabNXr0aE2YMCHOjxIAAJgtNcWhkhFZZk8DJrJFtwqpvaPEGWecoUmTJmnSpEk688wz9dhjjwWN2b59uzweT+DzdevWafTo0Zo6daokafbs2Ro9erTuu+8+SVL//v31yiuvaPLkyTr11FN1/fXXa9KkSXr55ZeVmpoavwcHAAAAS3AYhsFxL33U3Nwsp9Mpj8ejzMxMs6cDAACADsLNa7ZZOQYAAABijXAMAAAA+BCOAQAAAB/CMQAAAOBDOAYAAAB8CMcAAACAD+EYAAAA8CEcAwAAAD6EYwAAAMCHcAwAAAD4EI4BAAAAH8IxAAAA4EM4BgAAAHwIxwAAAIAP4RgAAADwIRwDAAAAPoRjAAAAwIdwDAAAAPgQjgEAAAAfwjEAAADgQzgGAAAAfAjHAAAAgA/hGAAAAPAhHAMAAAA+hGMAAADAh3AMAAAA+BCOAQAAAB/CMQAAAOBDOAYAAAB8CMcAAACAD+EYAAAA8CEcAwAAAD6EYwAAAMCHcAwAAAD4EI4BAAAAH8IxAAAA4EM4BgAAAHwIxwAAAIBPP7MnAACAnXjbDNU07tfuQy3KGZSh4vwhSk1xmD0tAFFCOAYAIExV9W5VVDbI7WkJXMtzZqh8WoFKC/NMnBmAaKGsAgCAMFTVu7VgVW1QMJakJk+LFqyqVVW926SZAYgmwjEAAD3wthmqqGyQEeJr/msVlQ3ytoUaAcBOCMcAAPSgpnF/pxXjoxmS3J4W1TTuj9+kAMQE4RgAgB7sPtR1MO7NOADWRTgGAKAHOYMyojoOgHURjgEA6EFx/hDlOTPUVcM2h9q7VhTnD4nntADEAOEYAIAepKY4VD6tQJI6BWT/5+XTCuh3DCQAwjEAAGEoLczTirlFcjmDSydczgytmFtEn2MgQXAICAAAYSotzNPEAhcn5AEJjHAMAEAEUlMcKhmRZfY0AMQI4RgAAABx5W0zLPsODOEYAAAAcVNV71ZFZUPQwTp5zgyVTyuwRO0+G/IAAAAQF1X1bi1YVdvpxMkmT4sWrKpVVb3bpJl9i3AMAACAmPO2GaqobJAR4mv+axWVDfK2hRoRP4RjAAAAxFxN4/5OK8ZHMyS5PS2qadwfv0mFQDgGAABAzO0+1HUw7s24WCEcAwAAIOZyBmX0PCiCcbFCOAYAAEDMFecPUZ4zo9MR7H4OtXetKM4fEs9pdUI4BgAAQMylpjhUPq1AkjoFZP/n5dMKTO93TDgGAABAXJQW5mnF3CK5nMGlEy5nhlbMLbJEn2MOAQEAAEDclBbmaWKBixPyAAB9Z+UjVwEgXKkpDpWMyDJ7GiERjgHAJqx+5CoAJAJqjgHABuxw5CoAJALCMQBYnF2OXAWAREA4BgCLs8uRqwCQCAjHAGBxdjlyFQASAeEYACzOLkeuAkAiIBwDgMXZ5chVAEgEhGMAsDi7HLkKAImAcAwANmCHI1cBIBFwCAgA2ITVj1wFgERAOAYAG7HykasAkAgoqwAAAAB8CMcAAACAj23C8YEDB1RWVian0ymn06mysjIdPHiwy/Gtra1atGiRzjjjDA0cOFBDhw7VVVddpV27dgWNO3z4sP7jP/5D2dnZGjhwoC677DJ99tlnMX40AAAAsCLbhOM5c+aorq5OVVVVqqqqUl1dncrKyroc/9VXX6m2tlZ33HGHamtrtXbtWv3973/XZZddFjRu4cKFeuaZZ/TEE0/ozTff1BdffKFLL71UXq831g8JAAAAFuMwDMMwexI92bZtmwoKCrRx40aNGzdOkrRx40aVlJTo/fff16mnnhrW/bzzzjsqLi7Wxx9/rBNPPFEej0fHHXecHnvsMc2aNUuStGvXLg0bNkwvvPCCJk+eHNb9Njc3y+l0yuPxKDMzs3cPEgAAADETbl6zxcpxdXW1nE5nIBhL0vjx4+V0OvXWW2+FfT8ej0cOh0PHHnusJGnz5s1qbW3VpEmTAmOGDh2qwsLCbu/38OHDam5uDvoAAACA/dkiHDc1NSknJ6fT9ZycHDU1NYV1Hy0tLbr11ls1Z86cwKuFpqYm9e/fX4MHDw4am5ub2+39Ll26NFD77HQ6NWzYsAgeDQAAAKzK1HC8ZMkSORyObj82bdokSXI4Oje5Nwwj5PWOWltbNXv2bLW1tWn58uU9ju/pfhcvXiyPxxP4+PTTT3u8TwAAAFifqYeAXHfddZo9e3a3Y04++WS9++67+vzzzzt9bc+ePcrNze329q2trZo5c6YaGxu1YcOGoBoTl8ulI0eO6MCBA0Grx7t379ZZZ53V5X2mp6crPT292+8LAAAA+zE1HGdnZys7O7vHcSUlJfJ4PKqpqVFxcbEk6e2335bH4+k2xPqD8QcffKBXX31VWVnBp0qNGTNGaWlpWr9+vWbOnClJcrvdqq+v11133dWHRwYAAAA7skXN8ciRI1VaWqr58+dr48aN2rhxo+bPn69LL700qFPFaaedpmeeeUaS9M033+gHP/iBNm3apNWrV8vr9aqpqUlNTU06cuSIJMnpdOqaa67RzTffrFdeeUVbtmzR3LlzdcYZZ+jiiy825bECAADAPKauHEdi9erVuv766wOdJS677DLde++9QWO2b98uj8cjSfrss8+0bt06SdKoUaOCxr366qs6//zzJUl33323+vXrp5kzZ+rrr7/WRRddpIcfflipqamxfUAAAACwHFv0ObY6+hwDAABYW0L1OQYAAADigXAMAAAA+BCOAQAAAB/CMQAAAOBDOAYAAAB8CMcAAACAD+EYAAAA8LHNISBW5m8V3dzcbPJMAAAAEIo/p/V0xAfhOAoOHTokSRo2bJjJMwEAAEB3Dh06JKfT2eXXOSEvCtra2rRr1y4NGjRIDofD7OkgAs3NzRo2bJg+/fRTTjdMUjwHIPE8AM+BZGAYhg4dOqShQ4cqJaXrymJWjqMgJSVFJ5xwgtnTQB9kZmbyxzDJ8RyAxPMAPAcSXXcrxn5syAMAAAB8CMcAAACAD+EYSS09PV3l5eVKT083eyowCc8BSDwPwHMA32JDHgAAAODDyjEAAADgQzgGAAAAfAjHAAAAgA/hGAAAAPAhHCPpHDhwQGVlZXI6nXI6nSorK9PBgwe7HN/a2qpFixbpjDPO0MCBAzV06FBdddVV2rVrV/wmjT5Zvny58vPzlZGRoTFjxuiNN97odvzrr7+uMWPGKCMjQ8OHD9d9990Xp5kiViJ5Dqxdu1YTJ07Ucccdp8zMTJWUlOjFF1+M42wRK5H+LfD729/+pn79+mnUqFGxnSAsgXCMpDNnzhzV1dWpqqpKVVVVqqurU1lZWZfjv/rqK9XW1uqOO+5QbW2t1q5dq7///e+67LLL4jhr9NaTTz6phQsX6vbbb9eWLVt0zjnnaMqUKfrkk09Cjm9sbNQll1yic845R1u2bNFtt92m66+/Xk8//XScZ45oifQ58Ne//lUTJ07UCy+8oM2bN+uCCy7QtGnTtGXLljjPHNEU6fPAz+Px6KqrrtJFF10Up5nCbLRyQ1LZtm2bCgoKtHHjRo0bN06StHHjRpWUlOj999/XqaeeGtb9vPPOOyouLtbHH3+sE088MZZTRh+NGzdORUVFWrFiReDayJEjdfnll2vp0qWdxi9atEjr1q3Ttm3bAteuvfZabd26VdXV1XGZM6Ir0udAKKeffrpmzZqln/3sZ7GaJmKst8+D2bNn65RTTlFqaqqeffZZ1dXVxWG2MBMrx0gq1dXVcjqdgWAsSePHj5fT6dRbb70V9v14PB45HA4de+yxMZglouXIkSPavHmzJk2aFHR90qRJXf73rq6u7jR+8uTJ2rRpk1pbW2M2V8RGb54DHbW1tenQoUMaMmRILKaIOOjt8+Chhx7Sjh07VF5eHuspwkL6mT0BIJ6ampqUk5PT6XpOTo6amprCuo+WlhbdeuutmjNnjjIzM6M9RUTR3r175fV6lZubG3Q9Nze3y//eTU1NIcd/88032rt3r/Ly8mI2X0Rfb54DHf3qV7/Sl19+qZkzZ8ZiioiD3jwPPvjgA916661644031K8fcSmZsHKMhLBkyRI5HI5uPzZt2iRJcjgcnW5vGEbI6x21trZq9uzZamtr0/Lly6P+OBAbHf/b9vTfO9T4UNdhH5E+B/zWrFmjJUuW6Mknnwz5whr2Eu7zwOv1as6cOaqoqNA///M/x2t6sAheCiEhXHfddZo9e3a3Y04++WS9++67+vzzzzt9bc+ePZ1WFDpqbW3VzJkz1djYqA0bNrBqbAPZ2dlKTU3ttDK0e/fuLv97u1yukOP79eunrKysmM0VsdGb54Dfk08+qWuuuUZ/+tOfdPHFF8dymoixSJ8Hhw4d0qZNm7RlyxZdd911ktrLawzDUL9+/fTSSy/pwgsvjMvcEX+EYySE7OxsZWdn9ziupKREHo9HNTU1Ki4uliS9/fbb8ng8Ouuss7q8nT8Yf/DBB3r11VcJSTbRv39/jRkzRuvXr9eMGTMC19evX6/p06eHvE1JSYkqKyuDrr300ksaO3as0tLSYjpfRF9vngNS+4rxD3/4Q61Zs0ZTp06Nx1QRQ5E+DzIzM/U///M/QdeWL1+uDRs26KmnnlJ+fn7M5wwTGUCSKS0tNc4880yjurraqK6uNs444wzj0ksvDRpz6qmnGmvXrjUMwzBaW1uNyy67zDjhhBOMuro6w+12Bz4OHz5sxkNABJ544gkjLS3NWLlypdHQ0GAsXLjQGDhwoPHRRx8ZhmEYt956q1FWVhYY/+GHHxoDBgwwbrzxRqOhocFYuXKlkZaWZjz11FNmPQT0UaTPgccff9zo16+f8fvf/z7o9/3gwYNmPQREQaTPg47Ky8uN7373u3GaLcxEOEbS2bdvn3HllVcagwYNMgYNGmRceeWVxoEDB4LGSDIeeughwzAMo7Gx0ZAU8uPVV1+N+/wRud///vfGSSedZPTv398oKioyXn/99cDXrr76auO8884LGv/aa68Zo0ePNvr372+cfPLJxooVK+I8Y0RbJM+B8847L+Tv+9VXXx3/iSOqIv1bcDTCcfKgzzEAAADgQ7cKAAAAwIdwDAAAAPgQjgEAAAAfwjEAAADgQzgGAAAAfAjHAAAAgA/hGAAAAPAhHAMAAAA+hGMAAADAh3AMAAlo3rx5cjgccjgcSktL0/Dhw3XLLbfoyy+/DIx5+umndf7558vpdOqYY47RmWeeqTvvvFP79+8Puq+vv/5agwcP1pAhQ/T11193+l5/+MMfdP755yszM1MOh0MHDx6M9cMDgJghHANAgiotLZXb7daHH36oX/ziF1q+fLluueUWSdLtt9+uWbNm6Xvf+57+8pe/qL6+Xr/61a+0detWPfbYY0H38/TTT6uwsFAFBQVau3Ztp+/z1VdfqbS0VLfddltcHhcAxJLDMAzD7EkAAKJr3rx5OnjwoJ599tnAtfnz5+v555/Xc889p3Hjxumee+7RDTfc0Om2Bw8e1LHHHhv4/IILLtDs2bNlGIb++7//Wxs2bAj5PV977TVdcMEFOnDgQNDtAcBO+pk9AQBAfHznO99Ra2urVq9erWOOOUY//vGPQ447Otju2LFD1dXVWrt2rQzD0MKFC/Xhhx9q+PDhcZo1AMQXZRUAkARqamr0+OOP66KLLtIHH3yg4cOHKy0trcfbPfjgg5oyZUqg5ri0tFQPPvhgHGYMAOYgHANAgnr++ed1zDHHKCMjQyUlJTr33HP1u9/9ToZhyOFw9Hh7r9erRx55RHPnzg1cmzt3rh555BF5vd5YTh0ATENZBQAkqAsuuEArVqxQWlqahg4dGlgp/ud//me9+eabam1t7Xb1+MUXX9TOnTs1a9asoOter1cvvfSSpkyZEtP5A4AZWDkGgAQ1cOBA/dM//ZNOOumkoBA8Z84cffHFF1q+fHnI2/lbsa1cuVKzZ89WXV1d0MeVV16plStXxuMhAEDcsXIMAElm3Lhx+ulPf6qbb75ZO3fu1IwZMzR06FD94x//0H333aezzz5bc+bMUWVlpdatW6fCwsKg21999dWaOnWq9uzZo+OOO05NTU1qamrSP/7xD0nS//zP/2jQoEE68cQTNWTIEDMeIgD0GivHAJCEli1bpscff1xvv/22Jk+erNNPP1033XSTzjzzTF199dV69NFHNXDgQF100UWdbnvBBRdo0KBBgX7I9913n0aPHq358+dLks4991yNHj1a69ati+tjAoBooM8xAAAA4MPKMQAAAOBDOAYAAAB8CMcAAACAD+EYAAAA8CEcAwAAAD6EYwAAAMCHcAwAAAD4EI4BAAAAH8IxAAAA4EM4BgAAAHwIxwAAAIDP/wc9/ahdqkYreAAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "encodings = encodings.detach()\n", + "\n", + "pca = PCA(n_components=2)\n", + "\n", + "principalComponents = pca.fit_transform(encodings)\n", + "\n", + "fig = plt.figure(figsize=(8, 8))\n", + "ax = fig.add_subplot(1, 1, 1)\n", + "ax.set_title(\"Encodings\")\n", + "ax.set_xlabel('PCA1'); ax.set_ylabel('PCA2')\n", + "\n", + "ax.scatter(principalComponents[:, 0], principalComponents[:, 1])\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop-updated/examples/multi_task.ipynb b/chemprop-updated/examples/multi_task.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..0c8a67aa3b85b6e816bb89279919061e31da851c --- /dev/null +++ b/chemprop-updated/examples/multi_task.ipynb @@ -0,0 +1,315 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Multitask model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/multi_task.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from lightning import pytorch as pl\n", + "import torch\n", + "import numpy as np\n", + "import pandas as pd\n", + "from pathlib import Path\n", + "from chemprop import data, models, nn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Step 1: Make datapoints" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol_multitask.csv\"\n", + "smiles_column = 'smiles' \n", + "target_columns = [\"mu\",\"alpha\",\"homo\",\"lumo\",\"gap\",\"r2\",\"zpve\",\"cv\",\"u0\",\"u298\",\"h298\",\"g298\"] \n", + "\n", + "df_input = pd.read_csv(input_path)\n", + "smis = df_input.loc[:, smiles_column].values\n", + "ys = df_input.loc[:, target_columns].values\n", + "\n", + "datapoints = [data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Step 2: Split data and make datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "split_indices = data.make_split_indices(datapoints)\n", + "train_data, val_data, test_data = data.split_data_by_indices(datapoints, *split_indices)\n", + "\n", + "\n", + "train_dset = data.MoleculeDataset(train_data[0])\n", + "val_dset = data.MoleculeDataset(val_data[0])\n", + "test_dset = data.MoleculeDataset(test_data[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Step 3: Scale targets and make dataloaders" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "output_scaler = train_dset.normalize_targets()\n", + "val_dset.normalize_targets(output_scaler)\n", + "\n", + "train_loader = data.build_dataloader(train_dset)\n", + "val_loader = data.build_dataloader(val_dset)\n", + "test_loader = data.build_dataloader(test_dset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Step 4: Define the model" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "output_transform = nn.transforms.UnscaleTransform.from_standard_scaler(output_scaler)\n", + "\n", + "ffn = nn.RegressionFFN(n_tasks = len(target_columns), output_transform=output_transform)\n", + "chemprop_model = models.MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), ffn)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Step 5: Set up the trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(logger=False, enable_checkpointing=False, max_epochs=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Step 6: Train the model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | RegressionFFN | 93.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "321 K Trainable params\n", + "0 Non-trainable params\n", + "321 K Total params\n", + "1.286 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sanity Checking DataLoader 0: 0%| | 0/1 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smileslipo
0Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc143.54
1COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)...-1.18
2COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl3.69
3OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C...3.37
4Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N...3.10
.........
95CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C...2.20
96CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)...2.04
97CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)...4.49
98COc1ccc(Cc2c(N)n[nH]c2N)cc10.20
99CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(...2.00
\n", + "

100 rows × 2 columns

\n", + "" + ], + "text/plain": [ + " smiles lipo\n", + "0 Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14 3.54\n", + "1 COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)... -1.18\n", + "2 COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl 3.69\n", + "3 OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C... 3.37\n", + "4 Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N... 3.10\n", + ".. ... ...\n", + "95 CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C... 2.20\n", + "96 CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)... 2.04\n", + "97 CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)... 4.49\n", + "98 COc1ccc(Cc2c(N)n[nH]c2N)cc1 0.20\n", + "99 CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(... 2.00\n", + "\n", + "[100 rows x 2 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_test = pd.read_csv(test_path)\n", + "df_test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get smiles" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14\n", + "1 COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)...\n", + "2 COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl\n", + "3 OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C...\n", + "4 Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N...\n", + " ... \n", + "95 CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C...\n", + "96 CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)...\n", + "97 CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)...\n", + "98 COc1ccc(Cc2c(N)n[nH]c2N)cc1\n", + "99 CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(...\n", + "Name: smiles, Length: 100, dtype: object" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "smis = df_test[smiles_column]\n", + "smis" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get molecule datapoints" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "test_data = [data.MoleculeDatapoint.from_smi(smi) for smi in smis]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get molecule dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer()\n", + "test_dset = data.MoleculeDataset(test_data, featurizer=featurizer)\n", + "test_loader = data.build_dataloader(test_dset, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Set up trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicting DataLoader 0: 100%|████████████████████| 2/2 [00:00<00:00, 48.17it/s]\n" + ] + } + ], + "source": [ + "with torch.inference_mode():\n", + " trainer = pl.Trainer(\n", + " logger=None,\n", + " enable_progress_bar=True,\n", + " accelerator=\"cpu\",\n", + " devices=1\n", + " )\n", + " test_preds = trainer.predict(mpnn, test_loader)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smileslipopred
0Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc143.542.253542
1COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)...-1.182.235016
2COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl3.692.245891
3OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C...3.372.249847
4Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N...3.102.228097
............
95CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C...2.202.233408
96CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)...2.042.236931
97CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)...4.492.237789
98COc1ccc(Cc2c(N)n[nH]c2N)cc10.202.252625
99CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(...2.002.235702
\n", + "

100 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " smiles lipo pred\n", + "0 Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14 3.54 2.253542\n", + "1 COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)... -1.18 2.235016\n", + "2 COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl 3.69 2.245891\n", + "3 OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C... 3.37 2.249847\n", + "4 Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N... 3.10 2.228097\n", + ".. ... ... ...\n", + "95 CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C... 2.20 2.233408\n", + "96 CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)... 2.04 2.236931\n", + "97 CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)... 4.49 2.237789\n", + "98 COc1ccc(Cc2c(N)n[nH]c2N)cc1 0.20 2.252625\n", + "99 CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(... 2.00 2.235702\n", + "\n", + "[100 rows x 3 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_preds = np.concatenate(test_preds, axis=0)\n", + "df_test['pred'] = test_preds\n", + "df_test" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop-updated/examples/predicting_regression_multicomponent.ipynb b/chemprop-updated/examples/predicting_regression_multicomponent.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..e43d7331324d6548a55018413c0d738f0192dd9d --- /dev/null +++ b/chemprop-updated/examples/predicting_regression_multicomponent.ipynb @@ -0,0 +1,618 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Predicting Regression - Multicomponent" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/predicting_regression_multicomponent.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import torch\n", + "from lightning import pytorch as pl\n", + "from pathlib import Path\n", + "\n", + "from chemprop import data, featurizers\n", + "from chemprop.models import multi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change model input here" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "checkpoint_path = chemprop_dir / \"tests\" / \"data\" / \"example_model_v2_regression_mol+mol.ckpt\" # path to the checkpoint file. \n", + "# If the checkpoint file is generated using the training notebook, it will be in the `checkpoints` folder with name similar to `checkpoints/epoch=19-step=180.ckpt`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load model" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MulticomponentMPNN(\n", + " (message_passing): MulticomponentMessagePassing(\n", + " (blocks): ModuleList(\n", + " (0-1): 2 x BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): GraphTransform(\n", + " (V_transform): Identity()\n", + " (E_transform): Identity()\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): BatchNorm1d(600, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=600, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0-1): 2 x MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mcmpnn = multi.MulticomponentMPNN.load_from_checkpoint(checkpoint_path)\n", + "mcmpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change predict input here" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "test_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol+mol\" / \"mol+mol.csv\" # path to your .csv file containing SMILES strings to make predictions for\n", + "smiles_columns = ['smiles', 'solvent'] # name of the column containing SMILES strings" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load test smiles" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smilessolventpeakwavs_max
0CCCCN1C(=O)C(=C/C=C/C=C/C=C2N(CCCC)c3ccccc3N2C...ClCCl642.0
1C(=C/c1cnccn1)\\c1ccc(N(c2ccccc2)c2ccc(/C=C/c3c...ClCCl420.0
2CN(C)c1ccc2c(-c3ccc(N)cc3C(=O)[O-])c3ccc(=[N+]...O544.0
3c1ccc2[nH]ccc2c1O290.0
4CCN(CC)c1ccc2c(c1)OC1=C(/C=C/C3=[N+](C)c4ccc5c...ClC(Cl)Cl736.0
............
95COc1ccc(C2CC(c3ccc(O)cc3)=NN2c2ccc(S(N)(=O)=O)...C1CCOC1359.0
96COc1ccc2c3c(c4ccc(OC)cc4c2c1)C1(c2ccccc2-c2ccc...C1CCCCC1386.0
97CCCCOc1c(C=C2N(C)c3ccccc3C2(C)C)c(=O)c1=OCCO425.0
98Cc1cc2ccc(-c3cccc4cccc(-c5ccc6cc(C)c(=O)oc6c5)...c1ccccc1324.0
99Cc1ccc(C(=O)c2c(C)c3ccc4cccc5c6cccc7ccc2c(c76)...ClCCl391.0
\n", + "

100 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " smiles solvent peakwavs_max\n", + "0 CCCCN1C(=O)C(=C/C=C/C=C/C=C2N(CCCC)c3ccccc3N2C... ClCCl 642.0\n", + "1 C(=C/c1cnccn1)\\c1ccc(N(c2ccccc2)c2ccc(/C=C/c3c... ClCCl 420.0\n", + "2 CN(C)c1ccc2c(-c3ccc(N)cc3C(=O)[O-])c3ccc(=[N+]... O 544.0\n", + "3 c1ccc2[nH]ccc2c1 O 290.0\n", + "4 CCN(CC)c1ccc2c(c1)OC1=C(/C=C/C3=[N+](C)c4ccc5c... ClC(Cl)Cl 736.0\n", + ".. ... ... ...\n", + "95 COc1ccc(C2CC(c3ccc(O)cc3)=NN2c2ccc(S(N)(=O)=O)... C1CCOC1 359.0\n", + "96 COc1ccc2c3c(c4ccc(OC)cc4c2c1)C1(c2ccccc2-c2ccc... C1CCCCC1 386.0\n", + "97 CCCCOc1c(C=C2N(C)c3ccccc3C2(C)C)c(=O)c1=O CCO 425.0\n", + "98 Cc1cc2ccc(-c3cccc4cccc(-c5ccc6cc(C)c(=O)oc6c5)... c1ccccc1 324.0\n", + "99 Cc1ccc(C(=O)c2c(C)c3ccc4cccc5c6cccc7ccc2c(c76)... ClCCl 391.0\n", + "\n", + "[100 rows x 3 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_test = pd.read_csv(test_path)\n", + "df_test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get smiles" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([['CCCCN1C(=O)C(=C/C=C/C=C/C=C2N(CCCC)c3ccccc3N2CCCC)C(=O)N(CCCC)C1=S',\n", + " 'ClCCl'],\n", + " ['C(=C/c1cnccn1)\\\\c1ccc(N(c2ccccc2)c2ccc(/C=C/c3cnccn3)cc2)cc1',\n", + " 'ClCCl'],\n", + " ['CN(C)c1ccc2c(-c3ccc(N)cc3C(=O)[O-])c3ccc(=[N+](C)C)cc-3oc2c1',\n", + " 'O'],\n", + " ['c1ccc2[nH]ccc2c1', 'O'],\n", + " ['CCN(CC)c1ccc2c(c1)OC1=C(/C=C/C3=[N+](C)c4ccc5ccccc5c4C3(C)C)CCCC1=C2c1ccccc1C(=O)O',\n", + " 'ClC(Cl)Cl']], dtype=object)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "smiss = df_test[smiles_columns].values\n", + "smiss[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get molecule datapoints" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "n_componenets = len(smiles_columns)\n", + "test_datapointss = [[data.MoleculeDatapoint.from_smi(smi) for smi in smiss[:, i]] for i in range(n_componenets)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get molecule datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer()\n", + "test_dsets = [data.MoleculeDataset(test_datapoints, featurizer) for test_datapoints in test_datapointss]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Get multicomponent dataset and data loader" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "test_mcdset = data.MulticomponentDataset(test_dsets)\n", + "test_loader = data.build_dataloader(test_mcdset, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Set up trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicting DataLoader 0: 0%| | 0/2 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smilessolventpeakwavs_maxpred
0CCCCN1C(=O)C(=C/C=C/C=C/C=C2N(CCCC)c3ccccc3N2C...ClCCl642.0454.898621
1C(=C/c1cnccn1)\\c1ccc(N(c2ccccc2)c2ccc(/C=C/c3c...ClCCl420.0453.561584
2CN(C)c1ccc2c(-c3ccc(N)cc3C(=O)[O-])c3ccc(=[N+]...O544.0448.694977
3c1ccc2[nH]ccc2c1O290.0448.159760
4CCN(CC)c1ccc2c(c1)OC1=C(/C=C/C3=[N+](C)c4ccc5c...ClC(Cl)Cl736.0456.897003
...............
95COc1ccc(C2CC(c3ccc(O)cc3)=NN2c2ccc(S(N)(=O)=O)...C1CCOC1359.0454.548584
96COc1ccc2c3c(c4ccc(OC)cc4c2c1)C1(c2ccccc2-c2ccc...C1CCCCC1386.0455.287140
97CCCCOc1c(C=C2N(C)c3ccccc3C2(C)C)c(=O)c1=OCCO425.0453.560364
98Cc1cc2ccc(-c3cccc4cccc(-c5ccc6cc(C)c(=O)oc6c5)...c1ccccc1324.0454.656891
99Cc1ccc(C(=O)c2c(C)c3ccc4cccc5c6cccc7ccc2c(c76)...ClCCl391.0453.118774
\n", + "

100 rows × 4 columns

\n", + "" + ], + "text/plain": [ + " smiles solvent \\\n", + "0 CCCCN1C(=O)C(=C/C=C/C=C/C=C2N(CCCC)c3ccccc3N2C... ClCCl \n", + "1 C(=C/c1cnccn1)\\c1ccc(N(c2ccccc2)c2ccc(/C=C/c3c... ClCCl \n", + "2 CN(C)c1ccc2c(-c3ccc(N)cc3C(=O)[O-])c3ccc(=[N+]... O \n", + "3 c1ccc2[nH]ccc2c1 O \n", + "4 CCN(CC)c1ccc2c(c1)OC1=C(/C=C/C3=[N+](C)c4ccc5c... ClC(Cl)Cl \n", + ".. ... ... \n", + "95 COc1ccc(C2CC(c3ccc(O)cc3)=NN2c2ccc(S(N)(=O)=O)... C1CCOC1 \n", + "96 COc1ccc2c3c(c4ccc(OC)cc4c2c1)C1(c2ccccc2-c2ccc... C1CCCCC1 \n", + "97 CCCCOc1c(C=C2N(C)c3ccccc3C2(C)C)c(=O)c1=O CCO \n", + "98 Cc1cc2ccc(-c3cccc4cccc(-c5ccc6cc(C)c(=O)oc6c5)... c1ccccc1 \n", + "99 Cc1ccc(C(=O)c2c(C)c3ccc4cccc5c6cccc7ccc2c(c76)... ClCCl \n", + "\n", + " peakwavs_max pred \n", + "0 642.0 454.898621 \n", + "1 420.0 453.561584 \n", + "2 544.0 448.694977 \n", + "3 290.0 448.159760 \n", + "4 736.0 456.897003 \n", + ".. ... ... \n", + "95 359.0 454.548584 \n", + "96 386.0 455.287140 \n", + "97 425.0 453.560364 \n", + "98 324.0 454.656891 \n", + "99 391.0 453.118774 \n", + "\n", + "[100 rows x 4 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_preds = np.concatenate(test_preds, axis=0)\n", + "df_test['pred'] = test_preds\n", + "df_test" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop-updated/examples/predicting_regression_reaction.ipynb b/chemprop-updated/examples/predicting_regression_reaction.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..15523b7b922a461508c75712cf5e4232b2e723f5 --- /dev/null +++ b/chemprop-updated/examples/predicting_regression_reaction.ipynb @@ -0,0 +1,435 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Predicting Regression - Reaction" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/predicting_regression_reaction.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import torch\n", + "from lightning import pytorch as pl\n", + "from pathlib import Path\n", + "\n", + "from chemprop import data, featurizers, models" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change model input here" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "checkpoint_path = chemprop_dir / \"tests\" / \"data\" / \"example_model_v2_regression_rxn.ckpt\" # path to the checkpoint file.\n", + "# If the checkpoint file is generated using the training notebook, it will be in the `checkpoints` folder with name similar to `checkpoints/epoch=19-step=180.ckpt`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load model" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=134, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=406, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): GraphTransform(\n", + " (V_transform): Identity()\n", + " (E_transform): Identity()\n", + " )\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0-1): 2 x MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mpnn = models.MPNN.load_from_checkpoint(checkpoint_path)\n", + "mpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change predict input here" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "test_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"rxn\" / \"rxn.csv\"\n", + "smiles_column = 'smiles'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load smiles" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['[O:1]([C:2]([C:3]([C:4](=[O:5])[C:6]([O:7][H:15])([H:13])[H:14])([H:11])[H:12])([H:9])[H:10])[H:8]>>[C:3](=[C:4]=[O:5])([H:11])[H:12].[C:6]([O:7][H:15])([H:8])([H:13])[H:14].[O:1]=[C:2]([H:9])[H:10]',\n", + " '[C:1]1([H:8])([H:9])[O:2][C@@:3]2([H:10])[C@@:4]3([H:11])[O:5][C@:6]1([H:12])[C@@:7]23[H:13]>>[C:1]1([H:8])([H:9])[O:2][C:3]([H:10])=[C:7]([H:13])[C@:6]1([O+:5]=[C-:4][H:11])[H:12]',\n", + " '[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])=[C:6]([H:16])[C@@:7]12[H:17])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([H:14])=[C:5]([H:15])[C:6]([H:16])=[C:7]1[H:17])([H:8])([H:9])[H:10]',\n", + " '[C:1]([O:2][C:3]([C@@:4]([C:5]([H:14])([H:15])[H:16])([C:6]([O:7][H:19])([H:17])[H:18])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C-:1]([O+:2]=[C:3]([C@@:4]([C:5]([H:14])([H:15])[H:16])([C:6]([O:7][H:19])([H:17])[H:18])[H:13])[H:12])([H:8])[H:10].[H:9][H:11]',\n", + " '[C:1]([C:2]#[C:3][C:4]([C:5](=[O:6])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[C:3]=[C:4]([H:10])[H:11])[C:5](=[O:6])[H:12])([H:7])([H:8])[H:9]'],\n", + " dtype=object)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_test = pd.read_csv(test_path)\n", + "\n", + "smis = df_test.loc[:, smiles_column].values\n", + "smis[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load datapoints" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "test_data = [data.ReactionDatapoint.from_smi(smi) for smi in smis]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define featurizer" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.CondensedGraphOfReactionFeaturizer(mode_=\"PROD_DIFF\")\n", + "# Testing parameters should match training parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get dataset and dataloader" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "test_dset = data.ReactionDataset(test_data, featurizer=featurizer)\n", + "test_loader = data.build_dataloader(test_dset, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Perform tests" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicting DataLoader 0: 100%|███████████████████| 2/2 [00:00<00:00, 119.42it/s]\n" + ] + } + ], + "source": [ + "with torch.inference_mode():\n", + " trainer = pl.Trainer(\n", + " logger=None,\n", + " enable_progress_bar=True,\n", + " accelerator=\"cpu\",\n", + " devices=1\n", + " )\n", + " test_preds = trainer.predict(mpnn, test_loader)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smileseapreds
0[O:1]([C:2]([C:3]([C:4](=[O:5])[C:6]([O:7][H:1...8.8989348.071494
1[C:1]1([H:8])([H:9])[O:2][C@@:3]2([H:10])[C@@:...5.4643288.108090
2[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H...5.2705528.087680
3[C:1]([O:2][C:3]([C@@:4]([C:5]([H:14])([H:15])...8.4730068.070966
4[C:1]([C:2]#[C:3][C:4]([C:5](=[O:6])[H:12])([H...5.5790378.065533
............
95[C:1]([C:2]([C:3]([H:12])([H:13])[H:14])([C:4]...9.2956658.071316
96[O:1]=[C:2]([C@@:3]1([H:9])[C:4]([H:10])([H:11...7.7534428.085133
97[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H...10.6502158.096391
98[C:1]1([H:8])([H:9])[C@@:2]2([H:10])[N:3]1[C:4...10.1389458.202709
99[C:1]([C@@:2]1([C:3]([C:4]([O:5][H:15])([H:13]...6.9799348.107012
\n", + "

100 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " smiles ea preds\n", + "0 [O:1]([C:2]([C:3]([C:4](=[O:5])[C:6]([O:7][H:1... 8.898934 8.071494\n", + "1 [C:1]1([H:8])([H:9])[O:2][C@@:3]2([H:10])[C@@:... 5.464328 8.108090\n", + "2 [C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H... 5.270552 8.087680\n", + "3 [C:1]([O:2][C:3]([C@@:4]([C:5]([H:14])([H:15])... 8.473006 8.070966\n", + "4 [C:1]([C:2]#[C:3][C:4]([C:5](=[O:6])[H:12])([H... 5.579037 8.065533\n", + ".. ... ... ...\n", + "95 [C:1]([C:2]([C:3]([H:12])([H:13])[H:14])([C:4]... 9.295665 8.071316\n", + "96 [O:1]=[C:2]([C@@:3]1([H:9])[C:4]([H:10])([H:11... 7.753442 8.085133\n", + "97 [C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H... 10.650215 8.096391\n", + "98 [C:1]1([H:8])([H:9])[C@@:2]2([H:10])[N:3]1[C:4... 10.138945 8.202709\n", + "99 [C:1]([C@@:2]1([C:3]([C:4]([O:5][H:15])([H:13]... 6.979934 8.107012\n", + "\n", + "[100 rows x 3 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_preds = np.concatenate(test_preds, axis=0)\n", + "df_test['preds'] = test_preds\n", + "df_test" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop-updated/examples/rigr_featurizer.ipynb b/chemprop-updated/examples/rigr_featurizer.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..868ca7d0b75e05e9cb3d6fe96d6c414a569cd38b --- /dev/null +++ b/chemprop-updated/examples/rigr_featurizer.ipynb @@ -0,0 +1,577 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# RIGR: Resonance Invariant Graph Representation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "RIGR is introduced and discussed in our work [RIGR: Resonance Invariant Graph Representation for Molecular Property Prediction](). It is a featurizer implemented as part of Chemprop v2.1.2, designed to impose strict resonance invariance for molecular property prediction tasks. It ensures a single graph representation of different resonance structures of the same molecule, including non-equivalent resonance forms. For CLI users, RIGR is available as a choice for the multi-hot atom featurization scheme. To use RIGR, add the following argument to your training or inference script:\n", + " ```bash\n", + " --multi-hot-atom-featurizer-mode RIGR\n", + " ```\n", + "In this Jupyter notebook, we show how to train and infer models using RIGR which is very similar to the generic training [example](./training.ipynb). RIGR can be easily implemented in your existing code by changing the `SimpleMoleculeMolGraphFeaturizer()` to this:\n", + " ```python\n", + " rigr_atom_featurizer = RIGRAtomFeaturizer()\n", + " rigr_bond_featurizer = RIGRBondFeaturizer()\n", + " featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer(atom_featurizer=rigr_atom_featurizer, bond_featurizer=rigr_bond_featurizer)\n", + " ```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/rigr_featurizer.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from typing import Sequence\n", + "\n", + "from lightning import pytorch as pl\n", + "import numpy as np\n", + "import pandas as pd\n", + "from rdkit import Chem\n", + "from rdkit.Chem.rdchem import Atom, Bond, Mol\n", + "\n", + "from chemprop import data, featurizers, models, nn\n", + "from chemprop.featurizers.atom import RIGRAtomFeaturizer\n", + "from chemprop.featurizers.bond import RIGRBondFeaturizer\n", + "from chemprop.featurizers.molecule import ChargeFeaturizer\n", + "from chemprop.utils import make_mol" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\" # path to your data .csv file\n", + "num_workers = 0 # number of workers for dataloader. 0 means using main process for data loading\n", + "smiles_column = 'smiles' # name of the column containing SMILES strings\n", + "target_columns = ['lipo'] # list of names of the columns containing targets" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smileslipo
0Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc143.54
1COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)...-1.18
2COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl3.69
3OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C...3.37
4Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N...3.10
.........
95CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C...2.20
96CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)...2.04
97CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)...4.49
98COc1ccc(Cc2c(N)n[nH]c2N)cc10.20
99CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(...2.00
\n", + "

100 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " smiles lipo\n", + "0 Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14 3.54\n", + "1 COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)... -1.18\n", + "2 COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl 3.69\n", + "3 OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C... 3.37\n", + "4 Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N... 3.10\n", + ".. ... ...\n", + "95 CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C... 2.20\n", + "96 CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)... 2.04\n", + "97 CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)... 4.49\n", + "98 COc1ccc(Cc2c(N)n[nH]c2N)cc1 0.20\n", + "99 CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(... 2.00\n", + "\n", + "[100 rows x 2 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_input = pd.read_csv(input_path)\n", + "df_input" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "smis = df_input.loc[:, smiles_column].values\n", + "ys = df_input.loc[:, target_columns].values" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Featurization and Make Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "RIGR uses only the subset of atom and bond features from Chemprop that remain invariant across different resonance forms. The tables below indicate which atom and bond features are present and absent in RIGR.\n", + "\n", + "### Atom Features\n", + "\n", + "| **Feature** | **Description** | **Present in RIGR?** |\n", + "|------------------------|---------------------------------------------------------------------------------|:--------------------:|\n", + "| Atomic number | The choice for atom type denoted by atomic number | ☑️ |\n", + "| Degree | Number of direct neighbors of the atom | ☑️ |\n", + "| Formal charge | Integer charge assigned to the atom | ☐ |\n", + "| Chiral tag | The choices for an atom's chiral tag (See `rdkit.Chem.rdchem.ChiralType`) | ☐ |\n", + "| Number of H | Number of bonded hydrogen atoms | ☑️ |\n", + "| Hybridization | Atom's hybridization type (See `rdkit.Chem.rdchem.HybridizationType`) | ☐ |\n", + "| Aromaticity | Indicates whether the atom is aromatic or not | ☐ |\n", + "| Atomic mass | The atomic mass of the atom | ☑️ |\n", + "\n", + "\n", + "### Bond Features\n", + "\n", + "| **Feature** | **Description** | **Present in RIGR?** |\n", + "|-----------------------|------------------------------------------------------------------------------------------------------|:--------------------:|\n", + "| Bond type | The known bond types: single, double, or triple bond | ☐ |\n", + "| Conjugation | Indicates whether the bond is conjugated or not | ☐ |\n", + "| Ring | Indicates whether the bond is a part of a ring | ☑️ |\n", + "| Stereochemistry | Stores the known bond stereochemistries (See [BondStereo](https://www.rdkit.org/docs/source/rdkit.Chem.rdchem.html#rdkit.Chem.rdchem.BondStereo.values)) | ☐ |" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "The return type of make_split_indices has changed in v2.1 - see help(make_split_indices)\n" + ] + } + ], + "source": [ + "mols = [make_mol(smi, add_h=True, keep_h=True) for smi in smis]\n", + "\n", + "charge_featurizer = ChargeFeaturizer()\n", + "x_ds = [charge_featurizer(mol) for mol in mols]\n", + "\n", + "all_data = [data.MoleculeDatapoint(mol, name=smi, y=y, x_d=x_d) for mol, smi, y, x_d in zip(mols, smis, ys, x_ds)]\n", + "train_indices, val_indices, test_indices = data.make_split_indices(mols, \"random\", (0.8, 0.1, 0.1))\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " all_data, train_indices, val_indices, test_indices\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "rigr_atom_featurizer = RIGRAtomFeaturizer()\n", + "rigr_bond_featurizer = RIGRBondFeaturizer()\n", + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer(atom_featurizer=rigr_atom_featurizer, bond_featurizer=rigr_bond_featurizer)\n", + "\n", + "train_dset = data.MoleculeDataset(train_data[0], featurizer)\n", + "scaler = train_dset.normalize_targets()\n", + "\n", + "val_dset = data.MoleculeDataset(val_data[0], featurizer)\n", + "val_dset.normalize_targets(scaler)\n", + "\n", + "test_dset = data.MoleculeDataset(test_data[0], featurizer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dataloader" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = data.build_dataloader(train_dset, num_workers=num_workers)\n", + "val_loader = data.build_dataloader(val_dset, num_workers=num_workers, shuffle=False)\n", + "test_loader = data.build_dataloader(test_dset, num_workers=num_workers, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Model" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=54, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=352, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): Identity()\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=301, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0): RMSE(task_weights=[[1.0]])\n", + " (1): MAE(task_weights=[[1.0]])\n", + " (2): MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mp = nn.BondMessagePassing(\n", + " d_v=featurizer.atom_fdim,\n", + " d_e=featurizer.bond_fdim,\n", + ")\n", + "agg = nn.MeanAggregation()\n", + "output_transform = nn.UnscaleTransform.from_standard_scaler(scaler)\n", + "ffn = nn.RegressionFFN(\n", + " input_dim=mp.output_dim + train_dset.d_xd,\n", + " output_transform=output_transform,\n", + ")\n", + "batch_norm = True\n", + "metric_list = [nn.metrics.RMSE(), nn.metrics.MAE()] # Only the first metric is used for training and early stopping\n", + "mpnn = models.MPNN(mp, agg, ffn, batch_norm, metric_list)\n", + "mpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/akshatz/anaconda3/envs/chemprop/lib/python3.12/site-packages/lightning/fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/akshatz/anaconda3/envs/chemprop/lib/python3.12 ...\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(\n", + " logger=False,\n", + " enable_checkpointing=True, # Use `True` if you want to save model checkpoints. The checkpoints will be saved in the `checkpoints` folder.\n", + " enable_progress_bar=True,\n", + " accelerator=\"auto\",\n", + " devices=1,\n", + " max_epochs=20, # number of epochs to train for\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start Training" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision\n", + "/home/akshatz/anaconda3/envs/chemprop/lib/python3.12/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /home/akshatz/chemprop/examples/checkpoints exists and is not empty.\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/akshatz/anaconda3/envs/chemprop/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=63` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 212 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | BatchNorm1d | 600 | train\n", + "3 | predictor | RegressionFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "303 K Trainable params\n", + "0 Non-trainable params\n", + "303 K Total params\n", + "1.214 Total estimated model params size (MB)\n", + "25 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sanity Checking DataLoader 0: 0%| | 0/1 [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test/mae 0.6620886325836182 │\n", + "│ test/rmse 0.9359426498413086 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mae \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.6620886325836182 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test/rmse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9359426498413086 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results = trainer.test(mpnn, test_loader)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop-updated/examples/shapley_value_with_customized_featurizers.ipynb b/chemprop-updated/examples/shapley_value_with_customized_featurizers.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..2c10b2e5865523784ac51eee827eb8c8a6f9ff8b --- /dev/null +++ b/chemprop-updated/examples/shapley_value_with_customized_featurizers.ipynb @@ -0,0 +1,1242 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Shapley value analysis for Chemprop models" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook demonstrates how to perform Shapley (SHAP) value analysis for a chemprop model. In addition, it also serves as an example on how to customize chemprop featurizers.\n", + "\n", + "* Example 1: Shapley value analysis to explain importance of default chemprop atom and bond features\n", + "* Example 2: Shapley value analysis to explain importance of particular atom/node and bond/edge\n", + "* Example 3: Shapley value analysis to explain importance of extra features (not yet implemented, will be done in the future release, this is likely eaiser to be achieved by modifying MoleculeDatapoint and dataloader functions)\n", + "\n", + "Disclaimer: This notebook is for feature demonstration purposes only. The models used in this notebook are not trained models, and the computed Shapley values are provided solely for illustrative purposes. \n", + "\n", + "Note: This notebook is developed for Chemprop v2. \n", + "\n", + "For Chemprop v1 SHAP implementation checkout: https://github.com/oscarwumit/chemprop_developing/tree/shap_v1\n", + "\n", + "This notebook requires the SHAP package, do \"pip install shap\" if you don't have it installed." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/shapley_value_with_customized_featurizers.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " !pip install shap\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Citation for SHAP implementation in Chemprop\n", + "\n", + "Li & Wu, et al. “When Do Quantum Mechanical Descriptors Help Graph Neural Networks to Predict Chemical Properties?” Journal of the American Chemical Society, vol. 146, no. 33, Aug. 2024, pp. 23103–20. https://doi.org/10.1021/jacs.4c04670.\n", + "\n", + "Bibtex format\n", + "
\n",
+    "@article{li_and_wu_qm_des_2024, \n",
+    "\ttitle = {When {Do} {Quantum} {Mechanical} {Descriptors} {Help} {Graph} {Neural} {Networks} to {Predict} {Chemical} {Properties}?}, \n",
+    "\tvolume = {146}, \n",
+    "\tcopyright = {https://doi.org/10.15223/policy-029}, \n",
+    "\tissn = {0002-7863, 1520-5126}, \n",
+    "\turl = {https://pubs.acs.org/doi/10.1021/jacs.4c04670}, \n",
+    "\tdoi = {10.1021/jacs.4c04670}, \n",
+    "\tlanguage = {en}, \n",
+    "\tnumber = {33}, \n",
+    "\turldate = {2025-01-13}, \n",
+    "\tjournal = {Journal of the American Chemical Society}, \n",
+    "\tauthor = {Li, Shih-Cheng and Wu, Haoyang and Menon, Angiras and Spiekermann, Kevin A. and Li, Yi-Pei and Green, William H.}, \n",
+    "\tmonth = aug, \n",
+    "\tyear = {2024}, \n",
+    "\tpages = {23103--23120}, \n",
+    "} \n",
+    "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Customize Chemprop featurizers for SHAP analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Import dependencies and classes\n", + "import sys\n", + "\n", + "from copy import deepcopy\n", + "from lightning import pytorch as pl\n", + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "import torch\n", + "\n", + "from dataclasses import InitVar, dataclass\n", + "from typing import List, Sequence, Tuple, Union, Optional\n", + "from rdkit import Chem\n", + "from rdkit.Chem import Mol, Draw\n", + "from rdkit.Chem.rdchem import Atom, Bond, BondType\n", + "\n", + "from chemprop.featurizers.atom import MultiHotAtomFeaturizer \n", + "from chemprop.featurizers.bond import MultiHotBondFeaturizer \n", + "from chemprop.featurizers.molgraph.molecule import SimpleMoleculeMolGraphFeaturizer \n", + "\n", + "from chemprop.data.molgraph import MolGraph \n", + "from chemprop.featurizers.base import GraphFeaturizer \n", + "from chemprop.featurizers.molgraph.mixins import _MolGraphFeaturizerMixin \n", + "\n", + "from chemprop import data, featurizers, models \n", + "\n", + "import shap # do \"pip install shap\" if you don't have it installed\n", + "\n", + "import logging\n", + "\n", + "# Set logging level to WARNING to suppress INFO logs\n", + "logging.getLogger(\"lightning.pytorch.utilities.rank_zero\").setLevel(logging.WARNING)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### CustomMultiHotAtomFeaturizer" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "class CustomMultiHotAtomFeaturizer(MultiHotAtomFeaturizer):\n", + " \"\"\"A custom MultiHotAtomFeaturizer that allows for selective feature ablation.\n", + " \n", + " Parameters\n", + " ----------\n", + " keep_features : List[bool], optional\n", + " a list of booleans to indicate which atom features to keep. If None, all features are kept. For any element that is False, the corresponding feature's encoding is set to all zeros. Useful for ablation and SHAP analysis.\n", + " \"\"\"\n", + " \n", + " def __init__(self,\n", + " atomic_nums: Sequence[int],\n", + " degrees: Sequence[int],\n", + " formal_charges: Sequence[int],\n", + " chiral_tags: Sequence[int],\n", + " num_Hs: Sequence[int],\n", + " hybridizations: Sequence[int],\n", + " keep_features: List[bool] = None):\n", + " super().__init__(atomic_nums, degrees, formal_charges, chiral_tags, num_Hs, hybridizations)\n", + " \n", + " if keep_features is None:\n", + " keep_features = [True] * (len(self._subfeats) + 2)\n", + " self.keep_features = keep_features\n", + "\n", + " def __call__(self, a: Atom | None) -> np.ndarray:\n", + " x = np.zeros(self._MultiHotAtomFeaturizer__size)\n", + " if a is None:\n", + " return x\n", + " \n", + " feats = [\n", + " a.GetAtomicNum(),\n", + " a.GetTotalDegree(),\n", + " a.GetFormalCharge(),\n", + " int(a.GetChiralTag()),\n", + " int(a.GetTotalNumHs()),\n", + " a.GetHybridization(),\n", + " ]\n", + " \n", + " i = 0\n", + " for feat, choices, keep in zip(feats, self._subfeats, self.keep_features[:len(feats)]):\n", + " j = choices.get(feat, len(choices))\n", + " if keep:\n", + " x[i + j] = 1\n", + " i += len(choices) + 1\n", + " \n", + " if self.keep_features[len(feats)]:\n", + " x[i] = int(a.GetIsAromatic())\n", + " if self.keep_features[len(feats) + 1]:\n", + " x[i + 1] = 0.01 * a.GetMass()\n", + "\n", + " return x\n", + "\n", + " def zero_mask(self) -> np.ndarray:\n", + " \"\"\"Featurize the atom by setting all bits to zero.\"\"\"\n", + " return np.zeros(len(self))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Atom features all: [1. 0. 0. 0. 0. 0. 0. 1. 0.\n", + " 1. 0. 0. 1. 0. 0. 0. 0. 0.\n", + " 0. 1. 0. 0. 0. 1. 0. 0.12011]\n", + "Atom features some: [1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.\n", + " 0. 0.]\n", + "Atom features none: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0. 0.]\n" + ] + } + ], + "source": [ + "# Example usage\n", + "atomic_nums = [6, 7, 8]\n", + "degrees = [1, 2, 3]\n", + "formal_charges = [-1, 0, 1]\n", + "chiral_tags = [0, 1, 2]\n", + "num_Hs = [0, 1, 2]\n", + "hybridizations = [1, 2, 3]\n", + "\n", + "keep_features_all = [True] * 8\n", + "keep_features_some = [True, True, False, True, False, True, True, False]\n", + "keep_features_none = [False] * 8\n", + "\n", + "featurizer_all = CustomMultiHotAtomFeaturizer(\n", + " atomic_nums=atomic_nums,\n", + " degrees=degrees,\n", + " formal_charges=formal_charges,\n", + " chiral_tags=chiral_tags,\n", + " num_Hs=num_Hs,\n", + " hybridizations=hybridizations,\n", + " keep_features=keep_features_all\n", + ")\n", + "\n", + "featurizer_some = CustomMultiHotAtomFeaturizer(\n", + " atomic_nums=atomic_nums,\n", + " degrees=degrees,\n", + " formal_charges=formal_charges,\n", + " chiral_tags=chiral_tags,\n", + " num_Hs=num_Hs,\n", + " hybridizations=hybridizations,\n", + " keep_features=keep_features_some\n", + ")\n", + "\n", + "featurizer_none = CustomMultiHotAtomFeaturizer(\n", + " atomic_nums=atomic_nums,\n", + " degrees=degrees,\n", + " formal_charges=formal_charges,\n", + " chiral_tags=chiral_tags,\n", + " num_Hs=num_Hs,\n", + " hybridizations=hybridizations,\n", + " keep_features=keep_features_none\n", + ")\n", + "\n", + "mol = Chem.MolFromSmiles('CCO')\n", + "atom = mol.GetAtomWithIdx(0) # Get the first atom\n", + "\n", + "features = featurizer_all(atom)\n", + "print(\"Atom features all:\", features)\n", + "\n", + "features = featurizer_some(atom)\n", + "print(\"Atom features some:\", features)\n", + "\n", + "features = featurizer_none(atom)\n", + "print(\"Atom features none:\", features)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### CustomMultiHotBondFeaturizer" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class CustomMultiHotBondFeaturizer(MultiHotBondFeaturizer):\n", + " \"\"\"A custom MultiHotBondFeaturizer that allows for selective feature ablation.\n", + " \n", + " Parameters\n", + " ----------\n", + " keep_features : List[bool], optional\n", + " a list of booleans to indicate which bond features to keep except for nullity. If None, all features are kept. For any element that is False, the corresponding feature's encoding is set to all zeros. Useful for ablation and SHAP analysis.\n", + " \"\"\"\n", + " \n", + " def __init__(self,\n", + " bond_types: Sequence[BondType] | None = None,\n", + " stereos: Sequence[int] | None = None,\n", + " keep_features: List[bool] = None):\n", + " super().__init__(bond_types, stereos)\n", + " \n", + " self._MultiHotBondFeaturizer__size = 1 + len(self.bond_types) + 2 + (len(self.stereo) + 1)\n", + "\n", + " if keep_features is None:\n", + " keep_features = [True] * 4 \n", + " self.keep_features = keep_features \n", + "\n", + " def __len__(self) -> int:\n", + " return self._MultiHotBondFeaturizer__size\n", + "\n", + " def __call__(self, b: Bond) -> np.ndarray:\n", + " x = np.zeros(len(self), int)\n", + "\n", + " if b is None:\n", + " x[0] = 1\n", + " return x\n", + " i = 1\n", + " bond_type = b.GetBondType()\n", + " bt_bit, size = self.one_hot_index(bond_type, self.bond_types)\n", + " if self.keep_features[0] and bt_bit != size:\n", + " x[i + bt_bit] = 1\n", + " i += size - 1\n", + "\n", + " if self.keep_features[1]:\n", + " x[i] = int(b.GetIsConjugated())\n", + " if self.keep_features[2]:\n", + " x[i + 1] = int(b.IsInRing())\n", + " i += 2\n", + "\n", + " if self.keep_features[3]:\n", + " stereo_bit, _ = self.one_hot_index(int(b.GetStereo()), self.stereo)\n", + " x[i + stereo_bit] = 1\n", + "\n", + " return x\n", + "\n", + " def zero_mask(self) -> np.ndarray:\n", + " \"\"\"Featurize the bond by setting all bits to zero.\"\"\"\n", + " return np.zeros(len(self), int)\n", + "\n", + " @classmethod\n", + " def one_hot_index(cls, x, xs: Sequence) -> tuple[int, int]:\n", + " \"\"\"Returns a tuple of the index of ``x`` in ``xs`` and ``len(xs) + 1`` if ``x`` is in ``xs``.\n", + " Otherwise, returns a tuple with ``len(xs)`` and ``len(xs) + 1``.\"\"\"\n", + " n = len(xs)\n", + " return xs.index(x) if x in xs else n, n + 1" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bond features all: [0 1 0 0 0 0 0 1 0 0 0 0 0 0]\n", + "Bond features some: [0 1 0 0 0 0 0 0 0 0 0 0 0 0]\n", + "Bond features none: [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n" + ] + } + ], + "source": [ + "# Example usage\n", + "bond_types = [BondType.SINGLE, BondType.DOUBLE, BondType.TRIPLE, BondType.AROMATIC]\n", + "stereos = [0, 1, 2, 3, 4, 5]\n", + "keep_features_all = [True] * 4\n", + "keep_features_some = [True, False, True, False]\n", + "keep_features_none = [False] * 4\n", + "\n", + "featurizer_all = CustomMultiHotBondFeaturizer(\n", + " bond_types=bond_types,\n", + " stereos=stereos,\n", + " keep_features=keep_features_all\n", + ")\n", + "\n", + "featurizer_some = CustomMultiHotBondFeaturizer(\n", + " bond_types=bond_types,\n", + " stereos=stereos,\n", + " keep_features=keep_features_some\n", + ")\n", + "\n", + "featurizer_none = CustomMultiHotBondFeaturizer(\n", + " bond_types=bond_types,\n", + " stereos=stereos,\n", + " keep_features=keep_features_none\n", + ")\n", + "\n", + "mol = Chem.MolFromSmiles('CCO')\n", + "bond = mol.GetBondWithIdx(0) # Get the first bond\n", + "\n", + "features = featurizer_all(bond)\n", + "print(\"Bond features all:\", features)\n", + "\n", + "features = featurizer_some(bond)\n", + "print(\"Bond features some:\", features)\n", + "\n", + "features = featurizer_none(bond)\n", + "print(\"Bond features none:\", features)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### CustomSimpleMoleculeMolGraphFeaturizer" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "@dataclass\n", + "class CustomSimpleMoleculeMolGraphFeaturizer(SimpleMoleculeMolGraphFeaturizer):\n", + " \"\"\"A custom SimpleMoleculeMolGraphFeaturizer with additional feature control.\"\"\"\n", + " \n", + " keep_atom_features: Optional[List[bool]] = None\n", + " keep_bond_features: Optional[List[bool]] = None\n", + " keep_atoms: Optional[List[bool]] = None\n", + " keep_bonds: Optional[List[bool]] = None\n", + "\n", + " def __post_init__(self, extra_atom_fdim: int = 0, extra_bond_fdim: int = 0):\n", + " super().__post_init__(extra_atom_fdim, extra_bond_fdim)\n", + "\n", + " if isinstance(self.atom_featurizer, CustomMultiHotAtomFeaturizer) and self.keep_atom_features is not None:\n", + " self.atom_featurizer.keep_features = self.keep_atom_features\n", + " if isinstance(self.bond_featurizer, CustomMultiHotBondFeaturizer) and self.keep_bond_features is not None:\n", + " self.bond_featurizer.keep_features = self.keep_bond_features\n", + "\n", + " def __call__(\n", + " self,\n", + " mol: Chem.Mol,\n", + " atom_features_extra: np.ndarray | None = None,\n", + " bond_features_extra: np.ndarray | None = None,\n", + " ) -> MolGraph:\n", + " n_atoms = mol.GetNumAtoms()\n", + " n_bonds = mol.GetNumBonds()\n", + "\n", + " if self.keep_atoms is None:\n", + " self.keep_atoms = [True] * n_atoms\n", + " if self.keep_bonds is None:\n", + " self.keep_bonds = [True] * n_bonds\n", + "\n", + " if atom_features_extra is not None and len(atom_features_extra) != n_atoms:\n", + " raise ValueError(\n", + " \"Input molecule must have same number of atoms as `len(atom_features_extra)`!\"\n", + " f\"got: {n_atoms} and {len(atom_features_extra)}, respectively\"\n", + " )\n", + " if bond_features_extra is not None and len(bond_features_extra) != n_bonds:\n", + " raise ValueError(\n", + " \"Input molecule must have same number of bonds as `len(bond_features_extra)`!\"\n", + " f\"got: {n_bonds} and {len(bond_features_extra)}, respectively\"\n", + " )\n", + " if n_atoms == 0:\n", + " V = np.zeros((1, self.atom_fdim), dtype=np.single)\n", + " else:\n", + " V = np.array([self.atom_featurizer(a) if self.keep_atoms[a.GetIdx()] else self.atom_featurizer.zero_mask()\n", + " for a in mol.GetAtoms()], dtype=np.single)\n", + "\n", + " if atom_features_extra is not None:\n", + " V = np.hstack((V, atom_features_extra))\n", + "\n", + " E = np.empty((2 * n_bonds, self.bond_fdim))\n", + " edge_index = [[], []]\n", + "\n", + " i = 0\n", + " for u in range(n_atoms):\n", + " for v in range(u + 1, n_atoms):\n", + " bond = mol.GetBondBetweenAtoms(u, v)\n", + " if bond is None:\n", + " continue\n", + "\n", + " x_e = self.bond_featurizer(bond) if self.keep_bonds[bond.GetIdx()] else self.bond_featurizer.zero_mask()\n", + "\n", + " if bond_features_extra is not None:\n", + " x_e = np.concatenate((x_e, bond_features_extra[bond.GetIdx()]), dtype=np.single)\n", + "\n", + " E[i: i + 2] = x_e\n", + " edge_index[0].extend([u, v])\n", + " edge_index[1].extend([v, u])\n", + " i += 2\n", + "\n", + " rev_edge_index = np.arange(len(E)).reshape(-1, 2)[:, ::-1].ravel()\n", + " edge_index = np.array(edge_index, int)\n", + " return MolGraph(V, E, edge_index, rev_edge_index)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Molecule graph: MolGraph(V=array([[1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0.,\n", + " 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],\n", + " [1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0.,\n", + " 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],\n", + " [0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0.,\n", + " 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]], dtype=float32), E=array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]), edge_index=array([[0, 1, 1, 2],\n", + " [1, 0, 2, 1]]), rev_edge_index=array([1, 0, 3, 2]))\n" + ] + } + ], + "source": [ + "# Example usage\n", + "atom_featurizer = CustomMultiHotAtomFeaturizer(\n", + " atomic_nums=[6, 7, 8],\n", + " degrees=[1, 2, 3],\n", + " formal_charges=[-1, 0, 1],\n", + " chiral_tags=[0, 1, 2],\n", + " num_Hs=[0, 1, 2],\n", + " hybridizations=[1, 2, 3],\n", + " keep_features=[True, True, False, True, False, True, True, False]\n", + ")\n", + "\n", + "bond_featurizer = CustomMultiHotBondFeaturizer(\n", + " bond_types=[BondType.SINGLE, BondType.DOUBLE, BondType.TRIPLE, BondType.AROMATIC],\n", + " stereos=[0, 1, 2, 3, 4, 5],\n", + " keep_features=[True, False, True, False]\n", + ")\n", + "\n", + "featurizer = CustomSimpleMoleculeMolGraphFeaturizer(\n", + " atom_featurizer=atom_featurizer,\n", + " bond_featurizer=bond_featurizer,\n", + " keep_atom_features=[True, True, False, True, False, True, True, False],\n", + " keep_bond_features=[True, False, True, False],\n", + ")\n", + "\n", + "# Example molecule (RDKit Mol object required)\n", + "from rdkit import Chem\n", + "mol = Chem.MolFromSmiles('CCO')\n", + "\n", + "mol_graph = featurizer(mol)\n", + "print(\"Molecule graph:\", mol_graph)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### SHAP analysis to interpret Chemprop model prediction" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 1: Shapley value analysis to explain importance of default chemprop atom and bond features" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing molecule: Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14\n" + ] + } + ], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "\n", + "# load chemprop model checkpoint file\n", + "checkpoint_path = chemprop_dir / \"tests\" / \"data\" / \"example_model_v2_regression_mol.ckpt\" \n", + "mpnn = models.MPNN.load_from_checkpoint(checkpoint_path)\n", + "\n", + "# load data\n", + "test_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\"\n", + "smiles_column = 'smiles'\n", + "df_test = pd.read_csv(test_path)\n", + "smis = df_test[smiles_column]\n", + "test_data = [data.MoleculeDatapoint.from_smi(smi) for smi in smis]\n", + "\n", + "# pick a test molecule for demonstration \n", + "test_mol = smis.iloc[0]\n", + "print(f\"Testing molecule: {test_mol}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# initialize the featurizer\n", + "atom_featurizer = CustomMultiHotAtomFeaturizer.v2() # chemprop v2 default atom featurizer settings\n", + "bond_featurizer = CustomMultiHotBondFeaturizer()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# A helper function to get predictions from a molecule with ability to keep or remove specific atom and bond features\n", + "def get_predictions(keep_atom_features: Optional[List[bool]], keep_bond_features: Optional[List[bool]], mol: str) -> float:\n", + " featurizer = CustomSimpleMoleculeMolGraphFeaturizer(\n", + " atom_featurizer=atom_featurizer,\n", + " bond_featurizer=bond_featurizer,\n", + " keep_atom_features=keep_atom_features,\n", + " keep_bond_features=keep_bond_features\n", + " )\n", + " test_data = [data.MoleculeDatapoint.from_smi(mol)]\n", + " test_dset = data.MoleculeDataset(test_data, featurizer=featurizer)\n", + " test_loader = data.build_dataloader(test_dset, shuffle=False, batch_size=1)\n", + "\n", + " with torch.inference_mode():\n", + " trainer = pl.Trainer(\n", + " logger=False,\n", + " enable_progress_bar=False,\n", + " accelerator=\"cpu\",\n", + " devices=1\n", + " )\n", + " test_preds = trainer.predict(mpnn, test_loader)\n", + " return test_preds[0][0]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop_delete/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prediction with keep_features set 0: tensor([2.2535])\n", + "Prediction with keep_features set 1: tensor([2.2319])\n", + "Prediction with keep_features set 2: tensor([2.1880])\n" + ] + } + ], + "source": [ + "# example prediction with different keep_atom_features and keep_bond_features\n", + "\n", + "# keep all atom and bond features\n", + "keep_atom_features_0 = [True]*8\n", + "keep_bond_features_0 = [True]*4\n", + "\n", + "# keep some atom and bond features\n", + "keep_atom_features_1 = [True, True, False, True, True, False, True, True]\n", + "keep_bond_features_1 = [True, True, False, True]\n", + "\n", + "# remove all atom and bond features\n", + "keep_atom_features_2 = [False]*8\n", + "keep_bond_features_2 = [False]*4\n", + "\n", + "pred_0 = get_predictions(keep_atom_features_0, keep_bond_features_0, test_mol)\n", + "pred_1 = get_predictions(keep_atom_features_1, keep_bond_features_1, test_mol)\n", + "pred_2 = get_predictions(keep_atom_features_2, keep_bond_features_2, test_mol)\n", + "\n", + "print(f\"Prediction with keep_features set 0: {pred_0}\") # expected 2.2535\n", + "print(f\"Prediction with keep_features set 1: {pred_1}\") # expected 2.2319\n", + "print(f\"Prediction with keep_features set 2: {pred_2}\") # expected 2.1880" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# An example wrapper class for use as the model input in SHAP explainer\n", + "# The wrapper needs to be initialized first with the molecule to be explained, and then can be called with a boolean list representing the features to keep\n", + "# The wrapper is needed because SHAP explainer requires a callable model with a single input argument, adapt X as needed\n", + "class MoleculeModelWrapper:\n", + " def __init__(self, mol: str):\n", + " self.mol = mol\n", + " \n", + " def __call__(self, X):\n", + " preds = []\n", + " for keep_features in X:\n", + " try:\n", + " # unpacking X, indices corresponds to feature orders from default chemprop featurizer, adapt as needed\n", + " keep_atom_features = keep_features[:8] # 8 atom features\n", + " keep_bond_features = keep_features[8:] # 4 bond features\n", + " except:\n", + " print(f\"Invalid input: {keep_features}\")\n", + " raise\n", + " pred = get_predictions(keep_atom_features, keep_bond_features, self.mol)\n", + " preds.append([pred.item()])\n", + " return np.array(preds)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# An example masker function for use with SHAP explainer\n", + "# The masker function takes in a binary mask and the input data X, and returns the masked input data. This simulates the effect of masking out certain features.\n", + "def binary_masker(binary_mask, x):\n", + " masked_x = deepcopy(x)\n", + " masked_x[binary_mask == 0] = 0\n", + " return np.array([masked_x])" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the model wrapper with the test molecule\n", + "model_wrapper = MoleculeModelWrapper(test_mol)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[2.25354147]])" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Test the model wrapper with a random feature choice\n", + "keep_features = [1] * 12 # 8 atom features + 4 bond features\n", + "feature_choice = np.array([keep_features])\n", + "model_wrapper(feature_choice) # expected 2.25354171" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the SHAP explainer with the model wrapper and masker\n", + "explainer = shap.PermutationExplainer(model_wrapper, masker=binary_masker)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop_delete/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n", + "PermutationExplainer explainer: 2it [00:28, 28.55s/it] \n" + ] + } + ], + "source": [ + "# Compute SHAP values, using 100 evaluations of different feature choices (notice that feature choices are masked out randomly by the binary masker, so the results may vary between runs)\n", + "explanation = explainer(feature_choice, max_evals=100) " + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + ".values =\n", + "array([[ 0.00123063, 0.01496077, 0.00213072, -0.01216608, 0.00954816,\n", + " 0.00413817, 0.00643879, -0.00101143, 0.01162252, 0.00842983,\n", + " 0.00846943, 0.01178101]])\n", + "\n", + ".base_values =\n", + "array([[2.18796897]])\n", + "\n", + ".data =\n", + "array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Print the SHAP values\n", + "explanation" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAu0AAAKFCAYAAAB4GddQAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAnCFJREFUeJzs3Xt81NWd//HXJCSTe8KQkIQQMIJICWAKAgGWFlrkEha7KxBh44WIDVAiFQm43GugRmCpLQVsQBtuqwSCdbuSxp+CAmWCKFouBq1hEQ0XJZCLGBhCkt8fAwOTSSBAhhnC+/l4zIOZ8z3nez7fPHbre75z5oyhpqamBhERERERcVseri5ARERERESuTaFdRERERMTNKbSLiIiIiLg5hXYRERERETen0C4iIiIi4uYU2kVERERE3JxCu4iIiIiIm1Nob0JqamooLy9HW++LiIiINC0K7U3I999/T3BwMN9//72rSxERERGRRqTQLiIiIiLi5hTaRURERETcnEK7iIiIiIibU2gXEREREXFzCu0iIiIiIm5OoV1ERERExM0ptIuIiIiIuDmFdhERERERN6fQLiIiIiLi5hTaRURERETcnEK7iIiIiIibU2gXEREREXFzCu0iIiIiIm5OoV1ERERExM0ptIuIiIiIuDmFdhERERERN6fQLiIiIiLi5hTaRURERETcnEK7iIiIiIibU2gXEREREXFzCu0iIiIiIm5OoV1ERERExM0ptIuIiIiIuDmFdhERERERN6fQLiIiIiLi5pq5ugBxghNn4OxFV1chIiIicuP8jRDs7+oq3I5Ce1OUugq+KXd1FSIiIiI3JqYlvDZJob0OCu1N0VfFUHjK1VWIiIiISCPRmnYRERERETen0C4iIiIi4uYU2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuTqFdRERERJq+0h8g5RUIGwv+Y2DAXPjkcMPHHyqCIekQ8B9gegIe/wOcKnPs99scePhFCE8GwyPwmw2NUr5Cu4iIiIg0bdXVMGwBvL4TUofCoifguzLoPxe+PH798UXF8JPZUHgSXkyCtIdhy1546AW4UGnfd/br8FEh/DimUS/BaaE9JycHg8FQ7yM3N9dZUwMwffp0MjIynDpHYygpKWH8+PHEx8djMpkwGAx06dLF1WWJiIiI3Dn6z4Gxf6z/eE4+mL+A1akw71GYNBQ+SAdPD5iXff3zv7gZfjgP216AycNg5kjYOBX2fQWr37fve+RPcOLPsP7ZW7kiB07/RdQBAwaQkJDg0N61a1enzpuVlUVERAQzZsxw6jy36tixY6xcuZLg4GA6dOjA3r17XV2SiIiISNOSkw/hIfBI/JW2sGBI7APrd4ClEoxe9Y/fvBv+9UFoE3albeAD0KEVbDRDyqAr7fe0bPTy4TaE9ri4ONLS0pw9zW1lsViorKwkICDgls9177338vnnn3P//fcD4OPjc8vnFBEREZGrfHoEut0LHrUWmfS8D1a+C/88Dl3a1j322GnrUpoH2zke63kf5N6eG65usaZ96dKldOrUCR8fH4xGIx06dGD58uV19ouPjyc0NBQvLy8CAwPp06cPO3bssOtnMBgoLi7m4MGDdktyDh48aDs+ZMgQh/NnZGRgMBjIycmxtaWkpGAwGDCbzYwePRqTyYSvry95eXkAVFRUkJqaSnR0NF5eXvj5+dGrVy+2bdvWoGv38/OzBXYRERERcYITJRDZ3LH9ctvxM9cee3Xf2uPPnLXeqXcyp99pr6iooKioyK7N19eXFi1aAPDUU0+RlZVF9+7dmTx5Mp6enmzZsoXU1FROnDjBggULbOMyMzMJDg4mMTGRyMhICgsL2bx5M4MGDcJsNtOtWzcAFi9eTHp6OoGBgUyZMsU2vnXr1jd9HUlJSXh7ezNu3DgMBgNt27bFYrEQHx/PoUOHGDRoEMnJyZSWlpKdnc3QoUN5++23eeihh256ThERERGppfIilFU4tlkqobjcvt0UYL27fu4CGOuIvT7e1n/PXah/vsvH6lo+4+N1pc+1ltc0AqeH9szMTDIzM+3a+vfvz/vvv8/WrVvJysriySefZPXq1bbjGRkZ9OnThyVLljBlyhRbwN+1axchISF255o4cSL9+vVj/vz5/OUvfwEgLS2NhQsXYjKZGm1pTkBAAHv37sXb29vWNm3aNA4cOMC6det47LHHbO1z586lY8eOPPfccxw4cKBR5hcRERG5G1y4cAHvq16bzWb69Olje12w6k06TapjG0XzF7Dh7/ZtR/5EuckHf2MzPC0XHc953hrIC44U0oletuO7d++mR48eeHp6gq+1mh/OlOJ/6XhRURHV1dW0OW+9w15eeZ6vDx6hc+fOtnPs2bOHnte4jhvl9NA+fPhwxowZY9fWpk0bAF599VUMBgOTJk1yuBs/bNgw8vPzycvLIykpCcAW2Kurqzlz5gznz5+ndevWtGrViv379zv1Op555hm7wA7w5ptv0qpVK/r37+9Qf69evcjLy+P7778nMDDQqbWJiIiINBW181btoNtpTAJ0qLW0eOpqiGgO035h3x4RQpCPN0S1uLLM5epzXmrrNPBf7IbFx1/1hdVLy2L8y6/cjbet3jhRAqYAgsJa0Dmshd05evbsCayo9zpulNNDe/v27R1C+2WFhYXU1NRcuqi6HTt2zPZ827ZtzJgxg3379mGxWOz6hYWF1R7aqOrahrGoqIgLFy4QHR1d77hjx47RsWNHZ5YmIiIicvdoHmDduaV2W2Rzx/bL4u6BnYes+7Vf/WXUD78EP6N1F5j6RLWAsCD4uI4fYtrzJcQ17n7s9XF6aL+WmpoaDAYDa9eutX78UIfLgf7QoUMkJCTg5+fH+PHjiY2NJSAgAIPBwPPPP8+5c+duuZ6LFy/We6yuu+U1NTW0adOGl156qd5xt7KOXkREREQawcje1m0f39wNIy/d8S4uh01mGP6g/Xr0wyet/7aLuNI2ojeseR++KYboUGvb1v3WXWemDL8tl+DS0B4TE8PevXtp3769/ccQdVi9ejUWi4V169YxatQou2MTJ07Ey8t+8b/BYKj3XP7+/pSWljq0Hz58Az9lC0RGRlJWVkZiYmK9bzpERERExMVG9ob4DpC8DAqKIDQQVuRBVTW8MNq+78/nWf/96qrvZM4cYQ34A+bCr4fB2fOw+H+s20Qm/8x+/LoP4OgpqLi0KmRHASzYZH3++E+h7c3t4+7SLR+ffvppAKZOnUplpeNWOUeOHLE9vxyKa2pq7Pqkp6dTVlbmMNbHx4fy8nKHdoCoqCgKCgrsjp88eZK33nrrhuofMWIEZWVlTJs2rc7jV9cvIiIiIi7i6Qm5s+HRvrB0C0xbC6FB1l84vT/q+uOjQ2H7fOvd9/9cD4vegoRu8O48x11jXtsKc96AjDetr98/aH095w048t1NX4JL77QPHjyYlJQUVq5cSbt27UhISCAqKorjx4+zb98+9uzZY1uykpiYyJIlS5gwYQI7d+7EZDJhNpvJz88nPDycqqoqu3N37dqV3Nxcxo0bR2xsLB4eHowdO5aQkBBSUlJIS0ujR48ejBo1ipKSEjZu3Eh4eHidbwDqk5GRwfbt23n55ZfZuXMn/fr1Izg4mKNHj7Jr1y6MRmODviA7c+ZM253/qqoqvv32W371q18B0L17d8aNG9fgmkRERETuOh/Mv36f5gHw6iTr41q+yqy7PbYNvDO3cWq5CS4N7WDdEjI+Pp5ly5axfv16LBYLQUFBxMTEMGvWLFu/uLg4NmzYwKxZs1i1ahUeHh507tyZd955hwkTJnDy5Em78y5fvpykpCSys7OpqKigpqaGgQMHEhISwtSpUykqKmLdunUsXLiQli1b8uyzz+Lh4cHMmTMbXLvRaCQ/P5958+axadMm2w9CmUwmYmNjSU5ObtB5Vq1aRXFxse31qVOneOWVVwDrGxuFdhEREZG7m6Gm9noTuWOVl5cTHBxMWfsUggpPubocERERkRtzf5R1yUork6srcTsuXdMuIiIiIiLXp9AuIiIiIuLmFNpFRERERNycQruIiIiIiJtTaBcRERERcXMK7SIiIiIibs7l+7SLE9wTCp7erq5CRERE5MbEtHR1BW5Lob0pWvZLCAxydRUiIiIiN87f6OoK3JJCe1MUaYIghXYRERGRpkJr2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuTqFdRERERMTNKbSLiIiIiLg5hXYRERERETen0C4iIiIi4uYU2kVERERE3JxCu4iIiIiIm1NoFxERERFxc81cXYA4wYkzcPaiq6sQERFxP/5GCPZ3dRUiN0yhvSlKXQXflLu6ChEREfcS0xJem6TQLnckhfam6KtiKDzl6ipEREREpJFoTbuIiIiIiJtTaBcRERERcXMK7SIiIiIibk6hXURERETEzSm0i4iIiIi4OYV2ERERERE3p9AuIiIicqtKf4CUVyBsLPiPgQFz4ZPDDR9/qAiGpEPAf4DpCXj8D3CqzLHfb3Pg4RchPBkMj8BvNjTaJYh7U2gXERERuRXV1TBsAby+E1KHwqIn4Lsy6D8Xvjx+/fFFxfCT2VB4El5MgrSHYcteeOgFuFBp33f26/BRIfw4xjnXIm7LaaE9JycHg8FQ7yM3N9dZUwMwffp0MjIynDpHY8jNzaVfv35ERETg4+ODj48Pbdq0Yfz48Zw6pR9IEhERcbn+c2DsH+s/npMP5i9gdSrMexQmDYUP0sHTA+ZlX//8L26GH87Dthdg8jCYORI2ToV9X8Hq9+37HvkTnPgzrH/2Vq5I7kBO/0XUAQMGkJCQ4NDetWtXp86blZVFREQEM2bMcOo8t+rgwYOcP3+e4cOHExUVRXV1NR999BGvvfYaf/vb3/jss88IDAx0dZkiIiJSn5x8CA+BR+KvtIUFQ2IfWL8DLJVg9Kp//Obd8K8PQpuwK20DH4AOrWCjGVIGXWm/p2Wjly93BqeH9ri4ONLS0pw9zW1lsViorKwkICDgls81ffp0pk+f7tA+ZcoUfv/735OVlcXkyZNveR4RERFxkk+PQLd7waPWAoae98HKd+Gfx6FL27rHHjttXUrzYDvHYz3vg9y9jV+v3JHcYk370qVL6dSpEz4+PhiNRjp06MDy5cvr7BcfH09oaCheXl4EBgbSp08fduzYYdfPYDBQXFzMwYMH7ZbkHDx40HZ8yJAhDufPyMjAYDCQk5Nja0tJScFgMGA2mxk9ejQmkwlfX1/y8vIAqKioIDU1lejoaLy8vPDz86NXr15s27btlv4mMTHWtWpnzpy5pfOIiIiIk50ogcjmju2X245f47/lJ0rs+9Yef+as9U693PWcfqe9oqKCoqIiuzZfX19atGgBwFNPPUVWVhbdu3dn8uTJeHp6smXLFlJTUzlx4gQLFiywjcvMzCQ4OJjExEQiIyMpLCxk8+bNDBo0CLPZTLdu3QBYvHgx6enpBAYGMmXKFNv41q1b3/R1JCUl4e3tzbhx4zAYDLRt2xaLxUJ8fDyHDh1i0KBBJCcnU1paSnZ2NkOHDuXtt9/moYceatD5y8vLKS8v5/vvv2f79u0sWLCAZs2a8W//9m83XbOIiIjcoMqLUFbh2GaphOJy+3ZTgPXu+rkLYKwjUvl4W/89d6H++S4fq2v5jI/XlT7XWl4jdwWn32nPzMwkOjra7jFy5EgAtm7dSlZWFk8++SQff/wxixYtIiMjg/3799O7d2+WLFnC6dOnbefatWsXZrOZFStWMGfOHNasWcN7771HVVUV8+fPt/VLS0vDaDRiMplIS0uzPUJCQm76OgICAjhw4ACLFy9m0aJF9OjRg9mzZ3PgwAGysrLYsmUL6enpLF261LYO/bnnnmvw+SdMmEB0dDSdOnVi4sSJ+Pj4sHr1auLi4m66ZhEREbFXXV3NP//5T7s2s9l85cWuz63bNl79MH8BG/7u2P51sfWcPs04e7rUdory8nLrp/vnLwVyX2/7Oa6e09ca7As/+5yqqirb8YKCAs6Xfm/rU1RUxNdff207/v333ztcW71zXLJ7926HOUpKSmyva89huw7N4ZQ5bpTT77QPHz6cMWPG2LW1adMGgFdffRWDwcCkSZMc7sYPGzaM/Px88vLySEpKArCF7urqas6cOcP58+dp3bo1rVq1Yv/+/U69jmeeeQZvb2+7tjfffJNWrVrRv39/h/p79epFXl4e33//fYO+SJqWlsbw4cM5c+YMf//739mzZw/ffvtto16DiIjI3c7Dw4MOHTrYtfXp0+fKiwfugXfn2Q+auhoimsO0X9i3R4RYz9mqBQHfX1nCEhQUROfOneHD96wNrUz0qbWm3TbnpWUx7f1N4OlpO96pUycoedd6N9/o5bBaoK5sYXcddbyOj4+3e92pUye717XnsF2H5nDKHDfK6aG9ffv2DqH9ssLCQmpqaujZs2e9448dO2Z7vm3bNmbMmMG+ffuwWCx2/cLCwmoPbVRdunRxaCsqKuLChQtER0fXO+7YsWN07Njxuufv1q2bbXnPpEmTWL9+PY8//jgGg8FuiY+IiIg4UfMA684ttdsimzu2XxZ3D+w8ZN2v/eovo374JfgZrbvA1CeqBYQFwcd1/BDTni8hTvuxi5XTQ/u11NTUYDAYWLt2LZ5Xvbu82uVAf+jQIRISEvDz82P8+PHExsYSEBCAwWDg+eef59y5c7dcz8WLF+s9Vtc72pqaGtq0acNLL71U77ibXUf/2GOPkZqayquvvqrQLiIi4s5G9rZu+/jmbhh56W5qcTlsMsPwB+3Xox8+af23XcSVthG9Yc378E0xRIda27but+46M2X47bkGcXsuDe0xMTHs3buX9u3bO3wMUdvq1auxWCysW7eOUaNG2R2bOHEiXl72X9AwGAz1nsvf35/S0lKH9sOHb+DnhoHIyEjKyspITEys903Hrbhw4QLl5eXX7ygiIiKuM7I3xHeA5GVQUAShgbAiD6qq4YXR9n1/fmnpzVeZV9pmjrAG/AFz4dfD4Ox5WPw/1m0ik39mP37dB3D0FFRcWnGwowAWbLI+f/yn0Fb7uDdVLt3y8emnnwZg6tSpVFY6bmd05MgR2/PLobimpsauT3p6OmVlZQ5jfXx86g28UVFRFBQU2B0/efIkb7311g3VP2LECMrKypg2bVqdx6+uvz7/93//V2f7woULOXfunNN/hEpERERukacn5M6GR/vC0i0wbS2EBll/4fT+qOuPjw6F7fOtd9//cz0segsSulnX1tfeNea1rTDnDch40/r6/YPW13PegCPfNfqliftw6Z32wYMHk5KSwsqVK2nXrh0JCQlERUVx/Phx9u3bx549e2xLVhITE1myZAkTJkxg586dmEwmzGYz+fn5hIeH233DF6y/uJqbm8u4ceOIjY3Fw8ODsWPHEhISQkpKCmlpafTo0YNRo0ZRUlLCxo0bCQ8Pr/MNQH0yMjLYvn07L7/8Mjt37qRfv34EBwdz9OhRdu3ahdFovO4XZAcOHEhwcDDdu3enbdu2lJaWsnv3bvLz8zGZTCxZsuTG/7AiIiLSeD6Yf/0+zQPg1UnWx7VcfYf9arFt4J25jVOLNEkuDe1g3RIyPj6eZcuWsX79eiwWC0FBQcTExDBr1ixbv7i4ODZs2MCsWbNYtWoVHh4edO7cmXfeeYcJEyZw8uRJu/MuX76cpKQksrOzqaiooKamhoEDBxISEsLUqVMpKipi3bp1LFy4kJYtW/Lss8/i4eHBzJkzG1y70WgkPz+fefPmsWnTJtsPQplMJmJjY0lOTr7uOR577DH++te/snnzZr7//ns8PT2JiIjg8ccf58UXXyQqqgHv0EVERESkSTPU1F5vInes8vJygoODKWufQlDhKVeXIyIi4l7uj7IuWWllcnUlIjfMpWvaRURERETk+hTaRURERETcnEK7iIiIiIibU2gXEREREXFzCu0iIiIiIm5OoV1ERERExM25fJ92cYJ7QsHT29VViIiIuJeYlq6uQOSmKbQ3Rct+CYFBrq5CRETE/fgbXV2ByE1RaG+KIk0QpNAuIiIi0lRoTbuIiIiIiJtTaBcRERERcXMK7SIiIiIibk6hXURERETEzSm0i4iIiIi4OYV2ERERERE3p9AuIiIiIuLmFNpFRERERNycQruIiIiIiJtTaBcRERERcXMK7SIiIiIibq6ZqwsQJzhxBs5edHUVIiIizuFvhGB/V1chclsptDdFqavgm3JXVyEiItL4YlrCa5MU2uWuo9DeFH1VDIWnXF2FiIiIiDQSrWkXEREREXFzCu0iIiIiIm5OoV1ERERExM0ptIuIiIiIuDmFdhERERERN6fQLiIiIiLi5hTaRURERC4r/QFSXoGwseA/BgbMhU8ON3z8oSIYkg4B/wGmJ+DxP8CpMsd+v82Bh1+E8GQwPAK/2dBolyBNk0K7iIiICEB1NQxbAK/vhNShsOgJ+K4M+s+FL49ff3xRMfxkNhSehBeTIO1h2LIXHnoBLlTa9539OnxUCD+Occ61SJNzQ6E9JycHg8FQ7yM3N9dZdQIwffp0MjIynDpHYygpKWH8+PHEx8djMpkwGAx06dKl3v5Lly5l6NCh3HPPPXh6emIwGDh48OBtrFhEROQu0H8OjP1j/cdz8sH8BaxOhXmPwqSh8EE6eHrAvOzrn//FzfDDedj2AkweBjNHwsapsO8rWP2+fd8jf4ITf4b1z97KFcld5KZ+EXXAgAEkJCQ4tHft2vWWC7qWrKwsIiIimDFjhlPnuVXHjh1j5cqVBAcH06FDB/bu3XvN/qtWreKLL74gJiaGiIgIjh9vwLt5ERERaVw5+RAeAo/EX2kLC4bEPrB+B1gqwehV//jNu+FfH4Q2YVfaBj4AHVrBRjOkDLrSfk/LRi9fmrabCu1xcXGkpaU1di0uZbFYqKysJCAg4JbPde+99/L5559z//33A+Dj43PN/hs3bqRdu3Z4e3szYsQI3nzzzVuuQURERG7Qp0eg273gUWshQs/7YOW78M/j0KVt3WOPnbYupXmwneOxnvdB7rVv4Ilcj9PWtC9dupROnTrh4+OD0WikQ4cOLF++vM5+8fHxhIaG4uXlRWBgIH369GHHjh12/QwGA8XFxRw8eNBuSc7lZSQGg4EhQ4Y4nD8jIwODwUBOTo6tLSUlBYPBgNlsZvTo0ZhMJnx9fcnLywOgoqKC1NRUoqOj8fLyws/Pj169erFt27YGXbufn58tsDfEj370I7y9vRvcX0RERJzgRAlENndsv9x2/My1x17dt/b4M2etd+pFbtJN3WmvqKigqKjIrs3X15cWLVoA8NRTT5GVlUX37t2ZPHkynp6ebNmyhdTUVE6cOMGCBQts4zIzMwkODiYxMZHIyEgKCwvZvHkzgwYNwmw2061bNwAWL15Meno6gYGBTJkyxTa+devWN3MJACQlJeHt7c24ceMwGAy0bdsWi8VCfHw8hw4dYtCgQSQnJ1NaWkp2djZDhw7l7bff5qGHHrrpOUVEROQ2qLwIZRWObZZKKC63bzcFWO+un7sAxjqikc+lG2vnLtQ/3+VjdS2f8fG60uday2tEruGm7rRnZmYSHR1t9xg5ciQAW7duJSsriyeffJKPP/6YRYsWkZGRwf79++nduzdLlizh9OnTtnPt2rULs9nMihUrmDNnDmvWrOG9996jqqqK+fPn2/qlpaVhNBoxmUykpaXZHiEhITd98QEBARw4cIDFixezaNEievTowezZszlw4ABZWVls2bKF9PR0li5dymeffUZgYCDPPffcTc8nIiIijWv37t1UVVXZXhcUFFBSUgK7Prdu23j1w/wFbPi7Q/vev7xjHezrDZaLmM1muzk+/8f+K8evnuOSoqIiTpZdugtvqaS8vNx+Q4nzlXbjAYc5ioqK6r6Oq45//fXXttcOc9Rxztqv6/1baQ6XzHGjbupO+/DhwxkzZoxdW5s2bQB49dVXMRgMTJo0yeFu/LBhw8jPzycvL4+kpCQAW+iurq7mzJkznD9/ntatW9OqVSv2799/M+U12DPPPOOwLOXNN9+kVatW9O/f36H+Xr16kZeXx/fff09gYKBTaxMREZHri4+Pt3vdqVMn65MHvODdefadp66GiOYw7Rd2zd3/5UfWJ5HN4UQJffr0sTveMTjc+qSVyX6OS1q3bg0GX+uLEyUEBQXRuXPnKx1OlFjv5l91l732HK1btwZPT8fruPr4VRzmqOOctV/X+7fSHC6Z40bdVGhv3769Q2i/rLCwkJqaGnr27Fnv+GPHjtmeb9u2jRkzZrBv3z4sFotdv7CwsNpDG1Vd2zAWFRVx4cIFoqOj6x137NgxOnbs6MzSRERE5FY0D7Du3FK7LbK5Y/tlcffAzkPW/dqv/jLqh1+Cn9G6C0x9olpAWBB8XMcPMe35EuK0H7vcmpsK7ddSU1ODwWBg7dq1eF71jvFqlwP9oUOHSEhIwM/Pj/HjxxMbG0tAQAAGg4Hnn3+ec+fO3XI9Fy9erPdYXXfLa2pqaNOmDS+99FK9425lHb2IiIi4qZG9rds+vrkbRl66K1pcDpvMMPxB+/Xoh09a/20XcaVtRG9Y8z58UwzRoda2rfutu85MGX57rkGarEYP7TExMezdu5f27ds7fLRQ2+rVq7FYLKxbt45Ro0bZHZs4cSJeXvZf1jAYDPWey9/fn9LSUof2w4dv4KeHgcjISMrKykhMTKz3TYeIiIg0QSN7Q3wHSF4GBUUQGggr8qCqGl4Ybd/355eW3nyVeaVt5ghrwB8wF349DM6eh8X/Y90mMvln9uPXfQBHT0HFpVUGOwpgwSbr88d/Cm21j7vYa/QtH59++mkApk6dSmWl49ZGR44csT2/HIpramrs+qSnp1NWVuYw1sfHh/Lycod2gKioKAoKCuyOnzx5krfeeuuG6h8xYgRlZWVMmzatzuNX1y8iIiJNiKcn5M6GR/vC0i0wbS2EBll/4fT+qOuPjw6F7fOtd9//cz0segsSulnX1tfeNea1rTDnDci49Nss7x+0vp7zBhz5rtEvTe58jX6nffDgwaSkpLBy5UratWtHQkICUVFRHD9+nH379rFnzx7bkpXExESWLFnChAkT2LlzJyaTCbPZTH5+PuHh4Xbf2gXrL67m5uYybtw4YmNj8fDwYOzYsYSEhJCSkkJaWho9evRg1KhRlJSUsHHjRsLDw+t8A1CfjIwMtm/fzssvv8zOnTvp168fwcHBHD16lF27dmE0Ghv0BdmZM2fa7vxXVVXx7bff8qtf/QqA7t27M27cOFvfv/zlL7z77rsAtm8ev/jii7Yv6a5YsaLB9YuIiEg9Pph//T7NA+DVSdbHtVx9h/1qsW3gnbmNU4vIVRo9tIN1S8j4+HiWLVvG+vXrsVgsBAUFERMTw6xZs2z94uLi2LBhA7NmzWLVqlV4eHjQuXNn3nnnHSZMmMDJkyftzrt8+XKSkpLIzs6moqKCmpoaBg4cSEhICFOnTqWoqIh169axcOFCWrZsybPPPouHhwczZ85scO1Go5H8/HzmzZvHpk2bbD8IZTKZiI2NJTk5uUHnWbVqFcXFxbbXp06d4pVXXgGsb2yuDu1/+9vfWLVqld34N954w/ZcoV1ERETk7maoqb02Re5Y5eXlBAcHU9Y+haDCU64uR0REpPHdH2VdrnJp+0WRu0Wjr2kXEREREZHGpdAuIiIiIuLmFNpFRERERNycQruIiIiIiJtTaBcRERERcXMK7SIiIiIibs4p+7SLi90TCp7erq5CRESk8cW0dHUFIi6h0N4ULfslBAa5ugoRERHn8De6ugKR206hvSmKNEGQQruIiIhIU6E17SIiIiIibk6hXURERETEzSm0i4iIiIi4OYV2ERERERE3p9AuIiIiIuLmFNpFRERERNycQruIiIiIiJtTaBcRERERcXMK7SIiIiIibk6hXURERETEzSm0i4iIiIi4uWauLkCc4MQZOHvR1VWIiIjcGn8jBPu7ugoRt6DQ3hSlroJvyl1dhYiIyM2LaQmvTVJoF7lEob0p+qoYCk+5ugoRERERaSRa0y4iIiIi4uYU2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuTqFdRERERMTNKbSLiIjI3af0B0h5BcLGgv8YGDAXPjnc8PGHimBIOgT8B5iegMf/AKfKHPv9NgcefhHCk8HwCPxmQ6NdgtxdFNpFRETk7lJdDcMWwOs7IXUoLHoCviuD/nPhy+PXH19UDD+ZDYUn4cUkSHsYtuyFh16AC5X2fWe/Dh8Vwo9jnHMtctdwWmjPycnBYDDU+8jNzXXW1ABMnz6djIwMp87RWIqKinjssceIiorCaDQSEhLCAw88wMqVK11dmoiIyJ2n/xwY+8f6j+fkg/kLWJ0K8x6FSUPhg3Tw9IB52dc//4ub4YfzsO0FmDwMZo6EjVNh31ew+n37vkf+BCf+DOufvZUrEnH+L6IOGDCAhIQEh/auXbs6dd6srCwiIiKYMWOGU+e5VeXl5fTo0YPTp0/zi1/8gq5du3L69GlycnIYP348RUVFpKenu7pMERGRpiMnH8JD4JH4K21hwZDYB9bvAEslGL3qH795N/zrg9Am7ErbwAegQyvYaIaUQVfa72nZ6OXL3cnpoT0uLo60tDRnT3NbWSwWKisrCQgIuOVzvfbaa5w8eZLp06ezcOFCW/uMGTNo27Yt69atU2gXERFpTJ8egW73gketBQc974OV78I/j0OXtnWPPXbaupTmwXaOx3reB7l7G79eEdxkTfvSpUvp1KkTPj4+GI1GOnTowPLly+vsFx8fT2hoKF5eXgQGBtKnTx927Nhh189gMFBcXMzBgwftluQcPHjQdnzIkCEO58/IyMBgMJCTk2NrS0lJwWAwYDabGT16NCaTCV9fX/Ly8gCoqKggNTWV6OhovLy88PPzo1evXmzbtq1B115WZv3SSnR0tF17WFgYRqMRHx+fBp1HREREGuhECUQ2d2y/3Hb8zLXHXt239vgzZ6136kUamdPvtFdUVFBUVGTX5uvrS4sWLQB46qmnyMrKonv37kyePBlPT0+2bNlCamoqJ06cYMGCBbZxmZmZBAcHk5iYSGRkJIWFhWzevJlBgwZhNpvp1q0bAIsXLyY9PZ3AwECmTJliG9+6deubvo6kpCS8vb0ZN24cBoOBtm3bYrFYiI+P59ChQwwaNIjk5GRKS0vJzs5m6NChvP322zz00EPXPO/DDz/M/PnzWbBgAQEBAfTu3ZtTp07x29/+loqKCp5//vmbrllERKTJq7wIZRWObZZKKC63bzcFWO+un7sAxjoikI+39d9zF+qf7/KxupbP+Hhd6XOt5TUiN8Hpd9ozMzOJjo62e4wcORKArVu3kpWVxZNPPsnHH3/MokWLyMjIYP/+/fTu3ZslS5Zw+vRp27l27dqF2WxmxYoVzJkzhzVr1vDee+9RVVXF/Pnzbf3S0tIwGo2YTCbS0tJsj5CQkJu+joCAAA4cOMDixYtZtGgRPXr0YPbs2Rw4cICsrCy2bNlCeno6S5cu5bPPPiMwMJDnnnvuuuft1q0by5Yto7KykuTkZDp27Ei/fv3Iz89n8+bNjB079qZrFhERudN9+umndq/NZrPd64JVb1q3bbz6Yf4CNvzdsf3rYsrLy6kyNgPLRcdznrcG8oIjhXZz7N69m6qqKusLX2uw/+FMqe14UVERX3/9NZy33mEvrzxv+3T/sj179lzzOuzmAAoKCigpKXGc45Ly8nKHOWqfU3O49xw3yul32ocPH86YMWPs2tq0aQPAq6++isFgYNKkSQ5344cNG0Z+fj55eXkkJSUB2EJ3dXU1Z86c4fz587Ru3ZpWrVqxf/9+p17HM888g7e3t13bm2++SatWrejfv79D/b169SIvL4/vv/+ewMDAa567RYsWtGvXjpEjR9K9e3e++eYbVq5cSVJSEm+//TY//elPG/16RERE7gQ//vGP7V736dPH7nWnMQnQ4X77QVNXQ0RzmPYL+/aIEIJ8vCGqxZVlLlef81Jbp4H/YjcsPv6qL6xeWhbjX37lbrztk/wTJWAKICisBZ3DWtido2fPnsCKeq/Dbg6gU6dOdq9rrxYICgqic+fOdm21z6k53HuOG+X00N6+fXuH0H5ZYWEhNTU1l/4PuW7Hjh2zPd+2bRszZsxg3759WCwWu35hYWG1hzaqLl26OLQVFRVx4cIFh/XoVzt27BgdO3as9/j69et54oknWLNmDY8//ritPTk5mc6dOzNx4kQKCgpurXgREZGmqnmAdeeW2m2RzR3bL4u7B3Yesu7XfvWXUT/8EvyM1l1g6hPVAsKC4OM6fohpz5cQp/3YxTmcHtqvpaamBoPBwNq1a/H09Kyzz+VAf+jQIRISEvDz82P8+PHExsYSEBCAwWDg+eef59y5c7dcz8WLF+s9Vtfd8pqaGtq0acNLL71U77jrraNfvHgxRqPRLrAD3HvvvXTp0oWPP/6Yc+fO4evre53qRUREpEFG9rZu+/jmbhh56e5ncTlsMsPwB+3Xox8+af23XcSVthG9Yc378E0xRIda27but+46M2X47bkGueu4NLTHxMSwd+9e2rdv7/AxRG2rV6/GYrGwbt06Ro0aZXds4sSJeHnZf+HDYDDUey5/f39KS0sd2g8fvoGfLwYiIyMpKysjMTGx3jcd13Pq1Clqamqorq7Go9bWU1VVVVRXV9utqRIREZFbNLI3xHeA5GVQUAShgbAiD6qq4YXR9n1/Ps/671eZV9pmjrAG/AFz4dfD4Ox5WPw/1m0ik39mP37dB3D0FFRcWiGwowAWbLI+f/yn0Fb7uEvDuHTLx6effhqAqVOnUlnpuD3SkSNHbM8vh+Kamhq7Punp6bZtE6/m4+NDeXm5QztAVFQUBQUFdsdPnjzJW2+9dUP1jxgxgrKyMqZNm1bn8avrr09MTAwWi4Vly5bZte/fv58DBw7Qpk2bRtkPXkRERC7x9ITc2fBoX1i6BaathdAg6y+c3h91/fHRobB9vvXu+3+uh0VvQUI3eHee464xr22FOW9AxpvW1+8ftL6e8wYc+a7RL02aLpfeaR88eDApKSmsXLmSdu3akZCQQFRUFMePH2ffvn3s2bPHtmQlMTGRJUuWMGHCBHbu3InJZMJsNpOfn094eLjD3eiuXbuSm5vLuHHjiI2NxcPDg7FjxxISEkJKSgppaWn06NGDUaNGUVJSwsaNGwkPD6/zDUB9MjIy2L59Oy+//DI7d+6kX79+BAcHc/ToUXbt2oXRaLzuF2Tnzp3L8OHDee6553j//feJi4vjm2++YePGjVRWVjJv3rwb/8OKiIjczT6Yf/0+zQPg1UnWx7VcfYf9arFt4J25jVOLSAO4NLSDdUvI+Ph4li1bxvr167FYLAQFBRETE8OsWbNs/eLi4tiwYQOzZs1i1apVeHh40LlzZ9555x0mTJjAyZMn7c67fPlykpKSyM7OpqKigpqaGgYOHEhISAhTp06lqKiIdevWsXDhQlq2bMmzzz6Lh4cHM2fObHDtRqOR/Px85s2bx6ZNm2w/CGUymYiNjSU5Ofm65xg8eDB5eXnMmzePbdu28de//hUfHx86duzIjBkzbNtjioiIiMjdy1BTe72J3LHKy8sJDg6mrH0KQYWnXF2OiIjIzbs/yrpcpZXJ1ZWIuAWXrmkXEREREZHrU2gXEREREXFzCu0iIiIiIm5OoV1ERERExM0ptIuIiIiIuDmXb/koTnBPKHh6u7oKERGRmxejXwoVuZpCe1O07JcQGOTqKkRERG6Nv9HVFYi4DYX2pijSBEEK7SIiIiJNhda0i4iIiIi4OYV2ERERERE3p9AuIiIiIuLmFNpFRERERNycQruIiIiIiJtTaBcRERERcXMK7SIiIiIibk6hXURERETEzSm0i4iIiIi4OYV2ERERERE3p9AuIiIiIuLmFNpFRERERNxcM1cXIE5w4gycvejqKkRE5DJ/IwT7u7oKEbmDKbQ3Ramr4JtyV1chIiIAMS3htUkK7SJySxTam6KviqHwlKurEBEREZFGojXtIiIiIiJuTqFdRERERMTNKbSLiIiIiLg5hXYRERERETen0C4iIiIi4uYU2kVERO5kpT9AyisQNhb8x8CAufDJ4YaPP1QEQ9Ih4D/A9AQ8/gc4VebY77c58PCLEJ4MhkfgNxsa7RJE5PoU2kVERO5U1dUwbAG8vhNSh8KiJ+C7Mug/F748fv3xRcXwk9lQeBJeTIK0h2HLXnjoBbhQad939uvwUSH8OMY51yIi1+S00J6Tk4PBYKj3kZub66ypAZg+fToZGRlOncMZDh8+jL+/PwaDgeeee87V5YiIiCv1nwNj/1j/8Zx8MH8Bq1Nh3qMwaSh8kA6eHjAv+/rnf3Ez/HAetr0Ak4fBzJGwcSrs+wpWv2/f98if4MSfYf2zt3JFInKTnP7jSgMGDCAhIcGhvWvXrk6dNysri4iICGbMmOHUeRrb2LFjqaqqcnUZIiJyJ8jJh/AQeCT+SltYMCT2gfU7wFIJRq/6x2/eDf/6ILQJu9I28AHo0Ao2miFl0JX2e1o2evki0nBOD+1xcXGkpaU5e5rbymKxUFlZSUBAQKOed9WqVezatYtf//rX/P73v2/Uc4uISBP06RHodi941PrgvOd9sPJd+Odx6NK27rHHTluX0jzYzvFYz/sgd2/j1ysiN80t1rQvXbqUTp064ePjg9FopEOHDixfvrzOfvHx8YSGhuLl5UVgYCB9+vRhx44ddv0MBgPFxcUcPHjQbknOwYMHbceHDBnicP6MjAwMBgM5OTm2tpSUFAwGA2azmdGjR2MymfD19SUvLw+AiooKUlNTiY6OxsvLCz8/P3r16sW2bdtu6G9w+vRp/vM//5N///d/p2/fvjc0VkRE7lInSiCyuWP75bbjZ6499uq+tcefOWu9Uy8ibsHpd9orKiooKiqya/P19aVFixYAPPXUU2RlZdG9e3cmT56Mp6cnW7ZsITU1lRMnTrBgwQLbuMzMTIKDg0lMTCQyMpLCwkI2b97MoEGDMJvNdOvWDYDFixeTnp5OYGAgU6ZMsY1v3br1TV9HUlIS3t7ejBs3DoPBQNu2bbFYLMTHx3Po0CEGDRpEcnIypaWlZGdnM3ToUN5++20eeuihBp0/JSWF6upq/vSnP7F9+/abrlNERO5QlRehrMKxzVIJxeX27aYA6931cxfAWMd/yn28rf+eu1D/fJeP1bV8xsfrSp9rLa8RkdvG6aE9MzOTzMxMu7b+/fvz/vvvs3XrVrKysnjyySdZvXq17XhGRgZ9+vRhyZIlTJkyxRbwd+3aRUhIiN25Jk6cSL9+/Zg/fz5/+ctfAEhLS2PhwoWYTKZGW5oTEBDA3r178fb2trVNmzaNAwcOsG7dOh577DFb+9y5c+nYsSPPPfccBw4cuO65c3Nz+ctf/sLvf/97wsLCrttfRESaoF2fW7drrM38BWz4u33bkT9Z15j7eoPlouOY85cCua+347HLLh+r6276+crrjxeR28rpy2OGDx/O66+/bve4fPf81VdfxWAwMGnSJIqKiuwew4YN4/z587ZlKIAtsFdXV1NcXExRURGtW7emVatW7N+/36nX8cwzz9gFdoA333yTVq1a0b9/f7vaz58/T69evSgoKOD777+/5nktFgsTJkzgxz/+MZMnT3bmJYiIiItUVtoHY7PZ7Pj6gXvg3Xnw7jwK/pBIVd4c6NoWBsVx9NWxfP/mVNvxoos/8PXXX1uXsZwooby83LYEFLiy9KWVqd459x4/Yte3oKCAkpISW1tViB9ff3vC1t9hjvqu4yq7d++221zBbg6gqKjIeh2aQ3PchXPcKENNTU3NLZ2hHjk5OYwaNYopU6bwu9/9rs4+PXr04OOPP77meRYuXMj06dMB2LZtGzNmzGDfvn1YLBa7fmFhYXz33Xd2ryMiIuq8020wGBg8eLDdGwKw3uGfOXMmmzZtYuTIkYB12cqqVaswm8307t3brr/RaOTChWt89AgcOnSIjh071nv8V7/6Fa+++ip79uwhLi4OaNjfri7l5eUEBwdT1j6FoMJTDR4nIiJOdH+UdUvFqwJ0g/WfY72jvvqZuo+PWgw7D8HxV+2/jJryCvz3Djiz9trLW1qOhf6dYWOtT6XvT4XWLWDrC45jisutP+Q0LxF+M/pGr0hEbpLTl8dcS01NDQaDgbVr1+Lp6Vlnn549ewLW8JuQkICfnx/jx48nNjaWgIAADAYDzz//POfOnbvlei5erOMjxksCAwPrrL9Nmza89NJL9Y671jr6w4cP8+qrrzJ06FBqamr49NNPATh69Chg/XLqp59+SkxMjMOyIBEREUb2tm77+OZuGNnH2lZcDpvMMPxB+8B++KT133YRV9pG9IY178M3xRAdam3but+668yU4bfnGkSkQVwa2mNiYti7dy/t27cnPj7+mn1Xr16NxWJh3bp1jBo1yu7YxIkT8fKyv5NgMBjqPZe/vz+lpaUO7YcP38DPPgORkZGUlZWRmJhY75uOa/n666+prKzkr3/9K3/9618djq9du5a1a9eybNkyJk2adMPnFxGRJm5kb4jvAMnLoKAIQgNhRR5UVcMLte6C/3ye9d+vrvqe2cwR1oA/YC78ehicPQ+L/8e6TWTyz+zHr/sAjp6CikufdO8ogAWbrM8f/ym01T7uIs7k0tD+9NNPk5OTw9SpU/nggw8cgveRI0eIibH+XPLlUFx7NU96ejplZWWEhobatfv4+FBeXuvb9pdERUVRUFBAeXk5QUFBAJw8eZK33nrrhuofMWIEL7/8MtOmTatzGcvV9delc+fOLFu2zKF9//79rFy5kkGDBvHwww8zaNCgOkaLiMhdz9MTcmfDtDWwdIt1t5ce7a3Lae6Puv746FDYPh+eWw3/uR68m8Gw7rBkrOOymte2wvbPrrx+/6D1AfAvP1JoF3Eyl4b2wYMHk5KSwsqVK2nXrh0JCQlERUVx/Phx9u3bx549e2xLVhITE1myZAkTJkxg586dmEwmzGYz+fn5hIeHO/yKaNeuXcnNzWXcuHHExsbi4eHB2LFjCQkJISUlhbS0NHr06MGoUaMoKSlh48aNhIeHU1ZW1uD6MzIy2L59Oy+//DI7d+6kX79+BAcHc/ToUXbt2oXRaLzmF2TDwsLqvIOek5PDypUriY2N1R12EZG72Qfzr9+neQC8Osn6uJavMutuj20D79Sxa83N1CIiTuPS0A7WLSHj4+NZtmwZ69evx2KxEBQURExMDLNmzbL1i4uLY8OGDcyaNYtVq1bh4eFB586deeedd5gwYQInT560O+/y5ctJSkoiOzubiooKampqGDhwICEhIUydOpWioiLWrVvHwoULadmyJc8++yweHh7MnDmzwbUbjUby8/OZN28emzZtsv0glMlkIjY2luTk5Mb5I4mIiIjIXc1pu8fI7afdY0RE3NCt7B4jInKJ0/dpFxERERGRW6PQLiIiIiLi5hTaRURERETcnEK7iIiIiIibU2gXEREREXFzCu0iIiIiIm7O5fu0ixPcEwqe3q6uQkREAGL0S6EicusU2puiZb+EwCBXVyEiIpf5G11dgYjc4RTam6JIEwQptIuIiIg0FVrTLiIiIiLi5hTaRURERETcnEK7iIiIiIibU2gXEREREXFzCu0iIiIiIm5OoV1ERERExM0ptIuIiIiIuDmFdhERERERN6fQLiIiIiLi5hTaRURERETcnEK7iIiIiIibU2gXEREREXFzzVxdgDjBiTNw9qKrqxARuX38jRDs7+oqREScRqG9KUpdBd+Uu7oKEZHbI6YlvDZJoV1EmjSF9qboq2IoPOXqKkRERESkkWhNu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuTqFdRERERMTNKbSLiIiIiLg5hXYREZH6lP4AKa9A2FjwHwMD5sInhxs+/lARDEmHgP8A0xPw+B/gVJljv9/mwMMvQngyGB6B32xotEsQkaZBoV1ERKQu1dUwbAG8vhNSh8KiJ+C7Mug/F748fv3xRcXwk9lQeBJeTIK0h2HLXnjoBbhQad939uvwUSH8OMY51yIid7wbCu05OTkYDIZ6H7m5uc6qE4Dp06eTkZHh1DkaQ0lJCePHjyc+Ph6TyYTBYKBLly7XHLN69Wo6duyI0WgkICCAn/zkJ+zfv/82VSwichfqPwfG/rH+4zn5YP4CVqfCvEdh0lD4IB08PWBe9vXP/+Jm+OE8bHsBJg+DmSNh41TY9xWsft++75E/wYk/w/pnb+WKRKQJu6kfVxowYAAJCQkO7V27dr3lgq4lKyuLiIgIZsyY4dR5btWxY8dYuXIlwcHBdOjQgb17916z/yuvvMKkSZNo27Yt06dPp7S0lLVr1/LTn/6Ujz76iPbt29+mykVExCYnH8JD4JH4K21hwZDYB9bvAEslGL3qH795N/zrg9Am7ErbwAegQyvYaIaUQVfa72nZ6OWLSNNyU6E9Li6OtLS0xq7FpSwWC5WVlQQEBNzyue69914+//xz7r//fgB8fHyuOe+sWbMwmUx88sknNG/eHIBHHnmEn//850yZMoX//d//veWaRETkBn16BLrdCx61PpTueR+sfBf+eRy6tK177LHT1qU0D7ZzPNbzPsi99s0cEZHanLamfenSpXTq1AkfHx+MRiMdOnRg+fLldfaLj48nNDQULy8vAgMD6dOnDzt27LDrZzAYKC4u5uDBg3ZLcg4ePGg7PmTIEIfzZ2RkYDAYyMnJsbWlpKRgMBgwm82MHj0ak8mEr68veXl5AFRUVJCamkp0dDReXl74+fnRq1cvtm3b1qBr9/PzswX263nzzTcpKSlh1KhRtsAO1k8zunTpwtatW7FYLA06l4iINKITJRDZ3LH9ctvxM9cee3Xf2uPPnLXeqRcRaaCbutNeUVFBUVGRXZuvry8tWrQA4KmnniIrK4vu3bszefJkPD092bJlC6mpqZw4cYIFCxbYxmVmZhIcHExiYiKRkZEUFhayefNmBg0ahNlsplu3bgAsXryY9PR0AgMDmTJlim1869atb+YSAEhKSsLb25tx48ZhMBho27YtFouF+Ph4Dh06xKBBg0hOTqa0tJTs7GyGDh3K22+/zUMPPXTTc9ZmNpsB+MlPfuJwrFu3buzfv5+9e/fSp0+fRptTROSuU3kRyioc2yyVUFxu324KsN5dP3cBjHX8Z9LH2/rvuQv1z3f5WF3LZ3y8rvS51vIaEZGr3FRoz8zMJDMz066tf//+vP/++2zdupWsrCyefPJJVq9ebTuekZFBnz59WLJkCVOmTLEF/F27dhESEmJ3rokTJ9KvXz/mz5/PX/7yFwDS0tJYuHAhJpOp0ZbmBAQEsHfvXry9vW1t06ZN48CBA6xbt47HHnvM1j537lw6duzIc889x4EDBxplfoATJ04AEBPjuGPA5TckR44cUWgXEbkVuz63btdYm/kL2PB3+7Yjf7KuMff1BstFxzHnLwVyX2/HY5ddPlbX3fTzldcfLyJSy00tjxk+fDivv/663ePy3fNXX30Vg8HApEmTKCoqsnsMGzaM8+fP25ahALbAXl1dTXFxMUVFRbRu3ZpWrVo5ffeUZ555xi6wg3W5SqtWrejfv79d7efPn6dXr14UFBTw/fffN1oN586dA6yfVNR2eS38Dz/80GjziYg0Zbt376aqqsr2uqCggJKSEnjgHnh3Hqden8S3/z0R3p0HXdty8WedObLySevrS4/8I59bB0c2hxMltk9ELzv890vr0VuZ7Oe4pKioiGPVl+7qnyihvLzctpTzcltlkI/dXfbacxQVFdV9HVcd//rrr22vHeao45y1X9f7t9IcmkNz3JY5bpShpqampqGdc3JyGDVqFFOmTOF3v/tdnX169OjBxx9/fM3zLFy4kOnTpwOwbds2ZsyYwb59+xzWboeFhfHdd9/ZvY6IiKjzTrfBYGDw4MF2bwjAeod/5syZbNq0iZEjRwLWNe2rVq3CbDbTu3dvu/5Go5ELF67xkSdw6NAhOnbseM0+V/Px8eG+++6rs+6RI0eyefNm8vPziY+Ptzs2Z84cFixYwPr160lKSrruPOXl5QQHB1PWPoWgwlMNrk9E5I52f5R1W8VLIbrB+s+x3lFf/Uzdx0cthp2H4Pir9l9GTXkF/nsHnFl77eUtLcdC/86wsdanw/enQusWsPUFxzHF5dYfcpqXCL8ZfWPXIyJN2k0tj7mWmpoaDAYDa9euxdPTs84+PXv2BKzhNyEhAT8/P8aPH09sbCwBAQEYDAaef/55213oW3HxYh0fbV4SGBhYZ/1t2rThpZdeqnfcrayjry0yMhKwLoGpHdovf2+grqUzIiLiZCN7W7d9fHM3jLy0RLG4HDaZYfiD9oH98Enrv+0irrSN6A1r3odviiE61Nq2db9115kpw2/PNYhIk9HooT0mJoa9e/fSvn17hxBa2+rVq7FYLKxbt45Ro0bZHZs4cSJeXvZ3MAwGQ73n8vf3p7S01KH98OEb+LlprCG6rKyMxMTEet90NKY+ffqwbNkyduzYwZgxY+yOffLJJ/j6+tK9e3en1yEiIrWM7A3xHSB5GRQUQWggrMiDqmp4odZd8J/Ps/771VXf95o5whrwB8yFXw+Ds+dh8f9Yt4lM/pn9+HUfwNFTUHHpE+cdBbBgk/X54z+FttrHXeRu1+hbPj799NMATJ06lcpKxy/gHDlyxPb8ciiuvUInPT2dsrIyh7E+Pj6Ul5c7tANERUVRUFBgd/zkyZO89dZbN1T/iBEjKCsrY9q0aXUev7r+xvDII48QEhLCpk2b7NZKbd++nQMHDjBgwACMRmOjzikiIg3g6Qm5s+HRvrB0C0xbC6FB1qU490ddf3x0KGyfb737/p/rYdFbkNDNuna+9rKa17bCnDcg403r6/cPWl/PeQOOfOdwahG5+zT6nfbBgweTkpLCypUradeuHQkJCURFRXH8+HH27dvHnj17bEtWEhMTWbJkCRMmTGDnzp2YTCbMZjP5+fmEh4fbfQEArL+4mpuby7hx44iNjcXDw4OxY8cSEhJCSkoKaWlp9OjRg1GjRlFSUsLGjRsJDw+v8w1AfTIyMti+fTsvv/wyO3fupF+/fgQHB3P06FF27dqF0Whs0BdkZ86cabvzX1VVxbfffsuvfvUrALp37864ceMA6xr6+fPnM3nyZLp168Zjjz1GWVkZa9asISgoiN///vcNrl1ERG7AB/Ov36d5ALw6yfq4lq8y626PbQPv1LFrzc3UIiJ3tUYP7WDdEjI+Pp5ly5axfv16LBYLQUFBxMTEMGvWLFu/uLg4NmzYwKxZs1i1ahUeHh507tyZd955hwkTJnDy5Em78y5fvpykpCSys7OpqKigpqaGgQMHEhISwtSpUykqKmLdunUsXLiQli1b8uyzz+Lh4cHMmTMbXLvRaCQ/P5958+axadMm2w9CmUwmYmNjSU5ObtB5Vq1aRXFxse31qVOneOWVVwDrG5vLoR0gNTUVPz8/Fi5cyKJFi2jWrBndunXjj3/8I/fdd1+DaxcRERGRpumGdo8R96bdY0TkrnSzu8eIiNxBGn1Nu4iIiIiINC6FdhERERERN6fQLiIiIiLi5hTaRURERETcnEK7iIiIiIibU2gXEREREXFzTtmnXVzsnlDw9HZ1FSIit0dMS1dXICLidArtTdGyX0JgkKurEBG5ffyNrq5ARMSpFNqbokgTBCm0i4iIiDQVWtMuIiIiIuLmFNpFRERERNycQruIiIiIiJtTaBcRERERcXMK7SIiIiIibk6hXURERETEzSm0i4iIiIi4OYV2ERERERE3p9AuIiIiIuLmFNpFRERERNycQruIiIiIiJtTaBcRERERcXPNXF2AOMGJM3D2oqurEBFxPn8jBPu7ugoREadTaG+KUlfBN+WurkJExLliWsJrkxTaReSuoNDeFH1VDIWnXF2FiIiIiDQSrWkXEREREXFzCu0iIiIiIm5OoV1ERERExM0ptIuIiIiIuDmFdhERERERN6fQLiIiUpfSHyDlFQgbC/5jYMBc+ORww8cfKoIh6RDwH2B6Ah7/A5wqc+z32xx4+EUITwbDI/CbDY12CSLSdCi0i4iI1FZdDcMWwOs7IXUoLHoCviuD/nPhy+PXH19UDD+ZDYUn4cUkSHsYtuyFh16AC5X2fWe/Dh8Vwo9jnHMtItIkOC205+TkYDAY6n3k5uY6a2oApk+fTkZGhlPnaCz/+Mc/GDhwIMHBwXh5eREREcEvf/lLzp496+rSRESapv5zYOwf6z+ekw/mL2B1Ksx7FCYNhQ/SwdMD5mVf//wvboYfzsO2F2DyMJg5EjZOhX1fwer37fse+ROc+DOsf/ZWrkhEmjin/7jSgAEDSEhIcGjv2rWrU+fNysoiIiKCGTNmOHWeW7Vnzx4GDBhAVVUViYmJ3HvvvezevZvXXnuNTz75hI8++ggPD30gIiJyW+XkQ3gIPBJ/pS0sGBL7wPodYKkEo1f94zfvhn99ENqEXWkb+AB0aAUbzZAy6Er7PS0bvXwRaXqcHtrj4uJIS0tz9jS3lcViobKykoCAgFs+17PPPsu5c+f4n//5H4YPH25rf+aZZ1i2bBm/+93vmtzfT0TE7X16BLrdC7VvmvS8D1a+C/88Dl3a1j322GnrUpoH2zke63kf5O5t/HpFpMlzi1u4S5cupVOnTvj4+GA0GunQoQPLly+vs198fDyhoaF4eXkRGBhInz592LFjh10/g8FAcXExBw8etFuSc/DgQdvxIUOGOJw/IyMDg8FATk6OrS0lJQWDwYDZbGb06NGYTCZ8fX3Jy8sDoKKigtTUVKKjo/Hy8sLPz49evXqxbdu2Bl37P/7xDyIjI+0CO1jDPMC6desadB4REWlEJ0ogsrlj++W242euPfbqvrXHnzlrvVMvInIDnH6nvaKigqKiIrs2X19fWrRoAcBTTz1FVlYW3bt3Z/LkyXh6erJlyxZSU1M5ceIECxYssI3LzMwkODiYxMREIiMjKSwsZPPmzQwaNAiz2Uy3bt0AWLx4Menp6QQGBjJlyhTb+NatW9/0dSQlJeHt7c24ceMwGAy0bdsWi8VCfHw8hw4dYtCgQSQnJ1NaWkp2djZDhw7l7bff5qGHHrrmeSsrKzEajQ7tQUFBAHzxxRdUV1driYyIyM2qvAhlFY5tlkooLrdvNwVY766fuwDGOv4T6eNt/ffchfrnu3ysruUzPl5X+lxreY2ISC1OT4KZmZlER0fbPUaOHAnA1q1bycrK4sknn+Tjjz9m0aJFZGRksH//fnr37s2SJUs4ffq07Vy7du3CbDazYsUK5syZw5o1a3jvvfeoqqpi/vz5tn5paWkYjUZMJhNpaWm2R0hIyE1fR0BAAAcOHGDx4sUsWrSIHj16MHv2bA4cOEBWVhZbtmwhPT2dpUuX8tlnnxEYGMhzzz133fO2adOGoqIi/u///s+u/S9/+QtgXYpz8uTJm65bRKQpu3DBPjzv3r2bqqoq2+uCggK+z/vIum3j1Q/zF7Dh747tXxdjNpvB1xssFwGsry87b52vyruZ3RwlJSW219+Wl1qfXLqbXl5ebvukl/PWtvx/2C+RsZujnuu4eo6ioiK+/vpr22u7Oeo5p+bQHJrDvea4UYaampqaWzpDPXJychg1ahTDhw9nzJgxdsfatGlD3759GTNmDNnZ2Xz44YdERkba9VmzZg2zZ89m/fr1JCUl2R2rrq7mzJkznD9/HoC+ffvSrFkzDh++sn9uWFgYERERHDhwwKE2g8HA4MGDbUtcLsvIyGDmzJls2rTJ9sYiJSWFVatWkZmZSUpKil3/du3acf78eT788EOHOcaPH09eXh6lpaUEBgbW+3f63e9+x9SpU7nvvvvIyMigY8eObN26lblz53L27Fmqqqr4/PPPuf/+++s9x2Xl5eUEBwdT1j6FoMJT1+0vInJHuz/KujtLK9O1+5Wchb219lefuhoimsO0X9i3/8uPrHfT75sE90VC7mz746+9B0+vgP0vX3tNe+tfwsLHYfq/2x97/A/WNe2n1zqOKy63vnGYlwi/GX3taxKRu47Tl8e0b9/eIbRfVlhYSE1NDT179qx3/LFjx2zPt23bxowZM9i3bx8Wi8WuX1hYWO2hjapLly4ObUVFRVy4cIHo6Oh6xx07doyOHTvWe/y5557j1KlT/OEPf7C9UWjWrBlPPfUU27Zto7Cw0LaUSEREbkLzAOvOLbXbIps7tl8Wdw/sPGTdr/3q5Ykffgl+RusuMPWJagFhQfBxHT/EtOdLiNN+7CJy45we2q+lpqYGg8HA2rVr8fT0rLPP5UB/6NAhEhIS8PPzY/z48cTGxhIQEIDBYOD555/n3Llzt1zPxYsX6z1W193ympoa2rRpw0svvVTvuIaso8/IyGDOnDns2rWLc+fO0atXL8LDwwkKCiIkJITQ0NCGXYCIiDSOkb2t2z6+uRtG9rG2FZfDJjMMf9B+PfrhS0sY20VcaRvRG9a8D98UQ/Sl/w3fut+668wU+40HREQawqWhPSYmhr1799K+fXvi4+Ov2Xf16tVYLBbWrVvHqFGj7I5NnDgRLy/7L/QYDIZ6z+Xv709paalD+9XLaxoiMjKSsrIyEhMT633T0VB+fn52X1p99913+f777+vc415ERJxsZG+I7wDJy6CgCEIDYUUeVFXDC7WWrvx8nvXfrzKvtM0cYQ34A+bCr4fB2fOw+H+sS2qSf2Y/ft0HcPQUVFz6BHlHASzYZH3++E+hrfZxFxEXb/n49NNPAzB16lQqKx23vzpy5Ijt+eVQXHsJfnp6OmVlZQ5jfXx8KC8vd2gHiIqKoqCgwO74yZMneeutt26o/hEjRlBWVsa0adPqPH51/Tfi7Nmz/PrXv6ZZs2bMmzfvps4hIiK3wNPTup790b6wdAtMWwuhQdY19PdHXX98dChsn2+9+/6f62HRW5DQDd6d57hrzGtbYc4bkPGm9fX7B62v57wBR75r9EsTkTuTS++0Dx48mJSUFFauXEm7du1ISEggKiqK48ePs2/fPvbs2WNbspKYmMiSJUuYMGECO3fuxGQyYTabyc/PJzw83O4bvmD9xdXc3FzGjRtHbGwsHh4ejB07lpCQEFJSUkhLS6NHjx6MGjWKkpISNm7cSHh4eJ1vAOqTkZHB9u3befnll9m5cyf9+vUjODiYo0ePsmvXLoxGI/v377/mOcxmM0888QQ///nPiY6O5uTJk7z55pt8++23ZGRkXHO9v4iI3KQP5l+/T/MAeHWS9XEtV99hv1psG3hnbuPUIiJ3PZeGdrBuCRkfH8+yZctYv349FouFoKAgYmJimDVrlq1fXFwcGzZsYNasWaxatQoPDw86d+7MO++8w4QJExy2RVy+fDlJSUlkZ2dTUVFBTU0NAwcOJCQkhKlTp1JUVMS6detYuHAhLVu25Nlnn8XDw4OZM2c2uHaj0Uh+fj7z5s1j06ZNth+EMplMxMbGkpycfN1zREZGEh4ezqZNmygvL8fPz4+uXbvy5z//uc4fgBIRERGRu4/TtnyU209bPorIXaWhWz6KiDQB+plNERERERE3p9AuIiIiIuLmFNpFRERERNycQruIiIiIiJtTaBcRERERcXMK7SIiIiIibs7l+7SLE9wTCp7erq5CRMS5Ylq6ugIRkdtGob0pWvZLCAxydRUiIs7nb3R1BSIit4VCe1MUaYIghXYRERGRpkJr2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuTqFdRERERMTNKbSLiIiIiLg5hXYRERERETen0C4iIiIi4uYU2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJurpmrCxAnOHEGzl50dRUicrP8jRDs7+oqRETEjSi0N0Wpq+CbcldXISI3I6YlvDZJoV1EROwotDdFXxVD4SlXVyEiIiIijURr2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuTqFdRERERMTNKbSLiNxNSn+AlFcgbCz4j4EBc+GTww0ff6gIhqRDwH+A6Ql4/A9wqsyx329z4OEXITwZDI/AbzY02iWIiNyNFNpFRO4W1dUwbAG8vhNSh8KiJ+C7Mug/F748fv3xRcXwk9lQeBJeTIK0h2HLXnjoBbhQad939uvwUSH8OMY51yIicpdxWmjPycnBYDDU+8jNzXXW1ABMnz6djIwMp87RmL744guGDx9OaGgoXl5ehISE8OCDD5Kfn+/q0kTkTtF/Doz9Y/3Hc/LB/AWsToV5j8KkofBBOnh6wLzs65//xc3ww3nY9gJMHgYzR8LGqbDvK1j9vn3fI3+CE3+G9c/eyhWJiMglTv9xpQEDBpCQkODQ3rVrV6fOm5WVRUREBDNmzHDqPI1h27ZtPPzww/j6+vLII4/Qtm1bTp8+zaeffsrx4w24+yUi0hA5+RAeAo/EX2kLC4bEPrB+B1gqwehV//jNu+FfH4Q2YVfaBj4AHVrBRjOkDLrSfk/LRi9fRORu5vTQHhcXR1pamrOnua0sFguVlZUEBATc8rnOnj1LUlISYWFhfPTRR4SGhjZChSIidfj0CHS7Fzxqfcja8z5Y+S788zh0aVv32GOnrUtpHmzneKznfZC7t/HrFRERG7dY07506VI6deqEj48PRqORDh06sHz58jr7xcfH25aQBAYG0qdPH3bs2GHXz2AwUFxczMGDB+2W5Bw8eNB2fMiQIQ7nz8jIwGAwkJOTY2tLSUnBYDBgNpsZPXo0JpMJX19f8vLyAKioqCA1NZXo6Gi8vLzw8/OjV69ebNu2rUHXvmLFCk6ePMns2bMJDQ2loqKCioqKBv/tREQa7EQJRDZ3bL/cdvzMtcde3bf2+DNnrXfqRUTEKZx+p72iooKioiK7Nl9fX1q0aAHAU089RVZWFt27d2fy5Ml4enqyZcsWUlNTOXHiBAsWLLCNy8zMJDg4mMTERCIjIyksLGTz5s0MGjQIs9lMt27dAFi8eDHp6ekEBgYyZcoU2/jWrVvf9HUkJSXh7e3NuHHjMBgMtG3bFovFQnx8PIcOHWLQoEEkJydTWlpKdnY2Q4cO5e233+ahhx665nn/9re/AdCiRQtiY2M5dOgQNTU13HPPPcyfP5/HHnvspmsWkSas8iKUVTi2WSqhuNy+3RRgvbt+7gIY6/iffR9v67/nLtQ/3+VjdS2f8fG60uday2tEROSmOf1Oe2ZmJtHR0XaPkSNHArB161aysrJ48skn+fjjj1m0aBEZGRns37+f3r17s2TJEk6fPm07165duzCbzaxYsYI5c+awZs0a3nvvPaqqqpg/f76tX1paGkajEZPJRFpamu0REhJy09cREBDAgQMHWLx4MYsWLaJHjx7Mnj2bAwcOkJWVxZYtW0hPT2fp0qV89tlnBAYG8txzz133vF999RUATzzxBAEBAfz+979n7ty5lJeX88QTT/DGG2/cdM0icme6cME+PO/evZuqqirb64KCAr7P+8i6bePVD/MXsOHvju1fF2M2m8HXGywXAayvLztvna/Ku5ndHCUlJbbX35aXWp9cupteXl5u+/SS89a2/H/YL5Gxm6Oe67h6jqKiIr7++mvba7s56jmn5tAcmkNz3Klz3ChDTU1NzS2doR45OTmMGjWK4cOHM2bMGLtjbdq0oW/fvowZM4bs7Gw+/PBDIiMj7fqsWbOG2bNns379epKSkuyOVVdXc+bMGc6fPw9A3759adasGYcPX9lrOCwsjIiICA4cOOBQm8FgYPDgwbYlLpdlZGQwc+ZMNm3aZHtjkZKSwqpVq8jMzCQlJcWuf7t27Th//jwffvihwxzjx48nLy+P0tJSAgMD6/07RURE8O2339KpUycOHDiAx6W1pp988gk9evSgXbt2/POf/6x3/NXKy8sJDg6mrH0KQYWnGjRGRNzM/VHW3Vlama7dr+Qs7K21v/rU1RDRHKb9wr79X35kvZt+3yS4LxJyZ9sff+09eHoF7H/52mvaW/8SFj4O0//d/tjjf7CuaT+91nFccbn1jcO8RPjN6Gtfk4iI1Mvpy2Pat2/vENovKywspKamhp49e9Y7/tixY7bn27ZtY8aMGezbtw+LxWLXLywsrPbQRtWlSxeHtqKiIi5cuEB0dHS9444dO0bHjh3rPe7tbf1YevTo0bbADtCtWzd+9KMfUVBQQGlp6S19SiAiTVDzAOvOLbXbIps7tl8Wdw/sPGTdr/3qL6N++CX4Ga27wNQnqgWEBcHHdfwQ054vIU77sYuIOJPTQ/u11NTUYDAYWLt2LZ6ennX2uRzoDx06REJCAn5+fowfP57Y2FgCAgIwGAw8//zznDt37pbruXjxYr3H6rpbXlNTQ5s2bXjppZfqHXe9dfQtW7bkm2++ISoqyuFYWFgYNTU1FBcXK7SLyK0b2du67eObu2FkH2tbcTlsMsPwB+3Xox8+af23XcSVthG9Yc378E0xRF/a6WrrfuuuM1OG355rEBG5S7k0tMfExLB3717at29PfHz8NfuuXr0ai8XCunXrGDVqlN2xiRMn4uVl/+Ung8FQ77n8/f0pLS11aL96eU1DREZGUlZWRmJiYr1vOq7nxz/+MXv37uXo0aMOx7799ls8PDyIiIioY6SIyA0a2RviO0DyMigogtBAWJEHVdXwQq2lKz+fZ/33q8wrbTNHWAP+gLnw62Fw9jws/h/rkprkn9mPX/cBHD0FFZc+Fd1RAAs2WZ8//lNoq33cRURuhEu3fHz66acBmDp1KpWVjluFHTlyxPb8ciiuvQQ/PT2dsrIyh7E+Pj6Ul5c7tANERUVRUFBgd/zkyZO89dZbN1T/iBEjKCsrY9q0aXUev7r++qSkpODh4cF///d/2335bPv27Xz++ec88MADjbIfvIgInp7W9eyP9oWlW2DaWggNsq6hv9/x0z4H0aGwfb717vt/rodFb0FCN3h3nuOuMa9thTlvQMab1tfvH7S+nvMGHPmu0S9NRKSpc+md9sGDB5OSksLKlStp164dCQkJREVFcfz4cfbt28eePXtsS1YSExNZsmQJEyZMYOfOnZhMJsxmM/n5+YSHh9t9wxesv7iam5vLuHHjiI2NxcPDg7FjxxISEkJKSgppaWn06NGDUaNGUVJSwsaNGwkPD6/zDUB9MjIy2L59Oy+//DI7d+6kX79+BAcHc/ToUXbt2oXRaGT//v3XPEePHj14/PHHWbNmDQ888AD/9m//xpkzZ/jv//5vvL29+f3vf3/Df1cRuUt9MP/6fZoHwKuTrI9rufoO+9Vi28A7cxunFhERaTCXhnawbgkZHx/PsmXLWL9+PRaLhaCgIGJiYpg1a5atX1xcHBs2bGDWrFmsWrUKDw8POnfuzDvvvMOECRM4efKk3XmXL19OUlIS2dnZVFRUUFNTw8CBAwkJCWHq1KkUFRWxbt06Fi5cSMuWLXn22Wfx8PBg5syZDa7daDSSn5/PvHnz2LRpk+0HoUwmE7GxsSQnJzfoPKtXryYmJoasrCz+67/+C29vb3784x+zaNEi+vTp0+B6RERERKRpctqWj3L7actHkSagoVs+iojIXcWla9pFREREROT6FNpFRERERNycQruIiIiIiJtTaBcRERERcXMK7SIiIiIibk6hXURERETEzbl8n3ZxgntCwdPb1VWIyM2IaenqCkRExA0ptDdFy34JgUGurkJEbpa/0dUViIiIm1Fob4oiTRCk0C4iIiLSVGhNu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuTqFdRERERMTNKbSLiIiIiLg5hXYRERERETen0C4iIiIi4uYU2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJurpmrCxAnOHEGzl50dRVyN/A3QrC/q6sQERFp8hTam6LUVfBNuaurkKYupiW8NkmhXURE5DZQaG+KviqGwlOurkJEREREGonWtIuIiIiIuDmFdhERERERN6fQLiIiIiLi5hTaRURERETcnEK7iIiIiIibU2gXEREREXFzCu0i4n5Kf4CUVyBsLPiPgQFz4ZPDrq5KRETEZRTaRcS9VFfDsAXw+k5IHQqLnoDvyqD/XPjyuKurExERcQmnhfacnBwMBkO9j9zcXGdNDcD06dPJyMhw6hyN4eDBg/X+jdq0aePq8kQaX/85MPaP9R/PyQfzF7A6FeY9CpOGwgfp4OkB87JvX50iIiJuxOm/iDpgwAASEhIc2rt27erUebOysoiIiGDGjBlOnaex9O3bl3/7t3+zazOZTK4pRsSVcvIhPAQeib/SFhYMiX1g/Q6wVILRy2XliYiIuILTQ3tcXBxpaWnOnua2slgsVFZWEhAQ0Gjn7NSpU5P7O4nclE+PQLd7waPWB4E974OV78I/j0OXtq6pTURExEXcYk370qVL6dSpEz4+PhiNRjp06MDy5cvr7BcfH09oaCheXl4EBgbSp08fduzYYdfPYDBQXFzssPTk4MGDtuNDhgxxOH9GRgYGg4GcnBxbW0pKCgaDAbPZzOjRozGZTPj6+pKXlwdARUUFqampREdH4+XlhZ+fH7169WLbtm03/Hc4e/Ys5eXlNzxOpEk5UQKRzR3bL7cdP3N76xEREXEDTr/TXlFRQVFRkV2br68vLVq0AOCpp54iKyuL7t27M3nyZDw9PdmyZQupqamcOHGCBQsW2MZlZmYSHBxMYmIikZGRFBYWsnnzZgYNGoTZbKZbt24ALF68mPT0dAIDA5kyZYptfOvWrW/6OpKSkvD29mbcuHEYDAbatm2LxWIhPj6eQ4cOMWjQIJKTkyktLSU7O5uhQ4fy9ttv89BDDzXo/GvXruXVV1+lpqYGk8nEv//7v7N06VL8/PxuumYRl6u8CGUVjm2WSiiu9QbVFGC9u37uAhjr+J8mH2/rv+cuOKdWERERN+b0O+2ZmZlER0fbPUaOHAnA1q1bycrK4sknn+Tjjz9m0aJFZGRksH//fnr37s2SJUs4ffq07Vy7du3CbDazYsUK5syZw5o1a3jvvfeoqqpi/vz5tn5paWkYjUZMJhNpaWm2R0hIyE1fR0BAAAcOHGDx4sUsWrSIHj16MHv2bA4cOEBWVhZbtmwhPT2dpUuX8tlnnxEYGMhzzz133fN6enrStWtXpkyZQmZmJvPnz+eee+7htddeo2/fvlRWVt50zSK3S3l5ue2TrMvMZjPs+ty6bePVD/MXsOHvDu1fbs2npKQEfL3BcpGioiK+/vpr2/kqzpRan/h6289Re86r7N69m6qqKtvrgoIC6xyX1J6j3uvQHJpDc2gOzaE5GnmOG2WoqampuaUz1CMnJ4dRo0YxfPhwxowZY3esTZs29O3blzFjxpCdnc2HH35IZGSkXZ81a9Ywe/Zs1q9fT1JSkt2x6upqzpw5w/nz5wHrlzibNWvG4cNX9nEOCwsjIiKCAwcOONRmMBgYPHiwbYnLZRkZGcycOZNNmzbZ3likpKSwatUqMjMzSUlJsevfrl07zp8/z4cffugwx/jx48nLy6O0tJTAwMDr/bkcDB8+nLfffpv/+q//YurUqQ0aU15eTnBwMGXtUwgqPHXDc4rckPujYNsL0OoaX5guOQt7a+2vPnU1RDSHab+wb/+XH1nvpt83Ce6LhNzZ9sdfew+eXgH7X9aadhERues4fXlM+/btHUL7ZYWFhdTU1NCzZ896xx87dsz2fNu2bcyYMYN9+/ZhsVjs+oWFhTVOwfXo0qWLQ1tRUREXLlwgOjq63nHHjh2jY8eONzzfSy+9xNtvv82WLVsaHNpF3E7zABj4gGNbZHPH9svi7oGdh6z7tV/9ZdQPvwQ/I3Ro5bRyRURE3JXTQ/u11NTUYDAYWLt2LZ6ennX2uRzoDx06REJCAn5+fowfP57Y2FgCAgIwGAw8//zznDt37pbruXjxYr3H6rpbXlNTQ5s2bXjppZfqHXez6+g7duyIh4cHpaWlNzVe5I41srd128c3d8PIPta24nLYZIbhD2q7RxERuSu5NLTHxMSwd+9e2rdvT3x8/DX7rl69GovFwrp16xg1apTdsYkTJ+LlZf8fcoPBUO+5/P396wzDVy+vaYjIyEjKyspITEys903Hzdq/fz/V1dW2L+yK3DVG9ob4DpC8DAqKIDQQVuRBVTW8MNrV1YmIiLiES7d8fPrppwGYOnVqnV+4PHLkiO355VBcewl+eno6ZWVlDmN9fHzq3T4xKiqKgoICu+MnT57krbfeuqH6R4wYQVlZGdOmTavz+NX11+fq5T+XVVVV2b7EOnz48BuqSeSO5+lpXc/+aF9YugWmrYXQIOv6+fujXF2diIiIS7j0TvvgwYNJSUlh5cqVtGvXjoSEBKKiojh+/Dj79u1jz549tiUriYmJLFmyhAkTJrBz505MJhNms5n8/HzCw8PtvuEL1l9czc3NZdy4ccTGxuLh4cHYsWMJCQkhJSWFtLQ0evTowahRoygpKWHjxo2Eh4fX+QagPhkZGWzfvp2XX36ZnTt30q9fP4KDgzl69Ci7du3CaDSyf//+a57j0Ucf5ezZszz44IO0adOGU6dO8be//Y3Dhw/Tu3dvJk2adON/WBF39sH86/dpHgCvTrI+RERExLWhHaxbQsbHx7Ns2TLWr1+PxWIhKCiImJgYZs2aZesXFxfHhg0bmDVrFqtWrcLDw4POnTvzzjvvMGHCBE6ePGl33uXLl5OUlER2djYVFRXU1NQwcOBAQkJCmDp1KkVFRaxbt46FCxfSsmVLnn32WTw8PJg5c2aDazcajeTn5zNv3jw2bdpk+0Eok8lEbGwsycnJ1z3HkCFDyM7OJicnh7Nnz9KsWTPatm3L7Nmz+c1vftPoy25ERERE5M7jtC0f5fbTlo9yWzVky0cRERFpFC5d0y4iIiIiIten0C4iIiIi4uYU2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuzuX7tIsT3BMKnt6urkKaupiWrq5ARETkrqHQ3hQt+yUEBrm6Crkb+BtdXYGIiMhdQaG9KYo0QZBCu4iIiEhToTXtIiIiIiJuTqFdRERERMTNKbSLiIiIiLg5hXYRERERETen0C4iIiIi4uYU2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuTqFdRERERMTNKbSLiIiIiLi5Zq4uQJzgxBk4e9HVVdyZ/I0Q7O/qKkRERETsKLQ3Ramr4JtyV1dx54lpCa9NUmgXERERt6PQ3hR9VQyFp1xdhYiIiIg0Eq1pFxERERFxcwrtIiIiIiJuTqFdRERERMTNKbSLiIiIiLg5hXYRERERETen0C4iIiIi4uYU2kVul9IfIOUVCBsL/mNgwFz45LCrqxIREZE7gEK7yO1QXQ3DFsDrOyF1KCx6Ar4rg/5z4cvjrq5ORERE3JzTQntOTg4Gg6HeR25urrOmBmD69OlkZGQ4dY7GsHXrVv7jP/6De++9F39/f/z9/Wnfvj1z5szBYrG4ujxpqP5zYOwf6z+ekw/mL2B1Ksx7FCYNhQ/SwdMD5mXfvjpFRETkjuT0X0QdMGAACQkJDu1du3Z16rxZWVlEREQwY8YMp85zqxYsWMDHH3/MT37yEx577DEuXrzIO++8w4IFC8jNzeWjjz7Cw0MfiNzxcvIhPAQeib/SFhYMiX1g/Q6wVILRy2XliYiIiHtzemiPi4sjLS3N2dPcVhaLhcrKSgICAm75XFOmTOFnP/uZ3blefPFFBg4cyNatW1m9ejVPPfXULc8jLvbpEeh2L9R+A9bzPlj5LvzzOHRp65raRERExO25xS3cpUuX0qlTJ3x8fDAajXTo0IHly5fX2S8+Pp7Q0FC8vLwIDAykT58+7Nixw66fwWCguLiYgwcP2i3JOXjwoO34kCFDHM6fkZGBwWAgJyfH1paSkoLBYMBsNjN69GhMJhO+vr7k5eUBUFFRQWpqKtHR0Xh5eeHn50evXr3Ytm1bg6794YcfrjP8jxkzBoB//OMfDTqPuLkTJRDZ3LH9ctvxM7e3HhEREbmjOP1Oe0VFBUVFRXZtvr6+tGjRAoCnnnqKrKwsunfvzuTJk/H09GTLli2kpqZy4sQJFixYYBuXmZlJcHAwiYmJREZGUlhYyObNmxk0aBBms5lu3boBsHjxYtLT0wkMDGTKlCm28a1bt77p60hKSsLb25tx48ZhMBho27YtFouF+Ph4Dh06xKBBg0hOTqa0tJTs7GyGDh3K22+/zUMPPXRT8x09ehSAiIiIm65ZnKTyIpRVOLZZKqG43L7dFGC9u37uAhjr+H83H2/rv+cuOKdWERERaRKcfqc9MzOT6Ohou8fIkSMB65cws7KyePLJJ/n4449ZtGgRGRkZ7N+/n969e7NkyRJOnz5tO9euXbswm82sWLGCOXPmsGbNGt577z2qqqqYP3++rV9aWhpGoxGTyURaWprtERISctPXERAQwIEDB1i8eDGLFi2iR48ezJ49mwMHDpCVlcWWLVtIT09n6dKlfPbZZwQGBvLcc8/d1FwlJSX86U9/wtfXl7Fjx950zXLjLlZV2Z6Xl5fbPp25zGw2w67Prds2Xv0wfwEb/u7Q/uXWfEpKSsDXGywXKSoq4uuvv7adr+JMqfWJr7f9HLXnvMru3bupuqrOgoIC6xyX1J6j3uvQHJpDc2gOzaE5NIfL5rhRhpqamppbOkM9cnJyGDVqFMOHD7ct9bisTZs29O3blzFjxpCdnc2HH35IZGSkXZ81a9Ywe/Zs1q9fT1JSkt2x6upqzpw5w/nz5wHo27cvzZo14/DhK3teh4WFERERwYEDBxxqMxgMDB482LbE5bKMjAxmzpzJpk2bbG8sUlJSWLVqFZmZmaSkpNj1b9euHefPn+fDDz90mGP8+PHk5eVRWlpKYGDg9f5cNpWVlfz0pz8lPz+f3/3ud3afFFxPeXk5wcHBlLVPIajwVIPHySX3R8G2F6CV6dr9Ss7C3lr7q09dDRHNYdov7Nv/5UfWu+n3TYL7IiF3tv3x196Dp1fA/pe1pl1ERETq5fTlMe3bt3cI7ZcVFhZSU1NDz5496x1/7Ngx2/Nt27YxY8YM9u3b57AdYlhYWOMUXI8uXbo4tBUVFXHhwgWio6PrHXfs2DE6duzYoDmqqqr413/9V/Lz85k0adINBXa5jZoHwMAHHNsimzu2XxZ3D+w8ZN2v/eovo374JfgZoUMrp5UrIiIidz6nh/ZrqampwWAwsHbtWjw9PevscznQHzp0iISEBPz8/Bg/fjyxsbEEBARgMBh4/vnnOXfu3C3Xc/HixXqP1XW3vKamhjZt2vDSSy/VO66h6+gvB/b/9//+H7/85S9ZtmxZg8bJHWJkb+u2j2/uhpF9rG3F5bDJDMMf1HaPIiIick0uDe0xMTHs3buX9u3bEx8ff82+q1evxmKxsG7dOkaNGmV3bOLEiXh52Yceg8FQ77n8/f0pLS11aL96eU1DREZGUlZWRmJiYr1vOhricmDPy8vjqaeeYuXKlTd9LnFTI3tDfAdIXgYFRRAaCCvyoKoaXhjt6upERETEzbl0y8enn34agKlTp1JZWelw/MiRI7bnl0Nx7SX46enplJWVOYz18fGhvLzcoR0gKiqKgoICu+MnT57krbfeuqH6R4wYQVlZGdOmTavz+NX116e6upqHH36YvLw8xo4dy2uvvXZDNcgdwtPTup790b6wdAtMWwuhQdY19PdHubo6ERERcXMuvdM+ePBgUlJSWLlyJe3atSMhIYGoqCiOHz/Ovn372LNnj23JSmJiIkuWLGHChAns3LkTk8mE2WwmPz+f8PBwu2/4gvUXV3Nzcxk3bhyxsbF4eHgwduxYQkJCSElJIS0tjR49ejBq1ChKSkrYuHEj4eHhdb4BqE9GRgbbt2/n5ZdfZufOnfTr14/g4GCOHj3Krl27MBqN7N+//5rneOyxx8jNzeWee+4hNjaW//qv/7I7/qMf/Yhhw4Y1uCZxkQ/mX79P8wB4dZL1ISIiInIDXBrawbolZHx8PMuWLWP9+vVYLBaCgoKIiYlh1qxZtn5xcXFs2LCBWbNmsWrVKjw8POjcuTPvvPMOEyZM4OTJk3bnXb58OUlJSWRnZ1NRUUFNTQ0DBw4kJCSEqVOnUlRUxLp161i4cCEtW7bk2WefxcPDg5kzZza4dqPRSH5+PvPmzWPTpk22H4QymUzExsaSnJx83XNc3t3mq6++qvOO/eDBgxXaRURERO5yTtvyUW4/bfl4ixq65aOIiIjIbebSNe0iIiIiInJ9Cu0iIiIiIm5OoV1ERERExM0ptIuIiIiIuDmFdhERERERN6fQLiIiIiLi5ly+T7s4wT2h4Ont6iruPDEtXV2BiIiISJ0U2puiZb+EwCBXV3Fn8je6ugIRERERBwrtTVGkCYIU2kVERESaCq1pFxERERFxcwrtIiIiIiJuTqFdRERERMTNKbSLiIiIiLg5hXYRERERETen0C4iIiIi4uYU2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuTqFdRERERMTNNXN1AeIEJ87A2YuuruLG+Rsh2N/VVYiIiIi4HYX2pih1FXxT7uoqbkxMS3htkkK7iIiISB0U2puir4qh8JSrqxARERGRRqI17SIiIiIibk6hXURERETEzSm0i4iIiIi4OYV2ERERERE3p9AuIiIiIuLmFNpFRERERNycQrs0faU/QMorEDYW/MfAgLnwyWFXVyUiIiLSYArt0rRVV8OwBfD6TkgdCouegO/KoP9c+PK4q6sTERERaRCnhvacnBwMBkO9j9zcXGdOz/Tp08nIyHDqHI2loqKCp59+mpYtW+Ll5UV4eDi/+tWvsFgsri7NvfWfA2P/WP/xnHwwfwGrU2HeozBpKHyQDp4eMC/79tUpIiIicgtuyy+iDhgwgISEBIf2rl27OnXerKwsIiIimDFjhlPnaQwDBw4kPz+fhIQE4uPj2b17N6+88gr/93//R15enqvLu3Pl5EN4CDwSf6UtLBgS+8D6HWCpBKOXy8oTERERaYjbEtrj4uJIS0u7HVPdNhaLhcrKSgICAm75XKtXryY/P5/Ro0fzxhtv2NrHjBnDhg0b+N///V+GDx9+y/PclT49At3uBY9aHyr1vA9Wvgv/PA5d2rqmNhEREZEGcps17UuXLqVTp074+PhgNBrp0KEDy5cvr7NffHw8oaGheHl5ERgYSJ8+fdixY4ddP4PBQHFxMQcPHrRbknPw4EHb8SFDhjicPyMjA4PBQE5Ojq0tJSUFg8GA2Wxm9OjRmEwmfH19bXfAKyoqSE1NJTo6Gi8vL/z8/OjVqxfbtm1r0LWvX78egHnz5tm1X3795z//uUHnkTqcKIHI5o7tl9uOn7m99YiIiIjchNtyp72iooKioiK7Nl9fX1q0aAHAU089RVZWFt27d2fy5Ml4enqyZcsWUlNTOXHiBAsWLLCNy8zMJDg4mMTERCIjIyksLGTz5s0MGjQIs9lMt27dAFi8eDHp6ekEBgYyZcoU2/jWrVvf9HUkJSXh7e3NuHHjMBgMtG3bFovFQnx8PIcOHWLQoEEkJydTWlpKdnY2Q4cO5e233+ahhx665nkLCgpo3rw5HTt2tGvv2LEjzZs358CBAzddc5NSeRHKKhzbLJVQXG7fbgqw3l0/dwGMdfyfuY+39d9zF5xTq4iIiEgjui132jMzM4mOjrZ7jBw5EoCtW7eSlZXFk08+yccff8yiRYvIyMhg//799O7dmyVLlnD69GnbuXbt2oXZbGbFihXMmTOHNWvW8N5771FVVcX8+fNt/dLS0jAajZhMJtLS0myPkJCQm76OgIAADhw4wOLFi1m0aBE9evRg9uzZHDhwgKysLLZs2UJ6ejpLly7ls88+IzAwkOeee+665z1z5gyhoaF1HmvRooXd9TdlNTU1tucFBQWUlJTYXhcVFfHtm9ut2zZe/TB/ARv+7tj+dTFmsxl8vcFyEYDdu3dTVVVlPeF5a1j//uIFuzm+/vpr2+vy8nLbJzOXmc3ma762m6Oe69AcmkNzaA7NoTk0h+a4UYaaq5NSI8vJyWHUqFEMHz6cMWPG2B1r06YNffv2ZcyYMWRnZ/Phhx8SGRlp12fNmjXMnj2b9evXk5SUZHesurqaM2fOcP78eQD69u1Ls2bNOHz4yv7bYWFhRERE1Hmn2mAwMHjwYIcveWZkZDBz5kw2bdpke2ORkpLCqlWryMzMJCUlxa5/u3btOH/+PB9++KHDHOPHjycvL4/S0lICAwPr/Tt5eHjwox/9iM8++8zhWGxsLEeOHKGioqKOkfbKy8sJDg6mrH0KQYWnrtvfrdwfBdtegFam+vuUnIW9tfZXn7oaIprDtF/Yt//Lj6x30++bBPdFQu5s++OvvQdPr4D9L2tNu4iIiLi927I8pn379g6h/bLCwkJqamro2bNnveOPHTtme75t2zZmzJjBvn37HLZDDAsLa5yC69GlSxeHtqKiIi5cuEB0dHS9444dO+aw9OVq3t7eVFZW1nnswoULGI3GGy+2KWoeAAMfcGyLbO7YflncPbDzkHW/9qu/jPrhl+BnhA6tnFauiIiISGO5LaH9WmpqajAYDKxduxZPT886+1wO9IcOHSIhIQE/Pz/Gjx9PbGwsAQEBGAwGnn/+ec6dO3fL9Vy8eLHeY3XdLa+pqaFNmza89NJL9Y673jp6k8lEcXFxncdOnz5tW/svN2Fkb+u2j2/uhpF9rG3F5bDJDMMf1HaPIiIickdweWiPiYlh7969tG/fnvj4+Gv2Xb16NRaLhXXr1jFq1Ci7YxMnTsTLyz6AGQyGes/l7+9PaWmpQ/vVy2saIjIykrKyMhITE+t903E9nTp1YuvWrXz++ed2d+Q///xzSkpK+MlPfnJT5xWsoT2+AyQvg4IiCA2EFXlQVQ0vjHZ1dSIiIiIN4vItH59++mkApk6dWucSkSNHjtieXw7FtZfhp6enU1ZW5jDWx8eH8vJyh3aAqKgoCgoK7I6fPHmSt95664bqHzFiBGVlZUybNq3O41fXX5/L6/VfeOEFu/bLr5OTk2+oJrmKp6d1PfujfWHpFpi2FkKDrOvn749ydXUiIiIiDeLyO+2DBw8mJSWFlStX0q5dOxISEoiKiuL48ePs27ePPXv22JasJCYmsmTJEiZMmMDOnTsxmUyYzWby8/MJDw+3+5YvWH9xNTc3l3HjxhEbG4uHhwdjx44lJCSElJQU0tLS6NGjB6NGjaKkpISNGzcSHh5e5xuA+mRkZLB9+3Zefvlldu7cSb9+/QgODubo0aPs2rULo9HI/v37r3mO5ORk/vSnP7FhwwbKy8vp3bs3+fn55ObmMnDgQH7xi19cc/xd7YP51+/TPABenWR9iIiIiNyBXB7awbolZHx8PMuWLWP9+vVYLBaCgoKIiYlh1qxZtn5xcXFs2LCBWbNmsWrVKjw8POjcuTPvvPMOEyZM4OTJk3bnXb58OUlJSWRnZ1NRUUFNTQ0DBw4kJCSEqVOnUlRUxLp161i4cCEtW7bk2WefxcPDg5kzZza4dqPRSH5+PvPmzWPTpk22H4QymUzExsY2+C751q1bmTx5Mv/7v//L//t//4/mzZszYcIEfv/73ze4FhERERFpmpy65aPcXk1+y0cRERGRu5TL17SLiIiIiMi1KbSLiIiIiLg5hXYRERERETen0C4iIiIi4uYU2kVERERE3JxCu4iIiIiIm3OLfdqlkd0TCp7erq7ixsS0dHUFIiIiIm5Lob0pWvZLCAxydRU3zt/o6gpERERE3JJCe1MUaYKgOzC0i4iIiEidtKZdRERERMTNKbSLiIiIiLg5hXYRERERETen0C4iIiIi4uYU2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuTqFdRERERMTNKbSLiIiIiLg5hXYRERERETfXzNUFiBOcOANnL7q6Cnv+Rgj2d3UVIiIiInckhfamKHUVfFPu6iquiGkJr01SaBcRERG5SQrtTdFXxVB4ytVViIiIiEgj0Zp2ERERERE3p9AuIiIiIuLmFNpFRERERNycQruIiIiIiJtTaBcRERERcXMK7SIiIiIibk6hXe58pT9AyisQNhb8x8CAufDJYVdXJSIiItJoFNrlzlZdDcMWwOs7IXUoLHoCviuD/nPhy+Ourk5ERESkUTgttOfk5GAwGOp95ObmOmtqAKZPn05GRoZT52gMQ4YMuebfKTIy0tUlulb/OTD2j/Ufz8kH8xewOhXmPQqThsIH6eDpAfOyb1+dIiIiIk7k9F9EHTBgAAkJCQ7tXbt2deq8WVlZREREMGPGDKfOc6ueeeYZBg4c6NC+detW8vLy+OlPf+qCqu4gOfkQHgKPxF9pCwuGxD6wfgdYKsHo5bLyRERERBqD00N7XFwcaWlpzp7mtrJYLFRWVhIQEHDL5xo2bBjDhg1zaH/jjTcAmDx58i3P0aR9egS63QsetT406nkfrHwX/nkcurR1TW0iIiIijcQt1rQvXbqUTp064ePjg9FopEOHDixfvrzOfvHx8YSGhuLl5UVgYCB9+vRhx44ddv0MBgPFxcUcPHjQbqnJwYMHbceHDBnicP6MjAwMBgM5OTm2tpSUFAwGA2azmdGjR2MymfD19SUvLw+AiooKUlNTiY6OxsvLCz8/P3r16sW2bdtu+u/x2Wef8emnn9KhQwf69Olz0+e5K5wogcjmju2X246fub31iIiIiDiB0++0V1RUUFRUZNfm6+tLixYtAHjqqafIysqie/fuTJ48GU9PT7Zs2UJqaionTpxgwYIFtnGZmZkEBweTmJhIZGQkhYWFbN68mUGDBmE2m+nWrRsAixcvJj09ncDAQKZMmWIb37p165u+jqSkJLy9vRk3bhwGg4G2bdtisViIj4/n0KFDDBo0iOTkZEpLS8nOzmbo0KH/v717j4qy2v8H/h4GGRCGgQlQvIGCCke8VBbebyksjY6K4VftgpqZHH8dLaGTxgHTjpJZXtLW8SCigqwID2omXlKwzFuJZWZgXssLJnJNwRFk//7gzMQ4M/oAg/Oo79das4T97GdfPjNsPzzseQZffPEFhg0bVu++li1bBiEEXnrppQaP94FUVQ2UVZiW6aqAa+XG5VqX2qvrlbcAlZmXsaND7b+Vt5pmrERERET3k2giGRkZAoDZx6BBg4QQQuzevVsAEJGRkSbn9+7dWzg6Oopr164ZykpKSkzqHTx4UNjb24tRo0YZlXt4eIigoCCzYwMgQkNDTcoXLFggAIiMjAxD2auvvioAiKCgIKHT6YzqR0dHCwAiJSXFqLywsFA89thjFvu/m+rqavHYY48JlUoliouL63VuWVmZACDK/KcKgdHyeXT+f0J3rsBorPv37zf6/uDBg6J69zHJbZb/eFocP35cCOfxQkxeYdrmtiO1dXccNe6jutrw/YkTJ4xifOHCBfHrr78axfP48eN3HbfZebAP9sE+2Af7YB/sg33co4/6UgghRFP8MrBx40ZERETgueeew/jx442OtWvXDn379sX48eORnp6Ow4cPm9wlZd26dYiNjUVqaipeeOEFo2M1NTUoLi7GzZs3AQB9+/aFvb09zpz5897cnp6eaNmyJY4fP24yNoVCgdDQUMMWF72FCxdizpw5yMjIwPPPPw+gdntMYmIiVq1ahalTpxrV9/Pzw82bN3H48GGTPl577TXs2LEDpaWlUKvV9wqXQWpqKl566SWMGDEC27Ztk3weAJSXl0Oj0aDMfypcTxfW69wm1bk1kP0u0Ep793ol14HcO+6vPmst0NIdiBlpXN4vsPZqesfpQEdvICvW+HjSbmDKJ8CPS7innYiIiB54Tb49xt/f3yRp1zt9+jSEEHj66actnn/p0iXD19nZ2Zg9ezaOHTsGnU5nVM/T09M6A7aga9euJmUXL17ErVu30LZtW4vnXbp0CQEBAZL7SUxMBABMnz69/oN80Lm7AEO7m5Z5u5uW6/XwBfbl1d6vve6bUQ+fApqrgE6tmmy4RERERPdLkyftdyOEgEKhwPr166FUKs3W0Sf0eXl5GDFiBJo3b47XXnsNXbp0gYuLCxQKBf7xj3+gsrKy0eOprq62eMzc1XIhBNq1a4eEhASL59VnH/2lS5dw4MABtG3b1uxtMsmM53vX3vYx8xDw/P/etHutHMg4ADzXk7d7JCIiooeCTZP29u3bIzc3F/7+/ujVq9dd665duxY6nQ4pKSmIiIgwOhYVFYVmzYyTM4VCYbEtZ2dnlJaWmpTX3V4jhbe3N8rKyjB27FiLv3TUx9KlS1FdXY1x48Y1uq1HxvO9gV6dgEkrgJ8vAh5q4JMdwO0a4F3GkYiIiB4ONr3l45QpUwAAs2bNQlVVlcnxc+fOGb7WJ8V3bsGfN28eysrKTM51dHREeXm5STkAtG7dGj///LPR8StXrmDz5s31Gv+YMWNQVlaGmJgYs8frjl+K9PR02NvbY8aMGfU675GmVNbuZ/+/vsDybUDMesDDtXYPfefWth4dERERkVXY9Ep7aGgopk6div/85z/w8/PDiBEj0Lp1a1y+fBnHjh3Dt99+a9iyMnbsWHz44YeYNm0a9u3bB61WiwMHDuDgwYNo0aIFbt++bdR2t27dkJWVhVdeeQVdunSBnZ0dJk6cCDc3N0ydOhXR0dF46qmnEBERgZKSEnz22Wdo0aKF2V8ALFm4cCG++uorLFmyBPv27UP//v2h0Wjw66+/Yv/+/VCpVPjxxx8ltbV9+3ZcuHABAwcOROvWTDYN9s6/dx13F2D19NoHERER0UPIpkk7UHvv9V69emHFihVITU2FTqeDq6sr2rdvj3feecdQr0ePHvj000/xzjvvIDExEXZ2dggKCsLOnTsxbdo0XLlyxajdlStX4oUXXkB6ejoqKioghMDQoUPh5uaGWbNm4eLFi0hJScH7778PLy8vzJw5E3Z2dpgzZ47ksatUKhw8eBDx8fHIyMgwfCCUVqtFly5dMGnSJMltffLJJwBgcocaIiIiIqImu+Uj3X8P/C0fiYiIiMgsm+5pJyIiIiKie2PSTkREREQkc0zaiYiIiIhkjkk7EREREZHMMWknIiIiIpI5m9/ykZqArwegdLD1KP7U3svWIyAiIiJ6oDFpfxiteBVQu9p6FMacVbYeAREREdEDi0n7w8hbC7jKLGknIiIiogbjnnYiIiIiIplj0k5EREREJHNM2omIiIiIZI5JOxERERGRzDFpJyIiIiKSOSbtREREREQyx6SdiIiIiEjmmLQTEREREckck3YiIiIiIplj0k5EREREJHNM2omIiIiIZI5JOxERERGRzDFpJyIiIiKSOSbtREREREQyx6SdiIiIiEjmmLQTEREREckck3YiIiIiIplj0k5EREREJHNM2omIiIiIZI5JOxERERGRzDFpJyIiIiKSOSbtREREREQyx6SdiIiIiEjmmLQTEREREcmcva0HQNYjhAAAlJeX23gkRERERHQ3arUaCoVCcn0m7Q+RoqIiAEDbtm1tPBIiIiIiupuysjK4urpKrs+k/SGi1WoBAL/99hs0Go2NRyN/5eXlaNu2LS5cuFCvH5pHEWMlHWMlHWMlHWMlHWNVP4yXdNaOlVqtrld9Ju0PETu72rcoaDQa/uDVg6urK+MlEWMlHWMlHWMlHWMlHWNVP4yXdLaKFd+ISkREREQkc0zaiYiIiIhkjkn7Q0SlUiE+Ph4qlcrWQ3kgMF7SMVbSMVbSMVbSMVbSMVb1w3hJZ+tYKYT+PoFERERERCRLvNJORERERCRzTNqJiIiIiGSOSTsRERERkcwxaZeB/Px8DBs2DM7OzmjZsiXeeust3Lp1657nCSGQkJCAdu3awcnJCb1798ahQ4dM6l2+fBljxoyBWq2GVqvFlClTUF5eblJv69at6N69OxwdHdGpUyckJydbZX7WZOtY3b59G4sWLcKAAQPg4eEBrVaLwYMHY9++fVadpzXYOlZ3ys3NhVKphIuLS6Pm1RTkEqubN28iLi4O7du3h0qlQrt27RATE2OVOVqLHGKl/zkMCAhA8+bN0aFDB8TExOD69etWm6e1NGW8CgsLMWPGDAQHB0OlUt31Z+tRX9+lxIrre636vK70HtX1vT6xssr6LsimiouLhbe3txgwYIDYsWOHSEpKEhqNRkyfPv2e5y5cuFA4ODiIjz76SOzevVuMHj1aqNVqcebMGUOdW7duiaCgIBEUFCQ+//xz8emnn4o2bdqIZ5991qitffv2CaVSKV577TWRnZ0tYmNjhUKhEBkZGVafc0PJIVZ//PGHcHNzEzNnzhRffPGF2L59uxg9erRQKpViz549TTLvhpBDrOqqqakRvXr1Ei1atBDOzs5Wm6c1yCVWt2/fFiEhIcLf318kJyeLvXv3inXr1ok5c+ZYfc4NJZdYvfvuu8Le3l4sXLhQZGdni+XLlwsXFxcxYcIEq8+5MZo6Xt9//73w8vISYWFhok+fPhZ/tri+S4sV1/daUl9Xeo/y+i41VtZa35m029iCBQuEs7OzKCoqMpStWrVKKJVKcenSJYvnVVZWCldXVzF79mxDmU6nEz4+PiIqKspQlpaWJhQKhcjPzzeU7dy5UwAQhw8fNpSFhISIPn36GPUxfvx4ERgY2Kj5WZMcYlVdXS2Ki4uN2q+urhYBAQEiLCys0XO0FjnEqq6kpCTh7+8vZs+eLbtFXS6xWr16tdBoNOLy5cvWmprVySVWnTt3FpGRkUZ9xMXFCZVKJaqqqhozRatq6njdvn3b8HV8fLzFny2u79JixfW9ltTXld6jvL5LjZW11nduj7Gx7du3Y+jQodBqtYaysWPHoqamBrt27bJ43oEDB1BeXo6xY8cayhwcHBAeHo6srCyj9rt164bOnTsbyoYNGwatVmuop9PpkJOTg4iICKM+xo0bh7y8PJw/f76x07QKOcRKqVTC3d3dqH2lUolu3brh8uXLjZ6jtcghVnqlpaV4++23sWTJEjg4OFhjelYll1glJiYiIiIC3t7e1pqa1cklVlVVVdBoNEZ9aDQa1NTUNGp+1tbU8bKzu/d/4Vzfa0mJFdf3WlJipfeor+9SY2Wt9Z1Ju43l5+cjICDAqMzNzQ3e3t7Iz8+/63kATM4NDAzEb7/9hsrKSovtKxQKBAQEGNo4c+YMqqqqzLZVty9bk0OszKmursahQ4cM8ZIDOcUqNjYWTz75JMLCwho8n6Ykh1hVVVXh6NGj8PHxwcsvvwxnZ2eo1WqMHTsWV65cafQcrUUOsQKAKVOmICUlBdnZ2bh+/Tq+/fZbfPzxx5g2bRrs7e0bNUdraup4ScH1XXqszHkU1/f6eNTXdymsub7LZ3V7RJWUlMDNzc2k3N3dHcXFxXc9T6VSwdHR0eQ8IQRKSkrg5OQkqf2SkhIAMKmnv+Jwt3HcT3KIlTmLFi3CpUuX8MYbb0ieS1OTS6x++OEHJCUl4fvvv2/wXJqaHGJVVFSEqqoqvP/++xgwYAA2bdqEwsJCvPXWWwgPD8eBAwcaNUdrkUOsAGD27NnQ6XQYOnQoxP8+H/DFF1/E0qVLGzSvptLU8ZI6BoDre0M9iuu7VFzfpcXKmus7k3aiRvjyyy8RHx+PuLg4PPnkk7YejqwIITB9+nT87W9/M7laQcb02zrUajUyMzMNH5HdokULDBs2DNnZ2RgyZIgthygrK1aswLJly7BkyRI8/vjjOHHiBP75z3/i9ddfx8qVK209PHpIcH23jOu7dNZc37k9xsbc3d1RVlZmUl5SUmK0B8vceTqdDjdv3jQ5T6FQGK6iSGlfX/fOevorNHcbx/0kh1jVdfToUYwZMwYTJkxAXFxcfafTpOQQq/T0dOTl5eHvf/87SktLUVpaami37te2JodYubm5QaFQoE+fPoYFHQAGDRoEpVKJEydONGhu1iaHWBUVFSE6Ohrz5s3DjBkzMGDAAERFRWHZsmX45JNP8MsvvzRmilbV1PGSOgaA63t9PcrruxRc36XHyprrO5N2GzO3B7isrAwFBQV3/e1Vf+zkyZNG5fn5+YZ7ilpqXwiBkydPGtrw8/NDs2bNTOpZ2tNlK3KIld7p06cxfPhw9OnTB6tXr27wnJqKHGKVn5+PkpIS+Pr6wt3dHe7u7nj//fdx48YNuLu7Y+7cuY2dplXIIVbNmzeHr6+vxb7k8h+gHGJ15swZ6HQ69OjRw6je448/bjguF00dLym4vtd/a8yjvr5LwfVdeqysub4zabex4cOHY/fu3SgtLTWUZWRkwM7ODiEhIRbP69OnD1xdXZGRkWEoq6qqQmZmJkaMGGHU/rFjx3Dq1ClD2Z49e1BUVGSop1KpMHjwYGzcuNGoj/T0dAQGBt71xXY/ySFWAFBQUICQkBC0a9cOGzduRLNmzaw0Q+uRQ6wmTpyInJwco0dkZCQcHR2Rk5ODqVOnWnHGDSeHWAFAWFgY9u/fb7SAZ2dn4/bt27L507wcYuXj4wOg9kpoXbm5uQAgm/UKaPp4ScH1vX64vkvD9b1+rLa+N+qGkdRo+hv/Dxw4UOzcuVOsWbNGuLm5mdz4f8iQIcLPz8+obOHChUKlUomlS5eKPXv2iDFjxlj8sJKuXbuKrVu3ivT0dNG2bVuLH64UFRUlcnJyRFxcnFAoFOKzzz5rusnXkxxiVVFRIbp37y7UarX4/PPPxcGDBw2Po0ePNm0A6kEOsTJHyj1/7ze5xOq3334Tbm5uIiQkRGzbtk2sXbtWtGzZUvTr10/U1NQ0XQDqQS6xGjVqlFCr1WLx4sUiOztbfPzxx0Kr1YqhQ4c23eQboKnjJYQQGRkZIiMjQ0RERAhHR0fD9+fPnzfU4fpe616x4vr+Jymvqzs9iuu7ENJiZa31nUm7DPz888/imWeeEU5OTsLLy0tER0cLnU5nVGfgwIHCx8fHqKympkYsWLBAtGnTRqhUKhEcHCwOHDhg0v7FixdFeHi4cHFxEW5ubmLy5MmirKzMpN6WLVtE165dhYODg/D39xdJSUlWnac12DpW586dEwDMPu7s09ZsHStz5LioCyGfWH3//fdi4MCBwtHRUWi1WjF58mRRUlJizak2mhxiVVZWJqKjo4Wfn59wdHQU7du3F6+//rrJB+PIQVPHy9J6lJycbFSP6/u9Y8X1/U9SX1d1Parru9RYWWN9V/yvQyIiIiIikinuaSciIiIikjkm7UREREREMseknYiIiIhI5pi0ExERERHJHJN2IiIiIiKZY9JORERERCRzTNqJiIiIiGSOSTsRERERkcwxaSciIiNXr16FRqNBYmKiUfnEiRPh6+trm0E9JObOnQuFQoHz58/fl/7Wrl1r0l9lZSVatWqFd999976MgYisg0k7EREZiY2NhaenJyZNmiSp/pUrVxAdHY2goCCo1Wq4urqiY8eOGDduHDIzM43qDho0CC4uLhbb0ie1R44cMXu8pKQETk5OUCgUSElJsdiOr68vFAqF4eHg4ABfX19MmTIFFy5ckDSvh5WTkxPefvttfPDBBygoKLD1cIhIIibtRERkcPHiRaxZswavv/467O3t71n/119/Rffu3bFy5Ur06tULCQkJWLhwIcLCwpCfn4/k5GSrjm/Dhg3Q6XRo37491qxZc9e6bdq0QUpKClJSUrBs2TIEBwdjzZo1CA4OxrVr16w6rgfNK6+8AoVCgY8++sjWQyEiie69IhMR0SNj1apVUCgUGD9+vKT6ixcvxtWrV7F582aMHDnS5PiVK1esOr6kpCQMHjwYI0eOxMyZM3H27Fl06NDBbF2NRoMXX3zR8H1UVBS8vLywYsUKJCcnIyYmxqpje5A4OzsjPDwca9euxXvvvQeVSmXrIRHRPfBKOxFRI+j3DO/Zswfz5s2Dj48PnJycEBwcjEOHDgEAvvrqK/Tr1w/Ozs7w9vbG/PnzzbZ15MgRjB49Gh4eHlCpVOjcuTP+9a9/obq62qjet99+i4kTJ6JTp05o3rw51Go1+vbti02bNpm0OXHiRCgUCpSVlRmSVkdHR/Tt2xeHDx82qZ+RkYGePXvCy8tL0vxPnToFAHjmmWfMHm/ZsqWkdqQ4evQofvjhB0RGRmLChAmwt7e/59X2O4WGhgIATp8+bbHO9u3boVAosHz5crPHe/fuDU9PT1RVVQGo3/Nhjv45MkehUGDixIkm5enp6ejXrx/UajWaN2+O4OBgbNy4UVJ/esOHD8e1a9eQk5NTr/OIyDaYtBMRWcHbb7+NzZs3Y8aMGYiPj8fZs2cREhKCzZs3Izw8HP3798fixYsREBCAuLg4pKamGp2/bds29O3bF7/88gtmzZqF5cuXo3fv3oiLizO56r1p0ybk5+dj7NixWLZsGd555x0UFxcjPDwcaWlpZscXGhqKixcvIi4uDrNnz8ZPP/2EZ599Fn/88Yehzu+//46TJ0/i6aefljxvPz8/AEBiYiKEEJLPu3btmtlHRUWFxXOSkpLg4uKCMWPGwMPDA2FhYVi3bh1qamok96v/JcPDw8NinZCQELRs2RLr1683e/6hQ4cwYcIENGvWDEDDno/GiI2Nxbhx46BWqzF//nwkJCSgefPmiIiIwMqVKyW307t3bwDA3r17rT5GImoCgoiIGiw5OVkAEI8//rjQ6XSG8i1btggAwt7eXnz33XeGcp1OJ1q2bCl69eplKKusrBQtWrQQ/fv3F1VVVUbtf/TRRwKAyMnJMZRdv37dZBw3btwQnTp1EoGBgUblkZGRAoCIiooyKv/ss88EAPHvf//bUJadnS0AiGXLlpmda2RkpPDx8TEqO3PmjHB1dRUARNu2bcWECRPEkiVLxJEjR8y2MXDgQAHgno+6MdPHyM3NTURGRhrKNm/eLACIrKwsk358fHxEQECAKCwsFIWFheLs2bNizZo1QqPRCHt7e3H8+HGz49OLjo4WAMSJEyeMymNjYwUAkZubayirz/MRHx8vAIhz584ZyvTPkTkAjOacm5srAIjZs2eb1B05cqRQq9WivLzcUKZ/fdbtry57e3sRFhZm9hgRyQuvtBMRWUFUVBQcHBwM3/fv3x8AEBwcjJ49exrKHRwc8PTTTxuu+ALAl19+id9//x2TJk1CaWmp0ZXnESNGAAB27dplqO/s7Gz4uqKiAkVFRaioqMCQIUOQl5eH8vJyk/G98cYbRt8PGTIEAIzGUVhYCADQarWS592hQwccO3YM06dPBwCkpaXhjTfeQM+ePdGtWzfk5uaanOPo6Igvv/zS7OOll14y209mZiZKS0sRGRlpKBsxYgQ8PT0tbpHJz8+Hp6cnPD090aFDB0yePBkeHh7YsmULgoKC7jovfT91r7YLIZCamoqgoCA88cQThvKGPB8NtWHDBigUCkRGRpr8leKvf/0r/vjjDxw8eFBye1qtFlevXrXa+Iio6fCNqEREVnDnmyHd3d0BAO3btzep6+7ujqKiIsP3eXl5AIDJkydbbP/33383fH316lXExsZiy5YtZhOu0tJSuLq63nV8jz32GAAYjUO/r1rUY5sLUHt7xRUrVmDFihUoKCjAN998g5SUFGzduhVhYWE4ceKE0S8CSqUSQ4cONdvWN998Y7Y8KSkJnp6eaNOmjdF+9JCQEGRkZODatWsmW158fX0N95p3cHBAq1at4O/vL2lO+sR8w4YNWLBgAezs7PD111/j/PnzWLRokVHdhjwfDZWXlwchBAICAizWqftauRchhMX99EQkL0zaiYisQKlU1qu8Ln2S/MEHH6BHjx5m67Rq1cpQNyQkBHl5eZgxYwZ69uwJjUYDpVKJ5ORkpKWlmd3jbWkcdRN0T09PAEBxcfE9x2yJt7c3IiIiEBERgRdeeAFpaWnIysoyuotLfZ07dw45OTkQQqBTp05m66SmpmLmzJlGZc7OzhZ/OZDi5ZdfxsyZM5GdnY2hQ4di/fr1UCqVRnNp6PNRl6Wk+c43IOv7UygU2L59u8XntEuXLpLnWFJSYnjeiUjemLQTEdlYx44dAUhLMn/88UccO3YMcXFxJp9ouXr16kaNQ5/s1d0y0xi9evVCWloaLl261Kh2kpOTIYRAYmIi3NzcTI7HxsZizZo1Jkl7Y02YMAExMTFYv349+vbti40bN2LYsGHw9vY21LHG86H/K0RxcbHRXyTOnj1rUrdjx47YsWMH2rVrh8DAwIZMy+D8+fOorq6+51YhIpIH7mknIrKx0NBQeHl5ISEhwexV7srKSsNdXvRXV+/cwvLTTz9JvsWgJZ6enujSpYvhVpVS7N27F5WVlSblNTU12Lp1KwDgL3/5S4PHVFNTg7Vr16Jr166YMmUKnn/+eZPH+PHjcfz4cXz33XcN7sccT09PDB8+HJmZmdiwYQPKy8uN9tQD1nk+9H892L17t1H5hx9+aFJXv+d/zpw5uH37tsnx+myN0T/PAwcOlHwOEdkOr7QTEdmYs7Mz1q9fj1GjRqFz586YPHky/P39UVpaivz8fGRmZmLTpk0YNGgQAgMD0aVLFyxatAgVFRXo3LkzfvnlF6xatQpdu3Y1+8bP+oiIiMD8+fNRUFBgdEXZksWLF2P//v147rnn8MQTT0Cj0eDKlSv473//i9zcXAwePBjPPvtsg8eza9cuXLhwAa+88orFOmPGjMHcuXORlJSEp556qsF9mRMZGYnPP/8cs2bNgkajwahRo4yOW+P5GD9+PObMmYOpU6ciPz8fWq0WO3bsMPuprU899RTmzp2LuXPnokePHoiIiECrVq1QUFCA3NxcZGVl4datW5LmlpWVBQ8PDwwePFhSfSKyLSbtREQyEBoaiu+++w4JCQlITU1FYWEh3N3d4efnhzfffBPdunUDUHtld9u2bYiOjsa6detw48YNBAUFYd26dTh27Fijk/ZXX30V7733HtLS0jBr1qx71o+NjUVGRga+/vpr7Ny5E8XFxXB2dkZgYCA+/PBDTJ8+HXZ2Df+jblJSEgAgPDzcYp2goCB06tQJn376KZYsWQInJ6cG93ensLAwaLVaFBcXY8qUKXB0dDQ6bo3nw9XVFVlZWXjzzTexYMECuLi4IDw8HKmpqYY3NNcVHx+Pnj17Yvny5Vi6dClu3LgBLy8vBAUFWfxAqDvduHEDmZmZiIqK4qehEj0gFKK+twkgIqKH2rRp07Br1y6cPHnS8AFCQO0nd+7duxfnz5+33eCoXtauXYtJkybh3Llz8PX1NZTrPwTq1KlTkv6iQkS2xz3tRERkZN68eSgqKkJycrKth0JNoLKyEgkJCYiJiWHCTvQA4fYYIiIy4uXlhbKyMlsPg5qIk5MTCgoKbD0MIqonXmknIiIiIpI57mknIiIiIpI5XmknIiIiIpI5Ju1ERERERDLHpJ2IiIiISOaYtBMRERERyRyTdiIiIiIimWPSTkREREQkc0zaiYiIiIhkjkk7EREREZHM/X/kPkeWlQT/HQAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot the SHAP values\n", + "shap.plots.bar(explanation, max_display=15)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA2MAAAI4CAYAAADnDzGJAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAnSpJREFUeJzs3XtclHXe//HXCMMgoICAiIDH0NQOrLneoGW6q20Juq6Zp9LVX6W10nGR9bZWRV3XzdJsN3UtCw+3h9Ry7/JQ4rFEvEVMDQ0PYYqaioIoynl+f0yMDMjBRC/F9/PxuB4x3+v7vb6fGZmJz3wPl8lqtVoRERERERGRW6qO0QGIiIiIiIjcjZSMiYiIiIiIGEDJmIiIiIiIiAGUjImIiIiIiBhAyZiIiIiIiIgBlIyJiIiIiIgYQMmYiIiIiIiIAZSMiYiIiIiIGEDJmIiIiACQnJyMyWQiOTnZ6FBERO4KSsZEREREREQMoGRMRERERETEAErGREREREREDKBkTERERERExABKxkRERERERAygZExERERERMQASsZEREREREQMoGRMRERERETEAErGREREREREDKBkTERERERExABKxkRERERERAygZExERERERMQASsZEREREREQMoGRMRERERETEAErGREREREREDKBkTERERERExABKxkRERERERAygZExERERERMQASsZEREREREQMoGRMRERERETEAErGREREREREDKBkTEREROz8/f2xWCxGhyEiclcwWa1Wq9FBiIiIiPH27t2LKfsKrYOb4WI2Gx2OiNRW7hbwdDc6ituCs9EBiIiIyO3ByckJb1d3XF6YC2lnjA5HRGqj5g1h3iglYz9TMiYiIiKO0s5A6gmjoxARqfW0ZkxERERERMQASsZEREREREQMoGRMRERERETEAErGREREREREDKBkTERERERExABKxkRERERERAygZExERERERO58WTkwYjb4DQP3QdBtHCQfqX77A+nw+ETwGAwNhsKQmXD2Qvl6f1sBvaeA/3Aw9YUJS39xyErGRERERETkzlZcDBGTYfHXEPUEvDUUzlyAruPg0Mmq26dnQJc34fBPMOVpiO4Nq3dBj1jIL3Cs++Zi2HkYftX8hsNWMnYb2717N+vXr2fJkiXMnTuXxYsXGx0Sqamp9OrVC5PJdM3D1dX1pva/adMmXn31VTZv3nxT+7lRly9fZvny5fTt25e2bdvi5uaGyWRiwoQJRocmIiIicufp+lcY9s+Kz6/YDgmpEBcF4wfAqCdg80RwqgPjl1V9/SkrIScXNsbCyxEwth988mfYcxTiNjnWTZsDpz6CRa/eyDMCwPmGryA3zc6dO7FYLPj6+pKfn290OOW88sorBAUFOZQ1atTopva5fft2Zs6ciZeXF127dr2pfd2IrKwsVq9ezapVqwgICKBJkyakpqYaHZaIiIhI7bRiO/h7Qd+wq2V+ntC/EyzaCnkFYDFX3H5lIkR2gCZ+V8u6PwitGsMnCTDisavlzRrWWNhKxm5jAwcOpH79+gAsX76cgoKCKlrcWs888wwdOnQwOowaU1hYSE5ODp6enjd8LV9fX/72t7/x7rvv4urqysSJE/n73/9eA1GKiIiISDm706B9C6hTZuJfxxCYux4OnoT7m1677YlztimNHVqWP9cxBNbsqvl4f6ZpirexkkTsTmS1Wvn22295+umnCQwMxGw24+HhQY8ePdi9e7dD3X379jF06FDuvfdePD09cXZ2xt/fn+eee45z584BcPHiRf785z/zxhtvABAbG2ufGtmsWTP7eZPJRFxcnMP1k5KS6NSpEyaTyV62efNmWrduTdOmTVm8eDEdO3bE09MTLy8vAPLz81m9ejU9evTAx8cHZ2dnfH19GTZsGGfOnKny+bu4uBAYGGi/noiIiIjcRKcyIcC7fHlJ2cnzlbctXbds+/OXbCNrN4FGxmq53Nzcatd1cXGhTtlvEypx9uxZ0tPT7Y+dnZ3x8vLC1dWV9evX89JLL5GWlkavXr1o06YNR48e5csvv6RTp058/fXX9lG15ORkPv30U7p3707v3r2xWq0kJiYSFxdHYmIiycnJuLq60q9fP44fP87y5cvp0aMHjz1mGy6+5557qh1zWZmZmbzyyit06NCBV155BScnJ/Lz85k3bx5jx44FoF+/fvj7+/Ptt9+ybNkyvv76a5KTk2tkBE1EREREyigohAuXy5flFUBGtmN5Aw/baNiVfLBcI7VxdbH990olS35Kzl1rGqOr+WqdyqY5/kIaGavlFixYUO3jp59+uq5r9+zZk+DgYPsREBBAXFwcaWlpfPjhhxw8eJDPP/+clStXMnnyZBYsWMCMGTNwd3cnOjrafp1BgwZx9uxZVq1axVtvvcW0adNYunQpgwcPJiUlhVWrVmE2mwkPDyc0NBSATp06ER0dTXR0NH369PnFr8/FixcZOXIka9euZcqUKUyaNImkpCTmzp2Ls7Mz+/bt44MPPmDy5MksWbKE0aNHc/ToUWbMmPGL+/wl9u/fT2Zmpv1xeno6x44dsz/Ozs7mu+++c2iTkJBQ6ePExESKiorUh/pQH+rD/vjy5TJ//IiI3CRlZ0o5fF5t+962PX3pIyEVln5TvvxYhq1NXRdOHz/hcM2EhATIzbefr/Azsa4tYTt/6nS5z92z6Sft7e3XrEEmq9VqrdEryk1RsmZs8ODB19Wu9MhVVfz8/LBYLJXWSU1NJTo6mi+++ILJkyfTokUL+zk3Nzc6dOhASkoKI0aMwNXVlfj4+HLtx48fT2JiIhcvXqRu3br2c1arldzcXC5cuEBhYSFLliwhJiaG119/nXfeeQeAKVOm8MYbbzB+/HiHnQkvXrzIhAkTmD59Oh9//DHDhg2zn0tKSuLll19m+/btlPy6b968mZEjR3Lw4EEyMzPt0wmtVitz5swhKiqKoUOHMmnSJIf4t27dSnR0NM2aNav2mzE3N9e+Zqxs3CIit5OUlBS8r1hp/My/IfVE1Q1ERK5X60DbjoWNG1RcJ/MS7Cpzf7A/x0Ejbxj9e8fyh9vYRr9CRkFIAKx50/H8vHh4bhbsnVH5mrGg5+EfQyDmD47nhsy0rRk7t6B8u4xsW0I4vj9MGFjx86mEpinWcmV3O6xJv/vd7665gce2bds4efIkBQUFBAcHV9g+IyOD4OBgTpw4wXvvvcfy5cs5duyYwzcWgMO3uDXNz8/PYV1Xbm4ux44do7i4mLi4uHLrz0qUTiJFREREpAZ5e9h2MixbFuBdvrxEaDP4+oDtfmOll93sOARuFtuuiBUJ9AG/+pB0jRtE/98hCL3x+4lVRMlYLXc9U04sFgtOTk411ve9997LuHHjrnnO09MTPz8/zpw5w+TJk5kzZw733XcfEyZMICgoCIvFwq5du3jnnXcoLi6uVn+lN+gozWq1lkvwSri5uZWrW6Jv377069fvmu2aNGlSrZhERERE5BboF27b3v7TROjXyVaWkQ3LE6BXB8f1Xkd+XprTstQtmZ4Mh/mb4HgGBPvayjbste3C+Fqvmxa2krFabtGiRdWuGxkZSePGlXxrUE3169enYcOGZGVlMWDAgEo3BUlOTiYxMRFXV1d27NhhT44KCgo4dOhQufoVJVwWiwV3d3cAzp933C0nOzubjIyMasVet25dAgMDMZlMODs7M2jQoGq1ExERERED9QuHsFYw/F+wPx1868GsdVBUDLFlphD+drztv0f/fbVs7JO2xK3bOHglAi7lwrT/2KY2Dv+NY/uFm+HHs3A5z/Z4636YvNz285BHoWn170OmZKyW69mzZ7Xr+vj41EifrVq1IiwsjJUrVzJ9+nSHzTrANlp38eJF/P39qVOnDnXq1MFkMjmMgCUnJ7N69epy1/bw8ADKJ1wuLi4EBQXh5OREfHw8r7/+OmBLxOLj40lLS6tW7CaTidDQUO677z4+/fRTEhMTCQsLc6hz+fJlcnJy8PPzq+AqIiIiInJLOTnZ1ouNng/vrbbtfvjreyDuJds6taoE+8KWSfB6HIxZBC7OEPEQvDOs/C6K8zbAlpSrjzd9ZzvAtoZNyVjtcPDgQS5dugTY1jIVFRWRnJwM2JKSVq1aVXmNm7lmrCItWrTghRde4MCBA4wePZpVq1YRHh6O2Wzmhx9+YNeuXQQFBbFp0yaaNWtG+/btSU5OJiwsjN69e3Pu3Dm2bt1KXl7eNa/dsGFDFi1aRP369fH19aVly5b06tWLhx56iPDwcNauXUvPnj0JDQ1l9+7dfPPNNwQGBlZ7M5OOHTsyatQoxo4dyyOPPEKvXr1o27YtV65c4fvvv2f37t2MGDGiyo04tmzZwrJlyyguLub7778HYPXq1Zw5c4Z69erx9NNP88ADD1z36ysiIiJy19k8qeo63h7w4SjbUZnSI2KltWsCX157ic11x1JNSsZuY6mpqZw6dcqhLCkpCYCAgIBqJWNG6d69O8uXL2fGjBnEx8fz3nvvYTKZ8PHxoUOHDowaZXuTNGrUiDfeeANnZ2f+85//MG3aNOrXr0/v3r0JDw9n5MiRDtd9+OGH+ctf/sLs2bOZNm0a+fn5NG3alF69ehEaGkpsbCwTJ05k69atbNiwgQcffJB3332XefPmVTsZc3Fx4dlnn+Wee+5h5syZJCQk8Pnnn+Pi4oK/vz8RERH079+/yuts27aN2bNnO5QlJSXZ/w3btGmjZExERETkLqat7UVERATQ1vYicgtUZ2v7u4hu+iwiIiIiImIAJWMiIiIiIiIGUDImIiIiIiJiACVjIiIiIiIiBlAyJiIiIiIiYgAlYyIiIiIiIgZQMiYiIiIiImIA3fRZREREHDVvaHQEIlJb6fPFgZIxERERAaCoqIhzuVfwnTMCF7PZ6HBEpLZytxgdwW1DyZiIiIgAUFhYSM9+v2fDhg20a9fO6HBERGo9rRkTERERu9OnT5OXl2d0GCIidwUlYyIiIiIiIgZQMiYiIiIiImIAJWMiIiIiIiIGUDImIiIiIiJiACVjIiIiIiIiBlAyJiIiIiIiYgAlYyIiIiIiIgZQMiYiIiIiImIAJWMiIiIiIiIGUDImIiIiIiJiACVjIiIiAoDFYsHf39/oMERE7homq9VqNToIEbk7Xci1klNodBQiUqIgv4DjP3xPfRd44IEHjA5HRKTWczY6ABG5e+UUwrNfFpN2Qd8JidwOmns6Mel+H5wKsowORUTkrqBkTEQMlXbBSup5o6MQERt9MSIicitpzZiIiIiIiIgBlIyJiIiIiIgYQMmYiIiIiIiIAZSMiYiIiIiIGEDJmIiIiIiIiAGUjImIiIiIiBhAW9uL1FJZuVZithbz2SErlwugY4CJd7rWob2/qVrtD5yz8tqmYr45YcXFCSJamJjetQ5+bo7t/5ZYzI5TVnacsnLmMowPNzGhs9PNeEoiIiIitYpGxkRqoWKrlYhPi1h8wErUr+rw1qN1OHPZStdlRRzKrPo+QukXrXRZWsThLCtTHqlDdIc6rP7BSo/lReQXObZ/85tidv5k5VcNq5fkiYiIiIiNkrHb2O7du1m/fj1Llixh7ty5LF682OiQiIqKwmQyXfMwm81cvnz5pvWdlZXFiBEjWLp06U3ro6ZMnz6dhx56iAYNGmA2m6lXrx6tW7dm8uTJ5OXl3fT+V6RaSTgJcY/XYXynOoz6VR02D3DCyQTjtxVX2X5KYjE5BbCxvxMvt6/D2LA6fNKrDnvOQtx3jslY2vNOnHrRmUU99XEiIiIicj00TfE2tnPnTiwWC76+vuTn5xsdjoPnnnuO1q1bO5Q5OTnh4uJy0/rMysrigw8+ID8/n4EDB960fmpCUlIS9erVY/DgwTRq1IhLly6xfv16/vrXv7J161a+/PJLTKabN5K04qAVfzfo2+pqH35uJvq3NrFov5W8QisW54r7X3nISmQLE03qX63TvWkdWnkX80mqlREPXq3bzFMjYiIiIiK/hJKx29jAgQOpX78+AMuXL6egoMDgiK6KiIigT58+RodRozIzM/H09KROnRsf4bnWKObUqVMJDw8nPj6elJQU7rvvvhvupyK7z1hp72+iTpmEr2OAibl7rRzMhPv9rt32xEXb2q8OjconWR0DTKz5oeppjiIiIiJSNc0ruo2VJGJ3otzcXF599VWCg4Mxm824ubnRsWNHvvrqK4d6xcXFvPzyy9x33314eXnh7OyMj48PvXv35ujRo/Z6a9asoXnz5gDMnz/fPjXS19fXft5kMjF+/PhysURGRmIymbh48aK9LDQ0FF9fX/bv38+jjz6Kh4cHDRo0ICcnB4Aff/yRvn374uvri7OzM97e3kRERHDkyJEbel2Cg4OxWq1kZGTc0HWqcioHAtzLl5eUnbxUcUJ1Ksexbtn253Mhr1AJmYiIiMiN0shYLZebm1vtui4uLtUeFcrKyiI9Pd2hzMvLCw8PDwoKCggLCyMlJYXu3bszbNgwsrOzWbZsGZGRkaxYsYLevXsDkJ+fz7x58+jSpQuPPfYY9erVIzk5mTVr1rB7925SUlKoX78+DzzwAGPHjmXKlCmEhYXx5JNPAuDm5lbt51dWbm4uXbp0oV27drz22mtkZGRgNps5fPgwHTt2pKCggD59+hASEsKRI0f45JNP6Ny5M0lJSQQFBVWrj7Nnz5KXl8fZs2f57LPP+Oyzz2jUqBEdO3asdpzFViv5RdWra3ECk8nElULbz2W5/vyOv1JY8TVKzl2zvdPVOhZ9eoiIiIjcEI2M1XILFiyo9vHTTz9V+7rDhw8nODjY4Zg0aRIAsbGx7Nmzh1mzZrF27VomTZrEzJkz+f777/H09GTMmDFYrbaRFYvFwqlTp1i7di3Tp08nNjaWzz//nKlTp5Kens6cOXMACAoK4vnnnwegdevWREdHEx0dzZ/+9Kdf/Nrk5OTw+OOPs2XLFiZNmsTs2bNxdXVl+PDhFBYWkpCQwMKFCxk3bhzz589n3bp1nD17ljfffLPafXTu3Jng4GDat2/P5MmTadeuHf/7v/97XUnk1uNW6r5bVK0j9bytTV1nOH7qjMN1EhISyC28ej4xMZGioqtZ3v79+8nMzKTuz0nWqbPnOXbsmP18dnY26acz7O1LrlmZivooUfqciNyeyr7Pyz6u6n2enp5e7rPku+++Ux/qQ32oj1rZx/UyWUv+KpbbWsmascGDB19Xu7KjV5Xx8/PDYrFUWicqKor333+fmJgYQkNDHc6FhobSpk0bQkJCyMnJYceOHeU2qRg1ahRffPEF6enpBAQEOJwrLCzk/Pnz5Ofnc+nSJdq0aUPfvn1ZuXIlAEePHqV58+b88Y9/JC4uzqHtmjVriIiIYNy4ccTGxjqci4yMZPXq1WRnZ1OvXj17rHv27OHw4cO0bNnSXjcrK4sGDRrQo0cP5s2bV+75h4eH4+rqyqFDhyp9nUqsX7+ejIwMTpw4wdq1azlz5gz/+Mc/6NmzZ7XaA/yUY2VdWvXepn8IMeFpMRHyYSEh3ibWPOk4vDVvXzHPfVnM3j86cb/ftTfeOHHRStC/i/hHlzrEdHT8vmbImiLW/GDlXFT5YbGMy1b8ZhVd133GTl6y8ptPriaRImKs1g1g0cOnqVuQRbt27YwOR0Sk1tNEo1quutPprld4eHiFG3gcO3aM/Px8mjRpUmH7EydO2JOxOXPmMGPGDI4cOVJupCQ7O7vGYi6rXr16NG3a1KFs7969WK1WvvrqK4KDg6/ZrmSdWnX06NHD/nN0dDTPP/88f/jDH9iyZQthYWHVukYjdxPD7ru+HQtDG5r4Ot1KsdXqsInHjlNW3JyhlXfFbQPrmfCrC0k/lU8A/++UlVDdT0xERESkRigZq+Wu575fFosFJ6fqjWhUxmq1EhQUxFtvvVVhnZCQEAA+/vhjXnzxRVq2bMmYMWNo3rw5bm5uFBUVMWTIEIqLq74nFlDpNvEVTYVzcXHB2dnxLVAyUNytWzf7tMiyXF1dqxXTtbz88st8+OGHvP/++9VOxn6Jfq1MrDho5dODVvq1tr02GZetLE+10qulyWFb+yNZtufc0utq2ZOtTMxPsXI820rwz9vbb/ixmIOZ8NpDSsZEREREaoKSsVpu0aJF1a4bGRlJ48aNb7jPwMBAzp8/T9++fauc9hgXF4fZbOabb76hUaNG9vJ9+/aVq1tZwuXv7w/A+fPl57uVnvtblXbt2mEymcjLy2PQoEHVblddJclx6fnJN0O/VibCAmD4umL2nwPfujDr22KKrBDb2XHq4W8/sSWrR0dc/TgY+191WH6wiG6fFPFK+zpcKoBpO4u53xeGlxmlW5hSzI/ZcPnnHRa3psPk7bYkekhbE011HzIRERGRa1IyVstdz9okHx+fGulz4MCBTJ06lddff53333+/3Pm0tDT7NvUlI3GlR8CsVisxMTHl2nl4eADXTmRatWqFk5MT27Ztw2q12hO39evXc+DAgWrH7uvrS1hYGImJiaxcudK+a2Pp2I4fP17pFMzCwkJOnTp1zWmOf/vb3wD4r//6r2rH9Es41bGtFxu9pZj3dhdzpQB+3QjinnCidYOqk6Pg+ia2DHDi9c3FjNlajIsTRLQw8U7XOuVuFj1vXzFbSi1N3HTcyqbjtsTs4cA6SsZEREREKqBk7DZ28OBBLl26BNi2YS8qKiI5ORmwJSatWrWq8ho3a81YZSZOnEh8fDyzZs0iMTGRLl264OXlxbFjx9i2bRtOTk6kpKQA0L9/fzZt2kSXLl146qmnKCoqYt26dfb7fZXm4+NDQEAAmzZtIiYmhsaNG1O3bl1GjhyJh4cHvXr1YtWqVXTt2pVu3bpx+PBhVq1aRdOmTR3uWVaVhQsXEhYWxoABA+jRowft27fHarWSlpbGxo0beeKJJ8ptIFLapUuXaNmyJZ07d6Zt27YEBATw008/8dVXX3Ho0CHuv/9+oqOjr/dlvW7eriY+/J0TH/6u8nqlR8RKa+dr4st+VU9b3TxQHyMiIiIiv4T+irqNpaamcurUKYeypKQkAAICAqqVjBnBbDazfft2JkyYwLJly5g1axYA3t7etG3blqFDh9rrvvDCC2RmZjJr1izefvtt3N3d6dy5M7Nnzy63uQbYpl1GRUUxc+ZM8vPz8fHxYeTIkYDtZtDPPPMMGzduJDExkRYtWrBgwQI++uij60rGWrZsyb59+4iJiSE+Pp74+HjMZjO+vr488sgjPPvss5W2d3NzY8CAAezYsYOkpCQuX76Mq6srTZo04S9/+Qvjxo2jbt261Y5HRERERGonbW0vIobR1vYitxdtbS8icmvpps8iIiIiIiIGUDImIiIiIiJiACVjIiIiIiIiBlAyJiIiIiIiYgAlYyIiIiIiIgZQMiYiIiIiImIAJWMiIiIiIiIG0E2fRcRQzT1NgG53KHI7sL0fRUTkVlEyJiKGcXeGeb/TAL3I7aIgv4DjP5zDxcXoSERE7g5KxkTEMJ6uJjyNDkJE7FJSDtEv8jHWrFljdCgiIncFfSUtIiIiAOTl5XH69GmjwxARuWsoGRMRERERETGAkjEREREREREDKBkTERERERExgJIxERERERERAygZExERERERMYCSMREREREREQMoGRMRERERETGAkjEREREREREDKBkTERERERExgJIxERERERERAygZExERETt/f38sFovRYYiI3BVMVqvVanQQIiIiYry9e/diyr5C6+BmuJjNRocjcvdwt4Cnu9FRiAGcjQ5AREREbg9OTk54u7rj8sJcSDtjdDgid4fmDWHeKCVjdyklYyIiIuIo7QyknjA6ChGRWk9rxkRERERERAygZExERERERMQASsZEREREREQMoGRMRERERETEAErGREREREREDKBkTERERERExABKxkREREREaqOsHBgxG/yGgfsg6DYOko9Uv/2BdHh8IngMhgZDYchMOHuhfL2/rYDeU8B/OJj6woSlNfYUajslYyIiIiIitU1xMURMhsVfQ9QT8NZQOHMBuo6DQyerbp+eAV3ehMM/wZSnIbo3rN4FPWIhv8Cx7puLYedh+FXzm/NcajElY7fY7t27Wb9+PUuWLGHu3LksXrzY6JCIiorCZDJd8zCbzVy+fPmm9Z2VlcWIESNYuvT2/wbl22+/5emnn+bBBx/E3d0dk8nEsGHDKqz/2muv8eijj+Lv74/JZMLX1/fWBSsiIiK1W9e/wrB/Vnx+xXZISIW4KBg/AEY9AZsnglMdGL+s6utPWQk5ubAxFl6OgLH94JM/w56jELfJsW7aHDj1ESx69Uae0V3J2egA7jY7d+7EYrHg6+tLfn6+0eE4eO6552jdurVDmZOTEy4uLjetz6ysLD744APy8/MZOHDgTeunJmzatIklS5bQsGFDQkJC2LNnT6X13333Xdzd3bnnnnvIycm5RVGKiIiIYEvG/L2gb9jVMj9P6N8JFm2FvAKwmCtuvzIRIjtAE7+rZd0fhFaN4ZMEGPHY1fJmDWs8/LuFkrFbbODAgdSvXx+A5cuXU1BQUEWLWyciIoI+ffoYHUaNyszMxNPTkzp1bnwQeNCgQQwcOJCAgAC2bdvGww8/XGn9PXv2cP/992MymWjWrBmXLl264RhEREREqmV3GrRvAWX/BuoYAnPXw8GTcH/Ta7c9cc42pbFDy/LnOobAml01H+9dStMUb7GSROxOlJuby6uvvkpwcDBmsxk3Nzc6duzIV1995VCvuLiYl19+mfvuuw8vLy+cnZ3x8fGhd+/eHD161F5vzZo1NG9um1s8f/58+9TIkul8a9aswWQyMX78+HKxREZGYjKZuHjxor0sNDQUX19f9u/fz6OPPoqHhwcNGjSwj0r9+OOP9O3bF19fX5ydnfH29iYiIoIjR6q3kLVRo0YEBARU+/V64IEHMJlM1a4vIiIiUmNOZUKAd/nykrKT5ytvW7pu2fbnL9lG1uSGaWTsDpSbm1vtui4uLtUeFcrKyiI9Pd2hzMvLCw8PDwoKCggLCyMlJYXu3bszbNgwsrOzWbZsGZGRkaxYsYLevXsDkJ+fz7x58+jSpQuPPfYY9erVIzk5mTVr1rB7925SUlKoX78+DzzwAGPHjmXKlCmEhYXx5JNPAuDm5lbt51dWbm4uXbp0oV27drz22mtkZGRgNps5fPgwHTt2pKCggD59+hASEsKRI0f45JNP6Ny5M0lJSQQFBf3ifkVERERumoJCuHC5fFleAWRkO5Y38LCNhl3JB8s1/tR3/Xn5yZVKlsuUnLvWNEZX89U6lU1zlGrRyNgdaMGCBdU+fvrpp2pfd/jw4QQHBzsckyZNAiA2NpY9e/Ywa9Ys1q5dy6RJk5g5cybff/89np6ejBkzBqvVCoDFYuHUqVOsXbuW6dOnExsby+eff87UqVNJT09nzpw5AAQFBfH8888D0Lp1a6Kjo4mOjuZPf/rTL35tcnJyePzxx9myZQuTJk1i9uzZuLq6Mnz4cAoLC0lISGDhwoWMGzeO+fPns27dOs6ePcubb775i/u8Ffbv309mZqb9cXp6OseOHbM/zs7O5rvvvnNok5CQUOnjxMREioqK1If6UB/qw/74Zm7YJCJVq/B9vu172/b0pY+EVFj6TbnyQxu2297ndV0gr7D8+/x8lu2Hui6OfZSy5+D3th9+Hv1y+CzJtZVl5l5dD1+2j7y8vFrxmfhL+rheJmvJX9Byy5WsGRs8ePB1tSs7elUZPz8/LBZLpXWioqJ4//33iYmJITQ01OFcaGgobdq0ISQkhJycHHbs2FFu6t2oUaP44osvSE9PLzeNr7CwkPPnz5Ofn8+lS5do06YNffv2ZeXKlQAcPXqU5s2b88c//pG4uDiHtmvWrCEiIoJx48YRGxvrcC4yMpLVq1eTnZ1NvXr17LHu2bOHw4cP07Ll1TnOWVlZNGjQgB49ejBv3rxyzz88PBxXV1cOHTpU6etUWsmasWvFfS0la8YyMjKq3YeIyK2WkpKC9xUrjZ/5N6SeMDockbtD60DbjoWNG1RcJ/MS7CqzrOLPcdDIG0b/3rH84Ta20a+QURASAGvKfOE8Lx6emwV7Z1S+ZizoefjHEIj5g+O5ITNta8bOLSjfLiPblhSO7w8Tbu+N2W4XmqZ4B7pZ0+nCw8Mr3MDj2LFj5Ofn06RJkwrbnzhxwp6MzZkzhxkzZnDkyBGHbyDA9i3DzVKvXj2aNnX8YNm7dy9Wq5WvvvqK4ODga7bTtvMiIiJy2/L2sO1kWLYswLt8eYnQZvD1Adv9xkovWdlxCNwstl0RKxLoA371Ieka6+r/7xCE6n5iNUXJ2B3oeqaRWCwWnJycbrhPq9VKUFAQb731VoV1QkJCAPj444958cUXadmyJWPGjKF58+a4ublRVFTEkCFDKC4urlaflW1+UTbBK+Hi4oKzs+Ovdcngb7du3ezTIstydXWtVkwiIiIid4R+4bbt7T9NhH6dbGUZ2bA8AXp1cFzvdeTnZS0tG10tezIc5m+C4xkQ/POX1hv22nZhfK3XrXkOdwElY3egRYsWVbtuZGQkjRtX8s1HNQUGBnL+/Hn69u1b5bTHuLg4zGYz33zzDY0aXX1T79u3r1zdyhIuf39/AM6fL7/bT+n5vFVp164dJpOJvLw8Bg0aVO12IiIiInesfuEQ1gqG/wv2p4NvPZi1DoqKIbbMFMLf/rxz9dF/Xy0b+6Qtces2Dl6JgEu5MO0/tqmNw3/j2H7hZvjxLFzOsz3euh8mL7f9PORRaKr7kFVEydgdqGfPntWu6+PjUyN9Dhw4kKlTp/L666/z/vvvlzuflpZm36a+ZCSu9AiY1WolJiamXDsPDw8Ah8WTJVq1aoWTkxPbtm3DarXaE7f169dz4MCBasfu6+tLWFgYiYmJrFy50r5rY+nYjh8/XukUTBEREZE7ipOTbb3Y6Pnw3mrb7oe/vgfiXrKtU6tKsC9smQSvx8GYReDiDBEPwTvDyu+iOG8DbEm5+njTd7YDbGvYlIxVSMnYLXbw4EH7zX9zc3MpKioiOTkZsCUmrVq1qvIaRmzBPnHiROLj45k1axaJiYl06dIFLy8vjh07xrZt23ByciIlxfYm7N+/P5s2baJLly489dRTFBUVsW7dOvv9vkrz8fEhICCATZs2ERMTQ+PGjalbty4jR47Ew8ODXr16sWrVKrp27Uq3bt04fPgwq1atomnTpg73LKvKwoULCQsLY8CAAfTo0YP27dtjtVpJS0tj48aNPPHEE1VuxHHhwgX++7//G4AzZ84AsGvXLvvuj08++SS//e1v7fVnzJhh3xQkKyuLgoICe93g4GD7tURERESu2+ZJVdfx9oAPR9mOypQeESutXRP4clzNxCLXpGTsFktNTeXUqVMOZUlJSQAEBARUKxkzgtlsZvv27UyYMIFly5Yxa9YsALy9vWnbti1Dhw61133hhRfIzMxk1qxZvP3227i7u9O5c2dmz55dbnMNsE27jIqKYubMmeTn5+Pj48PIkSMB282gn3nmGTZu3EhiYiItWrRgwYIFfPTRR9eVjLVs2ZJ9+/YRExNDfHw88fHxmM1mfH19eeSRR3j22WervEZmZiazZ892KPvuu+/sW576+fk5JGPz589nz549DvVL2rdp00bJmIiIiMhdTlvbi4iICKCt7UUMUZ2t7aXW0k2fRUREREREDKBkTERERERExABKxkRERERERAygZExERERERMQASsZEREREREQMoGRMRERERETEAErGREREREREDKCbPouIiIij5g2NjkDk7qH3211NyZiIiIgAUFRUxLncK/jOGYGL2Wx0OCJ3D3eL0RGIQZSMiYiICACFhYX07Pd7NmzYQLt27YwOR0Sk1tOaMREREbE7ffo0eXl5RochInJXUDImIiIiIiJiACVjIiIiIiIiBlAyJiIiIiIiYgAlYyIiIiIiIgZQMiYiIiIiImIAJWMiIiIiIiIGUDImIiIiIiJiACVjIiIiIiIiBlAyJiIiIiIiYgAlYyIiIiIiIgZQMiYiIiJ2/v7+WCwWo8MQEbkrmKxWq9XoIERERMR4e/fuxZR9hdbBzXAxm40OR6R2c7eAp7vRUYjBnI0OQERERG4PTk5OeLu64/LCXEg7Y3Q4IrVX84Ywb5SSMVEyJiIiImWknYHUE0ZHISJS62nNmIiIiIiIiAGUjImIiIiIiBhAyZiIiIiIiIgBlIyJiIiIiIgYQMmYiIiIiIiIAZSMiYiIiIiIGEDJmIiIiIhIbZGVAyNmg98wcB8E3cZB8pHqtz+QDo9PBI/B0GAoDJkJZy+Ur/e3FdB7CvgPB1NfmLC0xp7C3UTJmIiIiIhIbVBcDBGTYfHXEPUEvDUUzlyAruPg0Mmq26dnQJc34fBPMOVpiO4Nq3dBj1jIL3Cs++Zi2HkYftX85jyXu4SSsdvY7t27Wb9+PUuWLGHu3LksXrzY6JBITU2lV69emEymax6urq43tf9Nmzbx6quvsnnz5pvaT024dOkS8+fPp3Pnzvj6+uLi4oK/vz99+vRhz549RocnIiIid5quf4Vh/6z4/IrtkJAKcVEwfgCMegI2TwSnOjB+WdXXn7IScnJhYyy8HAFj+8Enf4Y9RyFuk2PdtDlw6iNY9OqNPKO7nrPRAUjFdu7cicViwdfXl/z8fKPDKeeVV14hKCjIoaxRo0Y3tc/t27czc+ZMvLy86Nq1603t60bk5OTw7rvvMmnSJDw9PRk0aBDe3t58++23xMfHs27dOrZv386vfvUro0MVERGR2mLFdvD3gr5hV8v8PKF/J1i0FfIKwGKuuP3KRIjsAE38rpZ1fxBaNYZPEmDEY1fLmzWs8fDvRkrGbmMDBw6kfv36ACxfvpyCgoIqWtxazzzzDB06dDA6jBpTWFhITk4Onp6eN3ytgwcP8tVXX5Gfn8/69et58MEHAbhw4QITJkzg3XffZf78+UrGREREpObsToP2LaBOmclvHUNg7no4eBLub3rttifO2aY0dmhZ/lzHEFizq+bjFU1TvJ2VJGJ3IqvVyrfffsvTTz9NYGAgZrMZDw8PevTowe7dux3q7tu3j6FDh3Lvvffi6emJs7Mz/v7+PPfcc5w7dw6Aixcv8uc//5k33ngDgNjYWPvUyGbNmtnPm0wm4uLiHK6flJREp06dMJlM9rLNmzfTunVrmjZtyuLFi+nYsSOenp54eXkBkJ+fz+rVq+nRowc+Pj44Ozvj6+vLsGHDOHPmTJXPv6CggNzcXAAaN25sL3dzc7P34e7ufl2vqYiIiEilTmVCgHf58pKyk+crb1u6btn25y/ZRtakRmlkrJYrSQiqw8XFhTplv0mpxNmzZ0lPT7c/dnZ2xsvLC1dXV9avX89LL71EWloavXr1ok2bNhw9epQvv/ySTp068fXXX9tH1ZKTk/n000/p3r07vXv3xmq1kpiYSFxcHImJiSQnJ+Pq6kq/fv04fvw4y5cvp0ePHjz2mG2o/J577ql2zGVlZmbyyiuv0KFDB1555RWcnJzIz89n3rx5jB07FoB+/frh7+/Pt99+y7Jly/j6669JTk6udAQtKCiItm3bsnPnTvr378/48ePx9vZm/fr1LFy4EH9/f1544YVfHLeIiIjUcgWFcOFy+bK8AsjIdixv4GEbDbuSD5Zr/Hnv6mL775VKlr2UnLvWNEZX89U6lU1zlOumkbFabsGCBdU+fvrpp+u6ds+ePQkODrYfAQEBxMXFkZaWxocffsjBgwf5/PPPWblyJZMnT2bBggXMmDEDd3d3oqOj7dcZNGgQZ8+eZdWqVbz11ltMmzaNpUuXMnjwYFJSUli1ahVms5nw8HBCQ0MB6NSpE9HR0URHR9OnT59f/PpcvHiRkSNHsnbtWqZMmcKkSZNISkpi7ty5ODs7s2/fPj744AMmT57MkiVLGD16NEePHmXGjBmVXrdx48ZER0fz+9//nu3bt9OtWzdCQ0MZPXo0np6eJCcnExwcXO049+/fT2Zmpv1xeno6x44dsz/Ozs7mu+++c2iTkJBQ6ePExESKiorUh/pQH+rD/vjy5TJ/+InITfXTTz9V/D7f9r1te/rSR0IqLP2mXHlR2mlbm7ouZJ4+W+59fubYCfv5Cj9L6v6csOUVlPssOfnDj/b2UP6zBGxLMa75PEr3Ucnj2/Ez8Zf0cb1MVqvVekNXkFuiZM3Y4MGDr6td6ZGrqvj5+WGxWCqtk5qaSnR0NF988QWTJ0+mRYsW9nNubm506NCBlJQURowYgaurK/Hx8eXajx8/nsTERC5evEjdunXt56xWK7m5uVy4cIHCwkKWLFlCTEwMr7/+Ou+88w4AU6ZM4Y033mD8+PFMmDDB3vbixYtMmDCB6dOn8/HHHzNs2DD7uaSkJF5++WW2b99Oya/75s2bGTlyJAcPHiQzM9M+ddBqtTJnzhyioqIYOnQokyZNcoh/69atREdH06xZsyrffD/88APvvPMO3377LY8//jj16tVj69atrFu3jjZt2rBx48YaWZ8mIlJTUlJS8L5ipfEz/4bUE0aHI1J7tQ607VjYuEHFdTIvwa4y9wf7cxw08obRv3csf7iNbfQrZBSEBMCaNx3Pz4uH52bB3hmVrxkLeh7+MQRi/uB4bshM25qxcwvKt8vItiWF4/vDhIEVPx+5Jk1TrOXK7nZYk373u99dcwOPbdu2cfLkSQoKCiod/cnIyCA4OJgTJ07w3nvvsXz5co4dO+bwjQVQ7puXmuTn52dPxMA2rfPYsWMUFxcTFxdXbv1ZidJJ5LUcOHCA6OhovvnmG1JTU+27TA4dOpTY2Fjee+89pk2bxuTJk2vqqYiIiEht4u1h28mwbFmAd/nyEqHN4OsDtvuNlV56suMQuFlsuyJWJNAH/OpD0jVuEP1/hyBU9xO7GZSM1XLXM+XEYrHg5ORUY33fe++9jBs37prnPD098fPz48yZM0yePJk5c+Zw3333MWHCBIKCgrBYLOzatYt33nmH4uLiavVXeoOO0qxWa7kEr4Sbm1u5uiX69u1Lv379rtmuSZMmlcaSlJTE+vXr+d3vfuew3X+DBg14+OGHmTt3Llu2bKn0GiIiIiLXpV+4bXv7TxOhXydbWUY2LE+AXh0c13sd+Xl5SstStyV6Mhzmb4LjGRDsayvbsNe2C+NrvW7Nc7jLKBmr5RYtWlTtupGRkQ47//1S9evXp2HDhmRlZTFgwIBKNwVJTk4mMTERV1dXduzYYU+OCgoKOHToULn6FSVcFovFvjvh+fOOOwVlZ2eTkZFRrdjr1q1LYGAgJpMJZ2dnBg0aVK12ZZ0+fZqCgoJrJoHFxcUUFxdTWFj4i64tIiIick39wiGsFQz/F+xPB996MGsdFBVDbJkphL8db/vv0X9fLRv7pC1x6zYOXomAS7kw7T+2qY3Df+PYfuFm+PEsXM6zPd66HyYvt/085FFoqvuQVYeSsVquZ8+e1a7r4+NTI322atWKsLAwVq5cyfTp0x026wDbaN3Fixfx9/enTp061KlTB5PJ5DAClpyczOrVq8td28PDAyifcLm4uBAUFISTkxPx8fG8/vrrgC0Ri4+PJy0trVqxm0wmQkNDue+++/j0009JTEwkLCzMoc7ly5fJycnBz8+vgqtA06ZNcXd3Z8uWLaSlpdG8uW1o//Tp02zYsIH8/Hx+/etfVysmERERkWpxcrKtFxs9H95bbdv98Nf3QNxLtnVqVQn2hS2T4PU4GLMIXJwh4iF4Z1j5XRTnbYAtKVcfb/rOdoBtDZuSsWpRMnYbO3jwIJcuXQJsa5mKiopITk4GbElJq1atqrzGzVwzVpEWLVrwwgsvcODAAUaPHs2qVasIDw/HbDbzww8/sGvXLoKCgti0aRPNmjWjffv2JCcnExYWRu/evTl37hxbt24lLy/vmtdu2LAhixYton79+vj6+tKyZUt69erFQw89RHh4OGvXrqVnz56Ehoaye/duvvnmGwIDA6u9mUnHjh0ZNWoUY8eO5ZFHHqFXr160bduWK1eu8P3337N7925GjBjhsIFIWb/+9a/p1asXS5cu5aGHHrLfwHvHjh18/fXX+Pj4lEtSRURERCq1eVLVdbw94MNRtqMypUfESmvXBL689jKT645FqqRk7DaWmprKqVOnHMqSkpIACAgIqFYyZpTu3buzfPlyZsyYQXx8PO+99x4mkwkfHx86dOjAqFG2D4hGjRrxxhtv4OzszH/+8x+mTZtG/fr16d27N+Hh4YwcOdLhug8//DB/+ctfmD17NtOmTSM/P5+mTZvSq1cvQkNDiY2NZeLEiWzdupUNGzbw4IMP8u677zJv3rxqJ2MuLi48++yz3HPPPcycOZOEhAQ+//xzXFxc8Pf3JyIigv79+1d6jWbNmvH222/TokULVq5cyccff0xBQQGenp48+eSTTJ06tcp1ZyIiIiJSu2lrexEREQG0tb3ILVOdre3lrqCbPouIiIiIiBhAyZiIiIiIiIgBlIyJiIiIiIgYQMmYiIiIiIiIAZSMiYiIiIiIGEDJmIiIiIiIiAGUjImIiIiIiBhAN30WERERR80bGh2BSO2m95j8TMmYiIiIAFBUVMS53Cv4zhmBi9lsdDgitZu7xegI5DagZExEREQAKCwspGe/37NhwwbatWtndDgiIrWe1oyJiIiI3enTp8nLyzM6DBGRu4KSMREREREREQMoGRMRERERETGAkjEREREREREDKBkTERERERExgJIxERERERERAygZExERERERMYCSMREREREREQMoGRMRERERETGAkjEREREREREDKBkTERERERExgJIxERERsfP398disRgdhojIXcFktVqtRgchIiIixtu7dy+m7Cu0Dm6Gi9lsdDgiN87dAp7uRkchUiFnowMQERGR24OTkxPeru64vDAX0s4YHY7IjWneEOaNUjImtzUlYyIiIuIo7QyknjA6ChGRWk9rxkRERERERAygZExERERERMQASsZEREREREQMoGRMRERERETEAErGREREREREDKBkTERERERExABKxkRERERErkdWDoyYDX7DwH0QdBsHyUeq3/5AOjw+ETwGQ4OhMGQmnL1Qvt7fVkDvKeA/HEx9YcLSGnsKcntQMiYiIiIiUl3FxRAxGRZ/DVFPwFtD4cwF6DoODp2sun16BnR5Ew7/BFOehujesHoX9IiF/ALHum8uhp2H4VfNb85zEcMpGbuN7d69m/Xr17NkyRLmzp3L4sWLjQ6JqKgoTCbTNQ+z2czly5dvWt9ZWVmMGDGCpUvvvG+Fli1bZn+dVq1aZXQ4IiIiUpGuf4Vh/6z4/IrtkJAKcVEwfgCMegI2TwSnOjB+WdXXn7IScnJhYyy8HAFj+8Enf4Y9RyFuk2PdtDlw6iNY9OqNPCO5jTkbHYBUbOfOnVgsFnx9fcnPzzc6HAfPPfccrVu3dihzcnLCxcXlpvWZlZXFBx98QH5+PgMHDrxp/dS0S5cu8dJLL2GxWMjLyzM6HBEREbkRK7aDvxf0Dbta5ucJ/TvBoq2QVwAWc8XtVyZCZAdo4ne1rPuD0KoxfJIAIx67Wt6sYY2HL7cXJWO3sYEDB1K/fn0Ali9fTkFBQRUtbp2IiAj69OljdBg1KjMzE09PT+rUqdkB4xdffJHCwkL69evH//zP/9TotUVEROQW250G7VtA2b8XOobA3PVw8CTc3/TabU+cs01p7NCy/LmOIbBmV83HK7c1TVO8jZUkYnei3NxcXn31VYKDgzGbzbi5udGxY0e++uorh3rFxcW8/PLL3HfffXh5eeHs7IyPjw+9e/fm6NGj9npr1qyheXPbfOn58+fbp/z5+vraz5tMJsaPH18ulsjISEwmExcvXrSXhYaG4uvry/79+3n00Ufx8PCgQYMG5OTkAPDjjz/St29ffH19cXZ2xtvbm4iICI4cuY7FucDWrVtZvHgxY8eOxcvL67raioiIyG3oVCYEeJcvLyk7eb7ytqXrlm1//pJtZE3uGhoZq+Vyc3OrXdfFxaXao0JZWVmkp6c7lHl5eeHh4UFBQQFhYWGkpKTQvXt3hg0bRnZ2NsuWLSMyMpIVK1bQu3dvAPLz85k3bx5dunThscceo169eiQnJ7NmzRp2795NSkoK9evX54EHHmDs2LFMmTKFsLAwnnzySQDc3Nyq/fzKys3NpUuXLrRr147XXnuNjIwMzGYzhw8fpmPHjhQUFNCnTx9CQkI4cuQIn3zyCZ07dyYpKYmgoKAqr19YWMjw4cN58MEHef3113n55Zd/cawiIiJyExQUwoXL5cvyCiAj27G8gYdtNOxKPliu8Se0689LNa5UsrSk5Ny1pjG6mq/WqWyao9QqGhmr5RYsWFDt46effqr2dYcPH05wcLDDMWnSJABiY2PZs2cPs2bNYu3atUyaNImZM2fy/fff4+npyZgxY7BarQBYLBZOnTrF2rVrmT59OrGxsXz++edMnTqV9PR05syZA0BQUBDPP/88AK1btyY6Opro6Gj+9Kc//eLXJicnh8cff5wtW7YwadIkZs+ejaurK8OHD6ewsJCEhAQWLlzIuHHjmD9/PuvWrePs2bO8+eab1br+2LFjOX78OHPmzLnhqY/79+8nMzPT/jg9PZ1jx47ZH2dnZ/Pdd985tElISKj0cWJiIkVFRepDfagP9WF/fDM3YRIx2jXfH1v327anL30kpMLSb8qVX0xJs70H67pAXmH5a+b+nGjVdXHso9R78IdTJ2w//Dz65fAezLWVpfxwqNK474TPkru5j+tlspb8VSy3tZI1Y4MHD76udmVHryrj5+eHxWKptE5UVBTvv/8+MTExhIaGOpwLDQ2lTZs2hISEkJOTw44dOzCZTA51Ro0axRdffEF6ejoBAQEO5woLCzl//jz5+flcunSJNm3a0LdvX1auXAnA0aNHad68OX/84x+Ji4tzaLtmzRoiIiIYN24csbGxDuciIyNZvXo12dnZ1KtXzx7rnj17OHz4MC1bXp23nZWVRYMGDejRowfz5s0r9/zDw8NxdXXl0KFD5c6Vtn//fn71q1/xxz/+kblz5zq8dp999lmtW28nIrVDSkoK3lesNH7m35B6wuhwRG5M60DbjoWNG1RcJ/MS7CqzBOHPcdDIG0b/3rH84Ta20a+QURASAGvKfDk7Lx6emwV7Z1S+ZizoefjHEIj5g+O5ITNta8bOLSjfLiPblhSO7w8T7pxNzKRqmqZYy1VnOt0vER4eXmFCcezYMfLz82nSpEmF7U+cOGFPxubMmcOMGTM4cuSIw7cVYPtG4mapV68eTZs6flju3bsXq9XKV199RXBw8DXblaxTq8zQoUPx8/Nj+vTpNRKriIiI3ATeHradDMuWBXiXLy8R2gy+PmC731jpmS87DoGbxbYrYkUCfcCvPiRdYw36/x2CUN1P7G6jZKyWu54pJxaLBScnpxvu02q1EhQUxFtvvVVhnZCQEAA+/vhjXnzxRVq2bMmYMWNo3rw5bm5uFBUVMWTIEIqLi6vVZ9kRuNLKJnglXFxccHZ2fAuUDBR369bNPi2yLFdX10pjmT9/Prt27WL8+PEOI2jnzp0DbJuD7N69m/vuuw+zWXPCRURE7ij9wm3b23+aCP062coysmF5AvTq4Lje68jPS0BaNrpa9mQ4zN8ExzMg+OcveDfste3C+FqvW/Mc5LahZKyWW7RoUbXrRkZG0rhxJd/mVFNgYCDnz5+nb9++VU57jIuLw2w2880339Co0dUPqn379pWrW1nC5e/vD8D58+V3MCo997cq7dq1w2QykZeXx6BBg6rdrrSSHRdjY2PLTZkEePXVVwE4cOAA99577y/qQ0RERAzSLxzCWsHwf8H+dPCtB7PWQVExxJaZQvjbn3d5Pvrvq2Vjn7Qlbt3GwSsRcCkXpv3HNrVx+G8c2y/cDD+ehcs/36d0636YvNz285BHoanuQ3anUzJWy/Xs2bPadX18fGqkz4EDBzJ16lRef/113n///XLn09LS7NvUl4zElR4Bs1qtxMTElGvn4eEB4LDQskSrVq1wcnJi27ZtWK1We+K2fv16Dhw4UO3YfX19CQsLIzExkZUrV9p3bSwd2/HjxyudgtmvXz97cljaypUr2bRpE6NGjaJNmzYEBgZWOy4RERG5TTg52daLjZ4P76227X7463sg7iXbOrWqBPvClknwehyMWQQuzhDxELwzrPwuivM2wJaUq483fWc7wLaGTcnYHU/J2G3s4MGDXLp0CbBtw15UVERycjJgS0xatWpV5TVu1pqxykycOJH4+HhmzZpFYmIiXbp0wcvLi2PHjrFt2zacnJxISbF9sPTv359NmzbRpUsXnnrqKYqKili3bp39fl+l+fj4EBAQwKZNm4iJiaFx48bUrVuXkSNH4uHhQa9evVi1ahVdu3alW7duHD58mFWrVtG0aVOHe5ZVZeHChYSFhTFgwAB69OhB+/btsVqtpKWlsXHjRp544olyG4iU9sADD/DAAw+UKz9w4ACbNm2ie/fu2sBDRETkdrV5UtV1vD3gw1G2ozKlR8RKa9cEvhxXM7HIHU3J2G0sNTWVU6dOOZQlJSUBEBAQUK1kzAhms5nt27czYcIEli1bxqxZswDw9vambdu2DB061F73hRdeIDMzk1mzZvH222/j7u5O586dmT17drnNNcA27TIqKoqZM2eSn5+Pj48PI0eOBGxrtZ555hk2btxIYmIiLVq0YMGCBXz00UfXlYy1bNmSffv2ERMTQ3x8PPHx8ZjNZnx9fXnkkUd49tlnb+wFEhERERFBW9uLiIjIz7S1vdQq1dnaXsRguumziIiIiIiIAZSMiYiIiIiIGEDJmIiIiIiIiAGUjImIiIiIiBhAyZiIiIiIiIgBlIyJiIiIiIgYQPcZExEREUfNGxodgciN0++x3AGUjImIiAgARUVFnMu9gu+cEbiYzUaHI3Lj3C1GRyBSKSVjIiIiAkBhYSE9+/2eDRs20K5dO6PDERGp9bRmTEREROxOnz5NXl6e0WGIiNwVlIyJiIiIiIgYQMmYiIiIiIiIAZSMiYiIiIiIGEDJmIiIiIiIiAGUjImIiIiIiBhAyZiIiIiIiIgBlIyJiIiIiIgYQMmYiIiIiIiIAZSMiYiIiIiIGEDJmIiIiIiIiAGUjImIiIiIiBhAyZiIiIjY+fv7Y7FYjA5DROSuYLJarVajgxARERHj7d27F1P2FVoHN8PFbDY6HLkbuVvA093oKERuGWejAxAREZHbg5OTE96u7ri8MBfSzhgdjtxtmjeEeaOUjMldRcmYiIiIOEo7A6knjI5CRKTW05oxERERERERAygZExERERERMYCSMREREREREQMoGRMRERERETGAkjEREREREREDKBkTERERERExgJIxEREREandsnJgxGzwGwbug6DbOEg+Uv32B9Lh8YngMRgaDIUhM+HshfL1/rYCek8B/+Fg6gsTltbYU5DaScmYiIiIiNRexcUQMRkWfw1RT8BbQ+HMBeg6Dg6drLp9egZ0eRMO/wRTnobo3rB6F/SIhfwCx7pvLoadh+FXzW/Oc5FaR8nYLbZ7927Wr1/PkiVLmDt3LosXLzY6JKKiojCZTNc8zGYzly9fvml9Z2VlMWLECJYuvf2/Ofr22295+umnefDBB3F3d8dkMjFs2LAK6xcXFzN27FgCAwMxm834+PgwYMAAMjIybl3QIiIitV3Xv8Kwf1Z8fsV2SEiFuCgYPwBGPQGbJ4JTHRi/rOrrT1kJObmwMRZejoCx/eCTP8OeoxC3ybFu2hw49REsevVGnpHcRZyNDuBus3PnTiwWC76+vuTn5xsdjoPnnnuO1q1bO5Q5OTnh4uJy0/rMysrigw8+ID8/n4EDB960fmrCpk2bWLJkCQ0bNiQkJIQ9e/ZUWn/IkCEsXryYsLAwRo4cSWpqKsuWLSMlJYVdu3ZhsVhuUeQiIiJ3sRXbwd8L+oZdLfPzhP6dYNFWyCsAi7ni9isTIbIDNPG7Wtb9QWjVGD5JgBGPXS1v1rDGw5faTcnYLTZw4EDq168PwPLlyykoKKiixa0TERFBnz59jA6jRmVmZuLp6UmdOjc+CDxo0CAGDhxIQEAA27Zt4+GHH66wbnJyMkuWLCEsLIxt27bZ+2/VqhUTJkzgnXfeYezYsTcck4iIiFRhdxq0bwFl/xboGAJz18PBk3B/02u3PXHONqWxQ8vy5zqGwJpdNR+v3FU0TfEWK0nE7kS5ubm8+uqrBAcHYzabcXNzo2PHjnz11VcO9YqLi3n55Ze577778PLywtnZGR8fH3r37s3Ro0ft9dasWUPz5rY51fPnz7dPjfT19bWfN5lMjB8/vlwskZGRmEwmLl68aC8LDQ3F19eX/fv38+ijj+Lh4UGDBg3IyckB4Mcff6Rv3774+vri7OyMt7c3ERERHDlSvQW8jRo1IiAgoFp1586di9Vq5aWXXnJIBP/yl79gsVhYvnx5ta4jIiIiN+hUJgR4ly8vKTt5vvK2peuWbX/+km1kTeQX0sjYHSg3N7fadV1cXKo9KpSVlUV6erpDmZeXFx4eHhQUFBAWFkZKSgrdu3dn2LBhZGdns2zZMiIjI1mxYgW9e/cGID8/n3nz5tGlSxcee+wx6tWrR3JyMmvWrGH37t2kpKRQv359HnjgAcaOHcuUKVMICwvjySefBMDNza3az6+s3NxcunTpQrt27XjttdfIyMjAbDZz+PBhOnbsSEFBAX369CEkJIQjR47wySef0LlzZ5KSkggKCvrF/ZaVnJyMyWSie/fuDuWurq7cc889HD58mIKCAszmSqZFiIiIiKOCQrhwuXxZXgFkZDuWN/CwjYZdyQfLNf7kdf15GcaVSpaNlJy71jRGV/PVOpVNcxSphEbG7kALFiyo9vHTTz9V+7rDhw8nODjY4Zg0aRIAsbGx7Nmzh1mzZrF27VomTZrEzJkz+f777/H09GTMmDFYrVYALBYLp06dYu3atUyfPp3Y2Fg+//xzpk6dSnp6OnPmzAEgKCiI559/HoDWrVsTHR1NdHQ0f/rTn37xa5OTk8Pjjz/Oli1bmDRpErNnz8bV1ZXhw4dTWFhIQkICCxcuZNy4ccyfP59169Zx9uxZ3nzzzV/c57WcPXvWPjJXlr+/P5cuXSIzM7Pa19u/f79D/fT0dI4dO2Z/nJ2dzXfffefQJiEhodLHiYmJFBUVqQ/1oT7Uh/3xzdywSeR6VPi7u+172/b0pY+EVFj6TbnyXZ99aWtc1wXyCsu9P77/du/V81z7/fHThZ9HzfIKyr8Hcwsc2kP592B6evpt9z5XHze3j+tlspb8BS23XMmascGDB19Xu7KjV5Xx8/OrcqOIqKgo3n//fWJiYggNDXU4FxoaSps2bQgJCSEnJ4cdO3ZgMpkc6owaNYovvviC9PT0ctP4CgsLOX/+PPn5+Vy6dIk2bdrQt29fVq5cCcDRo0dp3rw5f/zjH4mLi3Nou2bNGiIiIhg3bhyxsbEO5yIjI1m9ejXZ2dnUq1fPHuuePXs4fPgwLVtendudlZVFgwYN6NGjB/PmzSv3/MPDw3F1deXQoUOVvk6llawZu1bcAAEBAeTl5XHu3Llyr1dERARr1qwpF6eIiNFSUlLwvmKl8TP/htQTRocjd5vWgbYdCxuX/yLTLvMS7CqzvODPcdDIG0b/3rH84Ta20a+QURASAGvKfPE6Lx6emwV7Z1S+ZizoefjHEIj5g+O5ITNta8bOLSjfLiPblhSO7w8Tbu8NysRYmqZ4B6rJ6XSlhYeHV7iBx7Fjx8jPz6dJkyYVtj9x4oQ9GZszZw4zZszgyJEjDt9AgO1bhpulXr16NG3q+IG6d+9erFYrX331FcHBwddsV7JOraa4urqSk5NDUVERzs6Ob7OSaaYlSaSIiIhUk7eHbSfDsmUB3uXLS4Q2g68P2O43Vnrpxo5D4Gax7YpYkUAf8KsPSddYX/5/hyBU9xOTG6Nk7A50PdNILBYLTk5ON9yn1WolKCiIt956q8I6ISEhAHz88ce8+OKLtGzZkjFjxtC8eXPc3NwoKipiyJAhFBcXV6vPsiNKpZVN8Eq4uLiUS35KBn+7detmnxZZlqura7Viqi4/Pz9+/PFHzp8/T8OGjtvcnj59Gg8PD7y9r7EYWERERGpWv3Db9vafJkK/TrayjGxYngC9Ojiu9zry8/KOlo2ulj0ZDvM3wfEMCP75y9sNe227ML7W69Y8B6m1lIzdgRYtWlTtupGRkTRuXMk3PtUUGBjI+fPn6du3b5XTHuPi4jCbzXzzzTc0anT1w2zfvn3l6laWcPn7+wNw/nz5XY5Kz+etSrt27TCZTOTl5TFo0KBqt7sR7du3Z+fOncTHxztMQ83NzeXw4cO0adNGm3eIiIjcCv3CIawVDP8X7E8H33owax0UFUNsmSmEv/15B+ej/75aNvZJW+LWbRy8EgGXcmHaf2xTG4f/xrH9ws3w41m4nGd7vHU/TP55B+Uhj0JT3YdMHCkZuwP17Nmz2nV9fHxqpM+BAwcydepUXn/9dd5///1y59PS0uzb1JeMxJUeAbNarcTExJRr5+HhAXDNzSxatWqFk5MT27Ztw2q12hO39evXc+DAgWrH7uvrS1hYGImJiaxcudK+a2Pp2I4fP17pFMzr9eyzzzJ37lz++c9/MnDgQPuOlv/4xz/Iy8srF4OIiIjcJE5OtvVio+fDe6ttux/++h6Ie8m2Tq0qwb6wZRK8HgdjFoGLM0Q8BO8MK7+L4rwNsCXl6uNN39kOsK1hUzImZSgZu8UOHjzIpUuXANsoSVFREcnJyYAtMWnVqlWV17hZa8YqM3HiROLj45k1axaJiYl06dIFLy8vjh07xrZt23ByciIlxfbh079/fzZt2kSXLl146qmnKCoqYt26dfb7fZXm4+NDQEAAmzZtIiYmhsaNG1O3bl1GjhyJh4cHvXr1YtWqVXTt2pVu3bpx+PBhVq1aRdOmTR3uWVaVhQsXEhYWxoABA+jRowft27fHarWSlpbGxo0beeKJJ665EUdpFy5c4L//+78BOHPmDAC7du2y7/745JNP8tvf/haAX//61wwYMIClS5fSuXNnevbsyffff8+yZcto27Yto0ePrnbsIiIiUonNk6qu4+0BH46yHZUpPSJWWrsm8OW4molFpBQlY7dYamoqp06dcihLSkoCbDvwVScZM4LZbGb79u1MmDCBZcuWMWvWLAC8vb1p27YtQ4cOtdd94YUXyMzMZNasWbz99tu4u7vTuXNnZs+eXW5zDbBNu4yKimLmzJnk5+fj4+PDyJEjAdvNoJ955hk2btxIYmIiLVq0YMGCBXz00UfXlYy1bNmSffv2ERMTQ3x8PPHx8ZjNZnx9fXnkkUd49tlnq7xGZmYms2fPdij77rvv7Fue+vn52ZOxkufVpEkTFi5cyMSJE/Hw8OAPf/gDs2bNqnKqp4iIiIjUftraXkRERABtbS8Gq87W9iK1jG76LCIiIiIiYgAlYyIiIiIiIgZQMiYiIiIiImIAJWMiIiIiIiIGUDImIiIiIiJiACVjIiIiIiIiBtB9xkRERMRR84ZGRyB3I/3eyV1IyZiIiIgAUFRUxLncK/jOGYGL2Wx0OHI3crcYHYHILaVkTERERAAoLCykZ7/fs2HDBtq1a2d0OCIitZ7WjImIiIjd6dOnycvLMzoMEZG7gpIxERERERERAygZExERERERMYCSMREREREREQMoGRMRERERETGAkjEREREREREDKBkTERERERExgJIxERERERERAygZExERERERMYCSMREREREREQMoGRMRERERETGAkjERERGx8/f3x2KxGB2GiMhdwWS1Wq1GByEiIiLG27t3L6bsK7QOboaL2Wx0OHK3cbeAp7vRUYjcUs5GByAiIiK3BycnJ7xd3XF5YS6knTE6HLmbNG8I80YpGZO7jpIxERERcZR2BlJPGB2FiEitpzVjIiIiIiIiBlAyJiIiIiIiYgAlYyIiIiIiIgZQMiYiIiIiImIAJWMiIiIiIiIGUDImIiIiIiJiACVjIiIiIiIiBlAyJiIiIiK1V1YOjJgNfsPAfRB0GwfJR6rf/kA6PD4RPAZDg6EwZCacvVC+3t9WQO8p4D8cTH1hwtIaewpSeykZExEREZHaqbgYIibD4q8h6gl4ayicuQBdx8Ghk1W3T8+ALm/C4Z9gytMQ3RtW74IesZBf4Fj3zcWw8zD8qvnNeS5SKykZu43t3r2b9evXs2TJEubOncvixYuNDomoqChMJtM1D7PZzOXLl29a31lZWYwYMYKlS++Mb5o+++wzfvWrX+Hm5obFYqFly5b87W9/o7i42OjQREREaoeuf4Vh/6z4/IrtkJAKcVEwfgCMegI2TwSnOjB+WdXXn7IScnJhYyy8HAFj+8Enf4Y9RyFuk2PdtDlw6iNY9OqNPCO5yzgbHYBUbOfOnVgsFnx9fcnPzzc6HAfPPfccrVu3dihzcnLCxcXlpvWZlZXFBx98QH5+PgMHDrxp/dSEWbNmERUVhZ+fH88++yz16tVj9erVvPnmmxw9epQPPvjA6BBFRERqvxXbwd8L+oZdLfPzhP6dYNFWyCsAi7ni9isTIbIDNPG7Wtb9QWjVGD5JgBGPXS1v1rDGw5faT8nYbWzgwIHUr18fgOXLl1NQUFBFi1snIiKCPn36GB1GjcrMzMTT05M6dW5swLigoID//u//pl69euzcuZMmTZoA8Le//Y2wsDA+/vhjnnvuOf7rv/6rJsIWERGRiuxOg/YtoOz/2zuGwNz1cPAk3N/02m1PnLNNaezQsvy5jiGwZlfNxyt3HU1TvI2VJGJ3otzcXF599VWCg4Mxm824ubnRsWNHvvrqK4d6xcXFvPzyy9x33314eXnh7OyMj48PvXv35ujRo/Z6a9asoXlz2xzs+fPn26dG+vr62s+bTCbGjx9fLpbIyEhMJhMXL160l4WGhuLr68v+/ft59NFH8fDwoEGDBuTk5ADw448/0rdvX3x9fXF2dsbb25uIiAiOHKl6wW9ycjLZ2dl07tzZnogBmEwmhg8fTlFREXPnzq3+iykiIiK/zKlMCPAuX15SdvJ85W1L1y3b/vwl28iayA3QyFgtl5ubW+26Li4u1R4VysrKIj093aHMy8sLDw8PCgoKCAsLIyUlhe7duzNs2DCys7NZtmwZkZGRrFixgt69ewOQn5/PvHnz6NKlC4899hj16tUjOTmZNWvWsHv3blJSUqhfvz4PPPAAY8eOZcqUKYSFhfHkk08C4ObmVu3nV1Zubi5dunShXbt2vPbaa2RkZGA2mzl8+DAdO3akoKCAPn36EBISwpEjR/jkk0/o3LkzSUlJBAUFVXjdK1euAFC3bt1y59zd3QHbekARERG5DgWFcOFy+bK8AsjIdixv4GEbDbuSD5Zr/Lnr+vOyiiuVLAMpOXetaYyu5qt1KpvmKFIFjYzVcgsWLKj28dNPP1X7usOHDyc4ONjhmDRpEgCxsbHs2bOHWbNmsXbtWiZNmsTMmTP5/vvv8fT0ZMyYMVitVgAsFgunTp1i7dq1TJ8+ndjYWD7//HOmTp1Keno6c+bMASAoKIjnn38egNatWxMdHU10dDR/+tOffvFrk5OTw+OPP86WLVuYNGkSs2fPxtXVleHDh1NYWEhCQgILFy5k3LhxzJ8/n3Xr1nH27FnefPPNSq/7wAMPUKdOHXbt2lVuaumXX34JwNmzZ68r1v3795OZmWl/nJ6ezrFjx+yPs7Oz+e677xzaJCQkVPo4MTGRoqIi9aE+1If6sD++mZswiVyPa/3uXly307Y9fekjIRWWflO+/FiG7f1Q1wXyCoEy749cW6JV5OLs0Efp98fp7CzbDz+Pfjm8B3NtZdu/dZyqeCe8z9XHze3jepmsJX8Vy22tZM3Y4MGDr6td2dGryvj5+WGxWCqtExUVxfvvv09MTAyhoaEO50JDQ2nTpg0hISHk5OSwY8cOTCaTQ51Ro0bxxRdfkJ6eTkBAgMO5wsJCzp8/T35+PpcuXaJNmzb07duXlStXAnD06FGaN2/OH//4R+Li4hzarlmzhoiICMaNG0dsbKzDucjISFavXk12djb16tWzx7pnzx4OHz5My5ZX54JnZWXRoEEDevTowbx588o9//DwcFxdXTl06FClr1Pfvn357LPP+O1vf8sbb7yBl5cX//M//8M///lP8vPz8fPz48yZM5VeQ0TkVktJScH7ipXGz/wbUk8YHY7cTVoH2nYsbNyg4jqZl2BXmeUCf46DRt4w+veO5Q+3sY1+hYyCkABYU+aL1Hnx8Nws2Duj8jVjQc/DP4ZAzB8czw2ZaVszdm5B+XYZ2baEcHx/mHB7bzgmxtM0xVqusul0NyI8PLzCDTyOHTtGfn6+w3qpsk6cOGFPxubMmcOMGTM4cuSIw7cVYPtG4mapV68eTZs6fgDv3bsXq9XKV199RXBw8DXblaxTq8z//M//8PTTT/O///u/bNiwwd7f3/72N0aPHn1D0ytFRETuSt4etp0My5YFeJcvLxHaDL4+YLvfWOmlGDsOgZvFtitiRQJ9wK8+JF1jvfj/HYJQ3U9MbpySsVrueqacWCwWnJycbrhPq9VKUFAQb731VoV1QkJCAPj444958cUXadmyJWPGjKF58+a4ublRVFTEkCFDqn1PrrIjcKWVTfBKuLi44Ozs+BYoGSju1q2bfVpkWa6urlXGU7duXT799FPOnDnDjh07MJvNdOnSxT6iVlmiKiIiIjWkX7hte/tPE6FfJ1tZRjYsT4BeHRzXex35eblGy0ZXy54Mh/mb4HgGBP/8ZeyGvbZdGF/rdWueg9RqSsZquUWLFlW7bmRkJI0bV/INUTUFBgZy/vx5+vbtW+W0x7i4OMxmM9988w2NGl398Nu3b1+5upUlXP7+/gCcP19+V6TSc3+r0q5dO0wmE3l5eQwaNKja7SrSsGFDevW6+mFdcsPqxx57rKImIiIiUlP6hUNYKxj+L9ifDr71YNY6KCqG2DJTCH/7847MR/99tWzsk7bErds4eCUCLuXCtP/YpjYO/41j+4Wb4cezcDnP9njrfpi83PbzkEehqe5DJuUpGavlevbsWe26Pj4+NdLnwIEDmTp1Kq+//jrvv/9+ufNpaWn2bepLRuJKj4BZrVZiYmLKtfPw8ABwWGhZolWrVjg5ObFt2zasVqs9cVu/fj0HDhyoduy+vr6EhYWRmJjIypUr7bs2lo7t+PHjv2hk6/vvv+f999+nUaNGvPTSS9fdXkRERK6Tk5Ntvdjo+fDeatvuh7++B+Jesq1Tq0qwL2yZBK/HwZhF4OIMEQ/BO8PK76I4bwNsSbn6eNN3tgNsa9iUjMk1KBm7jR08eJBLly4Btm3Yi4qKSE5OBmyJSatWraq8xs1aM1aZiRMnEh8fz6xZs0hMTKRLly54eXlx7Ngxtm3bhpOTEykptg+r/v37s2nTJrp06cJTTz1FUVER69ats9/vqzQfHx8CAgLYtGkTMTExNG7cmLp16zJy5Eg8PDzo1asXq1atomvXrnTr1o3Dhw+zatUqmjZt6nDPsqosXLiQsLAwBgwYQI8ePWjfvj1Wq5W0tDQ2btzIE088UW4DkbL+/ve/89lnnxEeHk7Dhg1JTU1l5cqV1KlThwULFuDp6Xk9L6mIiIhcy+ZJVdfx9oAPR9mOypQeESutXRP4clzNxCJShpKx21hqaiqnTp1yKEtKSgIgICCgWsmYEcxmM9u3b2fChAksW7aMWbNmAeDt7U3btm0ZOnSove4LL7xAZmYms2bN4u2338bd3Z3OnTsze/bscptrgG3aZVRUFDNnziQ/Px8fHx9GjhwJ2G4G/cwzz7Bx40YSExNp0aIFCxYs4KOPPrquZKxly5bs27ePmJgY4uPjiY+Px2w24+vryyOPPMKzzz5b5TUefPBBli5dyscff8yVK1fw9PSke/fuTJ06lTZt2lQ7FhERERGpvbS1vYiIiADa2l4MVJ2t7UVqId30WURERERExABKxkRERERERAygZExERERERMQASsZEREREREQMoGRMRERERETEAErGREREREREDKD7jImIiIij5g2NjkDuNvqdk7uUkjEREREBoKioiHO5V/CdMwIXs9nocORu424xOgKRW07JmIiIiABQWFhIz36/Z8OGDbRr187ocEREaj2tGRMRERG706dPk5eXZ3QYIiJ3BSVjIiIiIiIiBlAyJiIiIiIiYgAlYyIiIiIiIgZQMiYiIiIiImIAJWMiIiIiIiIGUDImIiIiIiJiACVjIiIiIiIiBlAyJiIiIiIiYgAlYyIiIiIiIgZQMiYiIiIiImIAJWMiIiJi5+/vj8ViMToMEZG7gslqtVqNDkJERESMt3fvXkzZV2gd3AwXs9nocORmcbeAp7vRUYgI4Gx0ACIiInJ7cHJywtvVHZcX5kLaGaPDkZuheUOYN0rJmMhtQsmYiIiIOEo7A6knjI5CRKTW05oxERERERERAygZExERERERMYCSMREREREREQMoGRMRERERETGAkjEREREREREDKBkTERERERExgJIxERERERERAygZExEREZGakZUDI2aD3zBwHwTdxkHykeq3P5AOj08Ej8HQYCgMmQlnL5Sv97cV0HsK+A8HU1+YsLTGnoLIraRkTERERERuXHExREyGxV9D1BPw1lA4cwG6joNDJ6tun54BXd6Ewz/BlKchujes3gU9YiG/wLHum4th52H4VfOb81xEbhElY7ex3bt3s379epYsWcLcuXNZvHix0SGRmppKr169MJlM1zxcXV1vav+bNm3i1VdfZfPmzTe1n5qSmZnJP//5T9q3b0+9evVwcXGhUaNGPPXUU+Tn5xsdnoiISPV1/SsM+2fF51dsh4RUiIuC8QNg1BOweSI41YHxy6q+/pSVkJMLG2Ph5QgY2w8++TPsOQpxmxzrps2BUx/Boldv5BmJGM7Z6ACkYjt37sRiseDr63tb/uH+yiuvEBQU5FDWqFGjm9rn9u3bmTlzJl5eXnTt2vWm9nWjjh8/zrhx45g/fz73338/f/rTn3B1dSUtLY309HQKCwtxcXExOkwREZGasWI7+HtB37CrZX6e0L8TLNoKeQVgMVfcfmUiRHaAJn5Xy7o/CK0awycJMOKxq+XNGtZ4+CJGUDJ2Gxs4cCD169cHYPny5RQUFFTR4tZ65pln6NChg9Fh1JjCwkJycnLw9PS84WtduXKFhQsXsmjRIoYPH86HH36IyWSqgShFRERuU7vToH0LqFNm4lXHEJi7Hg6ehPubXrvtiXO2KY0dWpY/1zEE1uyq+XhFbgOapngbK0nE7kRWq5Vvv/2Wp59+msDAQMxmMx4eHvTo0YPdu3c71N23bx9Dhw7l3nvvxdPTE2dnZ/z9/Xnuuec4d+4cABcvXuTPf/4zb7zxBgCxsbH2qZHNmjWznzeZTMTFxTlcPykpiU6dOjkkQ5s3b6Z169Y0bdqUxYsX07FjRzw9PfHy8gIgPz+f1atX06NHD3x8fHB2dsbX15dhw4Zx5syZKp///v37Wb16NSaTiWnTplFUVMTFixcpLCy8gVdVRETkNnYqEwK8y5eXlJ08X3nb0nXLtj9/yTayJlLLaGSslsvNza12XRcXF+qU/TarEmfPniU9Pd3+2NnZGS8vL1xdXVm/fj0vvfQSaWlp9OrVizZt2nD06FG+/PJLOnXqxNdff20fVUtOTubTTz+le/fu9O7dG6vVSmJiInFxcSQmJpKcnIyrqyv9+vXj+PHjLF++nB49evDYY7bpCvfcc0+1Yy4rMzOTV155hQ4dOvDKK6/g5OREfn4+8+bNY+zYsQD069cPf39/vv32W5YtW8bXX39NcnJypSNox48fJyUlhXvuuYepU6cSFxfH2bNncXZ25tFHH2X27NmEhIT84rhFRERuqoJCuHC5fFleAWRkO5Y38LCNhl3JB8s1/rR0/XlK/pVKllyUnLvWNEZX89U6lU1zFLkDaWSslluwYEG1j59++um6rt2zZ0+Cg4PtR0BAAHFxcaSlpfHhhx9y8OBBPv/8c1auXMnkyZNZsGABM2bMwN3dnejoaPt1Bg0axNmzZ1m1ahVvvfUW06ZNY+nSpQwePJiUlBRWrVqF2WwmPDyc0NBQADp16kR0dDTR0dH06dPnF78+Fy9eZOTIkaxdu5YpU6YwadIkkpKSmDt3Ls7Ozuzbt48PPviAyZMns2TJEkaPHs3Ro0eZMWNGpdc9efIkFy5c4NixY0yfPp3IyEimT59OREQEW7ZsoXPnztf1eu/fv5/MzEz74/T0dI4dO2Z/nJ2dzXfffefQJiEhodLHiYmJFBUVqQ/1oT7Uh/3x5ctl/viWWqmw1O9Ehb9X2763bU9f+khIhaXflCs/tGG77feqrgvkFZb/vTqfZfuhrotjH6XsOfi97YefR78cfndzbWWZuTn2+mX7yMvLqxXvQfVx5/dxvUxWq9V6Q1eQW6JkzdjgwYOvq13pkauq+Pn5YbFYKq2TmppKdHQ0X3zxBZMnT6ZFixb2c25ubnTo0IGUlBRGjBiBq6sr8fHx5dqPHz+exMRELl68SN26de3nrFYrubm5XLhwgcLCQpYsWUJMTAyvv/4677zzDgBTpkzhjTfeYPz48UyYMMHe9uLFi0yYMIHp06fz8ccfM2zYMPu5pKQkXn75ZbZv307Jr/vmzZsZOXIkBw8eJDMz0z490Wq1MmfOHKKiohg6dCiTJk1yiH/r1q1ER0fTrFmzSt98b731Fn/5y18AeOONN5g8eTJgS9LGjh3L/PnziYmJ4R//+Eelr7eIyK2UkpKC9xUrjZ/5N6SeMDocuRlaB9p2K2zcoPJ6mZdgV5n7g/05Dhp5w+jfO5Y/3MY2+hUyCkICYM2bjufnxcNzs2DvjMrXjAU9D/8YAjF/cDw3ZKZtzdi5BeXbZWTbksLx/WHCwMqfk8htSNMUa7myux3WpN/97nfX3MBj27ZtnDx5koKCAoKDgytsn5GRQXBwMCdOnOC9995j+fLlHDt2zOEbC8DhG4ua5ufnZ0/EwDat89ixYxQXFxMXF1du/VmJ0klkVedLJ4aNGzfmN7/5DQsXLrxjtucXEZG7kLeHbSfDsmUB3uXLS4Q2g68P2O43VnrZw45D4Gax7YpYkUAf8KsPSde4QfT/HYJQ3U9MaiclY7Xc9Uw5sVgsODk51Vjf9957L+PGjbvmOU9PT/z8/Dhz5gyTJ09mzpw53HfffUyYMIGgoCAsFgu7du3inXfeobi4uFr9VbRbodVqLZfglXBzcytXt0Tfvn3p16/fNds1adKk0lgaNmyIi4sL+fn55bb7r1+/Ph4eHjc1yRQREbnl+oXbtrf/NBH6dbKVZWTD8gTo1cFxvdeRn6fqtyz1/8gnw2H+JjieAcG+trINe227ML7W69Y8B5FbTMlYLbdo0aJq142MjKRx40q+taqm+vXr07BhQ7KyshgwYEClm4IkJyeTmJiIq6srO3bssCdHBQUFHDp0qFz9ihIui8WCu7s7AOfPO+7WlJ2dTUZGRrVir1u3LoGBgZhMJpydnRk0aFC12pXVsGFDmjZtyqFDh0hPT+fee++1n8vKyuLixYvcf//9v+jaIiIit6V+4RDWCob/C/ang289mLUOioohtswUwt+Ot/336L+vlo190pa4dRsHr0TApVyY9h/b1Mbhv3Fsv3Az/HgWLufZHm/dD5OX234e8ig01X3I5M6gZKyW69mzZ7Xr+vj41EifrVq1IiwsjJUrVzJ9+nSHzTrANlp38eJF/P39qVOnDnXq1MFkMjmMgCUnJ7N69epy1/bw8ADKJ1wuLi4EBQXh5OREfHw8r7/+OmBLxOLj40lLS6tW7CaTidDQUO677z4+/fRTEhMTCQsLc6hz+fJlcnJy8PPzq+AqEBISQlhYGIcOHWL27NnMnDkTgB9//JF169ZhtVqv699GRETktufkZFsvNno+vLfatvvhr++BuJdsa9WqEuwLWybB63EwZhG4OEPEQ/DOsPK7KM7bAFtSrj7e9J3tANsaNiVjcodQMnYbO3jwIJcuXQJsa5mKiopITk4GbElJq1atqrzGzVwzVpEWLVrwwgsvcODAAUaPHs2qVasIDw/HbDbzww8/sGvXLoKCgti0aRPNmjWjffv2JCcnExYWRu/evTl37hxbt24lLy/vmtdu2LAhixYton79+vj6+tKyZUt69erFQw89RHh4OGvXrqVnz56Ehoaye/duvvnmGwIDA6u9mUnHjh0ZNWoUY8eO5ZFHHqFXr160bduWK1eu8P3337N7925GjBjhsIFIWUFBQTzzzDNs376d9957jyNHjtCuXTt27tzJli1baNOmDS+//PIvfYlFRERuvc2Tqq7j7QEfjrIdlSk9IlZauybw5bWXOFx3LCJ3ACVjt7HU1FROnTrlUJaUlARAQEBAtZIxo3Tv3p3ly5czY8YM4uPjee+99zCZTPj4+NChQwdGjbJ9SDdq1Ig33ngDZ2dn/vOf/zBt2jTq169P7969CQ8PZ+TIkQ7Xffjhh/nLX/7C7NmzmTZtGvn5+TRt2pRevXoRGhpKbGwsEydOZOvWrWzYsIEHH3yQd999l3nz5lU7GXNxceHZZ5/lnnvuYebMmSQkJPD555/j4uKCv78/ERER9O/fv1qvwYIFC5g6dSrbtm1j3bp1eHl58dxzzzF16lT7KJ+IiIiI3J20tb2IiIgA2tr+rlDdre1F5JbQTZ9FREREREQMoGRMRERERETEAErGREREREREDKBkTERERERExABKxkRERERERAygZExERERERMQAus+YiIiIOGre0OgI5GbRv63IbUXJmIiIiABQVFTEudwr+M4ZgYvZbHQ4crO4W4yOQER+pmRMREREACgsLKRnv9+zYcMG2rVrZ3Q4IiK1ntaMiYiIiN3p06fJy8szOgwRkbuCkjEREREREREDKBkTERERERExgJIxERERERERAygZExERERERMYCSMREREREREQMoGRMRERERETGAkjEREREREREDKBkTERERERExgJIxERERERERAygZExERERERMYCSMREREbHz9/fHYrEYHYaIyF3BZLVarUYHISIiIsbbu3cvpuwrtA5uhovZbHQ4dxZ3C3i6Gx2FiNxhnI0OQERERG4PTk5OeLu64/LCXEg7Y3Q4d47mDWHeKCVjInLdlIyJiIiIo7QzkHrC6ChERGo9rRkTERERERExgJIxERERERERAygZExERERERMYCSMREREREREQMoGRMRERERETGAkjEREREREREDKBkTERERuV1l5cCI2eA3DNwHQbdxkHzE6KhEpIYoGRMRERG5HRUXQ8RkWPw1RD0Bbw2FMxeg6zg4dNLo6ESkBigZu43t3r2b9evXs2TJEubOncvixYuNDomoqChMJtM1D7PZzOXLl29a31lZWYwYMYKlS5fetD5qyowZMyp8nXr37m10eCIicjvo+lcY9s+Kz6/YDgmpEBcF4wfAqCdg80RwqgPjl926OEXkpnE2OgCp2M6dO7FYLPj6+pKfn290OA6ee+45Wrdu7VDm5OSEi4vLTeszKyuLDz74gPz8fAYOHHjT+qlJzzzzDA8++KBDWdu2bQ2KRkRE7igrtoO/F/QNu1rm5wn9O8GirZBXABazYeGJyI1TMnYbGzhwIPXr1wdg+fLlFBQUGBzRVREREfTp08foMGpUZmYmnp6e1KlTcwPGPXr0YOjQoTV2PRERuYvsToP2LaDs/5c6hsDc9XDwJNzf1JjYRKRGaJribawkEbsT5ebm8uqrrxIcHIzZbMbNzY2OHTvy1VdfOdQrLi7m5Zdf5r777sPLywtnZ2d8fHzo3bs3R48etddbs2YNzZs3B2D+/Pn2KX++vr728yaTifHjx5eLJTIyEpPJxMWLF+1loaGh+Pr6sn//fh599FE8PDxo0KABOTk5APz444/07dsXX19fnJ2d8fb2JiIigiNHrn/R9Pnz52/q9E0REamlTmVCgHf58pKyk+dvbTwiUuM0MlbL5ebmVruui4tLtUeFsrKySE9Pdyjz8vLCw8ODgoICwsLCSElJoXv37gwbNozs7GyWLVtGZGQkK1assK+bys/PZ968eXTp0oXHHnuMevXqkZyczJo1a9i9ezcpKSnUr1+fBx54gLFjxzJlyhTCwsJ48sknAXBzc6v28ysrNzeXLl260K5dO1577TUyMjIwm80cPnyYjh07UlBQQJ8+fQgJCeHIkSN88skndO7cmaSkJIKCgqrVx8iRI8nNzcVkMhEcHMwLL7zAmDFjMJlMvzhuERG5AxUUwoXL5cvyCiAj27G8gYdtNOxKPliu8aea689LAq7cXksYROT6KRmr5RYsWFDtupGRkTRu3LhadYcPH16uLCYmhn/84x/ExsayZ88e5s6dy/PPP28/HxsbS0hICGPGjKFXr16YTCYsFgunTp0qNwr49ttvM3r0aObMmUNMTAxBQUE8//zzTJkyhdatWxMdHV3t51WRnJwc+vTpw6JFi8o9t8LCQhISErj//vvt5f/v//0/fvOb3/Dmm28SFxdX6bXd3d3p1q0b3bp1o3Hjxhw5coT58+czduxYDh48yMcff3zD8YuIyB1k2/e2benLSkiFpd84lqXNgWYNoa4L5BWWb5P7cxJW9+at0xaRW0PTFGu5nj17Vvvw8fGp9nVjYmJYvHixwzFs2DAAli1bRkBAAI8//jjp6en249KlS3Tq1InU1FR++uknAEwmkz0RKyws5MyZM6SnpxMZGQnAjh07avYFKSM2NtbhcVZWFtu2bSM8PBxvb2+H+Fu2bEnjxo3Ztm1bldcdMWIEGzdu5K9//SvPPvssU6ZM4ciRIzRt2pQFCxawc+fO64pz//79ZGZm2h+np6dz7Ngx++Ps7Gy+++47hzYJCQmVPk5MTKSoqEh9qA/1oT7sjzWl+sbs3r3b4bHDv8eDzdg/sz9F6/4K68fD+vHktmpEQbd29sdnF4/i9P+8CI28ACjy9+TiwWPlr3nq53/Dxg3uiN8r9aE+7qY+rpfJarVab+gKckuUbOAxePBgQ+OIiori/fff57PPPqtwAw+LxVLl7o87d+6kQ4cOAMyZM4cZM2Zw5MgRhzcIQPfu3Vm/fj0AR48epXnz5vzxj38sNzK1Zs0aIiIiGDduXLkEKzIyktWrV5OdnU29evUA25qxH374gfPnz+PsfHWAeOvWrTz66KOVxu7r68vZs2crrVORd999l9dee40xY8bw97///RddQ0TkZklJScH7ipXGz/wbUk8YHc6do3UgbIyFxg2ur13Xv9pGwOJeuvb5p6bB1wfg5IeOm3iMmA3/sxXOL9BuiiJ3OE1TrOWu51tOi8WCk5PTDfdptVoJCgrirbfeqrBOSEgIAB9//DEvvvgiLVu2ZMyYMTRv3hw3NzeKiooYMmQIxcXF1eqzsjVYZRO8Ei4uLg6JWEnsAN26dXOYYlmaq6trtWK6lpLbAWRkZPzia4iIyF2iX7hte/tPE6FfJ1tZRjYsT4BeHZSIidQCSsZqubLroSpzPWvGKhMYGMj58+fp27cvFoul0rpxcXGYzWa++eYbGjVqZC/ft29fubqVJVz+/v6AbefCskoPN1elXbt2mEwm8vLyGDRoULXbVVfJUHdJvCIiIhXqFw5hrWD4v2B/OvjWg1nroKgYYu+M+22KSOWUjNVyPXv2rHbd61kzVpmBAwcydepUXn/9dd5///1y59PS0uzb1JeMxJUeAbNarcTExJRr5+HhAeAwt7dEq1atcHJyYtu2bVitVnvitn79eg4cOFDt2H19fQkLCyMxMZGVK1fad20sHdvx48dp0qRJpdc5ceIEgYGBDmXnz59nxowZODs7069fv2rHJCIidyknJ1jzJoyeD++ttu2e+Ot7bNMaWwdW3V5EbntKxm5jBw8e5NKlS4BtG/aioiKSk5MBW2LSqlWrKq9R3S3Ya9LEiROJj49n1qxZJCYm0qVLF7y8vDh27Bjbtm3DycmJlJQUAPr378+mTZvo0qULTz31FEVFRaxbt85+v6/SfHx8CAgIYNOmTcTExNC4cWPq1q3LyJEj8fDwoFevXqxatYquXbvSrVs3Dh8+zKpVq2jatKnDPcuqsnDhQsLCwhgwYAA9evSgffv2WK1W0tLS2LhxI0888USVuynef//9PPDAA7Rr145GjRpx9OhRPvvsMzIzM4mKiiI0NPQ6XlEREamVNk+quo63B3w4ynaISK2jZOw2lpqayqlTpxzKkpKSAAgICKhWMmYEs9nM9u3bmTBhAsuWLWPWrFkAeHt707ZtW4YOHWqv+8ILL5CZmcmsWbN4++23cXd3p3PnzsyePZumTZuWu/aiRYuIiopi5syZ5Ofn4+Pjw8iRIwHbzaCfeeYZNm7cSGJiIi1atGDBggV89NFH15WMtWzZkn379hETE0N8fDzx8fGYzWZ8fX155JFHePbZZ6u8RkREBAkJCSQlJXHlyhXc3Nxo3bo1o0aNuuZtAURERETk7qPdFEVERATQboq/2C/dTVFE7nq6z5iIiIiIiIgBlIyJiIiIiIgYQMmYiIiIiIiIAZSMiYiIiIiIGEDJmIiIiIiIiAGUjImIiIiIiBhAyZiIiIiIiIgBdNNnERERcdS8odER3Fn0eonIL6RkTERERAAoKiriXO4VfOeMwMVsNjqcO4u7xegIROQOpGRMREREACgsLKRnv9+zYcMG2rVrZ3Q4IiK1ntaMiYiIiN3p06fJy8szOgwRkbuCkjEREREREREDKBkTERERERExgJIxERERERERAygZExERERERMYCSMREREREREQMoGRMRERERETGAkjEREREREREDKBkTERERERExgJIxERERERERAygZExERERERMYCSMREREbHz9/fHYrEYHYaIyF3BZLVarUYHISIiIsbbu3cvpuwrtA5uhovZbHQ41eNuAU93o6MQEflFnI0OQERERG4PTk5OeLu64/LCXEg7Y3Q4VWveEOaNUjImIncsJWMiIiLiKO0MpJ4wOgoRkVpPa8ZEREREREQMoGRMRERERETEAErGREREREREDKBkTERERERExABKxkRERERERAygZExERERERMQASsZEREREysrKgRGzwW8YuA+CbuMg+YjRUYlILaNk7Br27t1Lp06dqF+/PiaTiccff9zokKrlu+++w2QyMWLECKNDsbtTX0sREbmLFRdDxGRY/DVEPQFvDYUzF6DrODh00ujoRKQWue5kbM+ePfTq1YvAwEAsFgvu7u4EBgbyu9/9juXLl9+MGG+5wYMHs2fPHp577jmmTZvGSy+9VGHdnTt30r17dwIDA3Fzc8NisRAYGMjAgQM5cqTmv0E7ffo0I0aMYMWKFTV+7Zvhel7LmhATE8Pf//73m9qHiIjc4br+FYb9s+LzK7ZDQirERcH4ATDqCdg8EZzqwPhlty5OEan1nK+n8vr164mMjMTJyYmIiAjatWvH5cuXOXz4MNu3b2fp0qU89dRTNyvWW+Ly5cvs37+fJ598kunTp1dZ/4cffuDs2bP06NGD4OBgnJ2d2bdvH6tWreLLL7/k22+/pWnTpjUW39mzZ/nggw8A6NevX41d92a43teyJnz88cc0atSI//7v/74l/YmISC20Yjv4e0HfsKtlfp7QvxMs2gp5BWAxGxaeiNQe15WMvfnmm+Tn57N582YeffTRcud/+OGHGgvMKD/++CNWqxVvb+9q1R8wYAADBgwoV/6vf/2Ll156iRkzZvDuu+/WcJS3h4sXL2KxWHBxcbnm+et9LW93eXl5FBQU4OHhYXQoIiJyM+1Og/YtoE6ZCUQdQ2Duejh4Eu6vuS9aReTudV3TFNPT0/Hw8LhmIgbQokUL+8+VrV8aMWIEJpOJ7777zl72+OOPYzKZOHHiBI8//jj16tXD1dWVTp062ZO82NhYAgMDMZvNBAYGMnfu3OuKvW/fvvj4+ODs7IyPjw99+/blxIkTDjG0bdsWgA8++ACTyYTJZPpFUwLvueceADIzM6tVPz8/n1deeYWgoCDMZjMeHh507tyZb775xl5nxYoV3H///eXi8/PzK3e9jz76iHvuuQez2YyXlxdPP/00+fn55eolJyfTo0cPvLy8cHZ2xs/Pj6effpqsrCyHeiX/PseOHeOJJ57A09MTT09PDh48eM3nU53X8r333qNt27a4urpisVho1aoV77//frlrvffee4SFheHr64vZbKZevXp06tSJrVu3OtQzmUxkZGTYf/dKjpLfs4rWrP39738vF1vJ72hCQgIDBw6kQYMG1K1bl3Xr1gG2Ub+oqCiCg4Mxm824ubnxX//1X2zcuNHh2kVFRcTExNC0aVNcXV2pW7cujRs3JiIigry8vGu+diIiYrBTmRBwjS8SS8pOnr+18YhIrXVdI2OBgYGcPHmS2bNn8+KLL96UgB599FH8/f159dVXOXToEMuXL+eJJ56gZ8+e9mmQrq6uxMXF8eKLLxIWFsYDDzxQ6TXPnj3Lr3/9a06fPs0TTzxB+/bt2b17N6tWrWLHjh3s3bsXHx8fXnrpJR544AGmTZtG586d6dOnDwAPPfRQlXFfunSJrKwscnJySEpK4s033wTgD3/4Q7We92OPPcaWLVv41a9+xZAhQ/jpp59YtmwZ3bt3Z82aNfzmN7/hoYceYvTo0eXiq1+/vsO1Nm3axNKlS+nfvz+NGzdm9erVLF68GG9vb/71r3/Z68XHx9O7d2/c3Nzo378/QUFB7Nmzh08++YRdu3axZ88eLBaLw7UfffRRGjRowKhRo8jJycHLy+uaz6eq1/L//b//x8cff8xDDz3Eyy+/jJOTE6tXryYqKopTp04xefJk+7X+/e9/4+npSf/+/QkICODw4cOsXLmSxx57jISEBNq3bw/AtGnTmDhxIvXq1eO1116zt///7d17WFTV/j/w9wDOgDDcBC/IxbvAgNrxKGCKmgp5wRteUk/glczsWGkXU7OrpJV6tJNZgRfUQswyTAVE5TEh/GXm0SOal1DRLEUYVBBI1u8PvuzjODMw3NwDvF/PM0/N2mutvfZnL3n4sPde293d3aRzYMiUKVOgVCoxY8YMKBQKeHl5obi4GIGBgcjKykJISAimTZuG/Px8xMfHY+jQodi9ezeGDBkCAJg7dy4+/fRTBAQEICIiApaWlrh48SIOHDiAwsJCvfgSEVEdK/0L0BbqlxWXAjcLdMud7cqvhhWVACoDvyJZ/9+dIEX6f9wkIqoRUQ27d+8WlpaWAoBo3bq1ePLJJ8WSJUtERkaGXt2TJ08KAGLWrFl622bNmiUAiJMnT0ploaGhAoAYM2aMTt2JEycKAKJFixbixo0bUnlaWpoAICIiIqoc99SpUwUAsXjxYp3yRYsWCQBi+vTpJo27MgsXLhQApI+rq6v48MMPTWq7detWAUD0799f3L9/Xyo/dOiQsLCwEL6+viaNr2KbUqnUie39+/eFh4eHcHBw0Knv5eUl3NzcxM2bN3XK161bJwCIZcuWSWUV52fQoEEmHVNlY92/f78AICIjI/XaBAUFCWtra50x5eXl6dXLyMgQVlZWYvTo0TrlLi4uws/Pz+B4AIjQ0FC98mXLlgkAIiEhQSqrmKN+fn6iuLhYp/6CBQsEABEXF6dTfuPGDdGiRQud/bdv3160bdvW4HiIiMzNqVOnxNX/d1KIrnOFwBjz/3SdK8TV3MoP6uBJ0/v77Y/yNraThJj+sX5f3/9UXm/fz3UffCJqkqp1m+Lw4cNx6NAhhIaGorCwEPv27cM777yDoKAgaDQanDhxotbJYcUVpQoDBgwAAIwcORIuLi5SeXBwMGxsbEx6Ti0lJQVqtRpLlizRKX/jjTegVquRkpJS63FHRkZi27ZtWL9+PWbOnAlbW1v8+eefJrWtWIXy7bffhsUD96f3798fgYGBOH36NC5fvmzyWPr16wc/Pz/pu4WFBXr37g2tVivdNvnDDz/g0qVLCAsLQ1FREXJycqTPsGHDoFKpDMZl0aJFJo/DmC+++AIKhQLPPfeczn5zcnIwfPhw3Lt3T7odEIB09a2srAw3b95ETk4O3N3d4ebmhv/85z+1Hk9lnn/+eb1n4nbu3Ak3NzcMGDBAZ+z37t1DQEAATp8+jdu3bwMA7OzscOvWLezatatW4zh9+rTOLa85OTk6c6KgoEDntl8ASE9Pr/T7jz/+iPv373Mf3Af3wX1I3wsLH7qC1EBcv37deKy6twNSluK/q8cDKUvLP928kNe73f++pyzF6X9NwH1XdXmbNk64fe6KXqxyT/3fKsluzo3mnHMf3Af3Ubf7qLbaZHKnTp0Sy5cvF76+vgKA8PDwEIWFhUKIml8ZKykp0ambkJAgAIh33nlHrx8XFxfh7+9f5TibNWumc3XpQT4+PkKpVErfa3pl7GFpaWnCyspKzJ07t8q6PXv2FAqFQty7d09v27Rp0wQAsXfv3irHV7HtwSt9FSpifurUKSGEEP/61790ruQZ+nTr1k1qX3F+tFqtyTEwNta///3vVe57+fLlUv3U1FTRu3dvoVKp9Oq5urrq9F3XV8bS09P16iuVyirHn5WVJYQov5psb28vAAgnJycxaNAg8dFHH0n/ToiIzEmjvDJmSP/FQkSuMb593AohWk0T4oG7VYQQQsz6RIjmTwlxr8RwOyKiaqrWM2MP02g00Gg0WLBgAfz8/JCVlYXk5GSMGjUKCoXCaLu//vrL6LZmzQwvFWtpaWmwXAhRvUE/IsHBwejQoQO2bduGtWsreZdJPTAWK0A/XpMmTUJYWJjBuoYWBnn4+bSaEEJAoVBg8+bNRsfau3dvAEBWVhaGDRuG5s2b45lnnoFGo4GdnR0UCgVeffVVFBUV1Xo8lc1HtVptcPyenp54//33jbareE5t+PDhyM7OxpdffonU1FQcPXoUqampWLlyJY4ePQo3N7daj5+IiOrYuKDy5e13/giM61NedrMASEgHwv7OZe2JqM7UKhmrYGFhge7duyMrKwuXLl0CALRu3RqA4dUEs7Oz62K3JmvVqhWuXLmCkpISnVvOSkpKkJOTg1atWtXLfouLi3Hnzp0q63l6euLYsWPIzMxEcHCwzraK1QorViasLMmtDo1GA6A8cZs0aVKd9Gmq9u3b49ixY+jUqRMCAwMrrbtx40YUFxcjLi5O7x12zz77rF7yXll8bG1t9VaJBFDtl3O3adMGWq0WEyZMqDTxreDk5IQ5c+Zgzpw5AIAlS5bg3XffxQcffIBVq1ZVa99ERPQIjAsCArsA0z4GTucALmrgk33A/TLgrafkHh0RNSLVemZs27ZtBpdHv337No4cOQIA0sp2LVq0gIODAzIzM1FWVibVPXHihFT3URk8eDBu376N9957T6f83Xffxe3bt6WV72rC2DNrCQkJuHz5Mry9vavso+LlzW+++aZOrI4cOYKMjAz4+vrC09MTwP+enzJ1yXxjBg4cCA8PD3z99dcGn/UrKSnBtWvXarUPY2bOnAkAmD9/PkpLS/W2//bbb9L/VyQ7D1/Re/vtt6HVavXaWltbo6CgQK8cKF8N9PTp0zrbr1+/jm+//bZa4w8PD4dWq8XLL79scPuD48/JydHbHhQUBAC4dYtLIxMRmSVLS2DPYmDi48Ca74GXNwMu9sCBt4CubeUeHRE1ItW6Mvbaa69hzpw5CA4Ohp+fH2xtbXH58mUkJibi999/R0hICPr27SvVnzJlCj755BP06tULI0aMwNWrV7F9+3Z4enri/PnzdX4wxqxYsUJabOTnn3/GY489huPHj+P777+Hm5sbVqxYUeO+n376ady8eRNBQUFo164dioqKcPz4cRw4cADW1tYmvfB58uTJ+Oyzz3Dw4EH06tULoaGh0tL2zZo107nNsW3btmjdujWSkpLw6quvok2bNlCr1ZgxY0a1xm1hYYFNmzYhLCwMAQEBCAsLg0ajwd27d3HhwgUcOHAAr776KhYuXFjdkFQpNDQUUVFR+Oyzz9CxY0cMGzZMem3CiRMncPToUenWwQkTJuCjjz7C7NmzcfjwYTg7OyM9PR0ZGRlo1aqVzkOYANCtWzfs2bMHM2bMgEajgYWFBaZOnQpHR0dERUVhwYIF6NWrF8aPH4+8vDxs374drVq1MpjYGRMdHY20tDSsWrUKhw8fRr9+/eDg4IBLly7hyJEjUKlU0sIivr6+8PHxQc+ePaVjjI+Ph5WVFaZPn153QSUiItMdeqfqOk52wBfPlX+IiOpJtZKx6OhofP311/j5559x8OBBFBYWwsbGBh06dMBzzz2H1157Taf+qlWroNVqsXv3bixbtgweHh5YtWoVMjMzH2ky5urqiszMTMybNw9paWnYs2cPHBwcMHr0aKxZswYtWrSocd8TJ07E1q1bkZiYiIKCAuklzKNHj8Y777wDHx8fk/pJTk7Gyy+/jB07duCDDz6ASqVC9+7dsWLFCp0EFwA2bNiAF198EatXr0ZJSQlcXFyqnYwB5VfHMjMz8frrr+PQoUP45ptvYG1tjVatWmHUqFEmvyOtJtavX4/AwEB8/PHH2LJlC4qLi2Fvb4/27dvrrNjYo0cPfPXVV1i0aBE+//xzWFhYwM/PD0lJSZg9ezauX7+u0++///1vTJkyBfHx8SgsLIQQAoMHD4ajoyPmz5+PnJwcxMXFYfny5WjZsiVeeOEFWFhY4PXXXzd57CqVChkZGVi6dCkSEhKkF1U7OztDo9Fg2rRpUt2pU6ciNTUVW7ZsQVFREezt7aHRaLB06VKjL08nIiIioqZBIcx1BQwiIiJ6pP773//CqUjA7R/rgbNX5R5O1bq2Lb910M1Z7pEQEdVItZ4ZIyIiIiIiorrBZIyIiIiIiEgGTMaIiIiIiIhkwGSMiIiIiIhIBkzGiIiIiIiIZMBkjIiIiIiISAZMxoiIiIiIiGRQrZc+ExERURPQvqXcIzBNQxknEZERTMaIiIgIAHD//n3k3iuCy6dRUDZrJvdwTGOrknsEREQ1xmSMiIiIAAB//fUXho0bhdTUVGg0GrmHQ0TU6PGZMSIiIpL88ccfKC4ulnsYRERNApMxIiIiIiIiGTAZIyIiIiIikgGTMSIiIiIiIhkwGSMiIiIiIpIBkzEiIiIiIiIZMBkjIiIiIiKSAZMxIiIiIiIiGTAZIyIiIiIikgGTMSIiIiIiIhkwGSMiIiIiIpIBkzEiIiIiIiIZMBkjIiIiIiKSAZMxIiIiIiIiGTAZIyIiIiIikgGTMSIiIiIiIhkwGSMiIiIiIpIBkzEiIiIiIiIZMBkjIiIiIiKSAZMxIiIiIiIiGVjJPQAiqpwQArdv35Z7GETUBNy5c0f6b0FBgcyjISJqeNRqNRQKhcn1FUIIUY/jIaJaunHjBlq2bCn3MIiIiIioClqtFvb29ibX55UxIjOnVCoBAFeuXKnWP26qvYKCAnh4eDD2jxjjLh/GXh6Mu3wYe3k05rir1epq1WcyRmTmKi5129vbN7ofWA0FYy8Pxl0+jL08GHf5MPbyYNy5gAcREREREZEsmIwRERERERHJgMkYkZlTqVRYunQpVCqV3ENpchh7eTDu8mHs5cG4y4exlwfj/j9cTZGIiIiIiEgGvDJGREREREQkAyZjREREREREMmAyRkREREREJAMmY0SPWGJiIrp37w5ra2t06dIFGzZsMKmdVqvFjBkz4OzsDLVajXHjxuH333/XqfPTTz9h2rRp8PHxgYWFBUaMGGGwr3bt2kGhUOh97t27V+vjM1fmEHchBN5//314enrCxsYGQUFB+PHHH2t9bOauPmMPAOnp6QgKCoKNjQ28vLywfPlyPPw4dGOe82fOnMGQIUNga2uL1q1b45VXXkFJSUmV7Uydj9euXUN4eDjUajWcnZ0xc+ZMFBQU6NWr6XluyMwh9lOnTjU4t/ft21dnx2lu6jPuN27cwLx58xAQEACVSgU7Ozuj/TW1OW8OcW+U810Q0SNz+PBhYWlpKZ555hlx4MABsXjxYqFQKERCQkKVbUNDQ4W7u7uIj48Xu3btEn5+fqJ79+6itLRUqrN69WrRsWNHMXnyZOHl5SWGDx9usC8vLy8xbtw4kZGRofMpKyurs2M1J+YS9+joaKFUKsXKlSvF/v37xZgxY4RarRYXLlyos2M1N/Ud+3Pnzgk7OzsxZswYsX//frFy5UqhVCrFBx98oNNXY53zt27dEm3atBHBwcFi3759IiYmRjg4OIjnnnuuyramzMeSkhLh5+cn/Pz8xHfffSe++uor4e7urjfHa3OeGypziX1kZKTo0KGD3tzOz8+v82M2B/Ud9+PHj4uWLVuKESNGiD59+ghbW1uDfTW1OW8ucW+M853JGNEjFBISIvr06aNTNmnSJOHj41Npu/T0dAFAJCUlSWVnzpwRCoVCxMfHS2X379+X/r9///6VJmOm/ABtLMwh7kVFRcLe3l4sXLhQKisuLhZeXl7i2WefrfYxNRT1HfuoqCjh5eUliouLpbKFCxcKR0dHce/ePamssc75ZcuWCVtbW5GbmyuVrV+/XlhaWoqrV68abWfqfNy2bZtQKBTizJkzUllSUpIAIDIzM6Wymp7nhsxcYh8ZGSk0Gk1dHZbZq++4P/jzfOnSpUaTgqY2580l7o1xvvM2RaJHpLi4GAcPHsT48eN1yp966ilkZWUhOzvbaNu9e/fC0dERQ4YMkcq6du2KHj16YM+ePVKZhQX/ST/MXOKenp6OgoICTJgwQSpTKpUYO3asTl+NyaOI/d69ezF69GgolUqd/vPz85GRkVF3B2Om9u7di8GDB8PZ2VkqmzBhAsrKypCcnGy0nanzce/evejWrRu6du0qlQ0ZMgTOzs5Svdqc54bMHGLfFNV33E35ed4U57w5xL2xarpHTvSIXbhwAaWlpfD29tYp9/HxAVB+L7YxZ86cQdeuXaFQKPTaVtauMlu3bpXuyx42bBhOnjxZo37MnbnEvaK+oXFcvnwZRUVF1eqvIajv2N+9exdXrlzR69/b2xsKhUKv/8Y458+cOaN3/I6OjmjTpk2V8QWqno+G+lcoFPD29pb6qM15bsjMIfYVzp8/DwcHByiVSvTs2RPffvttTQ/L7NV33E3RFOe8OcS9QmOb71ZyD4CoqcjLywNQ/sPrQU5OTgCAW7duVdr24XYVbStrZ8zIkSMREBAAT09PXLx4Ee+99x769u2L48ePo0OHDtXuz5yZS9zz8vKgUqlgbW2t15cQAnl5ebCxsalWn+auvmOfn59vsH+lUonmzZvr9N9Y53xN56ip89GU/mtznhsyc4g9ADz22GPo1asXNBoN8vPzsW7dOowZMwYJCQkYN25cjY/PXNV33E0dA9C05rw5xB1onPOdyRhRLWi1WoOruz3M3H7ZW7NmjfT//fr1Q0hICLy9vfHhhx/ik08+kXFkpmmocW8MGmrsG/qcJzJm3rx5Ot9HjhyJPn364I033miwv5wSGdMY5zuTMaJaSEhIwKxZs6qsl5WVJf3FTKvV6myr+Avbg/dhP8zJyQlXrlzRK8/Ly6u0nanatGmDvn374tixY7Xu61FoiHF3cnJCcXEx7t27p/MXwry8PCgUCmmc5s6cYl/xV9qH+y8pKUFhYWGl/Te0OW+Mk5OT3vEDVc9RU+djZf17eHhIdYCaneeGzBxib4iFhQXCw8PxyiuvoKioqNFdca/vuJs6BqBpzXlziLshjWG+85kxolqYOXMmRPmqpJV+vL290bFjRzRr1kzv3mpj91M/yNvbG2fPntV7d5Khe7ibgoYY94r6Z8+e1eur4t0rDYE5xd7W1hYeHh56/Ve0awr/Ngw9P1Rx9bKq+AJVz0dD/QshcPbsWamP2pznhswcYt8U1XfcTdEU57w5xL2xYjJG9IioVCoMHDgQO3bs0CmPj4+Hj48P2rVrZ7Tt0KFDkZeXh9TUVKns119/xfHjxzFs2LBaj+3atWv44Ycf0KtXr1r3ZW7MJe59+vSBvb09EhISpLLS0lLs3LmzTs6hOXoUsR86dCh27dqF0tJSnf4dHR3Rp08fo/03ljk/dOhQ7N+/X3p+Dii/emlhYYGQkBCj7Uydj0OHDsWJEydw7tw5qSw1NRW5ublSvdqc54bMHGJvSFlZGRISEqDRaBrlL7r1HXdTNMU5bw5xN6RRzPf6XDefiHRVvCTy2WefFQcPHhRvvPGGUCgUYvv27Tr1LC0txfTp03XKQkNDhYeHh9i+fbv47rvvhL+/v94LcP/880+RkJAgEhIShK+vr+jZs6f0/e7du0KI8nfXTJ48WWzZskUcOHBAfPHFF6Jjx47CyclJXLx4sf6DIANziLsQ5S++VKlUYvXq1SI1NVWEh4c3mZc+11fsz507J2xtbUV4eLhITU0Vq1ev1nvpc2Oe8xUvYu3fv79ISkoSsbGxwtHRUe+dak888YTo2LGjTpkp87HixcP+/v4iMTFRxMfHCw8PD6Mvfa7qPDcm5hD77Oxs0b9/f/Hpp5+K/fv3i4SEBPHEE08IhUIhdu7cWb8BkEl9x10IIf38Hj9+vLC2tpa+Z2dnS3Wa2pw3h7g31vnOZIzoEdu1a5fw9/cXSqVSdOrUScTExOjVASAiIyN1yvLz88X06dOFo6OjsLOzE2PHjtV70eLBgwcFAIOf3377TQghREZGhhgwYIBwcXERVlZWwsXFRUyYMEHnxaKNkdxxF0KIsrIysWzZMuHu7i5UKpUICAgQ6enp9XG4ZqU+Yy+EEEeOHBEBAQFCpVIJd3d3ER0dLcrKyqTtjX3Onz59WgwaNEjY2NiIli1bigULFui8BFuI8peRe3l56ZSZOh9zcnLE2LFjhZ2dnXB0dBTTp08XWq1Wr54p57mxkTv2ubm5YuTIkcLd3V0olUphZ2cnBgwYIPbt21cvx2su6jvuxn6eb9iwQadeU5vzcse9sc53hRAP3ZBPRERERERE9Y7PjBEREREREcmAyRgREREREZEMmIwRERERERHJgMkYERERERGRDJiMERERERERyYDJGBERERERkQyYjBEREREREcmAyRgREREREZEMmIwRERFRozV16lQoFAq5h0FEZBCTMSIiogbm4sWLiIqKgre3N5o3bw4nJyf4+PggMjISBw8e1Knbrl07+Pn5Ge2rIlm5efOmwe1ZWVlQKBRQKBQ4fPiw0X4q6lR8rK2t0blzZ7z00ku4detWzQ6UiKiRs5J7AERERGS6n376Cf3790ezZs0QEREBjUaDoqIinDt3DsnJyVCr1Rg4cGCd7S8mJgZqtRo2NjaIjY1Fv379jNbt0aMH5s+fDwC4desW9uzZg1WrViElJQXHjh2DUqmss3ERETUGTMaIiIgakLfeeguFhYX45Zdf0L17d73t169fr7N9lZaWIi4uDuPHj4eDgwM+++wzrFmzBmq12mD9tm3b4h//+If0/Z///CfCwsKwe/du7Nq1C+PHj6+zsRERNQa8TZGIiKgBOXfuHFq0aGEwEQOA1q1b19m+EhMT8eeffyIyMhJTp07F3bt3ER8fX60+QkNDAQDnz583WmfdunVQKBT47rvv9LaVlZXB3d0dPXr0kMqSk5MxceJEdOjQATY2NnB0dERISAjS0tJMGtOAAQPQrl07vfLs7GwoFAq8+eabOuVCCKxbtw49e/ZE8+bNYWdnh4EDB+rdEkpEVF1MxoiIiBqQjh07Ijc3Fzt37jS5zf3793Hz5k2Dn+LiYqPtYmJi0L59e/Tr1w/dunXDY489htjY2GqN99y5cwAAFxcXo3WeeuopqFQqbN68WW9bamoqrl69isjISKls48aNuHXrFiIiIrB27Vq8+OKLyMrKwqBBgyp9rq2mnn76acydOxedOnXCihUr8NZbb0Gr1WLIkCEGE0giIlPxNkUiIqIGZPHixUhJSUF4eDg6d+6Mvn37olevXhgwYAB8fHwMtjlz5gxcXV2rtZ9r164hKSkJixcvllYjjIyMxAsvvICsrCyD+yotLZUWAsnLy8Pu3buxbt06ODg4YNSoUUb35eTkhLCwMCQmJiIvLw9OTk7Sts2bN8PKygpTpkyRyj7//HPY2trq9DF79mxoNBpER0dX+lxbdX3zzTfYunUr1q9fj6ioKKl83rx5CAwMxLx58xAWFsYVG4moRnhljIiIqAEJCgrCsWPHEBkZCa1Wiw0bNmDOnDnw9fVFcHAwLl68qNemXbt2SElJMfgJCQkxuJ+NGzeirKwMERERUtmUKVPQrFkzo1fHkpOT4erqCldXV3Tp0gUvvfQSfH19kZycjJYtW1Z6XJGRkSguLta5DfLOnTv45ptv8OSTT+q0fzARu3PnDnJzc2FpaYmAgABkZmZWup/q2rJlC9RqNUaPHq1zRTE/Px9hYWHIzs6Wrv4REVUXr4wRERE1MP7+/ti4cSMA4NKlS0hLS8MXX3yBw4cPY9SoUXorF9ra2mLw4MEG+9qyZYtemRACsbGx6NatG8rKynSe93r88ccRFxeH6OhoWFnp/hoREBCAd999FwCgUqng5eUFT09Pk46pIuHavHkzZs+eDQD4+uuvcffuXZ2EEAAuXLiARYsWISkpCfn5+Trb6voKVVZWFm7fvo1WrVoZrfPHH3+gS5cudbpfImoamIwRERE1YF5eXoiIiMDTTz+Nfv364ciRIzh69Cj69u1b4z7T0tJw4cIFAEDnzp0N1tm9ezdGjx6tU+bi4mI06auKlZUVJk+ejNWrV+P8+fPo1KkTNm/eDCcnJ4wcOVKqd+fOHQQHB+Pu3bt44YUX4O/vD7VaDQsLC0RHR+PAgQNV7stYwvbXX3/plQkh4Orqim3bthntr7L3uBERVYbJGBERUSOgUCgQEBCAI0eO4OrVq7XqKzY2VlpQw8JC/4mGZ555BjExMXrJWG1FRkZi9erV2Lx5M2bNmoVDhw4hKioKKpVKqpOamopr164hNjYW06ZN02m/ePFik/bj7OyMY8eO6ZUbusWzc+fO+PXXXxEYGAg7O7tqHhERUeWYjBERETUgKSkpGDhwoN4tgkVFRUhOTgYA+Pr61rh/rVaLHTt2ICQkBBMmTDBY5/vvv0dcXBx+//13tGnTpsb7eliPHj3QrVs3bNmyBdbW1igrK9NZRREALC0tAZRfsXpQcnKyyc+LdenSBTt37sTRo0fRu3dvAOVL6K9atUqvbkREBBITE7Fw4UKsXbtWb/sff/xR6S2MRESVYTJGRETUgLz44ovIzc3FyJEj4e/vj+bNm+PKlSvYtm0bfv31V0RERMDf37/G/X/55ZcoKipCeHi40Trh4eHYuHEjNm3ahNdee63G+zIkMjIS8+fPx/Lly9GlSxcEBgbqbO/bty9at26N+fPnIzs7G+7u7vjll18QFxcHf39/nDx5ssp9REVF4aOPPsKYMWMwb948KJVK7Nixw+BtiuPGjcO0adPw8ccf4+eff8aIESPg4uKCnJwcZGRk4Pz58wavqBERmYKrKRIRETUgK1euxNixY/Hjjz/izTffRFRUFNasWQM3NzfExMRgw4YNteo/JiYGVlZWOs9pPWzIkCFQq9W13pchU6ZMgZWVFQoKCvQW7gAAR0dHJCUlISAgAGvXrsX8+fNx+vRp7NmzB3/7299M2kf79u3x7bffwtXVFUuWLMGKFSvw+OOPY9OmTQbrx8bGSrdsRkdH4/nnn8emTZtgZ2eH6OjoWh0vETVtCvHwdX4iIiIiIiKqd7wyRkREREREJAMmY0RERERERDJgMkZERERERCQDJmNEREREREQyYDJGREREREQkAyZjREREREREMmAyRkREREREJAMmY0RERERERDJgMkZERERERCQDJmNEREREREQyYDJGREREREQkAyZjREREREREMvj/kZ9J0MzrD30AAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot the SHAP values\n", + "shap.plots.bar(explanation[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 2: Shapley value analysis to explain importance of particular atom/node and bond/edge" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nO2deVzU1f7/X7OwiAgaKIJSgAqIKRrmhlcTx6VE0wzKEk3L0ZZr1vfaqLcyc2m0foZtNqa5hKlganrJBUwJJRdAUGRRcEEEVARZZBtmzu+PQx8nBATmM/OZmc7z0R90ZuZzXjPCa87nnPciIoSAwWAwGG1FLLQABoPBMG+YjTIYDIZeMBtlMBgMvWA2ymAwGHrBbJTBYDD0gtkog8Fg6AWzUQaDwdALZqMMBoOhF8xGGQyGeVBUVHTixIm5c+cOHjx4+fLllZWVQiuqR8SymBgMhqlRV1eXm5t75cqVixcvpqen0x8KCgp0n+Pv75+SkiKUQl2YjTIYDIEpKSnJysrKyMjIysrKysrKzMzMyclRq9UNnubo6Ojt7S2VSmtqapKTkwGsX79+3rx5Qkj+G8xGGQyGUcnPz+cWmPSHK1euPPw0V1fXPn36eHl5+fn50R88PT1FIhF99KOPPlqxYoWtrW1cXNygQYOM+w4awmyUwWAYitLS0uzsbGqU1DQzMjIe3tO0sbHp0aOHrmn6+vq2b9+++Yu/88473377raura2Jiopubm8HexKNhNspgMPjh4WXm1atXH3aYTp06cQtM+oOHh4dY3OrjbrVaPXbs2OPHjw8dOvTYsWM2NjY8vY9Ww2yUwWC0HbVaPXfu3J07d9bW1mo0mgaP2tnZeXt7+/j4+Pr6+vr6+vj4eHt7P3KZ2XLu3r379NNPX716dd68eevXr+frsq2F2SiDwWg777///pdffkl/5muZ2QwpKSnvvfdeVFSUs7MzHTl37tzw4cMrKytVKpVcLudxrpbDbJTBYLSdbt265efnBwYGRkVFubq6Gnq64cOHnzx5cty4cdHR0RKJhA5GRESEhYVZWVkdPXr0X//6l6E1PAwLv2cwGG0kLy/v1q1bVlZWW7ZsMYKHAtixY0eXLl0OHz783//+lxucPn36ggUL1Gp1aGhoXl6eEWQ0gNloiyCECPLPw2CYMps2bdJoNC+++GLPnj2NM6O7u/uePXusra3XrFmzc+dObvyLL74YN25cYWFhSEhITU2NccRwMBt9NLGxsR06dHB3d7ezs1uwYMHdu3eFVsRgCI9Wq928eTOA119/3ZjzBgYGfvHFF4SQ2bNnJyUl0UGJRPLzzz97eXmdOnVKgB1SwmiW6Ohoa2tr3U/Mzs4uLCwsLi5Oq9UKrY7BEIzDhw8D8PT01Gg0dKSkpGTatGmHDx82wuxvvPEGgCeeeOL27dvcYEpKCg0D+Pbbb42ggYPZaJNotVqlUkm3sXv06JGZmbls2TKZTMblUfTq1Wvp0qXXr18XWimDIQChoaEAli9fzo188803AMaMGWOE2aurqwcPHgxg9OjRarWaG9+9e7dIJLKysjp+/LgRZFCYjTZOeXn5iy++CEAkEikUCu77lhBy48YNpVLp4eFBzVQsFstkssjIyNraWgEFMxjGpKioyMbGRiwW6y4jBgwYAGDnzp3G0ZCfn0+Tl95//33d8YULFwJwcXHJzc01jhJmo42QnZ3dt29fAB06dNi7d2+jz9FoNDExMSEhIdwtf9euXefPn3/hwgUjq2UwjA+NFX3uuee4kdTUVACPPfZYVVWV0WQkJCTQ5KUff/yRG9RoNM8++yyAAQMG3L9/3wgyeLbR4rrinOqcck05NzLq0qivbn/F7ywG5fjx4507dwbg7e2dnp7+yOcXFxerVKp+/fpxm6cBAQEqlaqiosIIahkMQfD39wfwyy+/cCNvv/02gHfffdfISmjykq2t7ZkzZ7jB4uJiGjwwffp0I2jgzUb3luztl94PSUASJMmS8ZfHZ1dnE0JczrssvrmYr1kMjUqlkkqlACZMmFBSUqL70LVr15p/bWJiolwut7e3p2bq4OAgl8vj4+MNqZfBEIBTp04BcHZ2rq6upiNVVVWdOnUCkJKSYnw99Gj+8ccfv3XrFjeYnp7u4OAAIDw83NAC+LHRbXe3iZJEskuy2LLYnOqc6HvRARkBk7InEfOx0aqqqpkzZza6GUoIUalU1tbWu3bteuR1ysrKtm7dKpPJuMWpn5+fUqm8c+eOwbQzGEZlzpw5AP7zn/9wIxEREQCefvppQfTU1taOGDECQGBgYE1NDTe+Z88ekUgklUp///13gwrgwUYrNZVOqU6DMwfXah+csZTVlVVqKomZ2OiNGzcGDhwIwN7eXvc+hRBSXV3NhcV9/PHHLb9mRkaGQqHo0qULfa2NjU1ISMj+/fvr6ur4ls9gGI+Kigq6yrt48SI3OGrUKADff/+9UKoKCwu7d+8O4N///rfu+JIlS+iObU5OjuFm58FGD5YeRBK2393e6KOmb6NxcXEuLi4AevbsmZaWpvvQzZs3hwwZQndetm7d2oaL19TU7N+/PyQkhO4VAOjevbtCoaAFxBgMs+PHH3+k6z5u5MqVKyKRqF27dg32wYzMn3/+SY+bNm7cyA1qNJoJEyYA8Pf3N9xxBQ82uvbWWiQh+X5yo49yNlqnNcVVmEqlsrKyAvDss88WFxfrPnTy5EmaJuzu7n727Fk9J8rLy1MqlV5eXg3CpHTvQRgM0ycwMLDByThd8c2cOVM4UfVs3bqVLnpOnz7NDZaWlvbu3RvA1KlTDZQyw4ONLi9YjiTcqL3R6KPURu+o73RJ7SK/Lk+6n6T/jLxQXV09a9as5jdDAYwYMUJ331pPaJhUWFhYu3btqJ926tRJLpenpqbyNQWDYTiysrJEIpG9vX1ZWRkdqaurc3d3B/DHH38Iq41CAwZcXV1v3rzJDWZmZjo6OgL4/PPPDTEpDzYafiscSTh7v/H1GrXRzUWb6SE+kjAwY+D6O+vv1d3Tf+o2k5eXR/u32NvbR0VF6T6kVqsVCgX1OLlcbqCg+pKSEpVK1b9//wZhUuXl5Y9+MYMhEDSyfc6cOdzIgQMHaHSgieRG19bWjhw5EsDQoUO5QAJCyL59+8RisVgs/u2333iflAcbPVp2FEnYUrSl0Ue5m/qLVRcVeQrnVGdqprbnbEOuhMSUxWiJsT/9+Pj4rl270hTP8+fP6z50+/btZ555hp4I6d62GI5Tp07NmTOH7tkDaN++va+vb9v2YRkMg6JWq+kfzqlTp7jByZMnA1i9erWAwhpQVFTk6ekJYN68ebrjS5cupfd/ly9f5ndGHmy0Vlvret7VP92/StNI9kKDI6ZqbXVkcaTskkyUJKJ+2iut19L8pddrjJSZzt2tP/PMM7pFDQghSUlJjz/+OIBu3brp7q0YgaqqqsjISC5MSiQSbd/e+JEdgyEUe/bsAdCnTx9upLCw0MrKSiqV5ufnCyjsYZKTk+3s7ACoVCpuUKvVTp06FUDv3r1LS0t5nI6fuNG9JXslyZLBmYP3luxNq0qLLYt9/8b7Pxb9SJo+qb9Re0NZqPS44EHNVJwkll2SRRZH6kZN8Ut1dTWtCkPv1nXLGRBCIiIi6H7l8OHDCwoKDKThkRw8eNDJyYne4wulgcFoFHrkvXbtWm5k9erVACZPniygqqb46aefAFhZWelu2paVlfXp0wfAlClTeNyF4C2L6WjZ0eFZwyXJEiTBJtlmUOagfff2EUI8Lnh8nN9kuKWGaGLKYkKuhFgnW1M/7Xq+6/wb8y9U8pyZ3kzoUoPNUMGPzj/66CMatC+sDAZDl7y8PIlEYm1trXsPR0/ADxw4IKCwZliwYAGArl273rjx4AA8KyurY8eOAFatWsXXRDzn1Ndoa+6q77Zhu7OwtvDzws99L/pSMxUliUZmjdx5ZGdlZaX+qpoJXbpz505QUBDdDNUNNxOQixcv0iBWoYUwGA9YsWIFgJCQEG7kjz/+oCbV4MbOdKirqxs3bhyAIUOG6B43HTp0SCKRiMXi//3vf7xMZHIVnhLvJ8qvy+3P2buluEmsJPpnpjcTunTu3Dla787Nze3PP//UW3vbqa2t5X4Xa2pqpFKpRCIxZqUcBqMZtFotrfRx6NAhbpAmTy9ZskRAYY/k7t27NFh7xowZuuPLly+ngTq676jNmJyNUkrrSiMSI2hZVkr//v2//vrrBhHyzdN86NLPP/9MN6GHDRsm7Ab5jBkzrKysDh48yI306tULQIOUKgZDKI4ePUoT8LhU5vLycnt7e5FIxPupN+80WhJfq9XSE11ra+u8vDw9pzBRG+VoNDM9JibmkdvDzYQu1dXVmdRm6Jtvvom/16GZOHEigN27dwuoisHgeOWVVwAsXbqUG/n+++8BjBo1SjhRraDRkviFhYU0Rfu7777T8/qmbqOU1mamNxO6VFRURL+FpFKpUqk0hvpHER4eDuDNN9/kRv7zn/8AWLFihYCqGAxKSUmJnZ2dWCzWrRX59NNPA4iIiBBQWKtYuHBhgzjxqqoqsVgMYP369Xpe3DxslINmptPYWjTRwKOZ0KXU1FT62s6dOx87dszY6pvg4MGDAIKCgriRH374AUBYWJiAqhgMytdffw1g7Nix3MiFCxcAODo6Gqe2PC/U1dU1qJyyd+9eAB07dtQ/8snMbJTSVGZ6cnJyM3frO3fupFskTz31lEn1obt69So95uJG4uPjAQwaNEhAVQwGhXZY0i22++677wJ4++23BVSlP5MmTQJPWfZmaaMcd+7cWbt2LY2n5bCxsdm0aZPu0+hmKO3o+eqrr/ISRMUjGo2GHnZxmRV37twB4ODgIKwwRmtZvHhxQkKC0Cr4JDExEYCTkxMXMFRTU+Ps7AwgObnxom5mQUFBAc2/4iXXxrxtlINr4CGVSmNiYnQfKi0tpSc2prMZ+jC0s41uMxmay2RqOXaMZqCbMxKJ5L333jOju93moeefCxYs4EZ27twJoF+/fgKq0p/PPvsMwAsvvMDL1SzERilPPfUUAN0I0NzcXBrv1qVLF2P2rW4ttOX3tm3buBFa1dHQzQ8YPFJbW6tUKmmQsqenZ2xsrNCK9KWysvLhDktjxowB8M033wgoTH98fX0BREdH83I1MSwI+tFkZmZyI66uru7u7v379z99+jQtn2WaUOVZWVnciI+PT4MRholjZWWlUCjOnj07cODAq1evjhkzZu7cuWVlZULraju7d+8uKSkZPHgwvVsCcO3ataNHj9ra2tIQKDMlLi4uMzOzW7duNMdJfyzKRh+2HqlUGhUVlZCQQLOVTJaHlTMbNVP69ev3559/0mXphg0bfH19f/31V6FFtZFNmzYB4HqRAdi8eTOtk0RXqWYKfV+zZs2SSCT8XJGXNa2JQHdtpkyZIrSQVpOUlATgySef5Ebo39748eMFVMXQh7S0NC4NLyQkxOxaw+bk5IhEovbt23MnnxqNhoZjm/Ve07179+zs7EQiUXZ2Nl/XtKjV6MM39eaCj48PzavTaDTcCMzzvTAoffr0SUhIUKlUtMPCk08+uXv3bqFFtQJaqSc0NJSrKX7kyJHc3FxPT0+aH2im/Pzzz5WVlUFBQT169ODtonz5sSlQWVkpFoutrKwM1PnDoND2sNw3pFqttra2FovFFnPm+48lJyeHVhEDEBwcrNsjyGS5du1a586dAZw4cYIbzM/PX758+YYNGwQUpj8BAQHUTHm8pkXZKCGE7oFmZWUJLaTV0BRV3aNDurhm3e4sAK1Wq1KpOnToAKBjx466JdlNiurqai7r2tXV1d7e/tKlS0KL4pPz58/TfwJ+g8ct6qYe5nwy8/COhPnuUTAaIBKJ5HJ5ZmbmpEmT7t279913F4ODceOG0LJ0SE5Onj9/vpub26RJk6KioiQSiUQiqaioeOGFFyoqKoRWxxtcmjWXAMkLlmaj5ms9D38BmO97YTSKm5vbr7/+unnzZq12dXQ0+vbFxo0gREhJJSUlGzZsCAgICAgIoIUo/fz8lErljRs3MjIy+vTpk5aWRms7CKmSJ2pqanbs2AFg9uzZPF+ax5WtKfDdd98BeP3114UW0mqOHDkCYOTIkdzI5s2bAbzyyivCiWIYhMJCMnUqAQhA/vUvYvz7ZlqVIiQkhCYL4K+qFElJSbpPu3TpEu23YRnFxn7++WcYpsuZpdno77//DiAwMFBoIa0mNzcXgIuLCzeSkJBgoH91hikQGUk6dyYAsbMjSiX5qyCyYcnMzFy6dCmNW4JOjbSmqu4ePnyY9tsw2YZLLWf06NHgo7row1iajd68eROAk5OT0EJajVartbe3B8BV+C8pKQHQvn17HlsYMkyK4mIil9cvS4cNIxkZhpro3j2iUpExY0K529DevXuvWbOmJUUbVq5cCaBDhw4XL140lD7Dc+XKFbFY3K5du1Z10GghlmajhBAa5lZUVCS0kFbzcE0AWvZft68hw/KIjibu7gQgtrZk6VLCY7SeRkPi44lcTtq3JwAZMWKng4NDWFhYS/pHcGi1WlrzwcfH5969e7yJMy4ffvghDFbD1wJtlNbl1o13MxemTZsGYMuWLdzIiBEjADSoWcWwPO7dI3I5EYkIQPz9yd+3KNvC1atk6VLi4VG/1BWLyejRZNeu0rYF+pSXl/ft2xfApEmTNBqNvuKMDpd/ZaD6RJZ2Ug9zPuBmmfX/WBwdoVLh+HH06oXUVAwejEWLUFMDADk5SEqq/5kjIwNXrjRynepqREVh4kT07Illy3DtGrp3h0KBy5cRG4vQUIe2BfrY29vv37/f2dl5//79n376aRuuICyHDh3Kzc318vKi6xLesUAbNV/refgLwHzfC6MNjBiB5GS88w60WqxejaefRmoqPvgAAwdi5cq/PTMsDEuW/G0kKQnvvovu3REaiv/9D1ZWCAnB/v24dg1KJby89NXm4eGxY8cOqVT66aefmldWK/6qRTJnzhxau513mI2aEA/n0ZvvyvphwsPDHRwcrKysRo4ceaXRpRQDsLfH118jPh6+vsjIAC2xYGODNWvQ6G9BYSHWrUP//hg4EF99hbt3ERCA8HDcvInISEycCL5qGAGQyWSrVq0ihMyaNevixYu8XdfAFBUVRUdHS6XSGTNmGGoOQ+wUCAvtt+Xt7S20kFZDawJYW1ur1Wo6kp2dDcDd3V1YYXqi0Wg+/vhj3YWAWCweM2bMzp07udYUjAZUVpJDhwgh5IUXyIgRJCCAjBpFuGOhgADy0kskPp5IpfW7n66u5IMPDHjWz0F38Hv16tWgQ5zJ8vnnnwOYNGmS4aawQButrq6WSCRSqVTwBvRt4IknngDAJTLX1dXZ2tqKRKLy8nJhhbWZsrKyKVOmAJBIJMuXL//++++nTp1KewsC6NixI+1FKLRM0+WFF8gzz5CTJ4lIRLj2CNRGa2pIt24kOJhERvJ5vt88lZWVtLrH2LFj64wT7Koffn5+AH799VfDTWGBNkoIoSWw0tPThRbSamg57v3793MjtGGfmRrNpUuX6C/xY489duTIEW783r17KpWKNkqhBAQEhIeH3717V0C1pgm1UULItGmkc2dCPyFqo4QQQVbzXP2n//73vwJM3xpOnDgBwMXFxaBV3yxwbxRse9Q0+O233wYNGpSent6vX7+zZ8/SHj4UR0dHuVx+4sSJtLQ0hULh7OyclJS0YMGCbt26hYaG0i5GAio3Tb74AjU1WLTob4M2NgIoeeKJJ/bs2WNlZbVq1arIyEgBFLQYrtC9lZWV4WaxTBs1X+tpqkCJeX0lEEJWr149ceLEe/fuhYaGJiQkeDVxVNynTx+lUpmXlxcZGSmTyWpqaqKiosaMGePr6/vJJ5/cMKkiSELj5oZly7BpExIThZYCDB8+fM2aNYSQ119/nZ5GmCAVFRU0qGDmzJmGnclwC10BUalUAF577TWhhbSao0ePAhg+fDg3sm3bNgAv0Vs4c6C8vHzq1KkARCKRQqFoVSZrbm6uUqmkG8QAJBIJzfg2xzrcfMHd1BNC1Gri709GjnxwUy8stFSSh4eHabZIoWXxRowYYeiJLNNGjx8/DmDo0KFCC2k1tCaAs7MzN3L69GkA/fv3F1BVy7l8+fKTTz4JwMHBoc2b+lz9Ie5GzNXVVaFQXL58mV+1ZoGujRJSf9ZkY2MSNlpVVUWTBmUymQkeNw0ZMgTA1q1bDT2RZdpoYWEhgI4dOwotpC30798/ODiYiwQqLS0F0K5dO9NPwjt48CBtGOnj45PBR+hNQUFBeHg4TUOkBAQEqFSqiooK/S9uLjSwUULI7NkEMAkbJYTcvHnT1dUVwAcffCCIgOrq6vPnz0dFRZWVlemOZ2RkAHB0dDRCGx7LtFFCCP17vnXrltBCeID+ml67dk1oIU2i1WqVSiVtVxscHMx7AYvExES5XE4rYOGvE6r4+Hh+ZzFNPvmE/N///W3kzh0yfjwxnRKgJ0+epHVL+W1w1CjFxcWJiYlbt25VKBQhISF+fn5ck+QGvw/vvfcegHnz5hlaErFgG6Xr+bi4OKGF8AC9TZbL5aa5A1VRQcLC1F5eE8Ri8aeffmq4sn6lpaVbt26lTasotFS7aX4s/yi++eYbes+UmJjI1zVramouXrz4yy+/rFq1aubMmYMHD6Y1pBtgZWXl7e39/PPPnz59Wve1NCTr7NmzfOlpBou1UXo2Z+5dDNVqtUKhAODk5ATA2to6ODg4MjKSS3MSnJwc0q8fAUjfvlVGq+ybnp6uUCjo3wkAGxubkJCQVhV/M0eyskinTsTXV2gdTfDGG28AeOKJJ27fvt2GlzdYZgYEBNg0FszVsWPHgICAkJCQpUuXRkZGJiYmNlqzioZh9e3bV++31SIs1kZXrVoF4P8a3A6ZFXfu3KGNeW1sbN5+++0JEyZw9y+PP/740qVLBb/NP36cdOlCANKrFzF+Sd/q6urIyMjg4GDuY3F3d1coFIJ/LAYiN5cApFs3oXU0QW1t7fDhwwEEBQU1/zWvVqtzcnJiYmLCw8PlcrlMJqPbVg/j6uoqk8nkcnl4eHhMTExOTk4LxQwbNgzAV199xcc7ezQWa6N79uwBMGHCBKGFtJHk5GQa9+Pm5nbq1Ck6mJ+fr1Qqe/bsSX/JxGJxYGCgSqUSpJe9SkWsrAhAnnuOCJtdnZeXp1QqPT09uY/lySefXLhwoSAfi+EoKSEAcXQUWkfTFBQUdOvWDcB777338KNRUVGTJ0/28fFpNBLe0dFx0KBBM2bMWLly5e7du9PS0tqczH3o0CEAIpGosLBQvzfUUizWRtPT0wH06NFDaCFtYfv27XZ2dgACAwMbbfNAj1yEykyvqiKvvUYAIhIRhYKYSAQBDZPS7Z1rLlFiLUStJgCRSIgpb10kJCTQm/FNmzY1eEipVDazzGzzhszNmzdjYmJUKtX8+fNlMhk9W6bbC/q+mRZjsTZaU1MjlUolEklVVZXQWlpBXV0d3QwFIJfLm/9CppnptE4ExQiZ6TdukKefJgCxtye7dxtunrZz48aNSZMmicViANnZ2ULL4RNbWwIQE19kb9myBYCtre2ZM2d0xzMyMiIjI1NSUtr8J1lWVpaYmLh9+/YPP/wwJCTE39/f1ta20d0AT0/PtLQ0Pt5Ni7BYGyWE9OrVC0CDTzMiIuLAgQMmGCpMCCkqKqLH0NbW1q06HEtKSnrrrbe4c8wuXVxmz1YbIhzojz+IiwsBSM+e5MIF/q/PI3R37PfffxdaCJ/QTqKmH8U3d+5culWtT8RhcXFxfHy8SqVSKBTBwcFeXl70q7EBnTp1CgwMlMvlSqVy//79WVlZxv/rtmQbnThxIoDdOksmtVrt5uYGk8yKSUlJobt7nTt3blvHGHrkIpPJRoyQ0xqU3t5k6VKSm8uPQm4zdPx4YoDuijxD8xTXr18vtBA+8fQkADH9FXZtbS1t1zFs2LCWbHHW1NTk5OTs379fqVTK5fLAwMAOHTo87JjW1tZeXl7BwcEKhUKlUsXHx5eWlhrh7TwSS7bRhQsXAlihE6ZcWVm5evVqWv6DbkKPGjUqIiKibX2+eGTHjh10M/Spp566fv26nle7fLlu8WLi5lZf0NfKikyZQg4caHsn9Orq+swZuhlqkkv5hqxevRrAggULhBbCJzS2LCVFaB0toLCwsHv37gDeeeedBg+1fJkZEBAQFhamVCojIyPT0tJM8yaSWLaN0qrXw4YNe/gh08mKoZuhtDL89OnTeTR0jYbExJCQkPolJC2QrlCQRpfg6ekkJoY0+GpPTiZJSSQvjwweXN/+lysbbPr8+uuvAMaPHy+0ED4ZNowAxFyyt5KSkuhx39y5c1esWPHqq68OHDiwqWWmn5/fCy+8sHjx4i1btpw+fdpcSutTLNlGv/vuOwA2NjZNbdAInhVz9+7dsWPHApBKpUql0kCzFBSQ8HDSt2+9mQIkIICoVEQ3MX3OHAIQufxvLxw7lowZQw4dIhIJ8fQkqakGEmgQaJlET09PoYXwybhxBCAHDwqto8V8++23YrG4QTvSBsvMxMREc+8lY8k2mpOTI5VK6c379OnTm+n2LkhWzPnz52kVTmdnZ+OchCQmErmc2NvXm6mjI5HLCU3emzOH2NoSsZicPPng+dRGCSE7d5KiIiMI5BO1Wm1tbS0Wiy0penTqVAKQyEihdbSYefPmAfDy8vrggw82bdp08uRJi2xwYMk2SghZv369g4MD9zXYp0+fL7/8sqnFpjGzYn799VcqbMCAAUbOurl3j6xfXx+0RP/bv5/MmUP69SPjx5O+fR909eFs1EyhFa9TzWsV3Sw0XPfHH4XW0TKOHTsmEolsbGyMGXskCBZuoxS62HRxceE2YprPTH84K4bH4sG0GBLdDH3llVcEXCulpxOFgvTsSe7fr7fR9HRiZUXWrKl/grnb6OTJkwHs2rVLaCG88c47BCDr1gmtowXcv3+fptutMJ1SVAbjH2GjlLq6OloMmN7pA+jWrZtCoWgqUffhrJiuXbvOnz///PnzbdZQWlr6/PPPG3oztFXQfQtqo4SQ994j7dsTuj42dxtdtGgRgGXLlgkthDcWLyYAWblSaB0tYMGCBSeh5tAAABDNSURBVAD8/f3/CZ0L/kE2ykEz02n3UG6xuXXr1qZOyYuLi1Uqlb+/P7c5QIsHt7bpcVZWVu/evQE4OTnRrm2mA2ejpaXEzY08/zwh5m+jmzdvpkt+oYXwxsqVBCCLFwut41GcOnWKNjnnsW6eKfNPtFEOGvZEAzbxV2b6uXPnmn8+F7Hh4OAQFhbWzMmVLgcOHHB0dKTfz1euXOHvTfADZ6OEkB076o+Dzd1GExIS6Hee0EJ4Y906ApCHAjFNi+rqatoV3PTbL/PFP9pGKTQz/amnntJdbDaTmV5eXr5x48ahQ4dyz+/bt294eHhxE5k9dDOUBhi//PLLpnlwrGujhBCZjPTpQ4KCzNtGS0pKALRv395i6pBu2bK9QweHmTNnCi2kOZYsWQLA19fXvMpZ6AOz0QfQnum0QDIAW1vb5sOeMjMzFQpFly5d6PMbjd4vKyubMmUKAIlEYiKboY3SwEYzM4m1NbGyMm8bJYTQf50bN24ILYQfaDXiqVOnCi2kSVJSUqysrMRi8YkTJ4TWYjyYjTaEy0ynh+kAvL29lUplU7ULa2pqdu/e/cYbbzzstpcuXfLz8wPw2GOPHTlyxPDa204DGyWELFlCALO3UZrZ3cKNF9Pn4MGDAMaNGye0kMZRq9W03tj7778vtBajwmy0SZrqmd7CBh7R0dG05FK/fv1aXrVbKNaubZjCdP8+ef55IlC3R95YsGDJgAGjf/yxLaVeTJD4+Hg0kd9sCnz66ac0c6y1p6/mDrPRR9CGnum6m6GhoaHm2A1YoyF8N/cUhv/3/8zgTKblpKSk0C9moYU0QkZGhq2trUgkspi1f8thNtpSWtgzvby8fOrUqTQDVaFQmOPhxuHDpF07MnGi0Dr44H//s4StCY7s7GyYZKEAjUYTGBgI4M033xRaiwAwG201jVaH+vPPPwkhsbGxNAHRwcFh3759QittIxcu1NcqtQCyswlA3N2F1sETt27dAtC5c2ehhTRk7dq6kSOVPXv2MpECoEZGRAgBo/WUlpbu2LHjxx9/PHv2LB2xtbWlFWr9/Pz27dtHa++bIzU1aN8eIhHu34e1tdBq9EOjgb09ampQVoa/vvjMmMrKyvbt29va2lZVVQmt5QHZ2fD3R2UlDh5Ujx/fSLs6i6eRaqmMluDo6Dhv3rwzZ87QhH2JREKLfXXt2jU2NtZ8PRSAjQ08PFBXh5wcoaXojUSCHj1ACC5fFloKH9jZ2Uml0urq6rq6OqG11EMI5s1DZSVmzsQ/00PBbFR/evfurVQqi4uLP/74423btt28ebOppttmBO0PkJUltA4+8PUFgMxMoXXwBG0HW1FRIbSQetavx9Gj6NoVa9cKLUU4mI3yg4ODw7Jly8LCwhpth2B2WJL10PdiGV8JAOimfHl5ue7g4cOHCwoKjC8mNxeLFgHAt9/isceMP7+pYAl/8wzesaTVKH0vlvGVAICWdNBdjVZXV0+ZMsXNza1Hjx5z586NiooqKyszjpi5c1FejtBQvPCCcSY0UZiNMhrBkqzHkr4S0NhqtKioaPTo0fb29leuXNmwYUNoaGiXLl1kMtnq1auTk5O1Wq2BlGzZgkOH4OSEr74y0AxmAzupZzTCrVvo2hUdO6KkRGgpelNWBkdHtGuHigpYwI5LUFDQsWPHjh49GhQUpDuu0WhSUlJiY2NjY2Pj4uLUajUdd3Z2HjVqlEwmGz9+/OOPP86XjMJC9OmD4mJERODVV/m6qrnCbJTROI89hpIS3LqFv0qvmDFubigowLVr+Cuz14yZNGnSgQMH9u3bR+t/N0pxcfHRo0djYmKOHDly/fp1btzPz2/y5NOBgfYjR6J9e71kTJ2KPXvw3HOIjtbrOpYBs1FG4wwdilOnEBeHESOElqI3o0bh+HEcPoyxY4WWoh8ajSYgICA1NfWpp56aPXv2mDFjvL29m3/JlStX6BI1JibG2vrxO3dSCYFUCn9/yGSQyfDMM/irHURL2bULL78MBwekpcHdve1vx3IQMvafYcLQ7mkqldA6+GDePLNpYdQMxcXF48aNA/CYzqF4165dQ0JCtm7dWvSo3q1qtTohIeejj8iQIUQiedDQ0NmZvPQS2biRXL/eIhlFRcTFhQDkhx94eFOWAbNRRuN89hkBiGUUPPvySwKQt94SWoceZGZm0jxjZ2fn3bt3R0ZGyuXybt26cX4qFosDAgIUCkVMTMwj276Xl5OYGKJQED+/B34KEC8vIpeTyMjmCtNMm0YAMmoUMcNyEYaC2SijcfbsIQCZMEFoHXxw8CABSFCQ0Drayv79+2kHmv79+1+9elX3oZycnPDwcJlMZmtry1mqnZ2dTCZTKpWJiYmPLI5z+TL57jsyZQpxdHzgp9bWpKSkkSdHRxOAtG9PTK8PjpCwvVFG42RkwM8PPXogO1toKXqTn49lyzBoEF5/XWgprYQQsmbNmiVLlmi12mnTpm3cuJFrHdaAysrK+Pj4I0eOxMTEXLhwgRvv3r379Onr/f2DR49G587NzaXRICUFsbGIjUVZGU6fbuQ55eVQKNC7N/79b73el4XBbJTROLW16NEjr2vXrPj4f9nammuFkooKZGXBxQXduz8YLC7G1avo29fUC6+Ul5fPmDFj3759Eolk5cqVCoWihS+8fft2XFxcbGzsb7/9lpeXN2zY1YQEDwBeXggOxsSJGD4cOovXRsjPR3o6vLzg5fVg8Pp1XL6M0aPxV18Ixl8IvBpmmDD0FPjChQtCC2k7J07UF8rTLce+fTsBiIn3Z9Jtx93mQsharTY1NXXdusqxY0m7dg/u2e3syLPPkrVrSVP/tlFRBCA9exLdrnRffEEAotG0TYslY/7hyAyD4ePjAyDL/BOACgvxySdCi2gN0dHRgwcPzsjI6Nev39mzZ2UyWduuIxKJ+vXrN39+u8OHUVaG+HgoFAgIQHU1Dh7E+++jb1+4uCA0FBs24ObNhi+/eROrVun7Xv4JMBtlNAk9Gs40/5xQuRzr1iE1VWgdLYAQsnr16kmTJt27d++ll15KSEjw9PTk5cpSKYYPh1KJxETk5yMiAjNnwtUVt28jKgpz58LdHQMG4IMPkJRU/5J338WaNZaTR2s4mI0ymsRiVqMzZ2LAAMjlMFh+OT9UVFS8+OKLixYtEolESqVyx44d7fVMNmoCFxe8+iq2bEF+PnJyoFIhJAQdOiAlBZ9/jmPH6p/25pvo1Qtz54IdoDQPs1FGk1jMalQsxtq1OHsWP/wgtJSmyc7OHjJkyJ49exwcHPbu3atQKERGOcrx8oJcjshI3L6No0ehUGDixPqHpFKsXYu4OEREGEGIGcNslNEk1EazsrKI+a9Ghg/HzJlYvBi3bwstpTEOHTo0aNCgixcv+vj4nD59eiLnZEbExgZBQVAq62tiUcaMQUgI/vMfSyhSYziYjTKaxMnJycnJqaysrLCwUGgtPPDFFxCL8eGHD0YyMhqPjjQmdDM0ODi4pKRk4sSJZ86cod9epsO6daipwaefCq3DhGE2ymgOi7mvB+DkhOXLsWnTg7OmVaswZAh698bq1bhzRwBJFRUVoaGhixYt0mq1CoVi3759Dg4OAuhoFldXfPQRvv0Wly4JLcVUYTbKaA56ymSONtroadLcuQgIwNdf1/+vhwe6dEFmJhYtgrs7XnoJR44Y7xgqJydn2LBhu3fv7tChw549e5RKpcl2oHn3Xfj6YvNmoXWYKib6z8YwEcz0sP7OHchk2LWr4bhYjPXrUVtb/7/LlyM/HzExCAmBRoPISIwbh8cfx6JFuHrVsArj4uKGDh164cIFb2/v06dPT5482bDz6YdUiu++g8l0IzU5mI0ymoM7ZRJaSCs4cwYDBuDYMSxbBjs7BARANw09IACLFiEgoD4TVCKBTIbISFy/DqUSPXrg5k2sXo2ePTFmDLZtgyEawm/YsEEmk925c2fChAlnzpyh2UomhYsLZDLY2DwYGT4cS5ZAJmOZoI0hcBYVw7ShBurh4SG0kJYSEVGf9Th8OCkoaPXLNRoSH0/kcmJnV5832bEjkcvJuXP8yKuqqpoxYwYAkUikUCg0LLPSImA2ymgOtVptbW0tFovv378vtJZHoFYThaLe++RyUlOj19Xu3iXr1hF//wd56MOGaTdt2lZWVtbma+bm5g4cOBCAvb39L7/8opc+hinBbJTxCOh9fWpqqtBCmuPOHRIURABiY8NzVfa0NKJQECcnMnDgbQC2trYhISExMTGPrOPZgLi4uC5dugDo2bNnWloanxIZQsNslPEIJkyYAOCTTz4RWkiTnDtHPDwIQNzcyJ9/GmSK+/fJrl2nRowYwWUW9e7d+4svvrh161ZLXq5SqaysrAA8++yzxcXFBpHIEA5mo4xHEBwcTI3D29s7MjKytrZWaEV/4+ef6/cxhw0j+fkGn+7SpUtLly7lOhVLJBKZTBYZGalWqxt9fnV19axZs9hmqGXDbJTxCMrKynx00gNdXFwWLlyYmZkptC5SV8fnZmgrp66LiYkJCQmha0wAbm5uCoUiOztb92l5eXmDBg2im6FRUVHG08cwLsxGGS0iOztbpVL5+/tzfhoQEKBSqcp16yEbkaIiIpMRgEilRKkURAIhhBQUFCiVyp49e9LPRCwWBwYGqlSq+/fvf/PNN87OzgB69Ohx/vx5wSQyDA+zUUbrSExMlMvlHTp0oMbh4OAQFhbW5vLsbSM1VePpSQDStSs5ccKYMzeOVqs9duxYWFhYu3bt6MfC/RAUFHT37l2hBTIMC+vFxGgL1dXVBw4c2LBhw9GjR+mvUO/evWfOnDl79uzOzTdO05tdu3Z9+OEPJSVHnnhCvHcv/tqlNAnKysp27ty5bdu2hIQEQoi/v//p06dtdKPYGZYIs1GGXmRlZW3evHnz5s23b98GYG1t/fzzz4eFhT333HMSiYTfuTQazZIlSz7//HNCyMKF25YvDzNZgzpy5Iirq2vfvn2FFsIwBsxGGTyg0WiOHTu2YcOGvXv31tXVAejWrdv06dPnzp3LVw+MsrKy6dOnHzhwQCqVrlixouVtMhkMQ8NslMEn+fn5P/300w8//JCTkwNALBYHBQWFhYWFhIRw24VtICsra/LkyZmZmc7Ozrt27QoKCuJPMoOhL8xGGfyj1WoTEhJ++umniIiIyspKAJ06dQoJCXnrrbd0z/pbyIEDB6ZPn15WVta/f/+9e/d6eHjwr5jB0ANmowwDcu/evcjIyO+///7cuXN0JCAgQC6XT5s2jTvrbwZCyJo1a5YsWaLVaqdNm7Zx40Y73WJNDIZpwGyUYQySkpK2bdsWERFRXFwMwNbWduLEiXK5fPTo0U01bisvL58xY8a+ffskEsnKlSvZZijDZGE2yjAeD4dJ+fj4zJo1a9asWbRsB8elS5cmT56ckZHh5OS0c+dOmUwmkGQG49EwG2UIwOXLl7dv37558+bc3FwAEolk1KhRcrl8ypQpUqk0Ojr61VdfLS0t9ff337t3L19n/QyGgWA2yhAMtVr922+/bdy48eDBgxqNBoCVlZWrq+uNGzcIIS+//PKmTZvYZijD9GE2yhCegoKCbdu2rVu3rqCgAIBIJPrss88++OCDprZNGQyTgtkow1TQarVfffXV+fPnR4wY8dprrwkth8FoKcxGGQwGQy9YZ1AGg8HQC2ajDAaDoRfMRhkMBkMvmI0yGAyGXjAbZTAYDL1gNspgMBh6wWyUwWAw9ILZKIPBYOjF/wc5HGaMHRX65gAAAdN6VFh0cmRraXRQS0wgcmRraXQgMjAyNC4wOS40AAB4nHu/b+09BiAQAGImBgiQAGJpIG5gZGNIANKMzOwOGkCamZkNQrNAxJmY2BkUQHwYFyEMVY4mDtfukAGWZ0RiQGQEwQYyYirAcAEWI3AZys3AyMDIlMDEDGQzsLAysLIxsLEzsHMwcHAysHMpcHFnMHHzJPDwZjDx8jHw8ifwC2QwMQlmMAkKJQgJZzAJiySIiGYwiYoliIlnMIkzJnCyMAhwJYgLJTixAM1nZQQqFGdjZWPn4GRh4+bh5RfgYhMWERUTFxLXYgR6hgEWrFfaDti/2dpmD+KETjNwaKzLsQOx/V1MHQ6aLtoPYqc+vmLv68B7AMRe2ddu58R7FSyurypoO73Cah+IPbFs3X6hiTvAep/yKh1Q9xUBq1maIHwg8RMzWK98c/p+3jVqYPbh3uoDjelie0HsJtnWA5+5doLdUHB34wGvun9g9n636weKjSFuC2dmOnhq2WkwW+7D+QOftO6B7TUwWHxA408l2K6PnzscLk68AmZX2C52eLn+PFgNy9pLDme6TcDsW5xPHBzz5oDNUfl+1GEvp6QDiD2jfZpDbQwXmH1Iq9WhvsgZrEYMAGdzeABCE6vJAAACVnpUWHRNT0wgcmRraXQgMjAyNC4wOS40AAB4nH1VW6obMQz9zyq8gTGSLMn2501yKaXcBNq0eyj0s/unRx6S8QXTmVh47GM9j5xTiuf79dvvv+n1yPV0Son+8+u9p1+FiE4fKSbp/P7l6y1dHm/n58rl/vP2+JFEk1ScwfsZ+/a4fzxXOF0SZyNlNkzIRONQpvEcJyXdkuRqnatjWwp14wWuQJ/kVkS6po2zeBW3BVCHYffCndImWaiS0QJosAyDrtUpNLqr+spFh0asFtPiaaPMTZhWPlYAoai49F6wX5y0yQLYYBquWRHqLVRa7YVWyB4qJWOzVo0ZObv3BZIpoJQ7adEBhXVZ5pKjOFvJsGkoyhbBIZ2rJLEEVDNbbyZIrKohYytkVGizbNakMZC9NRRrhYwSbZ6r1NI8yKENaldIS5c/aWuZxdrgEXhCSx6x70q1N6QoYtJSuizDr7unoLwXGTmrDqauoFEpzVJ7rW0nCei0TFRUyjKIIabDPJmXVfgShfJsDTyhwacKsq7Cl6hTRfAARu5FhNdAGSq5ucJj0KWz8NJ2FElzJXgnALJz60uNOoDY1xrhwkV1XwDfb9dPfb/fBOf77XrcBPHK0fC4PlI5+pox9OhexrCjRxnDj05kjHr0G2O0o6sYox+twzHmBol95qkNNATLxHYNwWVitYZgndjLQ9jEUh0rPpExPsG0iXMasbaJWRqC+8QgDSEzUzSE8MQIDSEylV5DSJlqrCFEp2LquLVfC5FX5OMVggyH65QcqfuZw7+o9FzX+H7+O2B++gcuuTJBB7elowAAAS96VFh0U01JTEVTIHJka2l0IDIwMjQuMDkuNAAAeJwlUDmSwzAM+8qWyYzC4U1xXLrPJ9zu5AV5/IJeN7JACARwfuR6nG89z/fjsuvC5fd5XfY8T31+Lr3mU/n5PoSCXWQJcajbOpQqWvZiUuMOGWSbaq+XkGZprUMo0wSIknJxrAPs9MrhZLrXIGzhFmAxyVZhSGFsqd1Qt2TfCkgpTGVYUW08kBF+wgZjy5LeAJ0kegecukdi5ysoYusG0nsrvL+S4M960vj24WwSBQkIUvE/x3tzjribtcqtxN15L+xKNHL7gpmqCckpiUgzZTf3wWBfscBJq2v/B2eQgpAUk5HnSJg5kmIj9l1EoTg8K9jKcaWqaB8U2em6sLRFdXSL8R6AJJrGHSd6ZYKCZ67n9w9ramN6GPzYewAAAABJRU5ErkJggg==", + "text/plain": [ + "" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mol = Chem.MolFromSmiles(test_mol)\n", + "mol" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of atoms: 24, Number of bonds: 27\n" + ] + } + ], + "source": [ + "n_atoms = mol.GetNumAtoms()\n", + "n_bonds = mol.GetNumBonds()\n", + "print(f\"Number of atoms: {n_atoms}, Number of bonds: {n_bonds}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# initialize the featurizer\n", + "atom_featurizer = CustomMultiHotAtomFeaturizer.v2() # chemprop v2 default atom featurizer settings\n", + "bond_featurizer = CustomMultiHotBondFeaturizer()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "# A helper function to get predictions from a molecule with ability to keep or remove specific atom/node or bond/edge\n", + "def get_predictions(keep_atoms: Optional[List[bool]], keep_bonds: Optional[List[bool]], mol: str) -> float:\n", + " featurizer = CustomSimpleMoleculeMolGraphFeaturizer(\n", + " atom_featurizer=atom_featurizer,\n", + " bond_featurizer=bond_featurizer,\n", + " keep_atoms=keep_atoms,\n", + " keep_bonds=keep_bonds\n", + " )\n", + " test_data = [data.MoleculeDatapoint.from_smi(mol)]\n", + " test_dset = data.MoleculeDataset(test_data, featurizer=featurizer)\n", + " test_loader = data.build_dataloader(test_dset, shuffle=False, batch_size=1)\n", + "\n", + " with torch.inference_mode():\n", + " trainer = pl.Trainer(\n", + " logger=False,\n", + " enable_progress_bar=False,\n", + " accelerator=\"cpu\",\n", + " devices=1\n", + " )\n", + " test_preds = trainer.predict(mpnn, test_loader)\n", + " return test_preds[0][0]" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop_delete/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prediction with all atoms and bonds kept: tensor([2.2535])\n", + "Prediction with all atoms and bonds removed: tensor([2.1880])\n" + ] + } + ], + "source": [ + "# example prediction with different keep_atoms and keep_bonds\n", + "\n", + "# keep all atoms and bonds\n", + "keep_atoms_0 = [True] * n_atoms\n", + "keep_bonds_0 = [True] * n_bonds\n", + "\n", + "# remove all atoms and bonds\n", + "keep_atoms_1 = [False] * n_atoms\n", + "keep_bonds_1 = [False] * n_bonds\n", + "\n", + "pred_0 = get_predictions(keep_atoms_0, keep_bonds_0, test_mol)\n", + "pred_1 = get_predictions(keep_atoms_1, keep_bonds_1, test_mol)\n", + "\n", + "print(f\"Prediction with all atoms and bonds kept: {pred_0}\") # expected 2.2535\n", + "print(f\"Prediction with all atoms and bonds removed: {pred_1}\") # expected 2.1880" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "# An example wrapper class for use as the model input in SHAP explainer\n", + "class MoleculeModelWrapper:\n", + " def __init__(self, mol: str, n_atoms: int, n_bonds: int):\n", + " self.mol = mol\n", + " self.n_atoms = n_atoms\n", + " self.n_bonds = n_bonds\n", + " def __call__(self, X):\n", + " preds = []\n", + " for keep_features in X:\n", + " try:\n", + " # unpacking X, indices corresponds to atom.GetIdx() and bond.GetIdx() from rdkit mol, adapt as needed\n", + " keep_atoms = keep_features[:self.n_atoms]\n", + " keep_bonds = keep_features[self.n_atoms:self.n_atoms + self.n_bonds]\n", + " except Exception as e:\n", + " print(f\"Invalid input: {keep_features}\")\n", + " raise e\n", + " pred = get_predictions(keep_atoms, keep_bonds, self.mol)\n", + " preds.append([pred.item()])\n", + " return np.array(preds)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "# An example masker function for use with SHAP explainer\n", + "# The masker function takes in a binary mask and the input data X, and returns the masked input data. This simulates the effect of masking out certain features.\n", + "def binary_masker(binary_mask, x):\n", + " masked_x = deepcopy(x)\n", + " masked_x[binary_mask == 0] = 0\n", + " return np.array([masked_x])" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the model wrapper with the test molecule, number of atoms and bonds\n", + "model_wrapper = MoleculeModelWrapper(test_mol, n_atoms, n_bonds)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[2.25354147]])" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Test the model wrapper with a random node/edge choice\n", + "keep_features = [1] * (n_atoms + n_bonds)\n", + "feature_choice = np.array([keep_features])\n", + "model_wrapper(feature_choice)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the SHAP explainer with the model wrapper and masker\n", + "explainer = shap.PermutationExplainer(model_wrapper, masker=binary_masker)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "PermutationExplainer explainer: 2it [00:25, 25.79s/it] \n" + ] + } + ], + "source": [ + "# Compute SHAP values, using 200 evaluations of different node/edge choices (notice that nodes and edges are masked out randomly by the binary masker, so the results may vary between runs)\n", + "explanation = explainer(feature_choice, max_evals=200)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + ".values =\n", + "array([[-6.33835793e-04, 2.00152397e-04, 8.77737999e-04,\n", + " -1.20162964e-04, -1.70385838e-03, 1.32679939e-04,\n", + " 5.65052032e-04, -1.49548054e-03, 1.45280361e-03,\n", + " 7.84397125e-05, 7.94768333e-04, 1.32918358e-03,\n", + " 1.59931183e-03, 7.92026520e-04, -1.05524063e-03,\n", + " 1.27375126e-03, 1.26934052e-03, -5.46216965e-04,\n", + " 2.07734108e-03, 9.27805901e-04, 1.94215775e-03,\n", + " 1.70767307e-03, 9.18865204e-04, 2.30920315e-03,\n", + " 1.02865696e-03, 2.66933441e-03, 3.65734100e-04,\n", + " 1.01172924e-03, 1.39999390e-03, 1.10065937e-03,\n", + " 1.54471397e-03, 1.68943405e-03, 1.58667564e-03,\n", + " 8.28027725e-04, 2.80642509e-03, 2.18117237e-03,\n", + " 2.17568874e-03, 9.46164131e-04, 1.96087360e-03,\n", + " 2.82001495e-03, 2.58827209e-03, 2.84719467e-03,\n", + " 2.18105316e-03, 2.61569023e-03, 1.97517872e-03,\n", + " 2.07221508e-03, 2.10452080e-03, 1.83522701e-03,\n", + " 1.78325176e-03, 2.54166126e-03, 2.21943855e-03]])\n", + "\n", + ".base_values =\n", + "array([[2.18796897]])\n", + "\n", + ".data =\n", + "array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1]])" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Print the SHAP values\n", + "explanation" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA2sAAAL5CAYAAAAubLxsAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAzLhJREFUeJzs3XtU1VX+//HnB+J+9SQ38UapmRhDmok0zKjfQsFxugnZTyvNIh3RRNFGTS11IqcxHcMp1L4w4jiaWGbG2MUrgY5ljpdwmjTL4auWJBcTOaKc3x/kqeMBLwhypNdjrbP07L0/e7/PWbNcveazP/sYFovFgoiIiIiIiDgUp6YuQEREREREROwprImIiIiIiDgghTUREREREREHpLAmIiIiIiLigBTWREREREREHJDCmoiIiIiIiANSWBMREREREXFACmsiIiIiIiIOSGFNpJmxWCyUl5ej37sXERERub4prIk0MydPnsTPz4+TJ082dSkiIiIichUU1kRERERERByQwpqIiIiIiIgDUlgTERERERFxQAprIiIiIiIiDkhhTURERERExAEprImIiIiIiDgghTUREREREREHpLAmIiIiIiLigBTWREREREREHJDCmoiIiIiIiANSWBMREREREXFACmsiIiIiIiIOSGFNRERERETEASmsiYiIiIiIOCCFNREREREREQeksCYiIiIiIuKAFNZEREREREQckMKaiIiIiIiIA1JYExERERERcUAKayIiIiIiIg5IYU1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oAU1kRERERERByQwpqIiIiIiIgDUlgTERERERFxQAprIiIiIiIiDkhhTURERERExAEprImIiIiIiDigG5q6ABFpJEdPwPdnm7qKH3m5gZ9XU1chIiIict1QWBNprpIXw3/Lm7qKGmGB8PpohTURERGRK6CwJtJcfVUMB443dRUiIiIiUk96Zk1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oAU1kRERERERByQwpqIXP9KT0HSqxAwDLwehj7T4dODTV2ViIiIyFVRWJMrsnnzZgzDqPO1ffv2Rl1//vz5ZGVlNeoajeHo0aO0aNECwzD405/+ZNf/wQcfMHLkSHr06IG7uzuGYbB58+ZrX+j1qLoaBsyG5XmQHAd/fBS+LYPe0+GLI01dnYiIiEi96XfW5IoUFxcD0KNHD7p27WrX7+vr26jrv/TSS4SGhjJs2LBGXaeh/e53v6OyshKAI0fsA8SiRYtYs2YNbdq0ITAwkP/+97/W7/pnr/c0aB8IWWNq78/ZBgWfw6pUGBRd05YYDZ2SYcZKWJ5y7WoVERERaUAKa1Ivt99+O6mpqXbtbdq0adR1z5w5w+nTpxtl7qqqKs6dO4e7u3uDzrt27VrWrl3Lgw8+yKpVq2odk5iYSN++fQkMDOTtt98mOzu7QWto1nK2QZA/PBD1Y1uAX01gW7YVzFXg5tJk5YmIiIjUl7ZBSr14eXnRsWNHu9dPg87KlSv55S9/iY+PD56envTs2ZOcnBy7uVauXMlvf/tb2rZti5ubGy1btuS+++5jz549NuMMw6C4uJh9+/bZbL386quvrP213XHLysqy21b43HPPYRgGn332GePHj6d169a4u7uTn58PgNls5oUXXiA8PBx3d3f8/f0ZOHAgu3btuqLv6eTJk4wcOZJf/epXxMbG1jkuPj6epKQkHnzwQVq2bHlFa/zs7ToE3W4Cpwv+ObuzI1SY4T/aCikiIiLXJ91Zk3qprKy026bn5uaGj48PAM8++yx/+MMf6NatG7/5zW8A2LdvHwkJCaSnpzN69GjrdX/6059wcXGhZ8+eeHp68t1337Fx40Y+/PBDdu3aRceOHQEYPnw4q1atwtvbm7i4OOv1Pw2Ix44ds6v16NGjADb1nt+KOHjwYCwWC71796aqqgoXFxeqqqro168fBQUFxMTEcPvtt2M2m/noo4+466672Lp1K3fcccdlfU+TJk3i9OnTPPPMM3z//fd1jvPy8rqs+aQWR0vgV13s20Na1Px55ATc1u7a1iQiIiLSABTWpF5effVVXn31VZu2hIQE3njjDT799FP+8Ic/0L9/f8aOHUvr1q0xDINDhw4xbdo0nnnmGR599FFrsJs4cSJ+fn4EBATg6elJeXk5GzZsYPr06cyZM4clS5YAMHnyZN555x1MJhOTJ0+2rnvjjTfW+3NUV1czc+ZMOnbsiGEYtGnThldeeYUtW7bw9NNPc//99xMQEMCZM2fYuXMnkyZN4umnn7begbuY7du3s2jRIkaNGkXfvn1Zu3Ztvev82ag6C2UV9m3mKigut203edfcTTt9Btxq+afM3bXmz9NnGqdWERERkUambZBSLzExMYwbN87mdc899wCwZMkSDMPg8ccfp0ePHoSEhBAcHEyvXr3o27cvp06dYuvWrda57r//fvr168ftt99Oq1atuOmmm0hISCAoKIi8vDzruPN32JycnGy2Xrq41P95pPvuu4/777+fiIgIbrvtNvz9/VmyZAnBwcEMHz6c8PBwAgMDad26NXFxcdx2221s3779ks/NVVVVMXz4cG699VZSUlJwdXWtd43NyYXbSAsKCmzeFy5+s+b4/Z++Cj6HFR/Ztx8upry8nHNuN4D5rP2clTUhrfDQAZs1tm/fzrlz535cs7CQkpIS6/uioiIOHz5sfV9eXs6+ffsuWveF77WG1tAaWkNraA2toTUuZ41LMSwWi+WKrpCftZycHBISEhg1ahRTpkyx6fP09MRkMhEdHc22bdsuOs/8+fN5+umngZr/gJ82bRqbN2/m1KlTNuMCAwP55ptvrO8DAgIIDg5m7969dnMahkG/fv1Yv369TXtaWhpTpkxh1apVDBo0CICkpCQWL17M+++/bw2Z57m5uXHmzMXvxnz55ZeEhYXV2T9r1ixmzZrFq6++yogRI4Afv7uUlBRefvnlOq8dP3488+bNs6n3SpSXl+Pn50dZhyR8Dxy/4usbxS2hsPF5aGW6+LiS72HnBb+PNiELglvAxHtt2395a83ds46joWMI5D5r2//6h/DEX2DPPG2DFBERkeuStkFKvbi7u9O6deta+86cOYNhGIwZMwanCw99+EH37t0BOHz4MDExMbi7u9O/f38CAwNxdXXFMAzeeOONS4amy/HT/0fkQoGBgXZtFouF0NBQEhIS6rzu/BbO2hw9epQ//OEPREdH061bNw4cqLmzc/55urKyMg4cOEBISIieVbtQC2+4+xf2bSEt7NvPi2wPeftrfm/tp/97++cX4OkGnVo1WrkiIiIijUlhTRpcaGgoO3fu5N5776VTp061jjGZau6wrFixglOnTjF27FgSExPx9/fnhhtq/meZlZVl/ft5hmHUua6XlxcnT560a//vf/9b5zXOzs52bYGBgZw6dYqUlJQ6w6a/v3+dc3711VeYzWa2bNlCt27d7Pr/93//l//93//l73//O4MHD65zHrlMg3rVHN//5vYff2etuBxWFcDAO3Rsv4iIiFy3FNakwQ0cOJC1a9fy8ssv8/bbb9sFom+++QZPT08ASktLAYiMjCQyMtI6ZvHixZSWltodY+/u7l5rIAMIDg5m3759VFRUWOcvKSlh9erVV1R/3759yc7OZuXKlUycONGu/5tvvrELkT8VFBREUlKSXfuRI0dYt24dUVFRRERE0LZt2yuqS+owqBdEdYLh6VBYBC194C/r4Vw1PK8wLCIiItcvhTVpcL/97W95++23WbduHZGRkSQkJNCqVSuOHj3Kzp07yc3NtW5vjImJYe7cuYwdO5aDBw/SokUL8vPzeeeddwgICODCRyo7duzIpk2bmDZtGrfeeitOTk4MHDgQLy8vBg4cyPz58+nTpw+PPvoopaWlLFq0CH9/f7777rvLrn/s2LHWkx83bdpE37598fX15fDhw2zYsAFXV1e2bNlS5/WtW7fmmWeesWv/4IMPWLduHb/4xS+YNGkSQUFB1r49e/ZYT4vcsWMHAKtXr+bf//43AGPGjMHPz++yP8PPirNzzfNqE/8KC96tOf2xRwfIGlPzrJyIiIjIdUphTRpcYGAgzz33HO3btycvL4+5c+dy+vRpTCYTbdq0ITEx0Tr2rrvuYty4caxZs4bZs2fj7OxMly5dmDhxIn/72984ftz2gIyxY8dSUlLCggULOHnyJBaLhf/85z907NiR3/3udxw+fJi8vDxSUlIIDQ2lX79+uLu788orr1x2/d26dePFF1/kb3/7G59++imbNm3CMAxatmxJWFgY//M//3PR611dXbnpppvs2s//xICnp6dd/9atW5k2bZpN24oVK6x/T0xM/PmGtc2zLj2mhTcsGV3zEhEREWkmFNbkikRHR5ORkUHPnj0vOq579+4EBAQQFxfH8ePHqaqqwsPDA5PJRLt2P57M5+vry7hx44iOjubEiRMYhkFQUBA9e/YkIiLCbstjbGwsHh4eHD16FLPZDNRsf4Sau27Tp0/ns88+o6KiAl9fXyIiIjAMg65duxIdHW2dJykpiTvuuMOmlvOcnJwYMGAAYWFhfPHFF9YjWj09PQkMDKzzObyr+e7uueceMjIy6rw2JCSkXmuKiIiIyPVLR/eLNDPX9dH9IiIiImKlH8UWERERERFxQAprIiIiIiIiDkhhTURERERExAEprImIiIiIiDgghTUREREREREHpKP7RZqr9i3B2bWpq6gRFtjUFYiIiIhcdxTWRJqr9CfBx7epq/iRl1tTVyAiIiJyXVFYE2muQkzg60BhTURERESuiJ5ZExERERERcUAKayIiIiIiIg5IYU1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oAU1kRERERERByQwpqIiIiIiIgDUlgTERERERFxQAprIiIiIiIiDkhhTURERERExAHd0NQFiEgjOXoCvj/b1FX8yMsN/LyaugoRERGR64bCmkhzlbwY/lve1FXUCAuE10crrImIiIhcAYU1kebqq2I4cLypqxARERGRetIzayIiIiIiIg5IYU1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiLXt9JTkPQqBAwDr4ehz3T49GBTVyUiIiJy1RTWROT6VV0NA2bD8jxIjoM/PgrflkHv6fDFkaauTkREROSqKKzJFcnJycEwjDpfubm5jbr+pEmTSEtLa9Q1GkJubi4xMTEEBwfj7u6Ou7s7bdu25amnnuL4cfvj9Kurq5kxYwZhYWG4urri6elJ9+7dG/37dHi9p8GwV+ruz9kGBZ9DVjLMeAhGx8HmmeDsBDNWXrs6RURERBqBfmdN6qVPnz7Ex8fbtUdERDTqupmZmQQHBzN58uRGXedq7du3j8rKSgYOHEhoaCjV1dV8/PHHvP766/zjH//gs88+w8fHxzr+gQce4O233+a2225jwoQJVFRUsGLFCu69917+/ve/M2jQoCb8NA4sZxsE+cMDUT+2BfhBYjQs2wrmKnBzabLyRERERK6GwprUS2RkJKmpqU1dRoMym81UVVXh7e191XNNmjSJSZMm2bWnpKQwf/58MjMzGTt2LACbNm3i7bffpnv37uzYsQMnp5ob3lOmTOGWW25hzJgx3H///Tg7O191Xc3OrkPQ7SZwumCTwJ0dYdEH8J8jcFu7pqlNRERE5CppG6Q0mgULFtClSxfc3d1xc3OjU6dOLFy4sNZxUVFRtGzZEhcXF3x8fIiOjmbr1q024wzDoLi4mH379tlsvdy3b5+1v3///nbzp6WlYRgGOTk51rakpCQMw6CgoIDBgwdjMpnw8PBg/fr1AFRUVJCcnEybNm1wcXHB09OTnj17snHjxqv6TsLCwgA4ceKEte38VsehQ4dagxpAUFAQv/rVrzh27BjvvPPOVa3bbB0tgZAW9u3n246csO8TERERuU7ozprUS0VFBUVFRTZtHh4e3HjjjQA8/vjjZGZm0r17d8aOHYuzszPvvvsuycnJHD16lNmzZ1uvy8jIwM/Pj8TEREJCQjhw4ACrV68mNjaWgoICunXrBsBLL73EzJkz8fHxISUlxXp969at6/05hgwZgqurKyNGjMAwDNq1a4fZbCYqKor9+/cTGxvL8OHDKS0tZeXKlcTFxbFu3Truueeey5q/vLyc8vJyTp48yZYtW5g9ezY33HAD9913n3WM2WwGwMvLy+56Dw8PALZs2WJzTbNUdRbKKuzbzFVQXG7bbvKuuZt2+gy41fLPmLtrzZ+nzzROrSIiIiLXgMKa1EtGRgYZGRk2bb1792bTpk1s2LCBzMxMHnvsMbKysqz9aWlpREdHM3fuXFJSUqzBLj8/H39/f5u5Ro0aRUxMDLNmzeKtt94CIDU1lTlz5mAymRpsC6a3tzc7d+7E1dXV2jZx4kT27t1LdnY2Q4cOtbZPnz6dzp07M378ePbu3XtZ848cOZK///3v1vdt2rQhKyuLyMhIa9svfvELAD744AOefPJJa/v559wAu2DcLOX/u+bY/QsVfA4rPrJtO/QatA8ED1cwn7W/pvKHkObhat8nIiIicp3QNkipl4EDB7J8+XKb1/m7ZUuWLMEwDEaPHk1RUZHNa8CAAVRWVlq3GwLWoFZdXU1xcTFFRUW0bt2aVq1asWfPnkb9HGPGjLEJagBvvvkmrVq1onfv3ja1V1ZW0rNnTwoLCzl58uRlzZ+amsry5ctJT09n8ODBuLi48M0339iMeeSRR2jdujVvvvkm48aNY9euXWzdupX4+HgOHz4M1NzJbA527dpl876goODHN79oT+GfEzm3fhp8MAM+mEFlp2Cq+oRb3x9fPppv/jYKgv0BOBfkx8n/HLaf82hJzZtWJts1gO3bt3Pu3Dnr+8LCQkpKSqzvi4qKrN871NwdPb/Vtta6a3mvNbSG1tAaWkNraA2tcTlrXIphsVgsV3SF/Kzl5OSQkJBASkoKL7/8cq1jevTowSeffHLReebMmWM9gGPjxo1MnjyZ3bt3W7cEnhcQEMC3335r8z44OLjWO1uGYdCvXz+bIAg1d/SmTJnCqlWrrKcqJiUlsXjxYgoKCujVq5fNeDc3N86cufj2uf3799O5c+eLjqnNsmXLeOSRR3j55ZdttnLu37+fhx56yOZztWvXjkGDBjF37lweeughVqxYcVlrlJeX4+fnR1mHJHwP2P9MQJO4JRQ2Pg+tTFd2Xe9pNXfQssbU3p/wEuTthyNLbA8ZSXoV/rYVTizVaZAiIiJy3dI2SGlwFosFwzBYunRpnScY3nnnnUBNSImPj8fT05OnnnqK8PBwvL29MQyDZ555htOnT191PWfP1rJN7gc/PT7/p/W3bduWF198sc7r6vuc3NChQ0lOTmbJkiU2Ye3WW29lz5497N+/n3//+98EBQURHR3NtGnTrP1Si0G9ao7vf3M7DIquaSsuh1UFMPAOBTURERG5rimsSYMLCwtj586ddOjQgaioqIuOzcrKwmw2k52dTUJCgk3fqFGjcHGx/Y9twzDqnMvLy4vS0lK79oMHD15+8UBISAhlZWUkJiY2ynH5Z86coby8vNa+W2+91SaYvf/++xiGod9Zq8ugXhDVCYanQ2ERtPSBv6yHc9Xw/OCmrk5ERETkquiZNWlwTzzxBAATJkygqqrKrv/QoUPWv58PQxfuxp05cyZlZWV217q7u9cZdEJDQyksLLTpP3bsGGvWrLmi+h988EHKysqYOHFirf0/rb8uX375Za3tc+bM4fTp05f14+GLFy9mx44dxMbGEh4efsnxP0vOzpD7LDx0Fyx4FyYuhZa+NVsubwlt6upERERErorurEmD69evH0lJSSxatIibb76Z+Ph4QkNDOXLkCLt372bHjh3WrYmJiYnMnTuXkSNHkpeXh8lUcyDEtm3bCAoKsnmoEyAiIoLc3FxGjBhBeHg4Tk5ODBs2DH9/f5KSkkhNTaVHjx4kJCRQUlLCG2+8QVBQUK3Bry5paWls2bKFefPmkZeXR0xMDH5+fnz99dfk5+fj5uZ2yYNP7r77bvz8/OjevTvt2rWjtLSU7du3s23bNkwmE3PnzrUZP2DAAKqrq4mMjMTDw4P8/Hw+/PBDOnToQHZ29mXX3uxsnnXpMS28YcnompeIiIhIM6KwJo0iIyODqKgo0tPTWbZsGWazGV9fX8LCwpg6dap1XGRkJCtWrGDq1KksXrwYJycnunbtynvvvcfIkSM5duyYzbwLFy5kyJAhrFy5koqKCiwWC3fffTf+/v5MmDCBoqIisrOzmTNnDoGBgYwbNw4nJyemTJly2bW7ubmxbds2ZsyYwapVq6w/5G0ymQgPD2f48OGXnGPo0KGsXbuW1atXc/LkSZydnQkODuaRRx7hhRdeIDTU9q5Pjx49yM7OZsuWLZw9e5bg4GBGjRpFWlparc/ViYiIiEjzp9MgRZqZZnUapIiIiMjPmJ5ZExERERERcUAKayIiIiIiIg5IYU1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oD0O2sizVX7luDs2tRV1AgLbOoKRERERK47CmsizVX6k+Dj29RV/MjLrakrEBEREbmuKKyJNFchJvB1oLAmIiIiIldEz6yJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oAU1kRERERERByQwpqIiIiIiIgDUlgTERERERFxQAprIiIiIiIiDkhhTURERERExAEprImIiIiIiDgghTUREREREREHpLAmIiIiIiLigG5o6gJEpJEcPQHfn23qKn7k5QZ+Xk1dhYiIiMh1Q2FNpLlKXgz/LW/qKmqEBcLroxXWRERERK6AwppIc/VVMRw43tRViIiIiEg96Zk1ERERERERB6SwJiIiIiIi4oAU1kRERERERByQwpqIiIiIiIgDUlgTketb6SlIehUChoHXw9BnOnx6sKmrEhEREblqCmsicv2qroYBs2F5HiTHwR8fhW/LoPd0+OJIU1cnIiIiclUU1uSK5OTkYBhGna/c3NxGXX/SpEmkpaU16hoNITc3l5iYGIKDg3F3d8fd3Z22bdvy1FNPcfz4pY/Tf+SRRzAMA3d392tQrQPrPQ2GvVJ3f842KPgcspJhxkMwOg42zwRnJ5ix8trVKSIiItII9DtrUi99+vQhPj7erj0iIqJR183MzCQ4OJjJkyc36jpXa9++fVRWVjJw4EBCQ0Oprq7m448/5vXXX+cf//gHn332GT4+PrVeu2nTJpYvX46bm9s1rvo6lLMNgvzhgagf2wL8IDEalm0FcxW4uTRZeSIiIiJXQ2FN6iUyMpLU1NSmLqNBmc1mqqqq8Pb2vuq5Jk2axKRJk+zaU1JSmD9/PpmZmYwdO9auv6qqiieffJIePXpw6tQpvvjii6uupVnbdQi63QROF2wSuLMjLPoA/nMEbmvXNLWJiIiIXCVtg5RGs2DBArp06YK7uztubm506tSJhQsX1jouKiqKli1b4uLigo+PD9HR0WzdutVmnGEYFBcXs2/fPputl/v27bP29+/f327+tLQ0DMMgJyfH2paUlIRhGBQUFDB48GBMJhMeHh6sX78egIqKCpKTk2nTpg0uLi54enrSs2dPNm7ceFXfSVhYGAAnTpyotX/KlCn897//5fXXX7+qdX42jpZASAv79vNtR2r/nkVERESuB7qzJvVSUVFBUVGRTZuHhwc33ngjAI8//jiZmZl0796dsWPH4uzszLvvvktycjJHjx5l9uzZ1usyMjLw8/MjMTGRkJAQDhw4wOrVq4mNjaWgoIBu3boB8NJLLzFz5kx8fHxISUmxXt+6det6f44hQ4bg6urKiBEjMAyDdu3aYTabiYqKYv/+/cTGxjJ8+HBKS0tZuXIlcXFxrFu3jnvuueey5i8vL6e8vJyTJ0+yZcsWZs+ezQ033MB9991nN/azzz7jlVdeYdSoUYSHh9f7M123qs5CWYV9m7kKistt203eNXfTTp8Bt1r+GXN3rfnz9JnGqVVERETkGlBYk3rJyMggIyPDpq13795s2rSJDRs2kJmZyWOPPUZWVpa1Py0tjejoaObOnUtKSoo12OXn5+Pv728z16hRo4iJiWHWrFm89dZbAKSmpjJnzhxMJlODbcH09vZm586duLq6WtsmTpzI3r17yc7OZujQodb26dOn07lzZ8aPH8/evXsva/6RI0fy97//3fq+TZs2ZGVlERkZaTd22LBhBAUF8cc//rH+H+h6lv/vmmP3L1TwOaz4yLbt0GvQPhA8XMF81v6ayh9CmoerfZ+IiIjIdULbIKVeBg4cyPLly21e5++WLVmyBMMwGD16NEVFRTavAQMGUFlZad1uCFiDWnV1NcXFxRQVFdG6dWtatWrFnj17GvVzjBkzxiaoAbz55pu0atWK3r1729ReWVlJz549KSws5OTJk5c1f2pqKsuXLyc9PZ3Bgwfj4uLCN998Yzdu3rx57Ny5k/T0dLt6mpNdu3bZvC8oKPjxzS/aU/jnRM6tnwYfzIAPZlDZKZiqPuHW98eXj+abv42CYH8AzgX5cfI/h+3nPFpS86aVyXYNYPv27Zw7d876vrCwkJKSEuv7oqIiDh/+cc7y8nLrVtta667lvdbQGlpDa2gNraE1tMblrHEphsVisVzRFfKzlpOTQ0JCAikpKbz88su1junRoweffPLJReeZM2eO9QCOjRs3MnnyZHbv3o3ZbLYZFxAQwLfffmvzPjg4uNY7W4Zh0K9fP5sgCDV39KZMmcKqVasYNGgQUPPM2uLFiykoKKBXr142493c3Dhz5uLb5/bv30/nzp0vOqY2y5Yt45FHHuHll1+2buU8cuQIt956KzExMaxbt8469rbbbuOLL76gsrLyitYoLy/Hz8+Psg5J+B649M8EXBO3hMLG56GV6cqu6z2t5g5a1pja+xNegrz9cGSJ7SEjSa/C37bCiaU6DVJERESuW9oGKQ3OYrFgGAZLly7F2dm51jF33nknUBN64uPj8fT05KmnniI8PBxvb28Mw+CZZ57h9OnTV13P2bO1bJP7QW3H51ssFtq2bcuLL75Y53X1fU5u6NChJCcns2TJEmtYGzduHGazmeTkZJs7T2azGYvFwq5du/Dw8KhXOGz2BvWqOb7/ze0wKLqmrbgcVhXAwDsU1EREROS6prAmDS4sLIydO3fSoUMHoqKiLjo2KysLs9lMdnY2CQkJNn2jRo3CxcX2P7YNw6hzLi8vL0pLS+3aDx48ePnFAyEhIZSVlZGYmFhn2LwaZ86cobz8xwMzioqKMJvNxMXF1Tq+W7dutGnTxuY2u/xgUC+I6gTD06GwCFr6wF/Ww7lqeH5wU1cnIiIiclUU1qTBPfHEE+Tk5DBhwgQ2b95sF7gOHTpkPcL+fBi6cDfuzJkzKSsro2XLljbt7u7uNkHnp0JDQyksLKS8vBxfX18Ajh07xpo1a66o/gcffJB58+YxceLEWrd6/rT+unz55ZfcdNNNdu1z5szh9OnTNj8ePnXqVL766iu7sS+++CLffPMN8+bNo0WLWo6nF3B2htxnYeJfYcG7Nac/9uhQs23yltCmrk5ERETkqiisSYPr168fSUlJLFq0iJtvvpn4+HhCQ0M5cuQIu3fvZseOHdatiYmJicydO5eRI0eSl5eHyVRzIMS2bdsICgqyeagTICIigtzcXEaMGEF4eDhOTk4MGzYMf39/kpKSSE1NpUePHiQkJFBSUsIbb7xBUFAQZWVll11/WloaW7ZsYd68eeTl5RETE4Ofnx9ff/01+fn5uLm5XfLgk7vvvhs/Pz+6d+9Ou3btKC0tZfv27Wzbtg2TycTcuXOtYwcMGFDrHK+99hrHjx9n9OjRl117s7N51qXHtPCGJaNrXiIiIiLNiMKaNIqMjAyioqJIT09n2bJlmM1mfH19CQsLY+rUqdZxkZGRrFixgqlTp7J48WKcnJzo2rUr7733HiNHjuTYsWM28y5cuJAhQ4awcuVKKioqsFgs3H333fj7+zNhwgSKiorIzs5mzpw5BAYGMm7cOJycnJgyZcpl1+7m5sa2bduYMWMGq1atsv6Qt8lkIjw8nOHDh19yjqFDh7J27VpWr17NyZMncXZ2Jjg4mEceeYQXXniB0FDd9RERERGRi9NpkCLNTLM6DVJERETkZ0y/syYiIiIiIuKAFNZEREREREQckMKaiIiIiIiIA1JYExERERERcUAKayIiIiIiIg5IYU1ERERERMQB6XfWRJqr9i3B2bWpq6gRFtjUFYiIiIhcdxTWRJqr9CfBx7epq/iRl1tTVyAiIiJyXVFYE2muQkzg60BhTURERESuiJ5ZExERERERcUAKayIiIiIiIg5IYU1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oAU1kRERERERByQwpqIiIiIiIgDUlgTERERERFxQAprIiIiIiIiDkhhTURERERExAHd0NQFiEgjOXoCvj/b1FWAlxv4eTV1FSIiIiLXHYU1keYqeTH8t7xpawgLhNdHK6yJiIiI1IPCmkhz9VUxHDje1FWIiIiISD3pmTUREREREREHpLAmIiIiIiLigBTWREREREREHJDCmoiIiIiIiANSWBOR61PpKUh6FQKGgdfD0Gc6fHqwqasSERERaTAKayJy/amuhgGzYXkeJMfBHx+Fb8ug93T44khTVyciIiLSIBTW5Irk5ORgGEadr9zc3EZdf9KkSaSlpTXqGg1hw4YN/L//9/+46aab8PLywsvLiw4dOjBt2jTMZrPd+P79+9f5nS5cuLAJPkET6z0Nhr1Sd3/ONij4HLKSYcZDMDoONs8EZyeYsfLa1SkiIiLSiPQ7a1Ivffr0IT4+3q49IiKiUdfNzMwkODiYyZMnN+o6V2v27Nl88skn/OpXv2Lo0KGcPXuW9957j9mzZ5Obm8vHH3+Mk5P9/1fy0ksv2bX17dv3WpR8fcnZBkH+8EDUj20BfpAYDcu2grkK3FyarDwRERGRhqCwJvUSGRlJampqU5fRoMxmM1VVVXh7e1/1XCkpKfTt29dmrhdeeIG7776bDRs2kJWVxeOPP253XXP7ThvNrkPQ7Sa4MPDe2REWfQD/OQK3tWua2kREREQaiLZBSqNZsGABXbp0wd3dHTc3Nzp16lTrlr4FCxYQFRVFy5YtcXFxwcfHh+joaLZu3WozzjAMiouL2bdvn802wX379ln7+/fvbzd/WloahmGQk5NjbUtKSsIwDAoKChg8eDAmkwkPDw/Wr18PQEVFBcnJybRp0wYXFxc8PT3p2bMnGzduvKzP/tvf/rbW0Pfwww8D8K9//avW66qrqykuLubcuXOXtc7P1tESCGlh336+7ciJa1uPiIiISCPQnTWpl4qKCoqKimzaPDw8uPHGGwF4/PHHyczMpHv37owdOxZnZ2feffddkpOTOXr0KLNnz7Zel5GRgZ+fH4mJiYSEhHDgwAFWr15NbGwsBQUFdOvWDajZIjhz5kx8fHxISUmxXt+6det6f44hQ4bg6urKiBEjMAyDdu3aYTabiYqKYv/+/cTGxjJ8+HBKS0tZuXIlcXFxrFu3jnvuuade63399dcABAcH19rv5eVFZWUlN9xwA127duWFF14gLi6u3p/vulB1Fsoq7NvMVVBcbttu8q65m3b6DLjV8s+Xu2vNn6fPNE6tIiIiIteQwprUS0ZGBhkZGTZtvXv3ZtOmTWzYsIHMzEwee+wxsrKyrP1paWlER0czd+5cUlJSrMEuPz8ff39/m7lGjRpFTEwMs2bN4q233gJqtgjOmTMHk8nUYNsFvb292blzJ66urta2iRMnsnfvXrKzsxk6dKi1ffr06XTu3Jnx48ezd+/eK16rpKSE1157DQ8PD4YNG2bTFxgYyEMPPUSPHj3w8fFh586dLFu2jIEDB5KdnW29I9cs5f+75tj9CxV8Dis+sm079Bq0DwQPVzCftb+m8oeQ5uFq3yciIiJyndE2SKmXgQMHsnz5cpvX+btlS5YswTAMRo8eTVFRkc1rwIABVFZWWrcbAtagdn4LYFFREa1bt6ZVq1bs2bOnUT/HmDFjbIIawJtvvkmrVq3o3bu3Te2VlZX07NmTwsJCTp48eUXrVFVVMWDAAI4fP84f/vAHWrVqZdO/dOlSVqxYwYQJE0hKSiIjI4O8vDycnZ0ZP378VX9OR1FQUGD//hft4YMZ8MEMCv+cyLn10yCiHcRG8vWSYZx8c4K1v+jsKQ4fPlyz3fFoCeXl5dZtsEDN9kiAVqY619y+fbvNNtPCwkJKSkqs74uKimrW+IHdGnV9Dq2hNbSG1tAaWkNraI0rXONSDIvFYrmiK+RnLScnh4SEBFJSUnj55ZdrHdOjRw8++eSTi84zZ84cJk2aBMDGjRuZPHkyu3fvtjvWPiAggG+//dbmfXBwcK13tgzDoF+/fjZBEGru6E2ZMoVVq1YxaNAgoOaZtcWLF1NQUECvXr1sxru5uXHmzMW30e3fv5/OnTtfdMx5586dIz4+nvfff5/Ro0eTnp5+WdcBxMXFsX79ej7++GPuuOOOy7qmvLwcPz8/yjok4Xvg+GWv1ShuCYWNz9uEp8vSe1rNHbSsMbX3J7wEefvhyBLbQ0aSXoW/bYUTS3UapIiIiFz3tA1SGpzFYsEwDJYuXYqzs3OtY+68806gJvTEx8fj6enJU089RXh4ON7e3hiGwTPPPMPp06evup6zZ2vZLvcDHx+fWutv27YtL774Yp3XXe5zcufOneM3v/kN77//Pk8++eQVBTWAtm3bAnD06NEruq7ZG9Sr5vj+N7fDoOiatuJyWFUAA+9QUBMREZFmQWFNGlxYWBg7d+6kQ4cOREVFXXRsVlYWZrOZ7OxsEhISbPpGjRqFi4vtf3QbhlHnXF5eXpSWltq1Hzx48PKLB0JCQigrKyMxMbHOsHk5zge19evX8/jjj7No0aIrnuN87W3atKl3Hc3SoF4Q1QmGp0NhEbT0gb+sh3PV8Pzgpq5OREREpEHomTVpcE888QQAEyZMoKqqyq7/0KFD1r+fD0MX7sadOXMmZWVldte6u7tTXl5u1w4QGhpKYWGhTf+xY8dYs2bNFdX/4IMPUlZWxsSJE2vt/2n9damurua3v/0t69evZ9iwYbz++ut1ji0tLeX777+3a9+4cSNbtmwhNDSUyMjIy67/Z8HZGXKfhYfuggXvwsSl0NK3ZsvlLaFNXZ2IiIhIg9CdNWlw/fr1IykpiUWLFnHzzTcTHx9PaGgoR44cYffu3ezYscO6NTExMZG5c+cycuRI8vLyMJlMFBQUsG3bNoKCgux+bywiIoLc3FxGjBhBeHg4Tk5ODBs2DH9/f5KSkkhNTaVHjx4kJCRQUlLCG2+8QVBQUK3Bry5paWls2bKFefPmkZeXR0xMDH5+fnz99dfk5+fj5uZ2yYNPhg4dSm5uLu3btyc8PJw//elPNv233norAwYMAGDXrl3cf//99O7dmw4dOuDt7c3u3btZt24dTk5Otf42XbO3edalx7TwhiWja14iIiIizZDCmjSKjIwMoqKiSE9PZ9myZZjNZnx9fQkLC2Pq1KnWcZGRkaxYsYKpU6eyePFinJyc6Nq1K++99x4jR47k2LFjNvMuXLiQIUOGsHLlSioqKrBYLNx99934+/szYcIEioqKyM7OZs6cOQQGBjJu3DicnJyYMmXKZdfu5ubGtm3bmDFjBqtWrbKGJZPJRHh4OMOHD7/kHOcPQPnqq69qvUPXr18/a1hr164dPXr04OOPP+a9997jzJkztGjRgr59+zJr1izr830iIiIi8vOi0yBFmplmcRqkiIiIiOiZNREREREREUeksCYiIiIiIuKAFNZEREREREQckMKaiIiIiIiIA1JYExERERERcUAKayIiIiIiIg5Iv7Mm0ly1bwnOrk1bQ1hg064vIiIich1TWBNprtKfBB/fpq4CvNyaugIRERGR65LCmkhzFWICXwcIayIiIiJSL3pmTURERERExAEprImIiIiIiDgghTUREREREREHpLAmIiIiIiLigBTWREREREREHJDCmoiIiIiIiANSWBMREREREXFACmsiIiIiIiIOSGFNRERERETEASmsiYiIiIiIOCCFNREREREREQd0Q1MXICKN5OgJ+P5s09bg5QZ+Xk1bg4iIiMh1SmFNpLlKXgz/LW+69cMC4fXRCmsiIiIi9aSwJtJcfVUMB443dRUiIiIiUk96Zk1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oAU1kTk+lR6CpJehYBh4PUw9JkOnx5s6qpEREREGozCmohcf6qrYcBsWJ4HyXHwx0fh2zLoPR2+ONLU1YmIiIg0CIU1uSI5OTkYhlHnKzc3t1HXnzRpEmlpaY26RmM4ePAgXl5eGIbB+PHj7fofffRROnfujK+vLy4uLtx444307NmTnJycJqjWAfSeBsNeqbs/ZxsUfA5ZyTDjIRgdB5tngrMTzFh57eoUERERaUT6nTWplz59+hAfH2/XHhER0ajrZmZmEhwczOTJkxt1nYY2bNgwzp07V2f/rl27uOWWWxgwYAAmk4mjR4+yZs0aEhISmDNnDpMmTbqG1V4HcrZBkD88EPVjW4AfJEbDsq1grgI3lyYrT0RERKQhKKxJvURGRpKamtrUZTQos9lMVVUV3t7eDTrv4sWLyc/P5+mnn2b+/Pm1jtm7d69d26xZs2jfvj0LFixQWLvQrkPQ7SZwumBzwJ0dYdEH8J8jcFu7pqlNREREpIFoG6Q0mgULFtClSxfc3d1xc3OjU6dOLFy4sNZxUVFRtGzZEhcXF3x8fIiOjmbr1q024wzDoLi4mH379tlsvdy3b5+1v3///nbzp6WlYRiGzZbCpKQkDMOgoKCAwYMHYzKZ8PDwYP369QBUVFSQnJxMmzZtcHFxwdPTk549e7Jx48Yr+g6+++47fv/733P//fdz1113XdG1LVq0wMfHh++///6KrvtZOFoCIS3s28+3HTlxbesRERERaQS6syb1UlFRQVFRkU2bh4cHN954IwCPP/44mZmZdO/enbFjx+Ls7My7775LcnIyR48eZfbs2dbrMjIy8PPzIzExkZCQEA4cOMDq1auJjY2loKCAbt26AfDSSy8xc+ZMfHx8SElJsV7funXren+OIUOG4OrqyogRIzAMg3bt2mE2m4mKimL//v3ExsYyfPhwSktLWblyJXFxcaxbt4577rnnsuZPSkqiurqa1157jS1btlxyfFFREefOneOrr75iwYIF/N///V+t202blaqzUFZh32auguJy23aTd83dtNNnwK2Wf77cXWv+PH2mcWoVERERuYYU1qReMjIyyMjIsGnr3bs3mzZtYsOGDWRmZvLYY4+RlZVl7U9LSyM6Opq5c+eSkpJiDXb5+fn4+/vbzDVq1ChiYmKYNWsWb731FgCpqanMmTMHk8nUYFswvb292blzJ66urta2iRMnsnfvXrKzsxk6dKi1ffr06XTu3Jnx48fXum3xQrm5ubz11lvMnz+fgICAS44vKSmhTZs21veurq4MHDiQpUuXXuGnus7k/7vm2P0LFXwOKz6ybTv0GrQPBA9XMJ+1v6byh5Dm4WrfJyIiInKd0TZIqZeBAweyfPlym9f5u2VLlizBMAxGjx5NUVGRzWvAgAFUVlZatxsC1qBWXV1NcXExRUVFtG7dmlatWrFnz55G/RxjxoyxCWoAb775Jq1ataJ37942tVdWVtKzZ08KCws5efLkRec1m82MHDmS22+/nbFjx15WLd7e3ixfvpylS5cyY8YMOnToQEVFBeXl5Ze+2EFVVVXZvC8oKLB//4v28MEM+GAGhX9O5Nz6aRDRDmIj+XrJME6+OcHaX3T2FIcPH67Z7ni0hPLycus2WKBmeyRAK1Oda27fvt3msJfCwkJKSkqs74uKimrW+IHdGnV9Dq2hNbSG1tAaWkNraI0rXONSDIvFYrmiK+RnLScnh4SEBFJSUnj55ZdrHdOjRw8++eSTi87z0xMON27cyOTJk9m9ezdms9lmXEBAAN9++63N++Dg4FrvbBmGQb9+/WyCINTc0ZsyZQqrVq1i0KBBQM32xMWLF1NQUECvXr1sxru5uXHmzMW30e3fv5/OnTvX2f+73/2OJUuWsGPHDiIjI4HL++5+qqqqisjISE6ePMkXX3yBm5vbJa+Bmn8o/Pz8KOuQhO+B45d1TaO4JRQ2Pm8TnC5b72k1d9CyxtTen/AS5O2HI0tsDxlJehX+thVOLNVpkCIiInLd0zZIaXAWiwXDMFi6dCnOzs61jrnzzjuBmtATHx+Pp6cnTz31FOHh4Xh7e2MYBs888wynT5++6nrOnq1lu9wPfHx8aq2/bdu2vPjii3Ved7Hn5A4ePMiSJUuIi4vDYrGwa9cuAL7++mug5tCRXbt2ERYWZrf986dcXFwYNGgQM2fOZM2aNTz00EN1jv3ZGdSr5vj+N7fDoOiatuJyWFUAA+9QUBMREZFmQWFNGlxYWBg7d+6kQ4cOREVFXXRsVlYWZrOZ7OxsEhISbPpGjRqFi4vtf3QbhlHnXF5eXpSWltq1Hzx48PKLB0JCQigrKyMxMbHOsHkxhw8fpqqqirVr17J27Vq7/qVLl7J06VLS09MZPXr0ReeqqKg5eOP48Sa8Q+aIBvWCqE4wPB0Ki6ClD/xlPZyrhucHN3V1IiIiIg1Cz6xJg3viiScAmDBhgt0zSwCHDh2y/v18GLpwN+7MmTMpKyuzu9bd3b3OZ7hCQ0MpLCy06T927Bhr1qy5ovoffPBBysrKmDhxYq39P62/Nl27diU9Pd3ulZSUBEBsbCzp6enExsZaa6ztDuL5EygNw6BPnz5X9BmaPWdnyH0WHroLFrwLE5dCS9+abZe3hDZ1dSIiIiINQnfWpMH169ePpKQkFi1axM0330x8fDyhoaEcOXKE3bt3s2PHDuvWxMTERObOncvIkSPJy8vDZDJRUFDAtm3bCAoKsnmoEyAiIoLc3FxGjBhBeHg4Tk5ODBs2DH9/f5KSkkhNTaVHjx4kJCRQUlLCG2+8QVBQUK3Bry5paWls2bKFefPmkZeXR0xMDH5+fnz99dfk5+fj5uZ20YNPAgICar1jlpOTw6JFiwgPD7fpX7t2LRMnTqRv377cfPPN+Pr68uWXX/LOO+9w4sQJnnzyScLDwy+7/mZh86xLj2nhDUtG17xEREREmiGFNWkUGRkZREVFkZ6ezrJlyzCbzfj6+hIWFsbUqVOt4yIjI1mxYgVTp05l8eLFODk50bVrV9577z1GjhzJsWPHbOZduHAhQ4YMYeXKlVRUVGCxWLj77rvx9/dnwoQJFBUVkZ2dzZw5cwgMDGTcuHE4OTkxZcqUy67dzc2Nbdu2MWPGDFatWmX9IW+TyUR4eDjDhw9vmC/pB3feeSe//OUv+fjjj3nvvfcwm834+Phwyy23MHLkyAZfT0RERESuDzoNUqSZaRanQYqIiIiInlkTERERERFxRAprIiIiIiIiDkhhTURERERExAEprImIiIiIiDgghTUREREREREHpLAmIiIiIiLigPQ7ayLNVfuW4OzadOuHBTbd2iIiIiLNgMKaSHOV/iT4+DZtDV5uTbu+iIiIyHVMYU2kuQoxgW8ThzURERERqTc9syYiIiIiIuKAFNZEREREREQckMKaiIiIiIiIA1JYExERERERcUAKayIiIiIiIg5IYU1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oAU1kRERERERByQwpqIiIiIiIgDuqGpCxCRRnL0BHx/tmlr8HIDP6+mrUFERETkOqWwJtJcJS+G/5Y33fphgfD6aIU1ERERkXpSWBNprr4qhgPHm7oKEREREaknPbMmIiIiIiLigBTWREREREREHJDCmoiIiIiIiANSWBMREREREXFACmsicv0pPQVJr0LAMPB6GPpMh08PNnVVIiIiIg1KYU1Eri/V1TBgNizPg+Q4+OOj8G0Z9J4OXxxp6upEREREGozCmlyRnJwcDMOo85Wbm9uo60+aNIm0tLRGXaMxHDx4EC8vLwzDYPz48bWO+cc//kFkZCTu7u54eHjQvXt3Nm3adI0rdQC9p8GwV+ruz9kGBZ9DVjLMeAhGx8HmmeDsBDNWXrs6RURERBqZfmdN6qVPnz7Ex8fbtUdERDTqupmZmQQHBzN58uRGXaehDRs2jHPnztXZn5uby7333ovJZGLMmDEALF26lLi4OD788EN++ctfXqtSHV/ONgjyhweifmwL8IPEaFi2FcxV4ObSZOWJiIiINBSFNamXyMhIUlNTm7qMBmU2m6mqqsLb27tB5128eDH5+fk8/fTTzJ8/v9YxTz/9NDfccAMfffQRHTt2BCApKYmIiAjGjh3Lp59+2qA1Xdd2HYJuN4HTBRsD7uwIiz6A/xyB29o1TW0iIiIiDUjbIKXRLFiwgC5duuDu7o6bmxudOnVi4cKFtY6LioqiZcuWuLi44OPjQ3R0NFu3brUZZxgGxcXF7Nu3z2br5b59+6z9/fv3t5s/LS0NwzDIycmxtiUlJWEYBgUFBQwePBiTyYSHhwfr168HoKKiguTkZNq0aYOLiwuenp707NmTjRs3XtF38N133/H73/+e+++/n7vuuqvWMbt27eLAgQP07dvXGtQAOnbsSN++ffnXv/7Fl19+eUXrNmtHSyCkhX37+bYjJ65tPSIiIiKNRHfWpF4qKiooKiqyafPw8ODGG28E4PHHHyczM5Pu3bszduxYnJ2deffdd0lOTubo0aPMnj3bel1GRgZ+fn4kJiYSEhLCgQMHWL16NbGxsRQUFNCtWzcAXnrpJWbOnImPjw8pKSnW61u3bl3vzzFkyBBcXV0ZMWIEhmHQrl07zGYzUVFR7N+/n9jYWIYPH05paSkrV64kLi6OdevWcc8991zW/ElJSVRXV/Paa6+xZcuWWsds3rwZgKioKLu+nj17kpuby5YtW7jpppvq/TkdVtVZKKuwbzNXQXG5bbvJu+Zu2ukz4FbLP13urjV/nj7TOLWKiIiIXGMKa1IvGRkZZGRk2LT17t2bTZs2sWHDBjIzM3nsscfIysqy9qelpREdHc3cuXNJSUmxBrv8/Hz8/f1t5ho1ahQxMTHMmjWLt956C4DU1FTmzJmDyWRqsC2Y3t7e7Ny5E1dXV2vbxIkT2bt3L9nZ2QwdOtTaPn36dDp37sz48ePZu3fvJefOzc3lrbfeYv78+QQEBNQ57nzobdu2rV3f+bbDhw9f9me6ruT/u+bY/QsVfA4rPrJtO/QatA8ED1cwn7W/pvKHkObhat8nIiIich3SNkipl4EDB7J8+XKb1/m7ZUuWLMEwDEaPHk1RUZHNa8CAAVRWVlq3GwLWoFZdXU1xcTFFRUW0bt2aVq1asWfPnkb9HGPGjLEJagBvvvkmrVq1onfv3ja1V1ZW0rNnTwoLCzl58uRF5zWbzYwcOZLbb7+dsWPHXnRsRUXNnSUPDw+7Pk9PTwBOnTp1JR/LYVRVVdm8LygosHn/z8pv4YMZ1lfhnxOxRLSD2Ej4YAZfLxnGyTcn1PQH+1NUVERVgHfNVkigvLzcug32fNu/vrUNtheuuX37dpvDXgoLCykpKbG+LyoqsgnHNmvUMafW0BpaQ2toDa2hNbRGfda4JIvIFVi1apUFsKSkpNQ55o477rAAF33NmTPHOn7Dhg2WO++80+Lm5mY3LiAgwGbuli1bWrp27VrruoClX79+du0vvPCCBbCsWrXK2vbkk09aAEtBQYHdeFdX10vWv3///ot+T6NGjbK4uLhYdu3aZW2r67sbP368BbBkZWXZzZOZmWkBLM8999xF1/upsrIyC2Ap65BksXB/071uSbZY/u+7y67b6tfPWiyPLai7f9AfLZag4RbLuXO27U/+xWLxHGyxVJ658jVFREREHJC2QUqDs1gsGIbB0qVLcXZ2rnXMnXfeCcD+/fuJj4/H09OTp556ivDwcLy9vTEMg2eeeYbTp09fdT1nz9ayZe4HPj4+tdbftm1bXnzxxTqvu9hzcgcPHmTJkiXExcVhsVjYtWsXAF9//TVQc+jIrl27CAsLw9/f3zpXbVsdz7fVtkXyZ2tQr5rj+9/cDoOia9qKy2FVAQy8Q8f2i4iISLOhsCYNLiwsjJ07d9KhQ4daD834qaysLMxmM9nZ2SQkJNj0jRo1ChcX2//wNgyjzrm8vLwoLS21az948ODlFw+EhIRQVlZGYmJinWHzYg4fPkxVVRVr165l7dq1dv1Lly5l6dKlpKenM3r0aHr37g3U3Gq/0D//+U8Mw+DXv/71FdfRbA3qBVGdYHg6FBZBSx/4y3o4Vw3PD27q6kREREQajJ5Zkwb3xBNPADBhwgS7Z5YADh06ZP37+TBksVhsxsycOZOysjK7a93d3SkvL7drBwgNDaWwsNCm/9ixY6xZs+aK6n/wwQcpKytj4sSJtfb/tP7adO3alfT0dLtXUlISALGxsaSnpxMbGwvA7bffzs0338zGjRs5cOCAdZ4DBw6wceNGIiIimudJkPXl7Ay5z8JDd8GCd2HiUmjpCxufh1tCm7o6ERERkQajO2vS4Pr160dSUhKLFi3i5ptvJj4+ntDQUI4cOcLu3bvZsWOHdWtiYmIic+fOZeTIkeTl5WEymSgoKGDbtm0EBQXZPNQJEBERQW5uLiNGjCA8PBwnJyeGDRuGv78/SUlJpKam0qNHDxISEigpKeGNN94gKCio1uBXl7S0NLZs2cK8efPIy8sjJiYGPz8/vv76a/Lz83Fzc7vowScBAQGMHj3arj0nJ4dFixYRHh5u1z9v3jweeOAB7rrrLh577DEA/vrXv2KxWPjzn/982bU3C5tnXXpMC29YMrrmJSIiItJMKaxJo8jIyCAqKor09HSWLVuG2WzG19eXsLAwpk6dah0XGRnJihUrmDp1KosXL8bJyYmuXbvy3nvvMXLkSI4dO2Yz78KFCxkyZAgrV66koqICi8XC3Xffjb+/PxMmTKCoqIjs7GzmzJlDYGAg48aNw8nJiSlTplx27W5ubmzbto0ZM2awatUq6w95m0wmwsPDGT58eMN8ST8xcOBA1qxZw5QpU1iwYAEA4eHhLF++XFsgRURERH6mDMuF+89E5LpWXl6On58fZR2S8D1wvOkKuSW0ZmtiK1PT1SAiIiJyHdMzayIiIiIiIg5IYU1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oAU1kRERERERByQfmdNpLlq3xKcXZtu/bDApltbREREpBlQWBNprtKfBB/fpq3By61p1xcRERG5jimsiTRXISbwbeKwJiIiIiL1pmfWREREREREHJDCmoiIiIiIiANSWBMREREREXFACmsiIiIiIiIOSGFNRERERETEASmsiYiIiIiIOCCFNREREREREQeksCYiIiIiIuKAFNZEREREREQckMKaiIiIiIiIA7qhqQsQkUZy9AR8f7bp1vdyAz+vpltfRERE5DqnsCbSXCUvhv+WN83aYYHw+miFNREREZGroLAm0lx9VQwHjjd1FSIiIiJST3pmTURERERExAEprImIiIiIiDgghTUREREREREHpLAmIiIiIiLigBTWREREREREHJDCmohcf0pPQdKrEDAMvB6GPtPh04NNXZWIiIhIg1JYE5HrS3U1DJgNy/MgOQ7++Ch8Wwa9p8MXR5q6OhEREZEGo7AmVyQnJwfDMOp85ebmNur6kyZNIi0trVHXaAwHDx7Ey8sLwzAYP368XX91dTUzZswgLCwMV1dXPD096d69e6N/nw6p9zQY9krd/TnboOBzyEqGGQ/B6DjYPBOcnWDGymtXp4iIiEgj049iS7306dOH+Ph4u/aIiIhGXTczM5Pg4GAmT57cqOs0tGHDhnHu3Lk6+x944AHefvttbrvtNiZMmEBFRQUrVqzg3nvv5e9//zuDBg26htU6uJxtEOQPD0T92BbgB4nRsGwrmKvAzaXJyhMRERFpKAprUi+RkZGkpqY2dRkNymw2U1VVhbe3d4POu3jxYvLz83n66aeZP3++Xf+mTZt4++236d69Ozt27MDJqeaG95QpU7jlllsYM2YM999/P87Ozg1a13Vr1yHodhM4XbAx4M6OsOgD+M8RuK1d09QmIiIi0oC0DVIazYIFC+jSpQvu7u64ubnRqVMnFi5cWOu4qKgoWrZsiYuLCz4+PkRHR7N161abcYZhUFxczL59+2y2Xu7bt8/a379/f7v509LSMAyDnJwca1tSUhKGYVBQUMDgwYMxmUx4eHiwfv16ACoqKkhOTqZNmza4uLjg6elJz5492bhx4xV9B9999x2///3vuf/++7nrrrtqHXN+q+PQoUOtQQ0gKCiIX/3qVxw7dox33nnnitZt1o6WQEgL+/bzbUdOXNt6RERERBqJ7qxJvVRUVFBUVGTT5uHhwY033gjA448/TmZmJt27d2fs2LE4Ozvz7rvvkpyczNGjR5k9e7b1uoyMDPz8/EhMTCQkJIQDBw6wevVqYmNjKSgooFu3bgC89NJLzJw5Ex8fH1JSUqzXt27dut6fY8iQIbi6ujJixAgMw6Bdu3aYzWaioqLYv38/sbGxDB8+nNLSUlauXElcXBzr1q3jnnvuuaz5k5KSqK6u5rXXXmPLli21jjGbzQB4eXnZ9Xl4eACwZcsW7rvvvvp9SEdWdRbKKuzbzFVQXG7bbvKuuZt2+gy41fJPl7trzZ+nzzROrSIiIiLXmO6sSb1kZGTQpk0bm9f556o2bNhAZmYmjz32GJ988gl//OMfSUtLY8+ePfTq1Yu5c+fy3XffWefKz8+noKCAv/zlL0ybNo2//vWvfPjhh5w7d45Zs2ZZx6WmpuLm5obJZCI1NdX68vf3r/fn8Pb2Zu/evbz00kv88Y9/pEePHjz77LPs3buXzMxM3n33XWbOnMmCBQv47LPP8PHxqfWAkNrk5uby1ltv8fzzzxMQEFDnuF/84hcAfPDBBzbt1dXVfPzxxwB2wfh6UF1dzX/+8x+btoKCApv3+zJyao7f/+mr4HNY8ZF9++FiioqKqHa/AcxnASgvL7feWaWyJqQVHjpw0TW3b99u8/xgYWEhJSUl1vdFRUUcPnzY+t5mjTrm1BpaQ2toDa2hNbSG1qjPGpdiWCwWyxVdIT9rOTk5JCQkMHDgQB5++GGbvrZt23LXXXfx8MMPs3LlSv75z38SEhJiM+avf/0rzz77LMuWLWPIkCE2fdXV1Zw4cYLKykoA7rrrLm644QYOHvzx97MCAgIIDg5m7969drUZhkG/fv2sWxnPS0tLY8qUKaxatcoaKJOSkli8eDEZGRkkJSXZjL/55puprKzkn//8p90aTz31FOvXr6e0tBQfH586vyez2UzHjh0JCAhg586dNt9dSkoKL7/8snXsmTNnuPnmmzl69CjJyck89thjnDx5khdeeMEaWuPj43n33XfrXO+nysvL8fPzo6xDEr4Hjl/WNQ3ullDY+Dy0Ml18XMn3sPOC30ebkAXBLWDivbbtv7y15u5Zx9HQMQRyn7Xtf/1DeOIvsGeenlkTERGRZkHbIKVeOnToYBfWzjtw4AAWi4U777yzzuv/7//+z/r3jRs3MnnyZHbv3m3dEnjexe5INYTbbrvNrq2oqIgzZ87Qpk2bOq/7v//7Pzp37lxnf0pKCseOHWPt2rWXrMHV1ZX333+fhx56iD//+c/8+c9/BqBdu3aMGzeOuXPnXjQYXtdaeMPdv7BvC2lh335eZHvI21/ze2s/PWTkn1+Apxt0atVo5YqIiIhcSwpr0uAsFguGYbB06dI6TzA8H+T2799PfHw8np6ePPXUU4SHh+Pt7Y1hGDzzzDOcPn36qus5e/ZsnX21hSCLxULbtm158cUX67zuYs/JHTx4kCVLlhAXF4fFYmHXrl0AfP3110DNoSO7du0iLCzMuoXz1ltvZc+ePezfv59///vfBAUFER0dzbRp06z98oNBvWqO739zOwyKrmkrLodVBTDwDh3bLyIiIs2Gwpo0uLCwMHbu3EmHDh2Iioq66NisrCzMZjPZ2dkkJCTY9I0aNQoXF9v/8DYMo865vLy8KC0ttWv/6TbKyxESEkJZWRmJiYn1Oi7/8OHDVFVVsXbt2lrvrC1dupSlS5eSnp7O6NGjbfpuvfVWm2D2/vvvYxiGfmftpwb1gqhOMDwdCougpQ/8ZT2cq4bnBzd1dSIiIiINRgeMSIN74oknAJgwYQJVVVV2/YcOHbL+/XwYuvDRyZkzZ1JWVmZ3rbu7O+Xl5XbtAKGhoRQWFtr0Hzt2jDVr1lxR/Q8++CBlZWVMnDix1v6f1l+brl27kp6ebvc6/2xcbGws6enpxMbGXnSexYsXs2PHDmJjYwkPD7+iz9CsOTvXPK/20F2w4F2YuBRa+tY8I3dLaFNXJyIiItJgdGdNGly/fv1ISkpi0aJF3HzzzcTHxxMaGsqRI0fYvXs3O3bssG5NTExMZO7cuYwcOZK8vDxMJhMFBQVs27aNoKAgmxN4ACIiIsjNzWXEiBGEh4fj5OTEsGHD8Pf3JykpidTUVHr06EFCQgIlJSW88cYbBAUF1Rr86pKWlsaWLVuYN28eeXl5xMTE4Ofnx9dff01+fj5ubm7s2bOnzusDAgLs7phBzQEjixYtIjw83K5/wIABVFdXExkZiYeHB/n5+Xz44Yd06NCB7Ozsy669Wdg869JjWnjDktE1LxEREZFmSmFNGkVGRgZRUVGkp6ezbNkyzGYzvr6+hIWFMXXqVOu4yMhIVqxYwdSpU1m8eDFOTk507dqV9957j5EjR3Ls2DGbeRcuXMiQIUNYuXIlFRUVWCwW7r77bvz9/ZkwYQJFRUVkZ2czZ84cAgMDGTduHE5OTkyZMuWya3dzc2Pbtm3MmDGDVatWWX/I22QyER4ezvDhwxvmS/qJHj16kJ2dzZYtWzh79izBwcGMGjWKtLS05nu4iIiIiIhclI7uF2lmrquj+0VERESkTnpmTURERERExAEprImIiIiIiDgghTUREREREREHpLAmIiIiIiLigBTWREREREREHJCO7hdprtq3BGfXplk7LLBp1hURERFpRhTWRJqr9CfBx7fp1vdya7q1RURERJoBhTWR5irEBL5NGNZERERE5KromTUREREREREHpLAmIiIiIiLigBTWREREREREHJDCmoiIiIiIiANSWBMREREREXFACmsiIiIiIiIOSGFNRERERETEASmsiYiIiIiIOCCFNREREREREQeksCYiIiIiIuKAFNZEREREREQc0A1NXYCINJKjJ+D7s9d2TS838PO6tmuKiIiINFMKayLNVfJi+G/5tVsvLBBeH62wJiIiItJAFNZEmquviuHA8aauQkRERETqSc+siYiIiIiIOCCFNREREREREQeksCYiIiIiIuKAFNZEREREREQckMKaiIiIiIiIA1JYE5HrQ+kpSHoVAoaB18PQZzp8erCpqxIRERFpNAprckU2b96MYRh1vrZv396o68+fP5+srKxGXaMhfPrpp6SmptKtWzdatGhBixYt6NGjB3/5y1+oqqqyG//73/+e6OhoAgMDcXNzo02bNvzmN79h8+bN1754R1RdDQNmw/I8SI6DPz4K35ZB7+nwxZGmrk5ERESkUeh31uSKFBcXA9CjRw+6du1q1+/r69uo67/00kuEhoYybNiwRl3nas2aNYtNmzZxxx130LVrV6qrq/nss88YPXo0a9as4b333sMwDOv4999/n8DAQP7nf/4HNzc3Kioq2L59O3369GHp0qU88sgjTfhproHe06B9IGSNqb0/ZxsUfA6rUmFQdE1bYjR0SoYZK2F5yrWrVUREROQaUViTern99ttJTU21a2/Tpk2jrnvmzBlOnz7dKHNXVVVx7tw53N3dr3quX//618THx9OpUycCAgKorq7m8OHDPP/883zwwQesW7eOgQMHWsc///zz+Pv74+fnh6urK6dPn2bv3r0kJyfz/PPPN/+wdik52yDIHx6I+rEtwK8msC3bCuYqcHNpsvJEREREGoO2QUq9eHl50bFjR7vXT4POypUr+eUvf4mPjw+enp707NmTnJwcu7lWrlzJb3/7W9q2bYubmxstW7bkvvvuY8+ePTbjDMOguLiYffv22Wy9/Oqrr6z9td1xy8rKwjAMmy2Fzz33HIZh8NlnnzF+/Hhat26Nu7s7+fn5AJjNZl544QXCw8Nxd3fH39+fgQMHsmvXrsv6fgYPHszw4cP59a9/TZcuXejatSvx8fHWgPbRRx/ZjB84cCAxMTFERETQuXNnbr/9dh5++GF8fHz47rvvLmvNZm3XIeh2Ezhd8E/WnR2hwgz/0VZIERERaX50Z03qpbKy0rol8jw3Nzd8fHwAePbZZ/nDH/5At27d+M1vfgPAvn37SEhIID09ndGjR1uv+9Of/oSLiws9e/bE09OT7777jo0bN/Lhhx+ya9cuOnbsCMDw4cNZtWoV3t7exMXFWa//aUA8duyYXa1Hjx4FsKn3yJGa/7gfPHgwFouF3r17U1VVhYuLC1VVVfTr14+CggJiYmK4/fbbMZvNfPTRR9x1111s3bqVO+6446LfT3Bw8EX7PT09a20vLi6murqao0ePsnjxYo4cOcKvfvWri871s3C0BH7Vxb49pEXNn0dOwG3trm1NIiIiIo1MYU3q5dVXX+XVV1+1aUtISOCNN97g008/5Q9/+AP9+/dn7NixtG7dGsMwOHToENOmTeOZZ57h0UcftQa7iRMn4ufnR0BAAJ6enpSXl7NhwwamT5/OnDlzWLJkCQCTJ0/mnXfewWQyMXnyZOu6N954Y70/R3V1NTNnzqRjx44YhkGbNm145ZVX2LJlC08//TT3338/AQEBnDlzhp07dzJp0iSefvpp6x24K/H999+TkZGBh4cH/fv3r7U/ICDA+t7NzY2YmBibz9osVJ2Fsgr7NnMVFJfbtpu8a+6mnT4DbrX8c+XuWvPn6TONU6uIiIhIE9I2SKmXmJgYxo0bZ/O65557AFiyZAmGYfD444/To0cPQkJCCA4OplevXvTt25dTp06xdetW61z3338//fr14/bbb6dVq1bcdNNNJCQkEBQURF5ennXc+TtsTk5ONlsvXVzq/6zSfffdx/33309ERAS33XYb/v7+LFmyhODgYIYPH054eDiBgYG0bt2auLg4brvtNrZv337Fz82dO3eO//f//h9FRUU8+uij/OIXv7Abc8MNNzBu3DjGjBnDkCFDrNtCIyIi6v35rrWz585Z/15eXs6+ffts+gsKCiD/3zXH7//0VfA5rPjIrv2LDdsoKSkBD1cwn6WoqIjDhw9b56s4UVrzFw9X2zUuXPMntm/fzrmf1FlYWFizxg8uXKPOz6E1tIbW0BpaQ2toDa1xlWtcimGxWCxXdIX8rOXk5JCQkMCoUaOYMmWKTZ+npycmk4no6Gi2bdt20Xnmz5/P008/DcCuXbuYNm0amzdv5tSpUzbjAgMD+eabb6zvAwICCA4OZu/evXZzGoZBv379WL9+vU17WloaU6ZMYdWqVQwaNAiApKQkFi9ezPvvv28Nmee5ublx5szF79R8+eWXhIWFXXTMedXV1QwbNozs7Gzuvfde/vSnP9GhQ4daxx05cgSLxcLJkyf5/PPPmT59OhaLhV27dl12KC0vL8fPz4+yDkn4Hjh+Wdc0iFtCYePz0Mp08XEl38POC34fbUIWBLeAiffatv/y1pq7Zx1HQ8cQyH3Wtv/1D+GJv8CeedoGKSIiIs2OtkFKvbi7u9O6deta+86cOYNhGIwZMwanCw+E+EH37t0BOHz4MDExMbi7u9O/f38CAwNxdXXFMAzeeOONS4amy/HT/0fkQoGBgXZtFouF0NBQEhIS6rzu/BbOS6murmbEiBFkZ2fzm9/8hhkzZtQa1KDmjuFPv9POnTvz/vvv89prr7F582a7UHndauENd//Cvi2khX37eZHtIW9/ze+t/fR/U//8AjzdoFOrRitXREREpKkorEmDCw0NZefOndx777106tSp1jEmU83dlxUrVnDq1CnGjh1LYmIi/v7+3HBDzf8ss7KyrH8/76e/TXYhLy8vTp48adf+3//+t85rnJ2d7doCAwM5deoUKSkpdYZNf3//Ouc873xQy8rKIj4+nhkzZnD77bdf8rrznJycrL9b9+WXX172dc3SoF41x/e/uf3H31krLodVBTDwDh3bLyIiIs2SnlmTBnf+ePqXX36ZkJAQWrdubfNycXGxnoZYWloKQGRkJJGRkbRv357WrVvzj3/8w9r3U+7u7rUGMqg5gXHfvn1UVPx4eEVJSQmrV6++ovr79u1LaWkpK1eutKv9fP0XhsgLWSwWnnjiCbKysoiLi+O5556r8wTJkpKSWu8gnjp1itWrV2MYBl261HIS4s/JoF4Q1QmGp8PMN+Av/6j5Ie1z1fD84KauTkRERKRR6M6aNLjf/va3vP3226xbt47IyEgSEhJo1aoVR48eZefOneTm5lrDSUxMDHPnzmXs2LEcPHiQFi1akJ+fzzvvvENAQAAXPlLZsWNHNm3axLRp07j11ltxcnJi4MCBeHl5MXDgQObPn0+fPn149NFHKS0tZdGiRfj7+1/Rb5WNHTvWevLjpk2b6Nu3L76+vhw+fJgNGzbg6urKli1bLjpHamoqmZmZtG7dmp49e/L555/z+eefW/tvvvlmevXqBcCHH35IcnIyDz74IB06dMDHx4dDhw6RnZ1NUVERv/nNb4iMjLzs+pslZ+ea59Um/hUWvFtz+mOPDpA1puZZOREREZFmSGFNGlxgYCDPPfcc7du3Jy8vj7lz53L69GlMJhNt2rQhMTHROvauu+5i3LhxrFmzhtmzZ+Ps7EyXLl2YOHEif/vb3zh+3PaAjLFjx1JSUsKCBQs4efIkFouF//znP3Ts2JHf/e53HD58mLy8PFJSUggNDaVfv364u7vzyiuvXHb93bp148UXX+Rvf/sbn376KZs2bcIwDFq2bElYWBj/8z//c8k5Nm3aBNScEvTcc8/Z9ScmJlrDWmBgILfeeiu5ubl89913VFZW4uvrS7t27bjvvvt47LHHLvsZuevW5lmXHtPCG5aMrnmJiIiI/AworMkViY6OJiMjg549e150XPfu3QkICCAuLo7jx49TVVWFh4cHJpOJdu1+PLXP19eXcePGER0dzYkTJzAMg6CgIHr27ElERITdlsfY2Fg8PDw4evQoZrMZ+PEHqDt27Mj06dP57LPPqKiowNfXl4iICAzDoGvXrkRHR1vnSUpK4o477rCp5TwnJycGDBhAWFgYX3zxhfWIVk9PTwIDA+t8Du+nnn/+eeuPcdfmp3PcfvvtPP/883zzzTecOnWKs2fP4u7uTkBAAF26dKFt27aXXE9EREREmh8d3S/SzDj80f0iIiIicll0wIiIiIiIiIgDUlgTERERERFxQAprIiIiIiIiDkhhTURERERExAEprImIiIiIiDggHd0v0ly1bwnOrtduvbDAa7eWiIiIyM+AwppIc5X+JPj4Xts1vdyu7XoiIiIizZjCmkhzFWIC32sc1kRERESkweiZNREREREREQeksCYiIiIiIuKAFNZEREREREQckMKaiIiIiIiIA1JYExERERERcUAKayIiIiIiIg5IYU1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oAU1kRERERERBzQDU1dgIg0kqMn4Puz1249Lzfw87p264mIiIg0cwprIs1V8mL4b/m1WSssEF4frbAmIiIi0oAU1kSaq6+K4cDxpq5CREREROpJz6yJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oAU1kRERERERByQwpqIOLbSU5D0KgQMA6+Hoc90+PRgU1clIiIi0ugU1kTEcVVXw4DZsDwPkuPgj4/Ct2XQezp8caSpqxMRERFpVAprckVycnIwDKPOV25ubqOuP2nSJNLS0hp1jYawb9++Or+jtm3b1nrNP/7xDyIjI3F3d8fDw4Pu3buzadOma1z5NdZ7Ggx7pe7+nG1Q8DlkJcOMh2B0HGyeCc5OMGPltatTREREpAnod9akXvr06UN8fLxde0RERKOum5mZSXBwMJMnT27UdRrKXXfdxX333WfTZjKZ7Mbl5uZy7733YjKZGDNmDABLly4lLi6ODz/8kF/+8pfXolzHk7MNgvzhgagf2wL8IDEalm0FcxW4uTRZeSIiIiKNSWFN6iUyMpLU1NSmLqNBmc1mqqqq8Pb2brA5u3Tpclnf09NPP80NN9zARx99RMeOHQFISkoiIiKCsWPH8umnnzZYTdeVXYeg203gdMEmgDs7wqIP4D9H4LZ2TVObiIiISCPTNkhpNAsWLKBLly64u7vj5uZGp06dWLhwYa3joqKiaNmyJS4uLvj4+BAdHc3WrVttxhmGQXFxsd0Ww3379ln7+/fvbzd/WloahmGQk5NjbUtKSsIwDAoKChg8eDAmkwkPDw/Wr18PQEVFBcnJybRp0wYXFxc8PT3p2bMnGzduvOLv4fvvv6e8vLzO/l27dnHgwAH69u1rDWoAHTt2pG/fvvzrX//iyy+/vOJ1m4WjJRDSwr79fNuRE9e2HhEREZFrSHfWpF4qKiooKiqyafPw8ODGG28E4PHHHyczM5Pu3bszduxYnJ2deffdd0lOTubo0aPMnj3bel1GRgZ+fn4kJiYSEhLCgQMHWL16NbGxsRQUFNCtWzcAXnrpJWbOnImPjw8pKSnW61u3bl3vzzFkyBBcXV0ZMWIEhmHQrl07zGYzUVFR7N+/n9jYWIYPH05paSkrV64kLi6OdevWcc8991zW/EuXLmXJkiVYLBZMJhP3338/CxYswNPT0zpm8+bNAERFRdld37NnT3Jzc9myZQs33XRTvT+nQ6g6C2UV9m3mKii+IMyavGvupp0+A261/DPl7lrz5+kzjVOriIiIiANQWJN6ycjIICMjw6atd+/ebNq0iQ0bNpCZmcljjz1GVlaWtT8tLY3o6Gjmzp1LSkqKNdjl5+fj7+9vM9eoUaOIiYlh1qxZvPXWWwCkpqYyZ84cTCZTg23B9Pb2ZufOnbi6ulrbJk6cyN69e8nOzmbo0KHW9unTp9O5c2fGjx/P3r17Lzqvs7MzERERxMfHc9NNN/HNN9/w1ltv8frrr7Nz50527NiBi0vNs1bnQ29tB4+cbzt8+PBVf9Yml//vmmP3L1TwOaz4yLbt0GvQPhA8XMF81v6ayh9CmoerfZ+IiIhIM6FtkFIvAwcOZPny5Tav83fLlixZgmEYjB49mqKiIpvXgAEDqKystG43BKxBrbq6muLiYoqKimjdujWtWrViz549jfo5xowZYxPUAN58801atWpF7969bWqvrKykZ8+eFBYWcvLkyYvOe+utt7J7927S0tJ48sknefbZZ9m5cye/+c1v+Ne//sWCBQusYysqau42eXh42M1z/g7cqVOnrvajNrpz585x5MiPx+mXl5dbt6gC8Iv2fDY/AT6YYX2d6hAAsZHW94V/TuTc+mkQ7A+A+UYvqg5/a52iqKioJrgeLQHge19X2zWAgoKCi77fvn07586ds74vLCykpKTEfo26PofW0BpaQ2toDa2hNbRGA61xKYbFYrFc0RXys5aTk0NCQgIpKSm8/PLLtY7p0aMHn3zyyUXnmTNnDpMmTQJg48aNTJ48md27d2M2m23GBQQE8O2339q8Dw4OrvXOlmEY9OvXzyYIQs0dvSlTprBq1SoGDRoE1DyztnjxYgoKCujVq5fNeDc3N86cufj2uv3799O5c+eLjqnNZ599RteuXenTp4/1+bcJEybw8ssvk5WVxWOPPWYzPisri+HDh/Pcc88xY8aMy1qjvLwcPz8/yjok4Xvg+BXXWC+3hMLG56GV/UmXF9V7Ws0dtKwxtfcnvAR5++HIEttDRpJehb9thRNLdRqkiIiINFvaBikNzmKxYBgGS5cuxdnZudYxd955J1ATeuLj4/H09OSpp54iPDwcb29vDMPgmWee4fTp01ddz9mztWyj+4GPj0+t9bdt25YXX3yxzuvq+5xc586dcXJyorS01G6u2rY6nm+r67fZmr1BvWqO739zOwyKrmkrLodVBTDwDgU1ERERadYU1qTBhYWFsXPnTjp06FDroRk/lZWVhdlsJjs7m4SEBJu+UaNGWZ/rOs8wjDrn8vLysglB5x08ePDyiwdCQkIoKysjMTGxzrBZX3v27KG6utr6vB7UPOsHNbfaL/TPf/4TwzD49a9/3aB1XDcG9YKoTjA8HQqLoKUP/GU9nKuG5wc3dXUiIiIijUrPrEmDe+KJJ4Ca7X1VVVV2/YcOHbL+/XwYunA37syZMykrK7O71t3dvc5j8ENDQyksLLTpP3bsGGvWrLmi+h988EHKysqYOHFirf0/rb8u//d//2fXdu7cOcaPHw/UPPN33u23387NN9/Mxo0bOXDggLX9wIEDbNy4kYiIiOv/JMj6cnaG3GfhobtgwbswcSm09K3ZcnlLaFNXJyIiItKodGdNGly/fv1ISkpi0aJF3HzzzcTHxxMaGsqRI0fYvXs3O3bssG5NTExMZO7cuYwcOZK8vDxMJhMFBQVs27aNoKAgm4c6ASIiIsjNzWXEiBGEh4fj5OTEsGHD8Pf3JykpidTUVHr06EFCQgIlJSW88cYbBAUF1Rr86pKWlsaWLVuYN28eeXl5xMTE4Ofnx9dff01+fj5ubm6XPPjkoYce4vvvv+eOO+6gbdu2HD9+nH/84x8cPHiQXr16MXr0aJvx8+bN44EHHuCuu+6yPrf217/+FYvFwp///OfLrv26s3nWpce08IYlo2teIiIiIj8jCmvSKDIyMoiKiiI9PZ1ly5ZhNpvx9fUlLCyMqVOnWsdFRkayYsUKpk6dyuLFi3FycqJr16689957jBw5kmPHjtnMu3DhQoYMGcLKlSupqKjAYrFw99134+/vz4QJEygqKiI7O5s5c+YQGBjIuHHjcHJyYsqUKZddu5ubG9u2bWPGjBmsWrXK+kPeJpOJ8PBwhg8ffsk5+vfvz8qVK8nJyeH777/nhhtuoF27djz77LM899xzdtsrBw4cyJo1a5gyZYr1pMjw8HCWL1/+890CKSIiIvIzp9MgRZqZ6+o0SBERERGpk55ZExERERERcUAKayIiIiIiIg5IYU1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oD0O2sizVX7luDsem3WCgu8NuuIiIiI/IworIk0V+lPgo/vtVvPy+3arSUiIiLyM6CwJtJchZjA9xqGNRERERFpUHpmTURERERExAEprImIiIiIiDgghTUREREREREHpLAmIiIiIiLigBTWREREREREHJDCmoiIiIiIiANSWBMREREREXFACmsiIiIiIiIOSGFNRERERETEASmsiYiIiIiIOCCFNREREREREQd0Q1MXICKN5OgJ+P7stVvPyw38vK7deiIiIiLNnMKaSHOVvBj+W35t1goLhNdHK6yJiIiINCCFNZHm6qtiOHC8qasQERERkXrSM2siIiIiIiIOSGFNRERERETEASmsiYiIiIiIOCCFNREREREREQeksCYijq30FCS9CgHDwOth6DMdPj3Y1FWJiIiINDqFNRFxXNXVMGA2LM+D5Dj446PwbRn0ng5fHGnq6kREREQalcKaXJGcnBwMw6jzlZub26jrT5o0ibS0tEZdoyHk5uYSExNDcHAw7u7uuLu707ZtW5566imOH7c/Tr9///51fqcLFy5sgk9wjfSeBsNeqbs/ZxsUfA5ZyTDjIRgdB5tngrMTzFh57eoUERERaQL6nTWplz59+hAfH2/XHhER0ajrZmZmEhwczOTJkxt1nau1b98+KisrGThwIKGhoVRXV/Pxxx/z+uuv849//IPPPvsMHx8fu+teeuklu7a+fftei5IdU842CPKHB6J+bAvwg8RoWLYVzFXg5tJk5YmIiIg0JoU1qZfIyEhSU1ObuowGZTabqaqqwtvb+6rnmjRpEpMmTbJrT0lJYf78+WRmZjJ27Fi7/ub2nV61XYeg203gdMEmgDs7wqIP4D9H4LZ2TVObiIiISCPTNkhpNAsWLKBLly64u7vj5uZGp06dat3St2DBAqKiomjZsiUuLi74+PgQHR3N1q1bbcYZhkFxcTH79u2z2Sa4b98+a3///v3t5k9LS8MwDHJycqxtSUlJGIZBQUEBgwcPxmQy4eHhwfr16wGoqKggOTmZNm3a4OLigqenJz179mTjxo1X9Z2EhYUBcOLEiVr7q6urKS4u5ty5c1e1TrNxtARCWti3n287Uvv3KCIiItIc6M6a1EtFRQVFRUU2bR4eHtx4440APP7442RmZtK9e3fGjh2Ls7Mz7777LsnJyRw9epTZs2dbr8vIyMDPz4/ExERCQkI4cOAAq1evJjY2loKCArp16wbUbBGcOXMmPj4+pKSkWK9v3bp1vT/HkCFDcHV1ZcSIERiGQbt27TCbzURFRbF//35iY2MZPnw4paWlrFy5kri4ONatW8c999xzWfOXl5dTXl7OyZMn2bJlC7Nnz+aGG27gvvvuq3W8l5cXlZWV3HDDDXTt2pUXXniBuLi4en8+h1J1Fsoq7NvMVVBcbttu8q65m3b6DLjV8s+Uu2vNn6fPNE6tIiIiIg5AYU3qJSMjg4yMDJu23r17s2nTJjZs2EBmZiaPPfYYWVlZ1v60tDSio6OZO3cuKSkp1mCXn5+Pv7+/zVyjRo0iJiaGWbNm8dZbbwE1WwTnzJmDyWRqsO2C3t7e7Ny5E1dXV2vbxIkT2bt3L9nZ2QwdOtTaPn36dDp37sz48ePZu3fvZc0/cuRI/v73v1vft2nThqysLCIjI23GBQYG8tBDD9GjRw98fHzYuXMny5YtY+DAgWRnZ/Pwww9f3Qd1BPn/rjl2/0IFn8OKj2zbDr0G7QPBwxXMZ+2vqfwhpHm42veJiIiINBPaBin1MnDgQJYvX27zOn+3bMmSJRiGwejRoykqKrJ5DRgwgMrKSut2Q8Aa1M5vASwqKqJ169a0atWKPXv2NOrnGDNmjE1QA3jzzTdp1aoVvXv3tqm9srKSnj17UlhYyMmTJy9r/tTUVJYvX056ejqDBw/GxcWFb775xm7c0qVLWbFiBRMmTCApKYmMjAzy8vJwdnZm/PjxDfJZG9u5c+c4cuTH4/TLy8utW1QB+EV7PpufAB/MsL5OdQiA2Ejr+8I/J3Ju/TQI9gfAfKMXVYe/tU5RVFTE4cOHa7ZHAt/7utquARQUFFz0/fbt2222mRYWFlJSUmK/Rl2fQ2toDa2hNbSG1tAaWqOB1rgUw2KxWK7oCvlZy8nJISEhgZSUFF5++eVax/To0YNPPvnkovPMmTPHegDHxo0bmTx5Mrt378ZsNtuMCwgI4Ntvv7V5HxwcXOudLcMw6Nevn00QhJo7elOmTGHVqlUMGjQIqHlmbfHixRQUFNCrVy+b8W5ubpw5c/Htdfv376dz584XHVObZcuW8cgjj/Dyyy/bbOWsS1xcHOvXr+fjjz/mjjvuuKw1ysvL8fPzo6xDEr4H7H8moFHcEgobn4dWpiu7rve0mjtoWWNq7094CfL2w5EltoeMJL0Kf9sKJ5bqNEgRERFptrQNUhqcxWLBMAyWLl2Ks7NzrWPuvPNOoCb0xMfH4+npyVNPPUV4eDje3t4YhsEzzzzD6dOnr7qes2dr2Ub3g9qOz7dYLLRt25YXX3yxzuvq+5zc0KFDSU5OZsmSJZcV1tq2bQvA0aNH67XedW9Qr5rj+9/cDoOia9qKy2FVAQy8Q0FNREREmjWFNWlwYWFh7Ny5kw4dOhAVFXXRsVlZWZjNZrKzs0lISLDpGzVqFC4utv8xbhhGnXN5eXlRWlpq137w4MHLLx4ICQmhrKyMxMTEOsPm1Thz5gzl5eWXHsiPtbdp06bB67guDOoFUZ1geDoUFkFLH/jLejhXDc8PburqRERERBqVnlmTBvfEE08AMGHCBKqqquz6Dx06ZP37+TB04W7cmTNnUlZWZnetu7t7nUEnNDSUwsJCm/5jx46xZs2aK6r/wQcfpKysjIkTJ9ba/9P66/Lll1/W2j5nzhxOnz5t8+PhpaWlfP/993ZjN27cyJYtWwgNDbU7kORnw9kZcp+Fh+6CBe/CxKXQ0rdmy+UtoU1dnYiIiEij0p01aXD9+vUjKSmJRYsWcfPNNxMfH09oaChHjhxh9+7d7Nixw7o1MTExkblz5zJy5Ejy8vIwmUwUFBSwbds2goKC7H5vLCIigtzcXEaMGEF4eDhOTk4MGzYMf39/kpKSSE1NpUePHiQkJFBSUsIbb7xBUFBQrcGvLmlpaWzZsoV58+aRl5dHTEwMfn5+fP311+Tn5+Pm5nbJg0/uvvtu/Pz86N69O+3ataO0tJTt27ezbds2TCYTc+fOtY7dtWsX999/P71796ZDhw54e3uze/du1q1bh5OTU62/TddsbJ516TEtvGHJ6JqXiIiIyM+Iwpo0ioyMDKKiokhPT2fZsmWYzWZ8fX0JCwtj6tSp1nGRkZGsWLGCqVOnsnjxYpycnOjatSvvvfceI0eO5NixYzbzLly4kCFDhrBy5UoqKiqwWCzcfffd+Pv7M2HCBIqKisjOzmbOnDkEBgYybtw4nJycmDJlymXX7ubmxrZt25gxYwarVq2yhiWTyUR4eDjDhw+/5BxDhw5l7dq1rF69mpMnT+Ls7ExwcDCPPPIIL7zwAqGhP94VateuHT169ODjjz/mvffe48yZM7Ro0YK+ffsya9Ys6/N9IiIiIvLzotMgRZqZ6+o0SBERERGpk55ZExERERERcUAKayIiIiIiIg5IYU1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oD0O2sizVX7luDsem3WCgu8NuuIiIiI/IworIk0V+lPgo/vtVvPy+3arSUiIiLyM6CwJtJchZjA9xqGNRERERFpUHpmTURERERExAEprImIiIiIiDgghTUREREREREHpLAmIiIiIiLigBTWREREREREHJDCmoiIiIiIiANSWBMREREREXFACmsiIiIiIiIOSGFNRERERETEASmsiYiIiIiIOCCFNREREREREQd0Q1MXICKN5OgJ+P7stVvPyw38vK7deiIiIiLNnMKaSHOVvBj+W35t1goLhNdHK6yJiIiINCCFNZHm6qtiOHC8qasQERERkXrSM2siIiIiIiIOSGFNRERERETEASmsiYiIiIiIOCCFNREREREREQeksCYijq30FCS9CgHDwOth6DMdPj3Y1FWJiIiINDqFNRFxXNXVMGA2LM+D5Dj446PwbRn0ng5fHGnq6kREREQalcKaXJGcnBwMw6jzlZub26jrT5o0ibS0tEZdozEcPHgQLy8vDMNg/PjxNn3ff/89M2fOpFevXgQEBODq6kpgYCB9+/Zl+/btTVTxNdJ7Ggx7pe7+nG1Q8DlkJcOMh2B0HGyeCc5OMGPltatTREREpAnod9akXvr06UN8fLxde0RERKOum5mZSXBwMJMnT27UdRrasGHDOHfuXK19hYWFzJgxgy5dujBo0CBCQ0M5ePAgq1atIiYmhuXLl5OQkHCNK3YQOdsgyB8eiPqxLcAPEqNh2VYwV4GbS5OVJyIiItKYFNakXiIjI0lNTW3qMhqU2WymqqoKb2/vBp138eLF5Ofn8/TTTzN//ny7/tDQUDZu3EifPn1s2pOSkvjVr37F5MmTf75hbdch6HYTOF2wCeDOjrDoA/jPEbitXdPUJiIiItLItA1SGs2CBQvo0qUL7u7uuLm50alTJxYuXFjruKioKFq2bImLiws+Pj5ER0ezdetWm3GGYVBcXMy+fftstl7u27fP2t+/f3+7+dPS0jAMg5ycHGtbUlIShmFQUFDA4MGDMZlMeHh4sH79egAqKipITk6mTZs2uLi44OnpSc+ePdm4ceMVfQffffcdv//977n//vu56667ah0TGhpqF9QAevXqRdu2bfn666+vaM1m5WgJhLSwbz/fduTEta1HRERE5BrSnTWpl4qKCoqKimzaPDw8uPHGGwF4/PHHyczMpHv37owdOxZnZ2feffddkpOTOXr0KLNnz7Zel5GRgZ+fH4mJiYSEhHDgwAFWr15NbGwsBQUFdOvWDYCXXnqJmTNn4uPjQ0pKivX61q1b1/tzDBkyBFdXV0aMGIFhGLRr1w6z2UxUVBT79+8nNjaW4cOHU1paysqVK4mLi2PdunXcc889lzV/UlIS1dXVvPbaa2zZsuWKajt37hwnTpzAz8+vPh/N8VSdhbIK+zZzFRSX27abvGvupp0+A261/DPl7lrz5+kzjVOriIiIiANQWJN6ycjIICMjw6atd+/ebNq0iQ0bNpCZmcljjz1GVlaWtT8tLY3o6Gjmzp1LSkqKNdjl5+fj7+9vM9eoUaOIiYlh1qxZvPXWWwCkpqYyZ84cTCZTg23B9Pb2ZufOnbi6ulrbJk6cyN69e8nOzmbo0KHW9unTp9O5c2fGjx/P3r17Lzl3bm4ub731FvPnzycgIOCKa3vuuecoLS1lxIgRV3ytQ8r/d82x+xcq+BxWfGTbdug1aB8IHq5gPmt/TeUPIc3D1b5PREREpJnQNkipl4EDB7J8+XKb1/m7ZUuWLMEwDEaPHk1RUZHNa8CAAVRWVlq3GwLWoFZdXU1xcTFFRUW0bt2aVq1asWfPnkb9HGPGjLEJagBvvvkmrVq1onfv3ja1V1ZW0rNnTwoLCzl58uRF5zWbzYwcOZLbb7+dsWPHXnFd77zzDi+++CLt27ev9Tk3R3Tu3DmOHPnxOP3y8nLrFlUAftGez+YnwAczrK9THQIgNtL6vvDPiZxbPw2C/QEw3+hF1eFvrVMUFRVx+PDhmu2RwPe+rrZrAAUFBRd9v337dpvDXgoLCykpKbFfo67PoTW0htbQGlpDa2gNrdFAa1yKYbFYLFd0hfys5eTkkJCQQEpKCi+//HKtY3r06MEnn3xy0XnmzJnDpEmTANi4cSOTJ09m9+7dmM1mm3EBAQF8++23Nu+Dg4NrvbNlGAb9+vWzCYJQc0dvypQprFq1ikGDBgE12xMXL15MQUEBvXr1shnv5ubGmTMX3163f/9+OnfuXGf/7373O5YsWcKOHTuIjIwELu+7A/jwww+599578fLyIj8/n44dO160lguVl5fj5+dHWYckfA8cv6Jr6+2WUNj4PLQyXdl1vafV3EHLGlN7f8JLkLcfjiyxPWQk6VX421Y4sVSnQYqIiEizpW2Q0uAsFguGYbB06VKcnZ1rHXPnnXcCNaEnPj4eT09PnnrqKcLDw/H29sYwDJ555hlOnz591fWcPVvLNrof+Pj41Fp/27ZtefHFF+u87mLPyR08eJAlS5YQFxeHxWJh165dANaDQr777jt27dpFWFiY3fbPDRs2cN999+Hp6cmmTZuuOKg1O4N61Rzf/+Z2GBRd01ZcDqsKYOAdCmoiIiLSrCmsSYMLCwtj586ddOjQgaioqIuOzcrKwmw2k52dbXc8/ahRo3Bxsf2PccMw6pzLy8uL0tJSu/aDBw9efvFASEgIZWVlJCYm1hk2L+bw4cNUVVWxdu1a1q5da9e/dOlSli5dSnp6OqNHj7a2b9iwgXvvvRd3d3c2bNhAeHj4Fa/d7AzqBVGdYHg6FBZBSx/4y3o4Vw3PD27q6kREREQalZ5Zkwb3xBNPADBhwgSqqqrs+g8dOmT9+/kwdOFu3JkzZ1JWVmZ3rbu7O+Xl5XbtUHMEfmFhoU3/sWPHWLNmzRXV/+CDD1JWVsbEiRNr7f9p/bXp2rUr6enpdq+kpCQAYmNjSU9PJzY21nrNxo0bbYJaY/+4+HXD2Rlyn4WH7oIF78LEpdDSt2bL5S2hTV2diIiISKPSnTVpcP369SMpKYlFixZx8803Ex8fT2hoKEeOHGH37t3s2LHDujUxMTGRuXPnMnLkSPLy8jCZTBQUFLBt2zaCgoJsHuoEiIiIIDc3lxEjRhAeHo6TkxPDhg3D39+fpKQkUlNT6dGjBwkJCZSUlPDGG28QFBRUa/CrS1paGlu2bGHevHnk5eURExODn58fX3/9Nfn5+bi5uV304JOAgACbO2bn5eTksGjRIsLDw236P/vsM+69914qKioYMmQIH3zwAR988IHNtU888YTdlslmYfOsS49p4Q1LRte8RERERH5GFNakUWRkZBAVFUV6+v9v787Da7oWN45/TyLzHBnNMWtC1RhTUUQpvTVEFa2pRRptKdqiP1QN1XmgimpSVGlUdaCosQhVbmkVNQ9BSkQSRCJk//5Ic64jgyCRQ9/P85znOmuvvdfa5+zLebvWXnsq8+bNIz09HXd3d4KCghg9erS5Xu3atVmwYAGjR49m1qxZ2NjYEBISwooVKxg0aBDx8fEWx502bRo9e/Zk4cKFpKamYhgGrVu3xtPTk2HDhhEXF8fcuXOZMmUKfn5+DBkyBBsbG0aNGlXgvjs4OLB582bGjh1LTEyM+UHe3t7eBAcH07dv38L5kP6xZ88eLly4AMDMmTNzrfPwww/fm2FNRERERPKk1SBF7jF31WqQIiIiIpIn3bMmIiIiIiJihRTWRERERERErJDCmoiIiIiIiBVSWBMREREREbFCCmsiIiIiIiJWSGFNRERERETECuk5ayL3qgo+YGt/Z9oK8rsz7YiIiIj8iyisidyrpj4Dbu53rj0XhzvXloiIiMi/gMKayL0q0Bvc72BYExEREZFCpXvWRERERERErJDCmoiIiIiIiBVSWBMREREREbFCCmsiIiIiIiJWSGFNRERERETECimsiYiIiIiIWCGFNRERERERESuksCYiIiIiImKFFNZERERERESskMKaiIiIiIiIFVJYExERERERsUIlirsDIlJETiXChSt3pi0XB/BwuTNtiYiIiPxLKKyJ3KsGz4LjKUXfTpAfzI5UWBMREREpZAprIveqIwlw4Exx90JEREREbpHuWRMREREREbFCCmsiIiIiIiJWSGFNRERERETECimsiYiIiIiIWCGFNRGxbkkXYcB08O0DLk9AyzHw34PF3SsRERGRIqewJiLWKzMTHpkA8zfA4Hbw5lNwOhlajIH9J4u7dyIiIiJFSmFNbsqiRYswmUx5vpYtW1ak7b/00ktMnjy5SNsoDMuWLaNZs2YEBATg6OiIo6Mj5cqVY+DAgZw5k/dy+m+++SY1atTAycnJvE9ERMQd7Pkd1uL/oM9HeW9ftBli/4LowTD2cYhsB+vGg60NjF145/opIiIiUgz0nDW5JS1btqR9+/Y5ymvVqlWk7UZFRREQEMDIkSOLtJ3btWvXLtLS0ujYsSOlS5cmMzOTX3/9ldmzZ/Pjjz/y559/4ubmZrFPu3btWLFiBc2aNSM8PBwbGxsOHTrEsWPHiuksrMCizeDvCZ1D/1fm6wHdGsO8nyE9Axzsiq17IiIiIkVJYU1uSe3atRk+fHhxd6NQpaenk5GRgaur620f66WXXuKll17KUT506FDef/99oqKieP75583lEyZMYPny5UyZMiXX/f61fjsMdSqCzXWTABpUgZk/wb6TULN88fRNREREpIhpGqQUmQ8//JD77rsPR0dHHBwcqFq1KtOmTcu1XmhoKD4+PtjZ2eHm5kbjxo35+eefLeqZTCYSEhLYtWuXxdTLXbt2mbc//PDDOY4/efJkTCYTixYtMpcNGDAAk8lEbGws3bt3x9vbGycnJ5YvXw5AamoqgwcPpmzZstjZ2eHs7EzDhg1Zs2bNbX0mQUFBACQmJprLMjMzmTp1KhUrVjQHtbNnz5KZmXlbbd0TTp2DQK+c5dllJxNzbhMRERG5R2hkTW5JamoqcXFxFmVOTk6ULFkSgH79+hEVFUXdunV5/vnnsbW1ZenSpQwePJhTp04xYcIE834zZszAw8ODbt26ERgYyIEDB/j6668JCwsjNjaWOnXqAPDWW28xfvx43NzcGDp0qHn/MmXK3PJ59OzZE3t7e/r374/JZKJ8+fKkp6cTGhrKnj17CAsLo2/fviQlJbFw4ULatWvHDz/8QJs2bQp0/JSUFFJSUjh//jzr169nwoQJlChRgscee8xcZ9u2bfz999906dKFiIgI5s6dy8WLF3FycqJNmzZER0fj5ZVLYLnbZFyB5NScZekZkJBiWe7tmjWadukyOOTy15Sjfdb/XrpcNH0VERERsQIKa3JLZsyYwYwZMyzKWrRowdq1a1m9ejVRUVH07t2b6Oho8/bJkyfTuHFj3nnnHYYOHWoOdps2bcLT09PiWBERETRr1ozXX3+db775BoDhw4czZcoUvL29C20KpqurK9u3b8fe3t5cNmLECP744w/mzp1Lr169zOVjxoyhevXqvPjii/zxxx8FOv6gQYP48ssvze/Lli1LdHQ0tWvXNpft2LEDgJUrV3LlyhUGDRpE5cqV+f777/nuu+9o3rw5O3bswOb6qYB3m017s5bdv17sX7Bgo2XZ4U+ggh842UP6lZz7pP0T0pzsc24TERERuUfc5b/+pLh07NiR+fPnW7yyR8s+/fRTTCYTkZGRxMXFWbweeeQR0tLSzNMNAXNQy8zMJCEhgbi4OMqUKUOpUqX4/fffi/Q8nnvuOYugBrB48WJKlSpFixYtLPqelpZGw4YN2b17N+fPny/Q8YcPH878+fOZOnUq3bt3x87Ojr///tuiTkpK1qjS+fPnmT17Nu+++y7PPvssP/74I23btjUHR2sXHx9vsRhKSkqKeYoqAPdX4M/3w+GnsebXxcq+EFbb/H73B924uvz/IMATgPSSLmQcO20+RFxcXFYbp84BcMHd3rINIDY2Nt/3W7Zs4erVq+b3u3fv5ty5cznbyOs81IbaUBtqQ22oDbWhNgqpjRsxGYZh3NQe8q+2aNEiwsPDGTp0KO+++26uderXr8+2bdvyPc61C2msWbOGkSNHsnPnTtLT0y3q+fr6cvr0aYv3AQEBuY5smUwm2rZtaxEEIWtEb9SoUcTExNC1a1cg6561WbNmERsbS6NGjSzqOzg4cPly/tPr9uzZQ/Xq1fOtk5t58+bx5JNP8u6775qncn788cdERkbi5eVlcS8bQExMDN26daNXr14FDmwpKSl4eHiQXHkA7gfyfkxAoalWGta8BqW8b37fFv+XNYIW/Vzu28Pfgg174OSnlouMDJgOX/wMiXO0GqSIiIjcszQNUgqdYRiYTCbmzJmDra1trnUaNGgAZIWe9u3b4+zszMCBAwkODsbV1RWTycTLL7/MpUuXbrs/V67kMo3uH9cvn5/d/3LlyvHGG2/kud+t3ifXq1cvBg8ezKeffmoOa9mLjnh75ww75ctnrXSYlJR0S+3d9bo2ylq+f/EW6No4qywhBWJioWM9BTURERG5pymsSaELCgpi+/btVK5cmdDQ0HzrRkdHk56ezty5cwkPD7fYFhERgZ2d5Y9xk8mU57FcXFxyDTUHDx4seOeBwMBAkpOT6datW55h83ZcvnzZPPURoEmTJtjZ2ZGQkJCjbnbffX19C70fd4WujSC0KvSdCrvjwMcNPl4OVzPhte7F3TsRERGRIqV71qTQPf300wAMGzaMjIyMHNsPHz5s/nN2GLp+Nu748eNJTk7Osa+jo6NF0LlW6dKl2b17t8X2+Ph4lixZclP979KlC8nJyYwYMSLX7df2Py+HDh3KtXzKlClcunTJ4uHh7u7uPPjggyQnJ/PJJ59Y1M9+1EGnTp0K2v17i60tLHsVHm8CHy6FEXPAxz1r2mW10sXdOxEREZEipZE1KXRt27ZlwIABzJw5k0qVKtG+fXtKly7NyZMn2blzJ1u3bjVPTezWrRvvvPMOgwYNYsOGDXh7exMbG8vmzZvx9/e3uKkToFatWixbtoz+/fsTHByMjY0Nffr0wdPTkwEDBjB8+HDq169PeHg4586d46uvvsLf3z/X4JeXyZMns379et577z02bNhAs2bN8PDw4OjRo2zatAkHB4cbLnzSunVrPDw8qFu3LuXLlycpKYktW7awefNmvL29eeeddyzqf/TRRzRq1Ijnn3+ejRs3EhQUxMqVK9m6dSthYWF07NixwP2/q6x7/cZ1vFzh08isl4iIiMi/iMKaFIkZM2YQGhrK1KlTmTdvHunp6bi7uxMUFMTo0aPN9WrXrs2CBQsYPXo0s2bNwsbGhpCQEFasWMGgQYOIj4+3OO60adPo2bMnCxcuJDU1FcMwaN26NZ6engwbNoy4uDjmzp3LlClT8PPzY8iQIdjY2DBq1KgC993BwYHNmzczduxYYmJizKNb3t7eBAcH07dv3xseo1evXnz33Xd8/fXXnD9/HltbWwICAnjyySeZNGkSpUtbjgrVqFGDjRs3MmTIEL777jtSU1Px9/fnxRdf5M033yxw30VERETk3qHVIEXuMXfVapAiIiIikifdsyYiIiIiImKFFNZERERERESskMKaiIiIiIiIFVJYExERERERsUIKayIiIiIiIlZIYU1ERERERMQK6TlrIveqCj5ga1/07QT5FX0bIiIiIv9CCmsi96qpz4Cb+51py8XhzrQjIiIi8i+isCZyrwr0Bvc7FNZEREREpNDpnjURERERERErpLAmIiIiIiJihRTWRERERERErJDCmoiIiIiIiBVSWBMREREREbFCCmsiIiIiIiJWSGFNRERERETECimsiYiIiIiIWCGFNRERERERESuksCYiIiIiImKFFNZERERERESsUIni7oCIFJFTiXDhyp1py8UBPFzuTFsiIiIi/xIKayL3qsGz4HhK0bcT5AezIxXWRERERAqZwprIvepIAhw4U9y9EBEREZFbpHvWRERERERErJDCmoiIiIiIiBVSWBMREREREbFCCmsiIiIiIiJWSGFNRKxX0kUYMB18+4DLE9ByDPz3YHH3SkREROSOUFgTEeuUmQmPTID5G2BwO3jzKTidDC3GwP6Txd07ERERkSKnsCY3ZdGiRZhMpjxfy5YtK9L2X3rpJSZPnlykbRSFgwcP4uLigslk4sUXX8yz3ptvvkmNGjVwcnLC0dGRcuXKERERcQd7ege1+D/o81He2xdthti/IHowjH0cItvBuvFgawNjF965foqIiIgUEz1nTW5Jy5Ytad++fY7yWrVqFWm7UVFRBAQEMHLkyCJtp7D16dOHq1ev5lunXbt2rFixgmbNmhEeHo6NjQ2HDh3i2LFjd6iXVmbRZvD3hM6h/yvz9YBujWHez5CeAQ52xdY9ERERkaKmsCa3pHbt2gwfPry4u1Go0tPTycjIwNXVtVCPO2vWLDZt2sQLL7zA+++/n2udCRMmsHz5cqZMmcJLL71UqO3ftX47DHUqgs11EwAaVIGZP8G+k1CzfPH0TUREROQO0DRIKTIffvgh9913H46Ojjg4OFC1alWmTZuWa73Q0FB8fHyws7PDzc2Nxo0b8/PPP1vUM5lMJCQksGvXLoupl7t27TJvf/jhh3Mcf/LkyZhMJhYtWmQuGzBgACaTidjYWLp37463tzdOTk4sX74cgNTUVAYPHkzZsmWxs7PD2dmZhg0bsmbNmpv6DM6ePcsrr7xCp06daNKkSa51MjMzmTp1KhUrVjQHtbNnz5KZmXlTbd1zTp2DQK+c5dllJxPvbH9ERERE7jCNrMktSU1NJS4uzqLMycmJkiVLAtCvXz+ioqKoW7cuzz//PLa2tixdupTBgwdz6tQpJkyYYN5vxowZeHh40K1bNwIDAzlw4ABff/01YWFhxMbGUqdOHQDeeustxo8fj5ubG0OHDjXvX6ZMmVs+j549e2Jvb0///v0xmUyUL1+e9PR0QkND2bNnD2FhYfTt25ekpCQWLlxIu3bt+OGHH2jTpk2Bjj9gwAAyMzP55JNPWL9+fa51tm3bxt9//02XLl2IiIhg7ty5XLx4EScnJ9q0aUN0dDReXrmElrtJxhVITs1Zlp4BCSmW5d6uWaNply6DQy5/RTnaZ/3vpctF01cRERERK6GwJrdkxowZzJgxw6KsRYsWrF27ltWrVxMVFUXv3r2Jjo42b588eTKNGzfmnXfeYejQoeZgt2nTJjw9PS2OFRERQbNmzXj99df55ptvABg+fDhTpkzB29u70KZgurq6sn37duzt7c1lI0aM4I8//mDu3Ln06tXLXD5mzBiqV6/Oiy++yB9//HHDYy9btoxvvvmG999/H19f3zzr7dixA4CVK1dy5coVBg0aROXKlfn+++/57rvvaN68OTt27MDm+umAd5NNe7OW3b9e7F+wYKNl2eFPoIIfONlD+pWc+6T9E9Kc7HNuExEREbmH3MW//qQ4dezYkfnz51u8skfLPv30U0wmE5GRkcTFxVm8HnnkEdLS0szTDQFzUMvMzCQhIYG4uDjKlClDqVKl+P3334v0PJ577jmLoAawePFiSpUqRYsWLSz6npaWRsOGDdm9ezfnz5/P97jp6ekMGjSIBx54gOeffz7fuikpWSNL58+fZ/bs2bz77rs8++yz/Pjjj7Rt29YcHK1dfHy8xWIoKSkp5imq3F8BfhrLn++Hw09js161ynOuQYX/vf9pLLs/6MZVX7esfQK9OL//OOfOnTMfMy4ujrO7/nnOWilvyzb+ERsbm+/7LVu2WCz2snv37hxt5HkeakNtqA21oTbUhtpQG4XYxg0ZIjchJibGAIyhQ4fmWadevXoGkO9rypQp5vqrV682GjRoYDg4OOSo5+vra3FsHx8fIyQkJNd2AaNt27Y5yidNmmQARkxMjLnsmWeeMQAjNjY2R317e/sb9n/Pnj35fk4RERGGnZ2d8dtvv5nL8vrspk2bZgCGl5dXjuN89dVXBmD06tUr3/aulZycbABGcuUBhkGnon9VG2wYJ84WuH9mzV81jN4f5r2965uG4d/XMK5etSx/5mPDcO5uGGmXb75NERERkbuIpkFKoTMMA5PJxJw5c7C1tc21ToMGDQDYs2cP7du3x9nZmYEDBxIcHIyrqysmk4mXX36ZS5cu3XZ/rlzJZSrdP9zc3HLtf7ly5XjjjTfy3C+/++QOHjzIp59+Srt27TAMg99++w2Ao0ePAlmLh/z2228EBQXh6elJUFAQAN7e3jmOVb581mqHSUlJebZ3z+raKGv5/sVboGvjrLKEFIiJhY71tGy/iIiI3PMU1qTQBQUFsX37dipXrkxoaGi+daOjo0lPT2fu3LmEh4dbbIuIiMDOzvIHuclkyvNYLi4uuYaagwcPFrzzQGBgIMnJyXTr1i3PsJmfY8eOkZGRwXfffcd3332XY/ucOXOYM2cOU6dOJTIykiZNmmBnZ0dCQkKefc/vnrd7VtdGEFoV+k6F3XHg4wYfL4ermfBa9+LunYiIiEiR0z1rUuiefvppAIYNG0ZGRkaO7YcPHzb/OTsMGYZhUWf8+PEkJyfn2NfR0dF8j9f1Spcuze7duy22x8fHs2TJkpvqf5cuXUhOTmbEiBG5br+2/7kJCQlh6tSpOV4DBgwAICwsjKlTpxIWFgaAu7s7Dz74IMnJyXzyyScWx8p+1EGnTp1u6hzuCba2sOxVeLwJfLgURswBH3dY8xpUK13cvRMREREpchpZk0LXtm1bBgwYwMyZM6lUqRLt27endOnSnDx5kp07d7J161bz1MRu3brxzjvvMGjQIDZs2IC3tzexsbFs3rwZf39/i5s6AWrVqsWyZcvo378/wcHB2NjY0KdPHzw9PRkwYADDhw+nfv36hIeHc+7cOb766iv8/f1zDX55mTx5MuvXr+e9995jw4YNNGvWDA8PD44ePcqmTZtwcHDId+ETX19fIiMjc5QvWrSImTNnEhwcnGP7Rx99RKNGjXj++efZuHEjQUFBrFy5kq1btxIWFkbHjh0L3P+7xrrXb1zHyxU+jcx6iYiIiPzLKKxJkZgxYwahoaFMnTqVefPmkZ6ejru7O0FBQYwePdpcr3bt2ixYsIDRo0cza9YsbGxsCAkJYcWKFQwaNIj4+HiL406bNo2ePXuycOFCUlNTMQyD1q1b4+npybBhw4iLi2Pu3LlMmTIFPz8/hgwZgo2NDaNGjSpw3x0cHNi8eTNjx44lJibGPLrl7e1NcHAwffv2LZwP6Ro1atRg48aNDBkyhO+++47U1FT8/f158cUXefPNNwu9PRERERGxfibj+vlnInJXS0lJwcPDg+TKA3A/cKboG6xWOmtqYqmcC6SIiIiIyK3TPWsiIiIiIiJWSGFNRERERETECimsiYiIiIiIWCGFNRERERERESuksCYiIiIiImKFFNZERERERESskJ6zJnKvquADtvZF306QX9G3ISIiIvIvpLAmcq+a+gy4ud+Ztlwc7kw7IiIiIv8iCmsi96pAb3C/Q2FNRERERAqd7lkTERERERGxQgprIiIiIiIiVkhhTURERERExAoprImIiIiIiFghhTURERERERErpLAmIiIiIiJihRTWRERERERErJDCmoiIiIiIiBVSWBMREREREbFCCmsiIiIiIiJWqERxd0BEisipRLhwpejbcXEAD5eib0dERETkX0ZhTeReNXgWHE8p2jaC/GB2pMKaiIiISBFQWBO5Vx1JgANnirsXIiIiInKLdM+aiIiIiIiIFVJYExERERERsUIKayIiIiIiIlZIYU1ERERERMQKKayJiIiIiIhYIYU1EbFeSRdhwHTw7QMuT0DLMfDfg8XdKxEREZE7QmFNRKxTZiY8MgHmb4DB7eDNp+B0MrQYA/tPFnfvRERERIqcwprclEWLFmEymfJ8LVu2rEjbf+mll5g8eXKRtlEYzp07x8CBAwkNDcXb2xuTyUTNmjXzrJ+Zmcm7775L9erVcXNzw9HRkbJlyzJo0CASEhLuYM/voBb/B30+ynv7os0Q+xdED4axj0NkO1g3HmxtYOzCO9dPERERkWKih2LLLWnZsiXt27fPUV6rVq0ibTcqKoqAgABGjhxZpO3crhMnTjBz5kw8PDyoWrUq27dvz7d+//79iY6OplatWgwePBh7e3t+/vlnZsyYwdq1a9mzZw82Nv+y/7ayaDP4e0Ln0P+V+XpAt8Yw72dIzwAHu2LrnoiIiEhRU1iTW1K7dm2GDx9e3N0oVOnp6WRkZODq6nrbx6pYsSJ79+6lWrVqADg6OuZZ9/Lly8yfP5+KFSvy3//+F1tbW/O21q1bs3r1atavX0/Lli1vu193ld8OQ52KcH1IbVAFZv4E+05CzfLF0zcRERGRO+Bf9p/q5U768MMPue+++3B0dMTBwYGqVasybdq0XOuFhobi4+ODnZ0dbm5uNG7cmJ9//tminslkIiEhgV27dllMvdy1a5d5+8MPP5zj+JMnT8ZkMrFo0SJz2YABAzCZTMTGxtK9e3e8vb1xcnJi+fLlAKSmpjJ48GDKli2LnZ0dzs7ONGzYkDVr1hTo3J2dnc1B7UayQ2LJkiUtghpAQEAAAB4eHgU61j3l1DkI9MpZnl12MvHO9kdERETkDtPImtyS1NRU4uLiLMqcnJwoWbIkAP369SMqKoq6devy/PPPY2try9KlSxk8eDCnTp1iwoQJ5v1mzJiBh4cH3bp1IzAwkAMHDvD1118TFhZGbGwsderUAeCtt95i/PjxuLm5MXToUPP+ZcqUueXz6NmzJ/b29vTv3x+TyUT58uVJT08nNDSUPXv2EBYWRt++fUlKSmLhwoW0a9eOH374gTZt2txym9dzc3MjODiYbdu28cILL9CnTx/s7Oz4/vvvWbRoEa1btzZ/BnetjCuQnJqzLD0DElIsy71ds0bTLl0Gh1z+inK0z/rfS5eLpq8iIiIi1sIQuQkxMTEGkOurRYsWhmEYxqpVqwzA6N27d479GzVqZDg6OhoJCQnmsnPnzuWot3nzZqNEiRLGY489ZlHu4+NjhISE5No3wGjbtm2O8kmTJhmAERMTYy575plnDMAICQkx0tPTLeoPHz7cAIy5c+dalJ85c8YoWbJknu3nx8HBId/99u3bZ9SuXdvi8zSZTEa/fv2Mq1ev3lRbycnJBmAkVx5gGHQq2le1wYZx4qxhGIZx/Phx4+jRoxb9+OOPP7LerP2jwMe8cuBk1j4uTxiJnV83EhMTzcc8fvy48XfUj1l1l//Xso1/bNq0Kd/3mzdvNq5cuWJ+/+eff+ZoI8/zUBtqQ22oDbWhNtSG2ijENm7EZBiGUQwZUe5SixYtIjw8nI4dO/LEE09YbCtXrhxNmjThiSeeYOHChfzyyy8EBgZa1Pn888959dVXmTdvHj179rTYlpmZSWJiImlpaQA0adKEEiVKcPDg/56r5evrS0BAAH/88UeOvplMJtq2bWueypht8uTJjBo1ipiYGLp27QpkTYOcNWsWM2bMYMCAARb1K1WqRFpaGr/88kuONgYOHMjy5ctJSkrCzc3tRh+XmaOjI1WqVMm13wBxcXFERERw+fJl2rVrB8DixYvZsGEDzz77bK7TR/OSkpKCh4cHyZUH4H7gTIH3uyXVSsOa16CUd/71zl2A7dc9H21YNAR4wYj/WJY3rZE1elYlEqoEwrJXLbfPXgVPfwy/v6d71kREROSepmmQcksqV66cI6xlO3DgAIZh0KBBgzz3P3HihPnPa9asYeTIkezcuZP09HSLer6+voXT4Tzktpx+XFwcly9fpmzZsnnud+LECapXr14ofUhJSaF+/fpUr16dtWvXmsuHDBlCy5YtmT59On369KF+/fqF0l6x8HKF1vfnLAv0ylmerXYF2LAn63lr1y4y8st+cHaAqqWKrLsiIiIi1kBhTQqdYRiYTCbmzJmTY8GMbNlBbs+ePbRv3x5nZ2cGDhxIcHAwrq6umEwmXn75ZS5dunTb/bly5Uqe23IbHTMMg3LlyvHGG2/kud/t3Cd3vU8++YT4+Hj+7//+L8e28PBw1q1bx4oVK+7usHYrujbKWr5/8Rbo2jirLCEFYmKhYz0t2y8iIiL3PIU1KXRBQUFs376dypUrExoamm/d6Oho0tPTmTt3LuHh4RbbIiIisLOz/EFuMpnyPJaLiwtJSUk5yq+dRlkQgYGBJCcn061btzzDZmE6fvw4kHuozC7LL3Des7o2gtCq0Hcq7I4DHzf4eDlczYTXuhd370RERESKnJbul0L39NNPAzBs2DAyMjJybD98+LD5z9lh6PpbJ8ePH09ycnKOfR0dHUlJSclRDlC6dGl2795tsT0+Pp4lS5bcVP+7dOlCcnIyI0aMyHX7tf0vDNlTMefNm5dj29y5cwFo1qxZobZ5V7C1zbpf7fEm8OFSGDEHfNyz7pGrVrq4eyciIiJS5DSyJoWubdu2DBgwgJkzZ1KpUiXat29P6dKlOXnyJDt37mTr1q3mkaJu3brxzjvvMGjQIDZs2IC3tzexsbFs3rwZf39/rl69anHsWrVqsWzZMvr3709wcDA2Njb06dMHT09PBgwYwPDhw6lfvz7h4eGcO3eOr776Cn9//1yDX14mT57M+vXree+999iwYQPNmjXDw8ODo0ePsmnTJhwcHPj9999veJxRo0aZR/quXr3K33//zbPPPgtA3bp16d+/PwB9+/bl7bff5tdffyU4OJhHHnkEgKVLl7J7924efPBBWrVqVeD+3zXWvX7jOl6u8Glk1ktERETkX0ZhTYrEjBkzCA0NZerUqcybN4/09HTc3d0JCgpi9OjR5nq1a9dmwYIFjB49mlmzZmFjY0NISAgrVqxg0KBBxMfHWxx32rRp9OzZk4ULF5KamophGLRu3RpPT0+GDRtGXFwcc+fOZcqUKfj5+TFkyBBsbGwYNWpUgfvu4ODA5s2bGTt2LDExMeaVGL29vQkODqZv374FOs6sWbNISEgwvz9z5gzTp08HsgJtdlizs7Nj8+bNDBs2jJ9++on33nsPyJqO+cILL/Dmm28WuO8iIiIicu/Q0v0i9xirXLpfRERERG6a7lkTERERERGxQgprIiIiIiIiVkhhTURERERExAoprImIiIiIiFghhTURERERERErpKX7Re5VFXzA1r5o2wjyK9rji4iIiPyLKayJ3KumPgNu7kXfjotD0bchIiIi8i+ksCZyrwr0Bvc7ENZEREREpEjonjURERERERErpLAmIiIiIiJihRTWRERERERErJDCmoiIiIiIiBVSWBMREREREbFCCmsiIiIiIiJWSGFNRERERETECimsiYiIiIiIWCGFNRERERERESuksCYiIiIiImKFFNZERERERESsUIni7oCIFJFTiXDhSnH3QkREROTmuDiAh0tx98IqKKyJ3KsGz4LjKcXdCxEREZGCC/KD2ZEKa/9QWBO5Vx1JgANnirsXIiIiInKLdM+aiIiIiIiIFVJYExERERERsUIKayIiIiIiIlZIYU1ERERERMQKKayJiIiIiIhYIYW1XBw+fJjHHnsMX19fTCYTffr0Ke4uFYk+ffpgMpmKuxsWPv74Y6pXr46DgwMmk4kjR44Ud5dERERE5F6QdBEGTAffPuDyBLQcA/89WPD998TBw+PBtQd4PwVPfgBnkvPf54v1YOqctc8tuOmwdujQIQYMGED16tVxdnbGy8uLGjVq0Lt3b9auXXtLnbA2Tz75JKtXr6ZVq1b07duX+vXr51n3r7/+omfPntSoUQMPDw+cnZ2pXr06L774IqdOncpR32Qy5fuaOHFioZ7LkiVLGDduXK7b4uPjC7Wt27V27VoiIyNxc3OjR48e9O3bF0dHxyJrb8eOHYwbN06BUERERORel5kJj0yA+RtgcDt48yk4nQwtxsD+kzfePy4BHnwVDsTDpJ4w/FFYuh3avAaXM3Lf58IleGkuuNz679mbes7atm3baN68OTY2NjRt2pQGDRpw5coVzp49y+rVq7GxsaFly5a33BlrkJ6eTmxsLK1atWLkyJE4Ozvj7u6eZ/29e/eye/duqlevTr169TCZTJw+fZpPPvmEL7/8kp07d+Ln52eu37dv31yP88MPP3DmzBkqV65cqOczb948vv766zwDmzVZvHgxAC+//DI1a9bExsaGkiVLFll7GzZs4LXXXqNBgwZUqFChyNoRERERkSLW4v+ggh9EP5f79kWbIfYviBkOXRtnlXVrDFUHw9iFMH9o/sef9DVcTIPtb0E536yyBlWywlr0WhgQlnOfCYvAzQlahsCSrbd0WjcV1kaPHk1qaiqvvfYarVq1wsvLi6tXr5KcnExcXBxOTk631AlrcvLkSQzDoGzZstx///03rF+vXj1ef/11/Pz8cHFxwcbGhsTERObNm8cnn3zCrFmzGD16tLn+yJEjcxwjPj6e6OhoypcvT+vWrQv1fC5cuFCox7sd58+fx83NLc/t2SNcHTp0KNIRtWzZn01qamqRHP9G5ysiIiIid8iizeDvCZ1D/1fm65EV2Ob9DOkZ4GCX9/5fb4EO9f4X1ABa3w9VS8FXsTnD2v6T8N738M3LWdtv0U1Ng9y9ezcuLi5ERkbSpEkT7rvvPmrWrEnTpk3p3r07bdq0Mdc9cuQIJpMp1xGdcePG5bgfKfv+qbNnz9KnTx98fHxwc3PjscceM0/XmzlzJjVq1MDR0ZHq1avz7bffFrjvCQkJREZGUrZsWezt7SlbtiyRkZGcPXvWog8VK1YEICoqyjw1cd26dXket3Tp0nTo0IEGDRoQHBxMjRo1aNKkCWFhWV/YwYOW82CrVKmS47Vy5UoMw+Dhhx8u8EjS77//TqdOnShZsiSOjo7cd999vPnmm1y9etVcp0WLFqxYsQKwnH4ZHR1tcazk5GQiIiLw8/PD0dGRJk2a8Msvv+Ro0zAMpk+fTt26dXF2dsbV1ZWWLVvmmP567Xe/cOFC6tati5OTE5GRkbmeS3b9H374AQAnJydMJhMtWrQw1zl16hQRERGUK1cOe3t7SpUqxYABAzh9+rTFsU6ePMmwYcOoXbs2Xl5e5s9mypQpFp/NuHHjGDVqFADh4eHmzyb7/sTcrtFsFSpUsOhb9ufbp08fVq9eTdOmTXF1daVDhw7m7du2baNTp074+Pjg4OBAtWrVmDhxIleuXLE4zp9//kl4eDilS5fGwcGBgIAAWrZsydKlS3P97ERERESkAH47DHUqgs118adBFUhNh335TIU8cTZrymS9Sjm3NagCvx3KWT7ks6wRtfZ1b6vbNzWy5uvrS1xcHOvXr6dz5845tjs7O5v/nD1qcfJkzhPPLrt21Cc7kIWFheHs7Ez79u35+++/+f777zl16hRdu3Zl6tSpNGnShPvvv5+1a9fStWtX9u3bR1BQUL79Tk5OplGjRhw8eJAHH3yQBx98kJMnTzJ9+nRWr17Nr7/+ipubG/Xr1yc1NZWYmBhq167NAw88AICXl9cNP5u0tDQuXLhAWloau3fvZsyYMQA0bNgw3/0MwyAqKgoHBwf69et3w3bgf9NRTSYTLVu2xM3NjT///JOXX36ZHTt2MH/+fAAeeughTpw4wYEDByymX5YpU8bieGFhYZQoUYK2bduSkpLCqlWraN++PUeOHLEYGXryySf58ssvCQ0NpVOnTly5coVt27bRpk0bFi9ezKOPPgr873udP38+p0+fplWrVoSEhFC+fPlcz8fR0ZG+ffuyYcMGi75mT008duwYoaGhpKam0qxZM1q0aEFiYiJffPEFa9asYfv27Xh4eACwdetWvvzyS+rWrUvNmjXJyMhg7969vPLKKxw8eJCZM2cCUK1aNZo1a8aGDRto164dAQEBANStm/V/qNyu0WwXL160CPnZ1q9fz6JFi2jVqhVdunQxf3ZLly6lc+fO5uDl6OjIsWPHGDNmDL/99huLFi0C4OzZs7Rs2ZKMjAxatGiBh4cHaWlpxMXFsXLlSh555JFcPz8RERERuYFT5+DB+3KWB/7zO/9kItTM/bcqp85Z1r1+/8QLliNzS7fByp2w893b7vZNhbVevXrx+++/06VLF6pUqULTpk2pX78+LVq0oEaNGrfdGQA/Pz+GDh1K6dKlSUlJITMzk1WrVnHs2DEmTpxInTp1sLW15YEHHuCVV17ho48+4t138/8gJk+ezIEDB3jiiScYOHAgPj4+nD17lunTp7NgwQImT57MpEmT6Nq1K5UqVSImJoaQkBDzlMXSpUvfsN8zZsxgyJAhFufRr18/c4DJy+rVqzlx4gRNmjShdu3aN2wH4NlnnyUtLY1x48bRtm1bnJ2dOXLkCOPGjePLL7+kf//+tGrVin79+rF+/XoOHDhgMf3S39/f4nje3t688MILlClThvT0dEqXLs306dOZM2eOeTTsm2++4YsvvqBXr1707t2bgIAAMjMz2bt3Ly+99BKRkZF07NjRYnXJQ4cO8frrr9OqVSvc3Nywt7fP9XxKlizJyJEjOXr0qEVfs6fVRkREcOHCBcaPH0+jRo1wd3cnJSWFOnXqMHHiRN5++21ef/11AIKDg3n77bcJCgrC3d2dzMxMjh8/zvjx45k9ezavvfYagYGBhIWF8fvvv7NhwwY6duxonn6a3/2JN3LkyBGGDRtGly5d8PT0BLJCfN++fSlfvjzjx4+nWrVqODg4cPr0aT7++GNiYmJYt24dLVq0YO3atZw5c4bIyEj69OmDq6sraWlpJCQkmI8nIiIi8q+XcQWSU3OWpWdAQoplubdr1mjapcvgkEv0cfzn9+mly3m3l70tt2mSjnb/q+Ngl7XYyNAoGBQG95Ut2Pnk46bCWs+ePUlJSWHlypXs3r2bqKgooqKiAGjWrBnR0dHmaYS36qmnnjJPIQRo1aoVq1atomHDhvTq1cv8g79UqVKMGzeOHTt23PCYCxcuxM3NjZdfftniPjQPDw+WLl3KwoULmTRpEv7+/uZRJycnJ6pUqVLgfgcHBzNkyBDS09M5duwYu3btIjAwkMDAwHz3mzp1KgCPP/54nmHmWqdPn+bXX3/lgQce4NlnnzVPmwwODubEiRNs376duXPn0qpVK8qUKYOdXdYFlN+5PPXUUzz88MMWZdOnT2fbtm3m9zNmzMDR0ZGBAwdSvXp1c3lAQAChoaEsXLiQffv2Ua1aNfO2kJAQBg0adMORSTs7O6pUqZJrX5OTk1m+fDlNmzblsccew9XVFcga5e3RowczZ87k22+/NYe1oKAgKleujMlk4vLly1y4cIHAwEBatmzJL7/8wqZNm+jatSslS5Y0BzNfX9+b+q7zUqZMGYYMGWIxcrlkyRLOnDlD9+7deeihh7D5Z+jdz8+PXr16ERMTw9dff02LFi3MUyLj4+OpWrXqbQVHERERkbvZb7/9Zp7lBhAbG0vjxv8sDLJpb9ay+9eL/QsWbLQsO/wJVPDDcLIjKf4M1/4qjY2NpXGaQ9YbJ3vLNoAtW7ZQv359bJ2yfqOfOHQE53P3mX/bxsXF4X76LO7/7J+SkkLq2C8ISDgPr3X/XxvXt9n42pL83VRY8/f35/nnn6dFixYcP36cuLg49u3bx8aNG80jFL/99luBQkde2rZta/E+e+pcSEiIxXFLliyJi4sLiYmJNzzmiRMnCAoKombNmhblNWvWJCAggKNHj95yf7OFhoZSvXp1rly5QkJCAuvXr2fkyJG4uLjkuqgIQGJiIsuWLSMgIIDw8PACtbNnzx4AatSoYXF/m8lk4tFHHyUiIoK//vrrpvp+/WceHBwMwJkzZyzaTUtLo1mzZnke5+DBgxZhrUqVKgWaQpqfXbt2kZmZyc8//3zD6a7ZJk6cyJw5czhw4ACGYVhsO3bs2G31Jz9ly5bNMcU0+96/jz76iI8++ijX/bKvvwcffJDQ0FC+/vprvvvuO+rXr0/r1q15/PHHue++XIbtRURERO5R1wY1wDLg3F8BfhprucOwaAjwghH/sSwP8ATAFOiNV5rlpsaNG8PsVVlvSnnT+LppkKGh/yxG8s/0x9I2znDNb9syZcrAxatZo3cOdrinXcb90w3w7MOQcglSLtG4VGW4sBEMA46cpnHlm/tNd1NhDbKmzGUvrnD+/HlOnTrFnj17GDZsGLt372bz5s3m+6nycu1CD9e7frpX9miLi4tLjromkynfY12rRIkS5lGNbDY2NpQocdMfQa5cXV3Noz4VKlQgKCiIadOm8cEHH+QZ1qKjo8nIyKB169bme6ZuJHvlwtxWS8z+7K5ftOJGrv/Ms4997XGuXLmCm5sb/fv3z/M414ep7PvIbkdKStZQdsOGDWnUqFGuda5tZ+jQoUydOpVGjRrRpEkT3NzcsLW15dixYyxevJjLl/MZ4r7GrVy/2d//tbK/ry5dulC2bO5D4dnTX0uVKsWkSZNYt24du3fv5ujRo7z11ltMnDiR999/n8GDBxeo7yIiIiL3NC/XrJUYry8L9MpZnq12BdiwJ+t5a9dmgl/2g7ND1qqOeSldEnzdYVsuD9Deuh9q//Mb+NxFuJAGby7Jel0vaBD8pwEseSWfk7N0W0nFzc0NNzc3qlSpwmeffcbBgwfZu3cvzZs3N4+oZP/YvlZcXFyex7w+UBUGX19fTpw4wZUrVyzC2ZUrVzh58iQ+Pj6F3mbJkiUxDIOkpKQ868ycORNbW9sCLywCUK5cOSDrfrDr7d27F8MwLIJffqEjW0E+88DAQE6dOsWgQYNyDc6Q9Tnf7HFvJCgoCJPJhJOTE8OGDcu1zrXfaXR0NFWqVGH8+PGUK1cOR0dHbGxsmD59OoDFSFt+n032FMRz585ZlKelpZGUlJTrfYy5nW/291WtWjUiIiJybevaz7Nly5bcf//9HD9+nFOnTnHgwAEmTJhgvi+wIN+niIiIiFyna6Os5fsXb/nfc9YSUiAmFjrWs7wf7WDWwodUumYwpUsj+HwtHE+Asv9kh9W/Z60iObRj1ns/j6yl+q/34VLYvA++HJr7IiX5uKlf0z/99FOuozZpaWns2rULwDx64OPjg7u7O1u3brX4gXzo0CHWr19/U528XQ0bNiQ5Odm8EmC2GTNmkJyc/L8hzluQvYrl9dauXcvRo0fznLq3bds2/vrrL+6///6bar9ixYpUqlSJDRs2mD9zyAoh2fdttWrVylyevUhHQaaL5qdt27YYhsGHH35ImTJlcrzs7OxwcHC4rTZyU6FCBUJCQtiwYQNxcXE52i1dujS2trYW+9jZ2dG6dWuqVq1KuXLl8PLyMq+Qea3skJRboM5+OPmqVassyt95550cUyvz8/DDD+Pm5kZUVBTOzs45+l+yZElz2ExMTCQzMxNvb2/uv/9+Hn74YZ555hkCAgJIS0vj0qVLBW5XRERERK7RtRGEVoW+U2H8V/Dxj1kP0r6aab6/zKzV2KzXtUZ1yRqBazkGPloKk7+G8LezVpDs+1BWHWcHeKxhzlc5X7C1yfpzw6o31e2bGlkbPHgwKSkpPProo9SsWRNnZ2eOHz/O/PnzOXToEKGhoebgYW9vT1hYGIsWLaJdu3Y89thj5uXyAwMDc31+VVEZPHgwmzZt4rnnnmPHjh088MAD/Pbbb8yePRt/f3+eey6PJ50XQPazvh566CHKly9PWloa27dvZ8GCBTg6OuY5mpK9sEjnzp1v6mHiTk5OREREMGrUKJo2bcrgwYMJCAjghx9+YMWKFTRo0MDi/reaNWvyzTffMHDgQB599FHs7Oxo2LBhge//yta/f3/WrVvHxx9/zI4dO+jQoQM+Pj7ExcWxefNm9u/fz+HDh2/qmAXh6OjIiBEjGDJkCA8++CBPPfUUDzzwAJmZmRw6dIhvv/2WJ554gokTJwJZDylft24djz/+OK1bt+bvv//ms88+y3XU64EHHsBkMvHBBx9gMplwcXEhKCiIhg0b0rZtW/z9/XnjjTdIS0sjKCiIjRs3sn79+lynO+alatWqDBw4kA8++IBq1arRr18/KleuTFJSEnv37mXx4sV89dVXtGnThtmzZzN16lQ6depE5cqVsbOzY/369ezcuZO6deve1r2gIiIiIv9qtraw7FUY8XnWSNely1C/MkQ/B9VuvPI7ZX1g/evwYjS8Mg/sS8AjdeGdPvk/TPs23VRYCw8PZ8uWLaxevZqvvvqK8+fP4+rqSunSpXnqqad46qmn8Pb2NtcfMWIEKSkpbN26lbVr1xIUFESvXr04efLkHQ1rTZs2ZcKECXzxxRcsWbKEqKgovLy8ePDBB+nRo8dNrchyvZo1a7JixQpmz55NUlISJpMJPz8/mjZtyiOPPEKPHj1y7HPp0iW+/vprvLy86Nmz5023+dRTT3HhwgW+/fZbPvroIy5duoS/vz+dO3dm0KBBFlP0evTowebNm1m3bh2LFy8mMzOTjz/+OM8QmZeKFSuaF+7YsmULEydOJCMjg5IlS1KhQgU6dux40+dRUJ06dSI1NZXFixezYsUK5s6di4ODA76+vlStWtVihc9Ro0ZhZ2fHunXrWLJkCf7+/jRp0oTq1avneEB77dq16d27N6tWrSIiIoKMjAwef/xxFixYQLly5Xj55ZeZPXs2H3zwAfb29tSpU4eRI0cydux1/6UlH3Z2djz//PO4ubmxfPlyoqKiOHfuHO7u7uZVKkuVypojXaFCBcqXL8+iRYtITEzExsYGf39/unbtSt++fQvt/koRERGRe866129cx8sVPo3MeuXnyIzcy4PLwYpcVqG8kejnsl63wGTcxJyuuLg4jhw5Qnx8PBcvXuTy5cvY29vj7e1NlSpVqFatmsU9NZmZmWzdupX9+/dz+fJlPD09qVevHmfOnOG///0vTzzxhPnBwevWrWPfvn0MGDDAos2TJ0/yww8/0Lx5c4uVBiHroctubm4FCgqXLl1i+/btHD16lNTUVJydnSlfvjz16tWzWKzj/PnzfPnll9SpU4d69erd8LgHDx5k//79nD17lrS0rCVmXF1dKVOmDPfff3+uozBXrlxh3rx52NnZ0aNHj1u6D+ns2bNs27aN+Ph4MjIycHd3p2rVqtSqVctiFMkwDH755RcOHjxIamoqhmGYP8u8PnPIup+uatWq5sVksu3bt4+9e/dy9uxZMjMzcXJywsfHh0qVKlGpUtZT3W/2M8yWX3/S0tLYsWMHR48e5cKFC9ja2uLi4kLp0qWpUaOG+R7J7Ad1Hzp0iEuXLuHi4kL16tXx9fVl6dKlOa6jffv2sWPHDvMz/a4956SkJGJjYzl16hS2traUKVOGRo0a8e233+a47vL6vLIlJiayY8cOTp48SVpaGg4ODri7u1O2bFnuu+8+HB0dSUhI4I8//uDvv/8mNTUVk8mEm5sbVatWJTg4OMd0z7ykpKTg4eFBcuUBuB84c+MdRERERKxFtdKw5jUo5X3juv8CNxXWRMT6KayJiIjIXUthzULhL70oIiIiIiIit01hTURERERExAoprImIiIiIiFghhTURERERERErpLAmIiIiIiJihfTgJpF7VQUfsNWDtEVEROQuEuRX3D2wKgprIveqqc+Am3tx90JERETk5rg4FHcPrIbCmsi9KtAb3BXWRERERO5WumdNRERERETECimsiYiIiIiIWCGFNRERERERESuksCYiIiIiImKFFNZERERERESskMKaiIiIiIiIFVJYExERERERsUIKayIiIiIiIlZIYU1ERERERMQKKayJiIiIiIhYIYU1ERERERERK6SwJiIiIiIiYoUU1kRERERERKyQwpqIiIiIiIgVUlgTERERERGxQgprIiIiIiIiVkhhTURERERExAoprImIiIiIiFghhTURERERERErpLAmIiIiIiJihRTWRERERERErJDCmoiIiIiIiBVSWBMREREREbFCCmsiIiIiIiJWSGFNRERERETECimsiYiIiIiIWCGFNRERERERESuksCYiIiIiImKFFNZERERERESsUIni7oCIFC7DMABISUkp5p6IiIiISH7c3NwwmUx5bldYE7nHnD17FoCyZcsWc09EREREJD/Jycm4u7vnuV1hTeQe4+3tDcCxY8fw8PAo5t5IcUtJSaFs2bIcP348338M5N9B14NcS9eDXEvXQ/Fwc3PLd7vCmsg9xsYm61ZUDw8P/WUrZu7u7roexEzXg1xL14NcS9eDddECIyIiIiIiIlZIYU1ERERERMQKKayJ3GMcHBwYO3YsDg4Oxd0VsQK6HuRauh7kWroe5Fq6HqyTyche51tERERERESshkbWRERERERErJDCmoiIiIiIiBVSWBMREREREbFCCmsiVmrv3r20adMGFxcXAgICeOmll7h8+fIN9zMMgzfeeINy5crh5OREo0aN2LJlS456J0+epEuXLri5ueHt7c3TTz9NSkpKUZyKFIKivB7OnDnDCy+8QMOGDXFwcMDV1bWoTkMKSVFeD6tWraJ79+5UqFABZ2dn7rvvPt566y0yMjKK6nTkNhXl9fDrr7/Spk0bAgICcHBwoFy5cvTv35+TJ08W1elIISjq3xDZMjMzqVu3LiaTiUWLFhXmKUg2Q0SsTmJiohEYGGg8+OCDxvLly43Zs2cbHh4eRmRk5A33nTx5smFvb2+8++67xqpVq4xOnToZbm5uxsGDB811Ll++bISEhBghISHGd999ZyxYsMAoU6aM8cgjjxTlacktKurr4bfffjP8/PyMDh06GI0bNzZcXFyK8nTkNhX19dC1a1ejffv2xueff26sXbvWmDx5suHk5GT06dOnKE9LblFRXw8rVqwwBg8ebCxYsMBYu3atMXv2bKN8+fJGzZo1jbS0tKI8NblFRX1NXOvjjz82/P39DcCIiYkp7FMRwzAU1kSs0KRJkwwXFxfj7Nmz5rIZM2YYtra2xokTJ/Lc79KlS4a7u7sxcuRIc1l6erpRvnx5IyIiwlw2f/58w2QyGXv37jWXrVixwgCMX375pZDPRm5XUV8PV69eNf957NixCmtWrqivhzNnzuTYd+LEiYbJZMp1mxSvor4ecrNy5UoDMDZt2nT7JyCF7k5dE2fOnDG8vb2Nzz77TGGtCGkapIgV+vHHH2ndujXe3t7msm7dupGZmcnKlSvz3C82NpaUlBS6detmLrO3t6dz584sW7bM4vi1atWiWrVq5rI2bdrg7e1tUU+sQ1FfDzY2+qfgblLU14OPj0+OfR944AEMw+DUqVOFdBZSWIr6eshNyZIlAQo0rU7uvDt1TYwcOZKWLVvSsmXLwj0BsaB/oUWs0N69e6levbpFmaenJ4GBgezduzff/YAc+9aoUYNjx45x6dKlPI9vMpmoXr16vseX4lHU14PcXYrjeti4cSMODg4EBQXdRs+lKNyp6+Hq1atcvnyZvXv38tJLL1GnTh2aNm1aSGchhelOXBNbt25l/vz5vP3224XYc8mNwpqIFTp37hyenp45yr28vEhMTMx3PwcHBxwdHXPsZxgG586du63jS/Eo6utB7i53+nrYv38/H3zwAYMGDdLiM1boTl0PzZs3x8HBgRo1apCcnMyyZcsoUaJEoZyDFK6iviYyMzOJjIxk2LBhVKhQoTC7LrlQWBMREZFcpaSk0LlzZ4KCgpg4cWJxd0eK0ezZs9myZQvz5s0jPT2d1q1bawXhf6lPP/2U+Ph4XnnlleLuyr+CwpqIFfLy8iI5OTlH+blz5yzmoOe2X3p6OmlpaTn2M5lMeHl53dbxpXgU9fUgd5c7dT1cvnyZTp06ce7cOZYtW4aLi0vhnIAUqjt1PVSrVo2GDRvSs2dPfvrpJ/bv38/MmTML5ySkUBXlNXHhwgVGjRrFq6++yuXLl0lKSjKH9tTUVAX4IqCwJmKFcrt3LDk5mVOnTuWYS379fgB//fWXRfnevXvNz0zJ6/iGYfDXX3/le3wpHkV9Pcjd5U5cD5mZmfTs2ZPt27fz448/UrZs2UI8AylMxfH3g7+/P2XKlOHAgQO30XMpKkV5TSQkJHD27FkGDRqEl5cXXl5e3H///QD07t2bqlWrFvLZiMKaiBVq164dq1atIikpyVwWExODjY0NYWFhee7XuHFj3N3diYmJMZdlZGSwePFi2rdvb3H8nTt3sn//fnPZ6tWrOXv2rEU9sQ5FfT3I3eVOXA+RkZF8//33fPvtt9SsWbPQz0EKT3H8/XD8+HGOHj1KxYoVb7v/UviK8poICAhg7dq1Fq8vv/wSgHHjxrF48eKiOal/s2J8bICI5CH7gZbNmzc3VqxYYXz22WeGp6dnjgdaPvTQQ0alSpUsyiZPnmw4ODgY77//vrF69WqjS5cueT4Uu2bNmsb3339vLFy40Chbtqweim2livp6MAzDiImJMWJiYozw8HDD0dHR/P7IkSNFfn5yc4r6epg4caIBGCNGjDA2b95s8UpOTr4j5ygFV9TXw8CBA43Ro0cb33zzjbFmzRpj+vTpRuXKlY3SpUsbCQkJd+Qc5ebciX8zrnX48GE9Z60IKayJWKndu3cbrVq1MpycnAw/Pz9j+PDhRnp6ukWd5s2bG+XLl7coy8zMNCZNmmSUKVPGcHBwMBo2bGjExsbmOH5cXJzRuXNnw9XV1fD09DT69eunH2JWrKivByDXV1RUVBGeldyqorwemjdvnuf1sHbt2iI+M7kVRXk9zJ4922jYsKHh6elpODk5GdWqVTOef/55Iz4+vqhPS25DUf+bcS2FtaJlMgzDKIYBPREREREREcmH7lkTERERERGxQgprIiIiIiIiVkhhTURERERExAoprImIiIiIiFghhTURERERERErpLAmIiIiIiJihRTWRERERERErJDCmoiIiORw+vRpPDw8mDVrlkV5nz59qFChQvF06h4xbtw4TCYTR44cuSPtRUdH52jv0qVLlCpVitdee+2O9EFEbo3CmoiIiOTw6quv4uvrS9++fQtUPz4+nuHDhxMSEoKbmxvu7u5UqVKF7t27s3jxYou6LVq0wNXVNc9jZYeZbdu25br93LlzODk5YTKZmDt3bp7HqVChAiaTyfyyt7enQoUKPP300xw/frxA53WvcnJy4pVXXuGtt97i1KlTxd0dEcmDwpqIiIhYiIuL47PPPuO5556jRIkSN6x/9OhR7r//fqZNm0ZoaChvvPEGkydPpkOHDuzdu5eoqKhC7d8XX3xBeno6QUFBfPbZZ/nWLVOmDHPnzmXu3Ll88MEHNGzYkM8++4yGDRuSkJBQqP262/Tv3x+TycS7775b3F0RkTzc+G9gERER+VeZMWMGJpOJJ554okD13377bU6fPs2SJUv4z3/+k2N7fHx8ofZv9uzZtGzZkv/85z8MGTKEQ4cOUbFixVzrenh40KtXL/P7iIgI/Pz8mDp1KlFRUYwYMaJQ+3Y3cXFxoXPnzkRHRzNhwgQcHByKu0sich2NrImIiNym7HuCVq9ezfjx4ylfvjxOTk40bNiQLVu2ALB+/XqaNm2Ki4sLgYGBvP7667kea9u2bXTq1AkfHx8cHByoVq0aEydO5MqVKxb1tm7dSp8+fahatSrOzs64ubnRpEkTvvnmmxzH7NOnDyaTieTkZHNYcXR0pEmTJvzyyy856sfExFCvXj38/PwKdP779+8HoFWrVrluDwgIKNBxCuK///0vO3bsoHfv3vTo0YMSJUrccHTtem3btgXgwIEDedb58ccfMZlMfPjhh7lub9SoEb6+vmRkZAA3933kJvs7yo3JZKJPnz45yhcuXEjTpk1xc3PD2dmZhg0bsmjRogK1l61du3YkJCSwdu3am9pPRO4MhTUREZFC8sorr7BkyRJeeOEFxo4dy6FDhwgLC2PJkiV07tyZZs2a8fbbb1O9enXGjBnDvHnzLPZfunQpTZo0Yd++fQwbNowPP/yQRo0aMWbMmByjXN988w179+6lW7dufPDBB4wePZrExEQ6d+7M/Pnzc+1f27ZtiYuLY8yYMYwcOZJdu3bxyCOPcP78eXOdv//+m7/++osGDRoU+LwrVaoEwKxZszAMo8D7JSQk5PpKTU3Nc5/Zs2fj6upKly5d8PHxoUOHDnz++edkZmYWuN3scOnj45NnnbCwMAICApgzZ06u+2/ZsoUePXpgZ2cH3Nr3cTteffVVunfvjpubG6+//jpvvPEGzs7OhIeHM23atAIfp1GjRgCsW7eu0PsoIoXAEBERkdsSFRVlAMYDDzxgpKenm8u//fZbAzBKlChh/Prrr+by9PR0IyAgwAgNDTWXXbp0yfD39zeaNWtmZGRkWBz/3XffNQBj7dq15rILFy7k6MfFixeNqlWrGjVq1LAo7927twEYERERFuVfffWVARiffPKJuWzNmjUGYHzwwQe5nmvv3r2N8uXLW5QdPHjQcHd3NwCjbNmyRo8ePYz33nvP2LZtW67HaN68uQHc8HXtZ5b9GXl6ehq9e/c2ly1ZssQAjGXLluVop3z58kb16tWNM2fOGGfOnDEOHTpkfPbZZ4aHh4dRokQJ448//si1f9mGDx9uAMaff/5pUf7qq68agLF9+3Zz2c18H2PHjjUA4/Dhw+ay7O8oN4DFOW/fvt0AjJEjR+ao+5///Mdwc3MzUlJSzGXZ1+e17V2rRIkSRocOHXLdJiLFSyNrIiIihSQiIgJ7e3vz+2bNmgHQsGFD6tWrZy63t7enQYMG5hEegJ9++om///6bvn37kpSUZDHS1L59ewBWrlxpru/i4mL+c2pqKmfPniU1NZWHHnqIPXv2kJKSkqN/Q4cOtXj/0EMPAVj048yZMwB4e3sX+LwrVqzIzp07iYyMBGD+/PkMHTqUevXqUatWLbZv355jH0dHR3766adcX08++WSu7SxevJikpCR69+5tLmvfvj2+vr55ToXcu3cvvr6++Pr6UrFiRfr164ePjw/ffvstISEh+Z5XdjvXjq4ZhsG8efMICQmhTp065vJb+T5u1RdffIHJZKJ37945RiUfffRRzp8/z+bNmwt8PG9vb06fPl1o/RORwqMFRkRERArJ9YtceHl5ARAUFJSjrpeXF2fPnjW/37NnDwD9+vXL8/h///23+c+nT5/m1Vdf5dtvv831h3ZSUhLu7u759q9kyZIAFv3Ivm/KuInpjJC1TP7UqVOZOnUqp06dYuPGjcydO5fvv/+eDh068Oeff1oEQFtbW1q3bp3rsTZu3Jhr+ezZs/H19aVMmTIW95uFhYURExNDQkJCjqmNFSpUMD8rzt7enlKlSlG5cuUCnVN2IPviiy+YNGkSNjY2/Pzzzxw5coQ333zTou6tfB+3as+ePRiGQfXq1fOsc+21ciOGYeR5v5yIFC+FNRERkUJia2t7U+XXyg5Hb731FrVr1861TqlSpcx1w8LC2LNnDy+88AL16tXDw8MDW1tboqKimD9/fq73cOXVj2uDma+vLwCJiYk37HNeAgMDCQ8PJzw8nJ49ezJ//nyWLVtmsSrjzTp8+DBr167FMAyqVq2aa5158+YxZMgQizIXF5c8Q2FBPPXUUwwZMoQ1a9bQunVr5syZg62trcW53Or3ca28wtL1C8tkt2cymfjxxx/z/E6Dg4MLfI7nzp0zf+8iYl0U1kRERKxAlSpVgIKFi99//52dO3cyZswYXnvtNYttn3766W31I/tH/rVTI29HaGgo8+fP58SJE7d1nKioKAzDYNasWXh6eubY/uqrr/LZZ5/lCGu3q0ePHowYMYI5c+bQpEkTFi1aRJs2bQgMDDTXKYzvI3vUMTEx0WIE8tChQznqVqlSheXLl1OuXDlq1KhxK6dlduTIEa5cuXLDKaEiUjx0z5qIiIgVaNu2LX5+frzxxhu5jmpdunTJvGpj9mjK9VMVd+3aVeCl4vPi6+tLcHCw+ZEDBbFu3TouXbqUozwzM5Pvv/8egPvuu++W+5SZmUl0dDQ1a9bk6aefpmvXrjleTzzxBH/88Qe//vrrLbeTG19fX9q1a8fixYv54osvSElJsbhnDgrn+8geLVy1apVF+TvvvJOjbvY9faNGjeLq1as5tt/MFMjs77l58+YF3kdE7hyNrImIiFgBFxcX5syZw2OPPUa1atXo168flStXJikpib1797J48WK++eYbWrRoQY0aNQgODubNN98kNTWVatWqsW/fPmbMmEHNmjVzXdDjZoSHh/P6669z6tQpixGkvLz99tts2rSJjh07UqdOHTw8PIiPj+frr79m+/bttGzZkkceeeSW+7Ny5UqOHz9O//7986zTpUsXxo0bx+zZs6lfv/4tt5Wb3r1789133zFs2DA8PDx47LHHLLYXxvfxxBNPMGrUKAYMGMDevXvx9vZm+fLlJCQk5Khbv359xo0bx7hx46hduzbh4eGUKlWKU6dOsX37dpYtW8bly5cLdG7Lli3Dx8eHli1bFqi+iNxZCmsiIiJWom3btvz666+88cYbzJs3jzNnzuDl5UWlSpV48cUXqVWrFpA1krN06VKGDx/O559/zsWLFwkJCeHzzz9n586dtx3WnnnmGSZMmMD8+fMZNmzYDeu/+uqrxMTE8PPPP7NixQoSExNxcXGhRo0avPPOO0RGRmJjc+uTeWbPng1A586d86wTEhJC1apVWbBgAe+99x5OTk633N71OnTogLe3N4mJiTz99NM4OjpabC+M78Pd3Z1ly5bx4osvMmnSJFxdXencuTPz5s0zL1RzrbFjx1KvXj0+/PBD3n//fS5evIifnx8hISF5Psj7ehcvXmTx4sVERETg4OBQsA9DRO4ok3Gzyz2JiIjIPW/QoEGsXLmSv/76y/zgZ4A+ffqwbt06jhw5Unydk5sSHR1N3759OXz4MBUqVDCXZz+8e//+/QUaQRWRO0/3rImIiEgO48eP5+zZs0RFRRV3V6QIXLp0iTfeeIMRI0YoqIlYMU2DFBERkRz8/PxITk4u7m5IEXFycuLUqVPF3Q0RuQGNrImIiIiIiFgh3bMmIiIiIiJihTSyJiIiIiIiYoUU1kRERERERKyQwpqIiIiIiIgVUlgTERERERGxQgprIiIiIiIiVkhhTURERERExAoprImIiIiIiFghhTURERERERErpLAmIiIiIiJihf4fuBMK0TKHTdIAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot the SHAP values, notice that \"feature 0\" corresponds to the first atom, \"feature 1\" corresponds to the second atom, and so on also for bonds\n", + "shap.plots.bar(explanation, max_display=15)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the SHAP values for atoms and bonds\n", + "shap_values = explanation.values[0]\n", + "atom_shap_values = shap_values[:n_atoms]\n", + "bond_shap_values = shap_values[n_atoms:n_atoms + n_bonds]" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAyAAAAMgCAIAAABUEpE/AACtIklEQVR4nOzdZ1wU19cH8LOw9N6roKAoYEHB3hVLFLuY2PuSYjQmJqCJMYmJgVQ1zSVWNBrBrrGBvSuIXZCidASkSt1l53kxyTz7R0CFYYfy+37ygrlzZ/eM0eXsnXvPFTEMQwAAAADAHzWhAwAAAABobpBgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAANDdlZWWlpaW1dCguLq6oqHixvbS0tKys7MX2kpIShULBW3wtABIsAACA5uPZs2c+Pj56enr6+vpDhw5NS0ur0uHBgwdeXl4GBgb6+vozZ87k8rDU1NQhQ4bo6+vr6en5+Pg8e/aMbT958qSbm5uBgYGent6CBQvKy8tVej9NFhIsAACA5uPDDz9MT09PTk7OzMyUy+USiaRKh6lTpzo5OeXl5d25c+fixYtff/012+7n51dZWZmRkZGcnJyenr506VIiys3NnTJlyvLly2Uy2e3bt48cObJu3TpV31LTJGIYRugYAAAAgAd5eXk2Njb79u0bNWoUEV27dq1Pnz6PHz92cHBgO1y6dGnAgAHJycl2dnZEJJVKV65cmZmZmZKS4uTkdPny5Z49exLR0aNHJ06cmJmZaWxsnJeXZ2Jiwl7u5+eXkZFx6NAhge6vKcEIFgAAQDMRFxdXXl7ep08f9rB79+5qamoPHjzgOty7d8/R0ZHNroioV69e2dnZ2dnZDx48UFNT69GjB9veu3fv8vLyuLg4IuKyKyLKy8uztLRU0c00cWKhAwAAAAB+ZGdnq6mpGRkZsYdqamrGxsZZWVnKHYyNjblDU1NTInr69Gl2draRkZFIJGLbjY2N1dTUnj59qvziSUlJR44cOXLkSEPfRfOAESwAAIBmwsTERKFQFBcXs4cMwxQWFrJZFNehsLCQOywoKCAiMzMzExOToqIibtZQUVGRQqEwMzNT7jlp0qTp06cPGTJEFXfS9CHBAgAAaCZat24tEonu37/PHsbHx1dUVLRp04br0KZNm9TUVC7Hevjwoa6urpWVVZs2bSoqKhISEtj2Bw8eiESi1q1bs4dPnjwZPHhwu3btNmzYoLqbaeKQYAEAADQTtra23t7e33//PVuz6vvvv/fy8nJ3d5fL5U+fPlUoFMOGDTMzM2NXApaXl69bt2769Olisbhjx47dunULCgoiIoVC8eOPPw4fPtzGxoaIzpw506tXr+HDh//111/q6urC3mATglWEAAAAzce9e/dGjhypqampoaGRl5d35MiRHj16REZGdu/ePSMjw9ra+vDhw9OmTWvTpk1OTo6xsXFERIStrS0RXb16dcyYMaampjKZTCaTnThxws3NjV2HqKen5+Liwr6+mZnZiRMnBL3FpgEJFgAAQLNSVlZ29erVysrKXr166enpEVFJScn9+/e7du0qFouJKC8v7/r16wYGBj169GBbWMXFxVevXlVXV+/Vq5e2tjYRFRYWsmsJORoaGp07d1btDTVJSLAAAAAAeIYyDQAA0LJUVlYeOHAgJibG2tp6ypQpBgYGVToUFRWFhoZmZmZ26NBh/Pjx3MSj8vLysLCwpKQkR0dHX19fLS0t7pLr168/ePBgzpw5KrsLaOQwggUAAC3LmDFjbt68OXbs2EuXLpWVlV2/fl25NFReXl7Pnj21tbX79u176NChbt26HT58mIjKy8v79++fl5fn7e0dHh5uYWFx7tw5TU3NkydPrl69+vr16yKRqNptkqFlwggWAAC0IGfPnj1x4sTDhw+dnZ2Li4s9PDyCg4M/+eQTrkNwcDDDMFeuXNHT01u2bJmrq+u5c+cGDhwYGhqamJgYExNjbm6enZ3dvn37vXv3Tp06VS6Xr1q1ioh8fHyEu61XFRcXd+rUqVatWo0ePVroWJo5jGABAEALsnjx4piYmJMnT7KHn332WURExNWrV7kOPXv2HDFixFdffcUeent7u7u7r1u3bvz48WZmZps2bWLbZ8+eXVJSEhYWxh6eO3duxIgRjWoESy6XJycnJyYm3r9//8GDB4mJiefOnZPJZOzZHj16XLt2TdgImzeMYAEAQAuSmprq5OTEHbZp0yYlJUW5Q0pKinJlTicnp9TUVPZCLy8v5QuPHz/e8PG+qry8vEePHj18+DA2NjY2NjYmJiYhIaGioqJKN5FIpK2tXVpaev369Xfffff3338XJNqWAAlW85SdnR0bG2tra6v8OaIsKSkpJSXF2dmZrSPHycjISEhIcHBw4LZeZyUmJj59+tTFxUV55wQAgCanrKxMQ0ODO9TS0qoy7FRWVqapqflihyrt2traAo5Xpaens4NS3OjU48ePX3wkZWNj4+7u7uTk5OTk5Obm1rp1644dO4pEomnTpu3atWvDhg3Tpk3r16+fILfQ7CHBaoZ++umn5cuXt2nTJikpycfHZ+fOncqfJgqFYuHChTt27HByckpISFi+fPmXX37Jnlq5cmVQUJCzs3NiYuKsWbOkUqmamlpubu5bb70VFRVlb28fFxe3atUqf39/ge4MAKC+bG1tlTc/fvr0KVtmU7mD8ibHXIeXXthAysvL4+PjldOphw8flpSUVOmmpaXl7OzMplNubm7u7u7t27fX19ev9jV37twZFxcXGRk5ffr0GzduWFpaNvx9tDwMNC+JiYlqamr79+9nfzY3N9+8ebNyh/379+vr69+9e5dhmDNnzqipqd28eZNhmKioKDU1tbNnzzIMc/fuXX19/QMHDjAMs3HjRh8fn9LSUoZhDh06pKamFh8fr/LbAgDgR3BwsKWlpUwmYw9Hjhy5cOFC5Q7z589/44032J8rKiosLCw2bdrEMMzKlSs7d+7MdXN3d//qq6+4w7Nnz2ppadU/vLS0tPDwcKlUunjxYm9vbycnJ5FI9OLvbhMTk759+0okksDAwEOHDiUkJFRWVr7WG1VUVPTv35+I+vXrV15eXv/IoQpMcm9uvvzyy7179965c4c9XLp0aXR09NmzZ7kOEyZMMDQ03LZtG3s4aNAgDw+PtWvXLl68+N69e6dPn2bbZ86cWVxcvG/fPuUXVygUWlpa+/fvbxKLZQAAXlRcXNy2bdvBgwcvWrQoPDx8zZo1kZGRnTp1Cg0NJaIpU6bcvn27e/fun3766bBhw3799dezZ8/Gx8fr6uqmpqa6urrOnTt36tSpO3fuDAkJiY2Ntba2zs3NvXnz5p07dwICAo4ePWpkZNS9e/dXieTFoanY2Njnz59X6aapqWlvb88OSrGjU126dHmxdlcdZGZmenl5paWlLV68mN2dEHiER4TNTUJCgvImBp06deIWubDi4+OVS+F17tyZ3T49Pj6+yoV//fVXlRdPSUmRy+XK0z8BAJoWPT2906dPf/rpp7Nnz3ZwcDh8+HCnTp2I6P79+2yHLl26HDp06LvvvtuxY0enTp1Onz6tq6tLRPb29uHh4V9++eXMmTPbt28fERFhbW1NRAkJCQEBAUTUuXPngICATp06bdmy5cX3zcvL4+ZLsT88efKE3ZJZmYmJCZdLselUhw4dGmiLZWtr6z179gwaNGj9+vWdO3eeP39+Q7xLi4UEq7nJy8uzt7fnDo2MjHJzc5U75OfnK3/1MTQ0ZDtUaTcyMsrLy6vy4t98882gQYPc3d0bJHQAAJVwdXWtMjxPRNxsVCIaOXLkyJEjX7ywV69ex44dq9LYvXv3yMhI5ZaKiorU1FTldOrOnTtFRUVVLtTQ0GjdurXy0FTnzp0NDQ3rfmOvr1evXsHBwbNnz160aFGnTp169Oihyndv3pBgNTeWlpbKiVFOTo6VlZVyBwsLC+UOz549YztUaX/xwnXr1h0+fPjChQsNFToAQBP06kNT3PTzhh6aei2zZs26du3a77//Pn78+MjISNXM3G8JGleCdfbs2T/++CM3N7dr164BAQGmpqbKZxmG2bJly759+yorK0eMGLFo0SJuD/A9e/bs2LGjrKysf//+H374oY6ODtt+586d33//vWfPnnPnzlX1zQjE1dV1w4YNDMOw8yIjIyNdXV2rdLhx4wZ3GBkZOXz4cLadm4BV5UK5XB4QEPD333+Hh4e3bdtWFbcBANBY5eXlzZkz586dO9nZ2ZWVlS8Wa9DU1Gzbtm2HDh3at2/fvn179gfl3Xgam7Vr196/f//cuXO+vr5nzpxRrkYBdSfwJHslV69e1dDQWLZs2a5du3r06NG9e3eFQqHcYe3atXp6euvXr9+0aZOVldXSpUvZ9l27dmloaAQGBoaEhDg7O0+dOpVhmIqKimHDhtnZ2bVu3Xru3LkC3I9Anj59qqWltXbtWrlcfunSJW4x4P3792/dusUwzJkzZ7S0tI4fP65QKLZt2yYWixMTExmGiYuLU1dXDwkJUSgUx44d09TUvHDhAsMwWVlZQ4cO7dOnT3p6urC3BgDQGChPwyAiExMTT0/PmTNnBgYGhoaGRkZGlpWVCR1jNZKTk2NjY2s6m5mZ2apVKyJ65513VBlVM9aIEqzJkydPnjyZ/TkjI0MsFkdERHBnZTKZnZ3dunXr2MP9+/fr6urm5uYyDNOlS5cVK1aw7VeuXFFXV2frrd2/f1+hUPj5+bWoBIthmD179lhYWGhra2tra3N/MvPmzRs/fjz787fffqurq6utrW1iYrJt2zbuws2bNxsbG2tra+vp6X333Xds4+TJk6sk5atWrVLtDQEANBZHjx5lPwlnzJixYcMG9gtq43fnzh1LS0sXF5e8vLya+ty8eZOdzs/uxgj11IgSLAsLC+Vf9r179/7888+5w5iYGCLi/iqXl5erq6ufPHmSnTZ05coV5dfZvn07d9gCEyyGYWQyWUpKClu8imvh6r4wDFNWVpaSklJRUVHlwoqKipSUlMb59QsAQHBsCYYOHToIHcjrKSkp8fT0JKLhw4fL5fKaum3fvp2INDQ0zp8/r8rwmiW1hn4EWZMEJenp6RUVFTk5OcrbttjY2KSlpXGH7M9cB01NTTMzs/T09PT0dCJi18qyrK2tlS9smcRisb29vba2tnILN2WNiLS0tOzt7ZUrvLM0NDTs7e21tLRUFCgAQNMhl8vj4+OJ6OOPPxY6ltejo6Ozd+9eCwuLkydPrlq1qqZuM2bMWLJkiUwmmzJlCn6T1pNgCVY/Je+99566urqamlplZSXXQS6XK8+zY1MB5Q4ymUxDQ+PFdrlc/mLeAAAAUE+HDh3Ky8vr2LHjvHnzhI7ltTk6Ov79999isXjNmjVsVdVq/fDDD4MHD87MzJw8eXJ5ebkqI2xmBEuwMpTs379fXV3dysoqOTmZ65CSkqK8WJT9metQVFSUn59vZ2fHtnN7oVdWVqanp9vZ2anuTgAAoGXYtGkTETXdgpxDhgxh59fOnz//7t271fYRi8V79uxxcnK6evWqRCJRcYTNiWAJ1otGjx69e/duhmGIKDY29tatW8r7sbBVQ/7++2/2MDQ01NzcvGfPnnp6eoMGDeLajx8/XlFR4e3trfr4AQCgGUtLSztx4oSmpub06dOFjqXuli5dOm/evOfPn48dOzYnJ6faPqampvv27dPV1Q0JCfnjjz9UHGHzIfQksP8XGxtraGg4bty4r776qnXr1hMnTmTbBw4cePr0aYZhdu/eLRaLFy1aFBAQoKur+/PPP7MdTp8+raGhMW/evJUrV5qamgYEBLDte/bs8ff39/Dw6NSpk7+//+XLl4W4rSZg/vz5bdq0cXJyWrdu3etuFwoA0EKsXr2aiKZMmSJ0IPVVWlrKTtX39vauZcI7u1uahobG2bNnVRles9GIRrBcXFyio6M7deqUnp7+6aef7t69m20fNGiQpaUlEU2ZMuXs2bMaGhrFxcV79+794IMP2A6DBw++du2amZlZbm6uVCpds2aN8suOGDFi1KhRqr2VJqOkpKRLly6bNm16/PhxYmLikiVLDA0Nv/jiC+VntQAAwDDM1q1bqebngwzDqDSgetDW1t67d6+lpWVERMSKFStq6jZt2rRly5bJZLI333yTm4cDr07UhP5OAL9ycnIGDhz44MEDIho3blxRUdG5c+fY5QJqampDhgyRSCTjx4/HigEAgNOnTw8dOtTe3v7JkyfV7m/Tp08fV1fX7777zszMTPXh1cHly5cHDx5cUVGxc+fOqVOnVttHoVD4+PgcO3asa9euFy9eZKtkwStqRCNYoEq3bt3q3r37gwcP9PT0Nm7ceODAgVOnTpWWlh45csTX15et8jplyhQHB4clS5bcu3dP6HgBAITETW+vNru6fv36lStXDh06pK+vr/LQ6qhPnz4//vgjEc2fPz8qKqraPmpqan/99Zezs3N0dLSfn59qA2z6BH5ECULYuXMn+0Wkpg1w2IetnTt35v6eeHp6SqXS58+fqz5aAABh5eXl6ejoqKmpPXnypNoO7Gq7jz76SMWB1d+CBQuIyNHRMSsrq6Y+Dx48MDQ0JCJuMxV4FUiwWha5XO7v78/mTBKJpLy8vPb+kZGREomE+05maGgokUjYPQoBAFqIX375hYiGDx9e7dmSkhJ2I+f79++rOLD6q6io6NevHxENGTJEebePKvbt2ycSicRiMbvmDF4FEqwWJCcnhy1gIRaLX+uLSElJSWhoqLe3t0gkYjMtNze3wMDA7Ozshov21YWFhc2bN2/hwoXHjx9/8WxZWdn69etnzpz5wQcf3L59m2uXy+WbNm2aNWvWe++9d+nSJeVLrl+/vmzZslp2RQWAFqVr165ExBYSetGWLVvYBwIqjoovGRkZbPHIpUuX1tJt+fLlRGRmZpaQkKCy2Jo0JFgtxa1bt9q0aUNEFhYWZ86cqduLPHz40N/fn13USURaWlq+vr6HDh2qZaFvQ1uzZo2+vv6qVasCAgI0NTU3b95cpcO4cePatGnz3XffzZw5U1tb++bNm2z7O++8Y2VltWbNGnYjgRMnTjAM8+DBg/79+1tbW2tpae3bt0/VNwMAjU9kZCSbWNS0SSs7ArRp0yYVB8ajy5cvszukvfgRyqmsrBw9ejQRdenSpbi4WJXhNVFNNcH6I/uPhUkL12etFzqQpuHvv//W09Mjom7duiUlJdXz1crLyw8dOsTOhWczLXt7e39//8ePH/MR7GsoLi42NTXdsmULe/jtt986OTkpl/KKjo4WiUTcuP24cePeeusthmHS0tI0NDQiIiLY9nfffXfAgAEMw2RlZV25cqWystLGxgYJFgAwDPPOO+8Q0QcffFDt2djYWJFIpK+vX1hYqOLA+MWOw2lra1+/fr2mPgUFBa6urkQ0bdo0VcbWRDXSBKu4svhkwckN2RsCMwN/y/rtRMGJ0spS5Q4TEyZSFL0R94ZQETYV7KQr9tHejBkzSkpKeHzx1NTUwMBAJycnNs1SU1Pz9vYODQ196dQuvpw9e1YkEnFT7+Pi4ogoJiaG6xAYGNipUyfucMeOHSYmJgzDbN++3dzcXKFQsO2nT58mIuXvZEiwAIBhmJKSEhMTEyK6detWtR3YXZ8XLFig4sAaArtOsFWrVk+fPq2pz8OHD42MjIjohx9+UGVsTVGjK9OQLkv3S/Yzu2M2PH7428lvB6QFvJfy3oj4EZZ3LVdlrCpTlAkdYFOSm5s7atSooKAgdXX1wMDA7du36+jo8Pj6dnZ2/v7+cXFx4eHhM2fO1NLSYos7WFtb+/n53blzh8f3qlZqaqqJiQk7OEdE9vb2RKS8A3xaWhrbyHXIy8srLi5OTU21s7PjppSxfdLT0xs6YABoWvbs2ZOXl9ezZ88uXbq8eFYul+/YsYOa8u6Eyn755ZcBAwakpKRMnDixoqKi2j4dOnTYtm2bmpqav7//sWPHVBxh09K4EqzokmjPGM/gnOAyRZmOms5A/YG+Jr4jDEcYqxsXVRZ9lfHV8PjhJYoSocNsGmJiYvr27Xvy5Elzc/MTJ05wiwerpVAotm/frlAo6vBG7MBVSEhIenq6VCr18PDIy8sLDg7u0qWLl5dXcHDw8+fP63oTL8coVcrlEqZqz3IdmBeK69bUDgAtXO27Ox8+fDgjI6NDhw69evVSbVwNQkNDIzQ01N7e/tKlSx999FFN3caNG/fZZ59VVlbOmDEjISFBlRE2LY0owcqWZ49OGJ0pyxSRaIX1iqzOWWddzoa2CT3e9nhm58zv7L7TEGkkVSSlVKBg/8sdPny4Z8+eMTExXbt2vXHjxpAhQ2rpXFhYOH78+FmzZq1atao+b2psbCyRSKKjoyMjIxcvXmxqahoVFeXn52dpaTllyhR2wlN9Xv9Ftra2+fn5JSX/5tzsEBS7HIbroDwulZ6ebmRkpK+vb2trm5GRwcXD9rG1teU3PABo0hITE8+fP6+np/fmm29W24FNvxYuXKjauBqQlZXVwYMHdXR0fv31140bN9bU7Ysvvpg0aVJubu7EiROLi4tVGWFTIuTzyf81/8l8iiKKop+e/lRth6MFR5/K/n0wjDlYNVEoFIGBgWpqakQ0derUl671ePTokZubGxGZmpqePHmSx0hKS0urFHfo0KFDYGBgLU/3X1dRUZGRkdGuXbvYw59//tnR0VF5SeP169dFIhG3qPitt96aNGkSwzBJSUnq6upcdYYPP/ywd+/eyq+MOVgAEBAQQERz586t9mxGRoZYLNbU1KylRGcTFRISQkRaWlpXr16tqU9hYSH7u2PChAncfFZQ1lgSrFx5rk60DkVR94fdFczL/1chwapWYWHhuHHjiIiddPXS/kePHmXr43Xu3LnhSpvExsb6+/tbWVmxaZampqaPj09oaGgtRe1e3cqVK01NTdetWxcUFKSrq7thwwaGYW7cuMHteD9s2DA3N7eNGzcuXrxYU1Pz2rVrbPvs2bMdHBw2bNiwcuVKsVh85MgRtj0kJEQqlRoZGb3zzjtSqbSoqKj+QQJAkyOTydhR7YsXL1bb4euvvyaiyZMnqzgw1Vi0aBER2djYpKam1tQnNjaW/Q3y7bffqjK2pqKxJFj78vaxw1cbsje8Sv/GkGBlZWWdOHHi6tWrNVWBiouLO3HixL1796q0Jycnnzhx4ubNm1Wy/rKysps3b1ZUVNQtntjYWHYBrZmZWXh4eO2dlQe6fH19VbAHjlwuDw8PVy7uwM6Rr2diV1lZuXHjxgkTJvj6+oaGhrKNN2/enDNnDvvz8+fPV69e7ePjM3fu3CtXrnAXlpeX//zzz2PGjJk+fTpbBIu1bNkyiZKcnJz6hAcATdTBgweJyMXFpdrhGYVC0bZtWyI6duyY6mNTAZlMNmjQICLq3bt3TQXAGIY5fvy4urq6mpoa9x0VOI0lwVqetpxNsB6UPniV/oInWNu2bdPR0encubOFhYWHh8eLNc2XLl2qpaXl6enJPr/nkrCgoCBNTc1u3boZGRkNHTqUrZuQn5+/du1adi1bXFxcHeI5cuQIu3S2S5cuiYmJtXcuKiqaNGkSEYlEIn9/fxWP7qanpwcGBjo7O7NpFjtHftu2bfyWkAAAqI+xY8cS0XfffVftWba2i729vYBllhtaTk4OW57az8+vlm5fffUVEZmYmDx69EhlsTUJjSXBmvNkDkWRKEokU7zSYyNhE6zCwkJDQ8P169ezP3fq1GnZsmXKHa5du6aurs6Olzx69MjQ0JAdXHny5Im6uvr+/fuZ/3YnYLesCQsLW758Obvk9XUTLOWxqDfffPOlk67i4uI6duxIRIaGhgcOHHit9+IXu9Ehu+00Kc2RFzAkAACGYTIzMzU0NMRiMbsa5kXTp08nos8//1zFgalYdHQ0+xEtlUpr6qNQKHx9fdlZtvn5+aoMr5FrLAnWhIQJFEV60Xqv2L9KgnX5+WW5QnVfI0JCQkxNTblneRs2bLCyslLu8N5773l7e3OHc+bMGTt2LMMw33zzjaurK9e+YsUKLy8v7jA7O/t1E6zCwsIJEyZwk65eOhZ1/Phxtmhe+/btHzx4pcHChpafny+VSrt168YtvPD09Fy7du2zZ8+EDg0AWqhvv/2Wnb5d7dn8/HxdXV2RSBQfH6/iwFSPLfSloaFx7ty5mvoUFRWx39vHjRunvJdGC9dYyjSokzoRVVJlHa5NLE/sG9vX4Z5DQFpAQrkqanIkJia2a9dOQ0ODPXR3d3/69KlytafExER3d3fu0N3dPTExkW1n/xZWaa+buLi43r1779+/39TU9OjRo1zF9pqsW7du9OjReXl5Pj4+165dYydsCc7IyEgikURFRd27d8/f39/MzCwqKuqDDz6ws7NroOIOAAC127ZtG9Vc/mrHjh0lJSXe3t7cVIdmbPr06UuXLpXJZFOmTElNTa22j76+/qFDh8zMzA4ePPjNN9+oOMJGq7EkWMZiYyIqU5TVoY5oiizFWcs5XZYe9DTI5b7LsLhhu/J2NWjN94KCAn19fe7QwMCAiPLz82vqYGhoyJ598cKCgoK6JRDHjh3r0aPH/fv3O3XqdOPGjeHDh9fSuaysbNasWR988IFCofD39z948CA7YatRcXd3DwwMTEtLY4s7lJeXh4WFDRs2rEOHDkFBQU+fPhU6QABoEc6fPx8TE2NnZzdy5MhqO9RefbT5+f7770eMGPH06dNx48aVlpZW26dNmza7du1SV1dftWrV3r17VRxh49RYEqw2mm3YHx6UPXjdawfqD4xzj4vsECkxl+io6UQURUx7PM36rrVfst/Nkpt8R0pEZGVllZubyx3m5OSoqalZWlpyLZaWlsodsrOzra2tX2zPycmxtLSsfdjpRQzDBAUF+fj45Ofnjxkz5uLFi9xugNVKSUnp16/f9u3bDQwM9u7dy03Yapy0tLR8fX3Dw8NjY2NXrVrl4ODw6NGjgIAAOzu7YcOGhYWFyeVyoWMEgOaMzZ/mzJmjrq7+4tmbN29GR0ebmZmxNXFaAnV19Z07dzo5Od28eZPdr7Baw4YN+/rrrxmGmTt37v3791UZYSMl7BNKTkRhBLuKcE3GmlfpX9Mk9wJ5gTRb2je2L/tqFEVu990CMwNzZHwutj98+LC2tjY3m++bb77p0KGDcofPPvvMw8ODOxw7dixbNeD333+3sbHhVp28/fbbw4YN47q9yhysoqKiyZMn038LAF/6tPvcuXNs5teuXbv79++/zl02CpWVlWxxB+6BrI2NzZgxY06fPi10aADQDHHzq2r6KH733XeJaMmSJaqNS3i3bt1iN3799ddfa+qjUCjYqvcuLi55eXkqjK4xaiwJVrmi3OqOFUVRm7ttSipfvlz/pasI75fe90/1N79tzqZZ2tHavom+4YXhr1LF9KXkcrmjo+PChQuLioqio6Otra3ZxYCxsbGnTp1iGObx48disfi3334rKyvbs2ePhoYGWw83Pz/fwMDg888/Ly0tPXXqlL6+PlsuvLKyMjc3Ny4ujoiioqJq+nsZHx//WgsApVIpm5eMGjWqqf9dZ4s7uLi4cN8NTExMdu7cKXRcANCs/PHHH0Q0ZMiQas+WlJSw64Ru3bql4sAag71794pEIg0NjTNnztTUp6SkhF20NGLEiGZcw+JVNJYEi2GYL9O/ZJMhvyS/l6ZBr1imoUxRFpob6v3IWxQlYl+83b12q9JXJZUn1TPaqKiojh07qqmpaWtrv/vuu+xfozVr1nTr1o3tsHv3bhsbGzU1NVNT099++427MDw8vE2bNmpqagYGBqtWrWIbMzMzlYcVDQ0NX3xHbgGgi4vLSxcAlpaWzpkz59UHupqQw4cPc2mWpqYmO4kNAIAXXl5eRPTXX39Ve3b79u1E1KNHDxVH1Xh8/PHHRGRmZlZLwcUnT56Ym5tTCyhjUbtGlGCVK8o9HniwadDo+NHRJdHcqUqm8lzRuemPpwdlBrEtr1sHK6UiJTAzsPXd1uzrq99U937kHZobWqGoY9l0VlFRUe2F1/Py8qotnVBQUPBaqf3atWvZ2QCjR49+aaGRlJSU7t27E5G+vv6ePXte/V2akOTkZPYPRNhSXgDQnNy5c4eIjI2NayooyBY3r6UoVLNXWVn5xhtvEJGHh0ctZRcjIiLEYrFIJOI22GiBGlGCxTBMZkVmt4fduOlTVnesesb0dLvvpnVTi21xvufMDm7VrdBoJVMZXhjum+ircVODfUGbOzb+qf5xZXUpnq4apaWls2bNevWxqPPnz7O7/rVt2/bu3buqCVIQ7G1+/PHHQgcCAM3E+++/T0SLFi2q9mxCQoJIJNLR0WnqMy7qKTc3l90maMaMGbV0++GHH9jv+S/9TRQWFjZhwoQ33ngjKCjoxW15ZDLZ+vXrR40aNXbs2G3btnHtCoVi69atY8eOHT169C+//MKNWRQVFX3zzTejR4+eMmXK33//Xadb5EfjSrAYhilXlK/PWu9635VLs9j/dKN1JyVMOlN4hu1Wz0ruufJcaba084PO3Ot7PvSUZkufVzb4lnyvJTk5mR2vNjAwYGdr1Y6bdDVy5Mjc3FwVRCig/v37ExG35yBAU5ecnFxT3XCGYWQyWWJi4ou/2uVy+ZMnT6rdNFMFe4w2J+Xl5eyDrZs3b1bbYfny5fjMYT148MDQ0JCIfv7551q6sTNV2rRpU8umrtu3b9fU1FyzZs2mTZscHBzmzp1bpcNHH31kYWEhlUp/+uknPT09dg8VhmHWrl2rp6f3008/SaVSCwsLdj8VuVzeq1evoUOHhoaG/vzzz7q6uhs3bqzv3dZVo0uwOMnlyReKLuzP2x9eGP6g9EGVLXTy5fmZFZl58rx6vktkcaQkSaIfrc+mWYa3DCVJkgtFF+r5srxQXgD44o7RVZSVlc2bN48b6GoJUwv//PNPIpo1a5bQgQDU1/37911dXXV0dMRicf/+/TMzM6t02Ldvn5mZmYGBgbq6+vz587l/4KdPn7azs9PT01NXV580aRK7oadCofjnn3+GDh1KROfPn1f1zTRZO3fuJCJPT89qz8pkMltbWyK6cKFR/IIQ3P79+0Uikbq6+vHjx2vqU1payo4RDBs2rKbfSu7u7itXrmR/Pnv2rFgsTk1N5c7m5eXp6elx4wtr165t1aqVTCaTyWT29vZr165l2/fs2aOnp5efny+TyUJCQrhhsPfff3/QoEH1v9m6abwJliqxxR16xPTgBrS6PuwanBQs4CAQNxb1xhtvvHQ4OjU1tWfPnkSkra0dEhKikgCFd/78eSLq2bOn0IEA1FfPnj0nTpxYVlaWl5fXvXv3mTNnKp/Nzc3V09MLCgqqrKy8d++emZlZcHAwwzDl5eXW1tYfffQRO4jl4OCwevVqhmEOHTrk4+MTHBysoaFx9uxZYW6pCWJT0t9//73as4cOHWLXGL10R7KWY8WKFURkamqakJBQU5+kpCR2pGD58uUvnmWLE924cYM9VCgUxsbGys/1Tp48qa6uXl5ezh4mJCQQUWxs7MOHD4mIm2hfWlqqpqYWHh5e5fVXrlzZr1+/+txjfSDB+h8PSh/4p/pb3LagKBq4eSBX9FKV/6JedwHgxYsX2SqmrVq1ioyMVE2QjUFWVhYRGRkZCR0IQL2wE6tjY2PZw0OHDmlpaRUWFnIdfvvtN1tbW24A4OOPP2a/V7Df2ouKitj2H3/80dHRUfmVtbS0kGC9osePH6upqeno6NT0vZotKxoUFKTiwBqzyspKHx8fIurSpUstz6MvXLigqakpEolenBF1+/ZtIlIesurQocOPP/7IHW7dutXS0pI7LCkpIaLTp0+fOnWKiJQnbJmbm2/dulX5xRUKRdeuXbnhMdVrvBW9BeGq7RpoF5jSKeXvNn9bXLWQyWTsbi3t2rX75ptv0tLSGjqA1NTUAQMGbN26VV9fPzQ09KVV14ODg4cMGZKZmTlgwIDIyEhPT8+GjlDFzp8/HxgYyH5ZqcLCwsLMzKygoKBKkQuApiUmJsbQ0JArPuLp6VleXv7kyROuQ2xsbNeuXbmq4p6enrGxseyFHTp04Hbf8vT0TEpKqmknE6jd5s2bFQrF5MmT2Wo4VTx9+vTo0aNisXjmzJmqj63RUlNT27lzp5ub2+3bt2fPns3UsO1bv379vv/+e4Zhpk+fvnv3buVT7N9qhULBtcjlcq6sNNuhsrJS+SwRaWhosBdWOaV8IRF9++232dnZy5Ytq8891gcSrGpoibTeNHkzTBqWnJwcGBjYpk2bhISEzz77zMHBgd2tRSaTNcT7Xrx40cvL68aNG87OzleuXGErttekoqJCIpH4+fmxP0RERCjv1dNsbN26dfny5ezTwBe1b9+eiGJiYlQbFEDdRUdHr1ASFRX17NkzY2NjrgP7C559dMLKyclR3jzU1NSUnWtS5UJTU1O2c8PfRHOjUChq391569atMpnMx8fHxsZGtaE1duwCLCMjo71797LLBqu1ePHiTp06VVZWzpgx49GjR1y7nZ0dEaWkpLCHcrk8MzOTbeQ65OXlFRcXs4fsbtO2trZVLiwqKiooKFC+8Pvvv1+3bt2JEyfYyfiCQIJVGzs7O39///j4+PDw8JkzZ2ppaUVEREyZMsXKysrPz48d2OcLOxb19OnTESNG3Lhxg63YXpPs7Ozhw4f/+eef2traW7Zs4SZsNT9sCsV+X3/dswCNEDsvmCMSidiBWK4DuzG8hYUF12JmZlZYWMgd5uXlGRkZaWhoVLkwLy+PiNh1cPBaTpw4kZyc7OTkNGDAgGo7bN26lVrS7s6vpX379iEhIWpqagEBAceOHaup2/nz5/X09ORy+ahRo7hGY2PjPn36cMNaR48eVSgUgwcP5jr06tXL2Ng4LCyMPQwNDXV3d2/Tpo2zs3OHDh24C8PCwszMzNi5yMXFxdOmTdu8efP58+fd3Nx4v9/XINSzyaYoLy9PKpV6eHhwf3qenp5SqZSbA1E33AJAIlq8ePFLFwBGRUU5ODgQkb29/fXr1+vz1o3fgQMHiOiNN6qvxxEYGEhES5cuVXFUADy6efMmKU3XPX78uKampnI94XXr1jk4OHAzQT/99FMvLy+GYf7++28DAwN25SDDMOvXr7e3t1d+5aY7Bys4ONjFxcXExGTQoEHR0dFVzsrl8uXLl9vb25ubm0+ePDktLY079cMPPzg5OZmamo4YMSImJoZtzM/Pf/vtt+3t7W1tbadNm/biIs1JkyYR0Zo11e+Ey46gW1tby2SyajsAwzCrVq0iIhMTk1q20+UqYyk3Hj9+XCwWv/3221988YWJicmnn37KMMzTp0/79esXHx/PMMx3332np6f36aefLl26VENDIywsjL1w165dGhoaS5cu/fTTT9l6DQzDZGdnd+zYUSwWv/POO/7/acDbrhUSrLqIjIyUSCQGBgZsVmRgYDBz5swX1y+8irS0NG4BoHIJtZps375dR0eHiKpdy938sLOv2rRpU+3Z2tMvgKaia9eus2fPVigUZWVlgwYNmjZtGsMwBQUFDx8+ZBgmOztbW1v7zz//ZBgmOTnZxsaGXelWWlpqbm7OrhzMyclxcXGpsjNJE02wTpw4oaGhsXnz5tjY2Hnz5llZWXFJJCsoKMjc3PzkyZN3794dMGDAwIED2fadO3fq6uqGhoY+fPhw8uTJbdu2Zb+vvvHGG2PGjLl79+6DBw969eo1evRo5VfLzs7W0tISi8XKiZqy2bNnE9GKFSsa5G6bi8rKyrFjx4rF4lpWsnft2pWIOnbsWKX9+vXrS5cuffvtt7nkKT8//5NPPklPT2cPDx8+/M477yxevLhK2ZGzZ88uXrz43XffPXLkCNuSlpbm/wLebvI1IcGqu9LS0tDQUG9vb5FIxGZaHTp0CAwMzMrKesVXuHjxIvtE/1UWAMpkMn9/f/aNJBJJ7Vv0NBsVFRUaGhpqampVPmFZ7OyrmtIvgKYiMjLS3t7eysrKyMioc+fO7KKqXbt26erqsh1CQkJ0dXVbt26tqanp6+vL/fM/cuSIsbGxvb29jo6Ot7c3N5puYWHBzuUyMDAwMTGp5yi7ivn4+LAzphmGKS8vNzExUV4dVllZaW9vz9W3ZD8Ebt++zTBMr169PvzwQ7Y9Ly9PS0uL/b2bnJzM/Qns27dPS0tL+UEBO6wyZsyYaoMpKCjQ1dUViUS1DMwAq6Cg4Ny5czWdLSkpYdds7dixQ5VRCQgJFg9iYmL8/f3ZnVuISFNT08fHJzQ0tPbxZKlUqqmpSUQDBgx4+vRp7W+RnZ3NPpbW0tLatGkTr+E3duxEqzt37rx4qvb0C6AJkclkt27dun//Pvco8Pnz548fP+Y6FBUVRUZGJiVV3ai+pKTk5s2b7MMUTmJiYoKSprXdu62t7ZYtW7jDUaNGLV68mDtkVw0rPze0sbHZsmWLQqHQ1tY+dOgQ196zZ8+vvvqqyovv3r3b1NRUufIOO02npl1NN2zYQESDBw+u1y0Bw7Dzpao8xW7ekGDxRi6Xh4eH+/r6isViNtNi58i/uOV4eXn5woULX30s6ubNm46OjuwLXr16tcHuoJFiy8/UtGNoLekXADQ5CoVCXV398OHDXMvs2bOnTJnCHd66dYv+t3JS586dv/3229zcXCK6fPky1+7j4/POO+9Uef2RI0cqb8Zy6dIlIrKysqrpc7h79+4tatCl4QwfPpyIfvnlF6EDUR2sIuSNurq6t7d3aGhoUlJSYGCgs7NzWlpaUFBQ27Zthw0bFhISwtaned0FgDt37uzXr19SUlLfvn0jIyPZCVstSu21GDp06FDLWQBoWkQikZ6eHltPklVcXKy80p6d/PpiB319fXYwu6YLieiHH364devWN998w7Vs2rSJiObMmVPt5/Ddu3dv3LhhZGQ0YcIEHu6tBUtJSTl16pS2tvb06dOFjkV1kGDxz9bW1t/f/9GjRxEREVOnTtXU1IyIiJg9e7aDg8PUqVM7d+587tw5BweHS5cusRXbayKXywMCAqZPn15SUiKRSE6fPs1WbG9paq/FgAQLmqv09HShQxCGg4NDXFwcdxgXF8eum2bZ2tqKxWKuQ0lJSXp6uoODg4aGhrW1dXx8PNvOMEx8fDx3IcMwX3/99XfffRceHs7Vsnr+/Dm7/r+mj+KNGzcS0YwZM3R1dfm9x5Zm06ZNlZWVEydOrLaOa7Ml9BBa85efny+VSrt160ZEYrFYQ0PjVRYA5uTksBtjaWlpsfuOtVjsGD67Lv1FmzdvJqLp06erOCqABpWammpsbOzr65uTkyN0LKr29ddfu7i4sNPSL126pKamxu0jxJo0aZKPjw87sezXX381NzcvLS1lGGbp0qVeXl7s9imHDh3S1NTMyMhgGKagoGDSpEmdO3eusmUeu2H8gAEDqg2jvLycLSp28+bNhrnRlqKyspKd5XLq1CmhY1EpJFiqExkZyY5X17IvJuvWrVtt2rQhIhsbG+UpBS3Ts2fPiMjAwKDaHSFrT78Amqh9+/axBVmsra337dsndDgqlZeX16lTJ2dn54kTJxoaGnILAx0dHdnJ7/fv37ewsPDy8vLx8WHnWrAd0tPTnZyc3Nzcxo8fr6en980337Dt7Ch47969vf/DZmy9evUioprq4+zatYuIOnfu3MC32/wdP36ciNq0adO0FlvUn4ipYfMgaAiDBg06d+7ciRMn2Ol+1fr777/nz59fUlLi6em5b98+5bHxFsvS0jI7Ozs1NVV5JwRWbm6umZmZgYFBQUEBVy8DoBlITExcuHDh6dOnicjHx0cqldra2godlIqUl5cfO3YsKyvLw8OjR48ebOORI0c6duzYunVrIsrLyzt27FhZWVn//v3btWvHXVhcXHzs2LH8/Pzu3bt36dKFbdyzZ0+V33Te3t5Pnz51dXU1NDTMyMio9gngsGHDIiIifvvtt3fffbeBbrOFmDJlSlhY2Ndff/3pp58KHYtqCZ3htSx+fn5EtH79+mrPyuVyf39/NkuYMWMGSg9w+vfvT0QRERHVnmU3FVFeVQTQPCgUCqlUyk7rNjExkUqlQkfUfCxdupSI3n777WrPPn78WE1NTVtbOzc3V8WBNTM5OTlaWlrq6urJyclCx6JqmOSuUi/dO+/WrVvq6uqBgYFcxXYgzHOHlkokEkkkkpiYmLFjx+bl5fn5+Y0ePZrb4BbqrKKiYseOHVTz9oKbN29WKBSTJ09uWZOyG0BISEh5efnIkSNbtWoldCyqhgRLpWpPBdTV1Xfs2HH27FmuYjuwsOUztGS2trYHDx4MDQ01MzM7evRop06d2IUvQsfVhB08eDA7O7tTp05eXl4vnlUoFNu2bSPs7syHlrxPNhIslXppKmBubt63b18VRtQ01J6YIsGClsDX1/fevXuTJk0qKCjw8/MbNGiQci0DeC1s+asFCxZUe/bEiRPJyclt2rQZOHCgauNqbq5evXrnzh0rKysfHx+hYxEAEiyVat26tba2dlpaWmFhodCxNCV4RAhARNbW1nv27AkNDbWwsDh//ryHh0dQUFBlZaXQcTUxqampERERmpqa06ZNq7YDl35h3Uw9sX+Ss2bNqr2ednOFBEul1NTU2rVrxzAMvnq+FicnJy0treTk5OfPn794FiNY0KL4+vrGxsZKJJKSkpKAgIABAwbg28Vr4YpesmWuqnj27NmRI0fEYnHthaDhpYqLi0NDQ4lo7ty5QsciDCRYqobhljpQV1d3dnZmGIYr06ys9vQLoPlhVxT+888/9vb2ly9f7tq16xdffCGTyer/yiUlJbdv305NTa2pQ0FBwc2bN9nqdMqKi4tv3br1YvX58vLyu3fvPnnypP6x8YJhmJCQEKp5VtDWrVvZSdktpyhGA9m9e3dhYWG/fv1cXV2FjkUYSLBUDcMtdVNLYlp7+gXQXI0aNerevXsSiaS8vPzLL7/s3r37zZs36/OCYWFhNjY2I0eObNOmzfjx48vKyqp0CAwMtLCwGDt2rLW19dKlS7mJ9tu3b7eysho1apSjo+Nbb73FpXonT560t7cfOnRohw4dBg8eXFBQUJ/weBEREZGYmNi6deshQ4ZU22HLli3UUidl84t9PtiS/ySRYKkaEqy6wTQsgBcZGRlJpdKzZ8+2bdv29u3bvXr1CggIKC8vr8NLFRQUzJ8///PPP8/IyHj8+HFUVNSvv/6q3OHOnTuffvrpwYMHU1NTL1++LJVK2Qrd2dnZEonkxx9/TE9Pj42NPXPmDLuFX0VFxZw5c+bPn5+VlZWWlpaZmblmzRpe7ro+2N/68+bNU1Or/tff/v37V61aNXr0aNXG1dzExsZeuXJFX19/8uTJQsciHCGLcLVIN27cIGy/8PrYtb5Tp06t9uzy5cuJ6IsvvlBxVACNRHFxsb+/P5s0dOzY8dq1a6/7Cn/++aelpaVMJmMPV69e7ebmptzhgw8+6N+/P3f41ltvTZkyhWGYtWvXOjg4cDtZBQQEdO/enWGYAwcO6OjoFBYWsu1SqdTCwqJON8ePBw8eLFu2zNjYWE1NLSkpScBIWoJly5YR0cKFC4UOREgYwVK19u3bi0SiR48eYe3Pa0GlBoBa6OrqBgYGXrhwoUOHDvfu3evbt++SJUuKi4tf/RUePXrUqVMnsVjMHnbt2jUuLk6hUCh36Nq1K3fo4eHx6NEjtt3Dw4NbcOfh4cEu4omLi2vbti1bhp5tz87Ozs/Pr9d9vr6CgoLg4OA+ffq4ubn98MMPz58/VygU7DQsaCByubz2Oq4tBBIsVTMwMLC1tS0rK0tOThY6lqaETbBiY2OVP/GrnMUjQmjh+vTpc/PmzY8//phhmPXr1w8aNOHixVetR5qbm8vuRs8yNjaWyWRFRUW1dGCnur/YXlBQIJfLc3NzjYyMlNuJKCcnp4739poUCsXFixf9/Pzs7Oz8/PyuXLliaGg4c+bMr7/+Wl1dfdWqVYcPH1ZNJC3QoUOHMjMzO3bs2LNnT6FjERISLAFguKUOjIyMrK2tS0pKql3fxP2RVpt+QRX//PPPkCFDXFxcJkyY8ODBgxc7/PLLL927d3dzc/Pz81NeL7Z9+/Y+ffp06NBh+vTpyhu2XL58+c0332QfCoCwdHR0vvvuu8jIyK5du2pr/zFggMjPj5TSpBpZWFgoDy89e/ZMW1tbOXN6sYOlpWW17aampmKx2NzcPC8vT7mdiNhLGlRqampQUJCLi0v//v2Dg4NLS0v79u0rlUrT09NDQkL8/f1Xr16tUCimT59e7V9+qD9Mb2chwRIAhlvqppbE1NjYuJb0C5TdunVrwoQJAwcO3Lx5s7a29tChQ6s8SNq4cWNAQMCiRYt+/fXXmzdvTp06lW0/evTo/Pnzp06dGhwcnJ2dPWrUKIZhiGj06NESiSQpKSkqKkqA+4HqeHh4XLlydcgQZ7GYgoOpSxeKiHjJJS4uLnfu3OEWAEZHR7u4uChX2nRxcVFepXjr1i32n6SLi0t0dDT33SY6Opptb9++fUJCArdyMDo62sbGRjlj41dZWVlYWNiYMWNat24dEBCQkJBgb2/v7+8fFxd38eJFiUSip6fH9gwICHjzzTeLioomTpzYGBY2NjNpaWknTpzQ1NScPn260LEITehJYC3RunXrqOZd3KEmfn5+RLR+/fpqz7KbWpw8eVLFUTU5c+bMYXMjhmEqKirMzc3//PNP5Q6urq7ccgH2K/6dO3cYhhk6dKifnx/bnpOTo6Ghcfz4cYZhnj17xjDMt99+O2jQIJXdBbyiu3eZ7t0ZIoaI8fVlnj2rsWdRUZGRkdE333yjUCgSEhJsbGzYf2spKSl3795lGOb+/ftqamr79u1jGObChQva2tqnT59mGObZs2e6urpr165lGObhw4fm5uabNm1iGEYmk9nb2y9durSysjIjI6Ndu3YrV65siHuMjIxcvHixmZkZ+0tNW1vb19f30KFDcrm8pktKSkq6detGRCNGjKilG9TB6tWriYhdANHCIcESwIkTJ4gIv41e108//URE7733XrVna0+/gOPq6sr+LmRNnjx5wYIF3CG7idPly5e5llatWrEZmL6+/p49e7j2nj17fvnll9whEqxGSyZj1q5ldHUZIsbamtm3r8aeR44cMTc3NzExEYvFM2bMqKioYBhm5cqV7KpAhmF++eUXXV1dc3NzTU3Nzz//nLswLCzMxMSEfTK4YMECLmU5f/68nZ2dsbGxWCweO3bs8+fPebyvjIyMtWvXdunShRsv8PT0XLt27bNaskglT548YSu5K98I1JNCoXB2diaiEydOCB2L8JBgCYAtamxjYyN0IE3M0aNHiWjo0KHVnq09/QKOsbHxX3/9xR2+99573IAWwzDsk+v4+HiuxcvL68svv2QTr/Pnz3PtY8eO5Qa0GCRYjV58PDNo0P8PZWVlVd+toqIiNjY2OzubaykuLs7Pz+cOnz9/HhsbW1BQUOXCsrKy2NjYnJycKu1yuTw2NjYzM5Of22AYuVweHh7u6+vLbW9nY2OzePHi27dvv+5LRUREiMVikUgUGhrKV3gtXEREBBG1atUK44IMyjQIwsHBQVdXNyMjQ/Urlpu02hcHsGcxs+2ldHR0lAtRlpWVcXNT2LNEVKWDrq6utra2mppaLRdCI+fsTKdPk1RK+voUFkYdO1K1lQo0NDRcXFyUN+nT1dVVXgyop6fn4uLy4lQqLS0tFxcX7jkdR11d3cXFxcrKqv638ODBg4CAAFtb22HDhoWFhYlEIh8fn9DQ0KSkpHXr1nXu3Pl1X3Do0KHffvstwzDz5s27d+9e/SMEbnq7urq60LEIDwmWAEQikYuLCxGxVWTgFbVu3VpbWzstLY0dTamCq+Og8riaGHt7+8ePH3OHjx8/tre35w5tbGzEYjHXQS6Xp6SktGrVSkNDw8rKqpYLofETiUgiobt3ydubsrJo9mwaM4bS0oQO62Vyc5//+uuvnp6e7u7uQUFBWVlZXbp0Wbt2bVpa2uHDh5WHsupg2bJlc+bMef78+dixY1/cYBFeS35+/oEDB9TU1LBPNgsJljCwkLAO1NTU2rVrxzAMW8awCi79KnqVJekt2KRJk/7++++SkhIiSkhIOH/+/KRJk7izGhoaY8aM2bx5M3u4b9++ysrKESNGENHEiRO3bdvGFsi9ePFiYmLihAkThLgDqJfWrenkSdq2jUxN6cgR6tiRgoOJedVqWaqjUFBEBM2aRZ076y5b9unNmzeNjY0lEsmFCxdu3bq1ZMkS5WG2+vjjjz+8vLweP348depU1H+uj+3bt5eWlg4bNszR0VHoWBoHoZ9RtlCrVq0iohUrVggdSBPj6+tLRDt27Kj2bKdOnYgoMjJSxVE1Lbm5uS4uLp07d37nnXdsbW0nTZrEtvft23fLli0Mw0RHRxsaGnp7e8+bN09PTy8oKIjt8OTJE2tr6z59+kgkElNT0yVLlrDtR48elUgkPXr0sLGxkUgkmM7SVKSnM+PH/zsra8QIpvFsHhMbyyxfztjZ/RubujojkYSEhoaWlZU10DsmJSVZWFgQ0fLlyxvoLVoCDw8PIsInAEcsZHLXgqHWaN28dBrW3bt3Y2JiPD09VRtXU2JiYhIVFbV79+6MjIxff/113LhxbPvbb7/NLsjy8PB48ODB3r17i4qKTp482adPH7aDo6Pj3bt3w8LCnj17tmvXruHDh7Pt1tbWnp6e3J85nhs2FTY2tH8/hYXRu+/SiRPk6kqff04ff0w1bILc4EpL6cgRCg6mU6f+HVFzcaGpU2nOHGrdemaDvrWDg8O+ffuGDh0aGBjYpUuXN998s0HfrlmKjIy8deuWmZnZ2LFjhY6l0RA6w2uh2Hp97u7uQgfSxGzfvp1qrrDy2WefEVED1doBaK6ePmVmzvx3uKhfPyYmRtUBREYyEgmjr/9vDDo6jK8vEx7O/Ld/tIqsXbuWiHR0dKKiolT6xs3C22+/TURLly4VOpBGBHOwhMGWSI6Li5PL5ULH0pRgy2cA3llaUkgIHTpEdnZ08SJ17UpBQVRlMlJ6OiUmUmIiVVRU/yIlJf92+K8U/MulpVFQELVrR15eFBxMz5+TpydJpZSVRaGh5O1NSmXkVWHJkiXz588vLS2dNGmSyvZMbB5KS0v//vtvIsL0dmVIsIShp6fXqlWriooKtiYWvKL27duLRKJHjx5VOxcVSwcA6mzMGLp3jyQSKi2lgADq14+Ud+qbPJmcncnZmVavrv7yc+f+7ZCY+JI3Ki+nsDAaM4Zat6aAAIqPJzs78venuDiKjCSJhPT1ebup1/Xrr7/26NHjyZMnb731Fr79vrqwsLD8/PyePXvWoVhGM4YESzAYbqkDAwMDW1vbsrKy5OTkF8/Wnn4BQO2MjUkqpWPHyMGBrl4lDw8KCKg6ZPXdd1TnLZKjomjJErK3pylT6MgRUlcnX186dIiePKHAQGrbtv53UF/a2toHDhywtbU9derU8uXLhQ6nycDuztVCgiUYDLfUTS2JqYGBgY2NTU3pF9SNQqHYtGnT0qVLU1JShI4FVGHkSLp7l/z8SC6noCDatu1/zlZU0Ntvv15Zh9xcCg6mrl3Jy4vWr6ecHPL0pLVrKTWVQkNpzBgSN6bVVjY2NmFhYZqamj/88MPWrVuFDqcJSExMvHDhgp6eHhYHVIEESzAYwaqb2hNTlBvlV3FxcdeuXRcsWLB27VoHB4fWrVv//fffTCMsmgS8MjSkDRvo1CmaPJnmzv3/9j59SF2dLlyomnVVq7KSIiJoyhSytiY/P7p1i0xNSSKh6GiKjKQlS4inOlb869Onz88//0xE77zzTmRkpNDhNHbsXqVTpkx5sb5/C4cESzDY2qVuak9MMS7Io8TExD59+ty5c0ddXd3AwICIkpKSpk6d2qFDh6CgoMzMTKEDhIY1eDCFhf3P8JKbG7GTmD/5hGope84wtGwZ2drSsGEUFkZENH48HTxIT5+SVEoeHg0aNT/efffdhQsXlpWVTZo0KTs7W+hwGi+5XB4SEkJ4PlgdJFiCwVhL3WAhoWqcO3euV69ed+7cadeu3e3btwsLCy9evDhjxgxHR8dHjx4FBATY29uzW8LJXn3ZGDR933xDhoaUnU3+/jX2EYno5k3KyiJXVwoMpJQU2r+fxo5tXI8CX+q3337r379/cnLyxIkTK2paPNniHT16ND09vX379lzBPOAgwRKMnZ2dgYFBVlZWbm6u0LE0Ja+y5TMSrHoKDg729vbOzs4eNWrU9evX3d3diahv377bt29PTEwMDw/39fVVU1OLiIiYMmWKo6NjQEBAfHy80FGDKlhZ0YoVRESbN9PZszV2+/ZbunGDHjwgf3/iY6NnAWhoaISGhtrZ2V28ePGTTz4ROpxGip3evmDBApGKi2o0CUIX4mrR2OLXly9fFjqQpkShUOjq6hJRXl7ei2fZshc2NjYqj6uZKC0tnT17NhGJRCJ/f//Kysqaeubm5kqlUuVV2Z6enlKp9Pnz56oMGFSjd2+GiFmwgGEYpqyMadeOIWLc3ZmKin87HD36b5lQ1dcpbVBXrlzR0tIiok2bNgkdS6OTkZGhoaEhFoszMjKEjqUxwgiWkDBhqA5EIpGLiwsRPXr06MWzrVq10tXVzcjIyM/PV3VkTV9qauqAAQO2bdumr68fFhYWGBioVvO2KSYmJhKJ5Pbt25GRkRKJRF9fPyoqys/Pz9bW1s/P7+LFi6qMvBbPnj07e/bs3bt3a+qQmpp66tSpx48fV2nPyso6c+bMgxdqEhQUFJw/fz46OlqhUPAfblOgpUW//kpEdP8+/fST0NE0sF69ekmlUiJ67733rl+/LnQ4jcvWrVtlMtnYsWOtra2FjqVREjrDa9G++uorIvL39xc6kCbmrbfeIqJt27ZVe5bdcPTatWsqjqqpO3/+vJWVFRG1bdv23r17r3t5YWHhtm3bvL29uc8WNze3wMDA7Ozshoj2FW3cuFFbW9vV1dXAwGDQoEFFRUVVOvj7+2toaHTs2FFLS2vu3LnciN0vv/yipaXl5uamr68/YsSIkpIStn3v3r16enouLi4mJibdunUT9u5USXkEizV5MkPE6Ooyjx8zTPMdwWK98847RGRjY5OWliZ0LI0IO0bwzz//CB1II4UES0i7d+8movHjxwsdSBOzatUqIlqxYkW1Z9laLCEhISqOqkmTSqUaGhpENHLkyNzc3Pq81MOHD/39/S0sLNg0S0tLy9fXNzw8XKHijeUYJicnR0dHh11Dnp2d7eTk9M033yh3uH79urq6+oULFxiGuX//vr6+/oEDBxiGSUtL09DQ2LlzJ8Mw6enpdnZ2a9euZRimpKTE3Nx8zZo1DMMUFhZ26dJlyZIlKr4pobyYYKWk/Lt7oK8vwzT3BKuiomLgwIFE1KdPn/LycqHDaRTOnj1LRHZ2dnK5XOhYGikkWEK6ffs2EXXo0EHoQJqYnTt3EtGkSZOqPcumX59++qmKo2qiysrK5s2bR/9NuuLrs7K8vPzQoUO+vr7q6upspmVvb+/v7/+YHe5Qid9//93W1pYblAoMDGzfvr1yh/fee2/o0KHc4YwZMyZMmMAwzPfff+/s7My1r1y5smvXrgzDhIWF6evrc6NZmzZtMjU1bei7aCReTLAYhvnuu3+TqrNnm3mCxTBMZmamvb09Eb377rtCx9IozJw5k4g+++wzoQNpvDAHS0guLi7q6uoJCQlY6P5aXqVSA2a2vYq0tLQBAwZs3rxZX19/9+7dgYGBXD5UT5qammPGjAkNDU1KSgoMDGzTpk1qampQUJCzs7PKijskJia6u7tz08g6duyYmJioPHEqISGhU6dO3CHbgb2wSntCQgLbv23btjo6Olx7bm5uXl5eQ99Io/XBB+TuTkS0dCk1+wlpVlZWhw4d0tHR+f333//880+hwxFYQUHB3r17RSIRdneuBRIsIWlrazs4OMhkssSX7o8KSlxcXEQiUVxcXLW7saJSwyu6dOmSl5fX9evXnZ2dL1++7Ovr2xDvYmdn5+/vHx8fHx4ePnPmTC0tLba4g5WVlZ+f3507dxriTVl5eXn6SvsGGxkZyWSy58+fcy35+flsAVWuA5st5efnV7mwqKhILpdXaWfrVrfkMisaGrRhA4lEFB1Nf/0ldDQNr2vXrtyE9wsXLggdjpB27txZUlIyZMgQZ2dnoWNpvJBgCQzZQB3o6em1atWqoqKCLcpQBbflc7XpF7CCg4OHDBmSmZk5cODAK1euKA/YNAQ1NTVvb++QkJD09HSpVOrh4ZGXlxccHNylSxcvL6/g4GDlvIcvVlZWysNLOTk5urq6yrt5WFpaKqdHOTk57GIoS0vLKheam5uLxeIq7c+ePWPfhffIm5B+/WjGDCKiv/8WOhSVmDlz5uLFi2Uy2ZQpU9LS0oQORzDY3flVIMESGCo11E0tiamenp69vX1FRUVSUpLK42oCysvLFy5c6OfnV1FRIZFIIiIiuAnpKmBsbCyRSKKjoyMjIxcvXmxqasoVd5g1a1ZERASP7+Xq6nrr1q3y8nL28MaNG25ublU6XLt2jTu8ceOGq6sr2x4VFVVZWVnlQjc3t7i4OC7HunHjhqOjo/KYVsv0/fdkbPx62z83aT/++OPgwYMzMzMnT57M/e1qUe7evRsVFWVsbDx+/HihY2nchJ4E1tL98ccfRDRv3jyhA2liNmzY8OGHH96+fbvas8OGDSOiI0eOqDiqxi8tLa1Xr15EpK2tvXXrVqHDYUpLS0NDQ729vbky0B06dAgMDHz69Gn9X7ykpMTMzMzf37+8vPzmzZvm5ubsisJHjx6x1X3j4uLU1dU3bdokl8sPHTqkoaHBthcUFBgYGHzxxRcVFRVXr141MjJiVxTK5XInJ6eFCxeWlJQ8evTIwcGBXVHYElQ7yZ3zyy//znBvxpPcleXk5LRp04aIZs+eLXQsDaL2lZLvv/8+Eb3//vsqi6eJQoIlsNOnTxNR3759hQ6kWVm0aBER/fjjj0IH0rhcunTJxsaGiOzt7W/cuCF0OP8jJibG39+fe9ymqanp4+MTGhoqk8nq87Jnzpxp3bq1pqampqbmokWL2BWFn332WY8ePdgO27ZtMzMz09TUNDAw+Omnn7gLjx8/3qpVK01NTW1t7WXLlnE1JiIjI11dXTU1NcVi8axZs1rOiv3aE6zKSqZHjxaUYDEMEx0dzW4p8ccffwgdS71UVFQkJCQcOnQoMDBQIpH07dvX0NDw+++/r6l/WVmZubk5EUVHR6swzCYJCZbA0tPTiajlLPZWjV9++YWIJBKJ0IEwCoVizZo1rq6udnZ206ZNe3FDifz8/IULFzo4OLRt25Yda2HbS0pKPvzwQycnp9atW7/33nvc/jNlZWUbN27s1avXli1bXisSqVSqqalJRAMGDMjMzKz3nTUIuVzObnQo/m9bYHaOfGJiYp1fU6FQZGZmcrUV2BblLYBkMllmZuaLqVJlZWVmZmZpaemLr5mZmVlcXFznkJqi69eZ8HDmwYMaOyQnM+HhTHg403L+YP766y8i0tDQOHv2rNCxvKrMzMyzZ89KpdIPP/zwjTfecHJyqnbh8KJFi2p6BbZKjqenpyrDbqKQYAnP2NiYiFpOSWgVCA8PJ6Lu3bsLHQjz22+/GRoa7tmz5/r16/369evfv3+VDm+99Za7u/vFixePHz9uY2PDVU/94IMPHBwcIiIizp07165du/nz5zMM8/TpUwcHh1GjRjk5OX3xxRevGEN5ObNiRYa6uiYRffDBB/UcE1KNtLS0wMBAboESO0d+27ZtynkSgOA++ugjIrKyskpJSRE6lqrYoanw8PC1a9dKJBJvb+9qF2SIxWInJydvb+/FixdLpdLw8PD09PRaXnbo0KFE9Pvvv6vsRpouJFjC69GjBxGx5aSh/hQKxYoVK4yMjDQ0NHic0FM37dq146qHx8fHE1FUVBR3NjU1VSwWnz9/nj3csmWLqalpWVlZYWGhgYFBaGgo237s2DEtLa2cnByGYdihrOHDh79igpWRwfTtyxAx3t7Xmlx1e4VCceHCBYlEwj6LIaU58kKHBsyjR4xEwnz+udBxCEoul7/xxhtE1LVrV2Gz/9zc3MjIyG3btvn7+/v6+np6empra7+YThkbG3t6evr6+q5atSo0NDQyMvK1wk5MTFRTU9PR0cnLy2uwW2k+kGAJj62Hy06/hXrKz8/38fEhInV1dRMTE25Cz+TJk48dO6b8YEg1wRDR1atXuRZHR0epVModHjlyREtLi5vfwy57fPDgQWRkJBE9e/aMbS8rKxOJRGfOnOEufMUEKyqKcXRkiBg7O6ZJ782Yn58vlUq7devG/Z7w9PRcu3Yt90cEqnf9OkPEeHkJHYfQnj17xg61zpw5UzXvKJPJqgxNsXMrX2RjY+Pt7S2RSNauXRseHp6QkFDPHas+++wzVd5pUyeu9v8KqBJKYfElLi5u/PjxDx48MDU1/fvvv4cMGXLmzJng4OD9+/fv2bNnz549tra2M2fOlEgkTk5OvL97aWnpmTNnuEM7OzstLS0iUi6CYGlpmZGRwR1mZGSYmZlxC+gsLS3ZxpKSErFYzCWIWlpaRkZG7HS9V/fXX7RwIZWWUr9+FBZGTXq3eyMjI4lEIpFI7t+/v3379o0bN0ZFRUVFRQUEBIwZM0YikQwdOpT7YwTVYEu0FhUJHYfQTE1N9+3b16dPn+3bt3fv3p1dYcej/Pz8hISExMTE+/fvP3jwIDEx8cGDB6WlpVW6GRkZtW3b1snJycnJyc3Nzd3d3dXVlRv65YVCoQgJCSGUv3plSLCEx5bCQoJVT0ePHp0+fXp+fn7nzp3379/PplDe3t7e3t4ZGRkhISEbN26Mj48PCgr6/vvve/fuPWvWrBkzZvD4AVRYWPj9999zh8OHD2fHJpU/CktKSpRLh+vp6VU5S0QGBgYikUgul1dUVLApGvsiyhfWTi6nzz6joCAiIomEfvmFNDXrfFuNi7u7e2Bg4BdffHH48OHg4OBTp06FhYWFhYW1b99+7ty5c+bMaeFlP1WJrf/VAAVim57OnTuHhIRMnjz5ww8/7Nix4+DBg+v8Uunp6WwWxaVT1e7zYWNj4+7uzuVSTk5Obdq0aejvGD/++GNycrKjo+OAAQMa9I2aD6GH0IC5d+8eEbVr107oQJoqhUIRGBjIbjk3ZcoUbsHdiyIjIyUSiZ6eHvuXn53Qc/PmzQYKTCaTaWhoHDhwgD0sLy83NDTkZlYxDHPhwgWRSMROrmIY5vr160SUnp4eFxdHRA/+W7KVnJxM/7soupZHhNnZzJAhDBGjpcU0+8fOjx49WrVqlYODA/s/VF1d3dvbe/v27WVlZUKH1vzl5zNEjKGh0HE0GgEBAURkZmb2iote8/PzIyMjQ0NDAwMDZ86c6enpWe33PS0tLTc3N19fX39//23btkVGRtbyEdegbG1tiWjEiBGCvHtThARLeOXl5WKxWCwW47dCHRQVFU2aNImIRCLRqlWrXmWGATuhp2/fvtxHWMNN6JkyZYqPjw8bVUhIiKGhYWFhIXdWJpO1atXq22+/ZQ8XLlw4YMAA9ueuXbsuWbKE/XnlypXt27dXvrWaEqzoaKZ1a4aIsbVlrlzh/W4aqcrKSra4g4aGBpdp9erV6/Tp00KH1pzJ5QwRo6bG1G9WT/NRWVk5atQoIvLw8HhpFY/ly5e/mEuJRCIHB4dhw4YtWrTot99+i4iISE5OVk3wtXv27BlbE5tQ/up1IMFqFNq2bUtE9+/fFzqQJiYuLq5jx45EZGhoePDgwde9/N69e/7+/mzRPCLS1tb29fUNDw+v5zxQZffv3zczM+vTp4+vr6+Ojs769esZhnn69KmJiQn7ObVr1y4tLa1x48Z5e3vr6+tfunSJvfDkyZM6OjojR44cM2aMtrb2oUOH2PavvvrK19fXysrK3d3d19dXeU3izp2Mri5DxPTpw9S6zrrZysjIWLt2LTdCqa6ufvLkSaGDas50dBiiFlT46qVyc3PZD/Np06bV3vPXX3/V1NR0cnLy8fHx9/eXSqUXLlxQ/vYloNzc3AsXLkilUnZBovL2nba2tkJH15SImJazg1Qj5uPj888//+zbt2/ChAlCx9JkHD9+fNq0aXl5ee3btz9w4AA7la0OysvLDx06xE7oYf85uLi4TJ06dd68edyzp/rIzs4+fPhwUVHRwIEDPTw8iKisrCw0NHT06NFmZmZEFBsbGx4erqGh4ePjY2dnx12YlJR09OhRhUIxcuRIriLUP//8o7zF7MiRIx0cHCor6dNPm+ekqzpQKBS//vrrp59++vz58759+168eFHoiJotKyvKyqLMTMLMN05MTEzPnj0LCwt//PHHDz/8sKZuFRUVGhoagq/MKC8vf/ToUWxsbGxsbExMDPtDYWHhiz01NTVdXV2PHDlib2+v+jibKqEzPGCY/6rVtZx9zeqJnXTFFiD28fHJz8/n5WWTk5MDAwMdHR3ZfxrshJ7Q0NCKigpeXr+B5OQw3t4MESMWM4GBQkfTaPzwww9ENHbsWKEDac6cnBgiJj5e6DgamQMHDohEInV19WPHjgkdy/+oUivLzc2t2jLuJiYmyrWyzp07V1RUJHTsTRJGsBqFP//8UyKRzJ49e+vWrdV2YBimtLSU3zW3TVRxcfHcuXPDwsJEItEnn3yyZs0adno7XyorK48dO7Zp06Z//vlHJpMRka2t7bx5frNnf962LY/vw487d2j8eHr8mCwsKDSUBg0SOqBGIyoqysvLq1OnTnfu3BE6lmbLw4Nu36boaPLwEDqURubzzz9fvXq1qanp9evXueFnVZLJZCkpKcqrEe/evfv06dMq3cRisYODg/JqRHd395qqasHrQoLVKJw/f37gwIG9evW6cuVKTR3GjBkzbty4WbNmeXt7qzi8xiM5OXnixIlRUVEGBgYhISHjx49vuPfKzc3ds2fPr7/+evfu3b59Z1y6tN3TkyQSmj6d/pvkI7Ddu2n+fCoupm7daP9+4uN5ZvNRXFxsYGCgqalZXFxc7dd0qL9+/ejSJbpwgfr1EzqURkahUIwbN+7IkSOdO3e+fPmyXgN/ZOTl5bEFHbh06v79+2VlZVW6GRsbOzs7K6dTbm5uOjo6Vbrl5+f/9ddfKSkprq6ub731FlcshvP48ePdu3cXFRX17t2bLezMyszM3LlzZ05OjoeHx+TJk7mvvnK5/MCBA0Q0efJkfm+8sRN4BA0YhmEY9ouFsbFxTR2++OIL7n9Z586d161b1wJrWJ89e5Yt2tmuXTtVLgi4cOHCsmX39PQYIoaIMTZm3n2XUZpcLgC5nPH3Z0QihoiZPp3BBn3VYieLJCQkCB1IszVyJEPEHD0qdByNUmFhoZubGxFNnDiRx3UzDV3GPS8vz9nZuVu3bh999JGzs3O/fv3kcrlyh1u3bunp6Y0aNWrx4sUmJiZLly5l25OSkiwsLAYMGLB06VJbW9spU6aw7b/++qujo6Oenl5j2BxWxZBgNRampqZElJmZWVOHhw8f+vv7s8W+iUhLS8vX1/fQoUNV/vY3V1KpVCwWE9GoUaME2QaroIDZtu3f2U7sf25uTGAgo/pNugsKmDFjMOnq5dhdaf/55x+hA2m2Jk9miBilym7wP2JiYoyMjIgosK7/UPPy8thaWatWrWJ3GHxxwImIjIyM2FlTXK2sl9aJqMkPP/zQpk0b9vL09HRdXV1uCTNr8uTJ48aNY38+ceKEhoZGWloawzBLlizp1asX+/vozp07IpGIXSi9d+/e+/fvr1+/HgkWCKZ3795EdPbs2dq7sUvefH192WyDiOzt7f39/R8/fqySMAVQWlo6e/ZsIhKJRP7+/ireT/BFDx4w/v6MhcW/aZaWFuPry4SHq6gaUEwM06EDQ8SYmzOnTqniHZuu9957j4h++uknoQNptubOZYiYTZuEjqMRO3TokJqampqa2qsk+mlpaeHh4VKpdPHixd7e3jXt6MX7DoPK+vfv//HHH3OH48aNmz9/PneoUCi0tLT27t3LHdrY2LAb6bZu3fqXX37henbr1u3LL7/kDltmgoWtchoLdouPo0ePDhw4sJZumpqaY8aMGTNmTHp6+vbt24ODgxMTE9ntX4YMGSKRSMaNG6fZjNbop6amTpw48caNG/r6+lu3bmVrigrL1ZUCA+mrr+jECdq+nfbto7AwCgsje3uaPp3eeYf+W4b4EiUlVF5ORKSnV31VBYah/Pz/6XD4MM2YQYWF5OFB+/dT69b83FFzhV0+Gxp2y3mpMWPGrFq1atWqVTNmzLh+/Xrb/1bKFBQUxMfHK0+cevjwIbtZljItLS1nZ2flXXE6dOjQoDO6UlJSpk6dyh22bt06JiaGO8zOzi4vL2/930ePSCRydHRMTU1VKBTp6emtlT6SWrdunZqa2nBxNglIsBoLdk+6U6dOvWJ/W1tbf3//jz/++PLly9u3b9+xY0dERERERISJiYmvr++7777bpUuXhoxXFS5cuODr6/v06dO2bdseOHDA3d1d6Ij+n6YmjRlDY8ZQWhrt2EFSKT1+TEFB9P33NGQISSQ0fjz9V1e8egEB9MsvRERvvEFHj1bTISeH2AfC27fT9On03Xe0YgUpFDR1Km3cSFhR+lJsaTTlXw/AL+z3/CpWrlx569at/fv3e3t7Dx069PHjxw8fPszMzKzSjU1WXFxc2rdv7+rq2r59excXl1atWqk4WplMxj0eISINDY1y9osgERFVVFQQ0YsdFAqFXC6v5cKWCQlWY9GtW7cTJ06wu869OjU1tX79+vXr1y8oKCg0NHTDhg3R0dHBwcHBwcGenp4SiWTq1KmvvklwoxIcHLxo0SKZTDZy5MidO3eamJgIHVH17OzI358+/phOn6aQENqzhyIiKCKCTEzI15fee486d37JKxw7Rvv3U+0lZhmGLlwgkYgCA8nfn8fwmzOMYDU0jGC9CpFIFBIS4uDgIJPJNm/ezDZqamq2bduWHZpiR6e6dOnSGD6rbW1tlas5ZGZmKlc/tra2VlNTe7GDWCy2tLRUzhozMzP79OmjmpgbL6GfUcK/jv43iOHg4LB69eo6F7eMjIxcvHgxO2WeiHR0dHjf/qWhlZWVzZs3j/6bdFXtLP7Y2NiPP/54zpw5P//8c0l1i+jOnTv33nvvzZ8/f9euXcr3HhUV9cEHH8yZM+fPP/+UyWRc+7Nnz4KCgnbt2lXP4PPyGKmU8fD4/7nwnp6MVMq8WKjv/ff/v0+rVsyLm2RkZf17dvv2f1/5ZTP04H8oFAp9fX0iys3NFTqW5mn9eoaIWbRI6Dgavd27dxORvr7+119/feLEiSdPnjTaD+QPPvigb9++7M8ymczGxuaPP/5Q7tC7d+8PP/yQ/Tk+Pp6bzD5p0qS33nqLbc/Ly9PW1laedtYy52AhwWpERo8ezSW+bJqVkpJSt5cqLS0NDQ319vbmtmLo0KFDYGDg06dP+Y2Zd6mpqT169GA/jEJrWJ4UGxtrYGAwadKk7777rn379oMHD67SYd++ferq6kuWLPnqq6+MjY2XL1/Otl+8eFFDQ2PevHnffvutnZ3d7NmzGYZRKBSLFi0yNja2srIaP348XzcSGclIJIyBwb9JkoEBM3MmEx7+/x3YBMvZ+d/58v8tdv5/VRIsqIOuXbsS0dWrV4UOpHnasoUhYmbPFjqOxi0nJ4edYstOBm/k4uLidHR0lixZEh4ePmXKFBsbG7aMe2BgYEREBMMwe/fu1dTUXLdu3dGjR728vIYNG8ZeeOnSJbFY/NVXX504ccLb27tTp07sgqRHjx6FhobOmzfP2dk5NDQ0StgKN6qFBKtxuXbtmre3NzdQrKamxu7WUl5eXrcXjI2NXbVqFfcgX1NT08fHJzQ0VHnwpvG4ePGitbU1m19GRkbW1M3Pz2/QoEHsV8CkpCR1dfXz588rd/Dw8Pjkk0/Yn/fs2aOnp8dWdhg1atSMGTPY9itXrqipqbFFksLDw/Pz8wMCAnhMsFhFRcymTUyfPv8/WNWpE3P4MMP8l2B5ePw7DCAWM1V2qUeCVX/sdN2tW7cKHUjzFBbGEDGTJgkdR+M2bdo0Iho8eHCjHbWq4tKlS2PHju3Wrdu0adMePXrENi5YsIAb4N+5c+eQIUO8vLwWL16sXDTnn3/+GTFihKen54IFC9L/23B+79693krWrl2r2rsREhKsxqiysjI8PHzmzJlcyRMTExOJRHL79u26vaBcLg8PD/f19dX4b941O0e+UdVglEql7PrHgQMHZmVl1dLT0dHx999/5w779Onz6aefcocZGRlExH1PKi8v19bWPnjwYGVlpYaGxmE2u2EYhmEcHByUR78bIsHixMQw/v6MlRVDxBw8yDBKCZZMxri7M0RM9+6M8uNQJFj1x1bo5YYwgV/HjzNEzPDhQsfRiB05coSIdHV147FlY8vD5yZuwBd24CokJCQ9PV0qlXp4eOTl5QUHB3fp0sXLyys4OPj5a04r5fYtTkpKCgwMbNu2bXp6elBQULt27fr16xccHPzi8mBVKi8vX7hwoZ+fX0VFhUQiiYiIYCu2V4thmIyMDOUd3Vu1aqW8Hpj9WXnQzsrKKi0t7enTpzKZTPlCe3v7tLQ0/u+nOu3bU2AgJSfT/v00atT/nBKLae1aIqIbNyg4WDXhtBSY596gMMm9dgUFBW+//TYRffvtt4LsSAjCQoLVqBkbG0skkujoaG7qelRUlJ+fn6Wl5ZQpU9gn4q/1gjY2Nv7+/nFxcZGRkRKJREdH59KlS35+fnZ2dn5+ftHR0Q10I7VIT08fNGjQxo0btbW1t27dylVsrwl7y9zcsio/V0sk+v89N1/rQt5patL48fTi/Xl705gxREQrVtALy7eh7lCpoUGhTEPtPvroo9TU1F69erE1b6GlQYLVNHh6eq5bty4tLY2dul5WVhYWFjZs2DA3N7egoKCsrKw6vKBUKk1LS5NKpZ6envn5+cHBwd26dfPy8lq3bl1ubm5D3MWLrly54uXldfXqVXt7+/Pnz7MV22unpqZmbW2dnp7OtaSnp9va2nKH7M9ch8rKyqysLDs7O0tLS7FYXMuFwvrlF9LVpfx8+ugjoUNpRtq3b6+mphYfHy+Xy4WOpRnCCFYtTp8+vXnzZi0trU2bNmG78ZYJCVZToq2tzdZciImJ8ff3t7KyiomJCQgIaNWq1ZgxY8LCwl73t4iRkZFEIomMjLx3756/v7+5uTlbxcDOzq5uI2SvZfv27UOGDMnIyOjfv39kZGT37t1f8cIhQ4YcPHiQ/TkrK+vq1avsrnMsGxsbV1dXrgNboqJv377q6uoDBw7k2u/evfvkyZMhQ4bwd0P14uhIAQFERDt3UkSE0NE0Fzo6Oq1ataqoqHj8+LHQsTRDGMGqSXFxsUQiYRjmyy+/ZLd8hpZIyAlgUD/c1HXumZqdnV19pq6XlZVVKe7g4uKyatWq5ORkfiOXyWT+/5XLlEgkr1v06/bt29ra2n5+flu3bu3evXuvXr3Y5Tnz5s1jlxOGhIRoamp+8803f/zxh42NzeLFi9kLT548KRaLAwICNm3a5OLi4uvry7ZfvXpVKpWOHDnSw8NDKpWqZiExN8mdU17OtG/PEDGurkxFBSa582P48OFEpLy4AfhSUsIQMdraQsfR+CxatIiIPDw86lzREJoBjGC9HrlcfuXKlTNnzuSzu8S94Pnz52fPnr148SK7pQCnrKzswoUL586de3E6+aNHj/Ly8uoQDDd1PTk5OTAw0NnZOS0tjZ26PmzYsJCQEHb7nVenpaXFjpCxc+EdHR0fPXr05ZdftmnTZtiwYWFhYTKZrA5xVpGdnT18+PCgoCAtLa3NmzdLpVKN2veUeUHnzp3Pnz9fUVGxe/fuYcOGHT9+nM0INTQ01NTUiGjmzJl///13dHT00aNHV6xY8eOPP7IXsp2Tk5P37ds3Z86ckJAQtj0uLi4iIsLAwKBdu3YRERGJiYn1v8060NT8d/Ochw9pwwZBQmiGMA2r4ejokFhMZWXExwdD83HlypXff/9dLBZv3rz5dT/coFkROsNrSlJSUtq3b29tbe3m5mZgYPDid+Jz586Zmpq6uLjY29s7OjrGxMSw7Xfu3LGzs3N0dHR2drawsGDLHrK1GHx8fIho5cqVvETITl3X/W+bOm6OfN1ejY1QubgDN0e+zhHevHnT0dGRiOzs7K5du1bn12kGXhzBYvn6MkSMqSkTG4sRLB789ttvRLRgwQKhA2me3N27mplZPHuGWvn/Kisrc3V1JaJVq1YJHQsIDAnWa5g7d26fPn1KS0sZhlm5cqWdnZ3y8K9CoXBzc3v33XcZhpHL5W+88caYMWPYU0OHDp04cSJb1nbOnDmenp4Mw+Tm5k6fPn337t2DBg3iK8Fi5efnS6XSbt26cWm0p6fn2rVrnz17VrcXzMjIWLt2badOnZRfUCqVPn/+/LVeZ8eOHWxlr759+2ZkZNQtmGajpgQrPZ0xNPy3QDYSrPpjN1Dv16+f0IE0T2zdE95nETRdH3/8MRG5urqWlZUJHQsIDAnWq2LrVe7Zs4c9zMvL09TUPH78ONfh+vXrIpGI29zm9OnT6urqz549S0lJEYlE3GgN+6ji/v373IWjR4/mN8HisFPXzczM2KyImyNft4LCCoWCXejHjZC5u7u/4rVVJl3VuTB9c1JTgsUwzA8/MESMmhoSLB6wddEsLCyEDqR5YkdrlD/QWrKbN29qaGioq6tfv35d6FhAeJiD9aoyMjLKyso6duzIHhobG9vb2ytP1klMTDQyMuLqWLq7u1dWViYlJT1+/JhhGO5CFxcXDQ0N1czycXd3DwwM5Io7lJeXs8UdOnToEBQUpLwj+qsQiUT9+/ffunVrRkbGtm3bvL29x48f/yoX5uTkjBw5kp109eeff3IV26EmS5ZQly6kUAgdR7NgZ2dnaGiYnZ397NkzoWNphtjttF+39HGzVFFRMXv2bJlM9tFHH3FrogsLC99///0OHTp06tRp9erVlZWVVa568uTJm2++2a5dux49emzbto1rz8nJmT9/fvv27T08PH766SfmvwXdUVFR48aNc3Fx6d+/P7uHNDRaSLBeFTurnf00YRkaGipPTs/Pz69ylohyc3Pz8/PV1dW5TW9EIpGBgUHdZrXXTbVT1wMCAuzs7Nip669b3MHQ0HDWrFnh4eFffvnlSzvfunWre/fup06dsrW1PXv27IIFC+p6Hy2IWEy//koqL4PabLm4uBDquTcMduPUIpRqIFqzZs3du3ddXFzYDZpYfn5+p06d+uOPP7766qv169d/++23ypcoFIrRo0cXFhZu27Zt7ty5Cxcu/Oeff9hT06ZNu3v37p9//unv7//FF19s2LCBiBISEgYNGtS7d+89e/ZMnz592rRpZ86cUeEtwmsSeASt6WB3uLtz5w7X0qpVq02bNnGH+/fv19PT4w7ZBxP379+/du0aEeXn57PtcrlcTU0tPDyc69lwjwir1RBT12uyc+dO9nlinz59uL0/gVXLI0LWrFl4RMiPGTNmEJHyv1bgy9ixY4nowIEDQgcisDt37mhqaqqpqZ07d45rTE5OVldXv3DhAnu4YcMGa2tr5Zm7//zzj6amJjc71s/Pz9vbm301Inr48CHb/vXXX7u4uDAMI5PJrly5wl0+cODApUuXNvCdQd1hBOtVWVpasgsA2cPU1NS0tDR3d3eug5ubW0lJyd27d9nDa9eu6enpOTo6tm3bVlNT88qVK2z7jRs36L+l44JgNzpkizuwU9czMjLY4g7sRofFxcX1f5fKysqAgIBp06aVlJRIJJIzZ87Y2NjU/2Wbk/feo/Dw2soxrF1L4eEUHk7e3ioMqznq2fO9Hj0OpaT4CB1IM4QRLCKSy+Xz58+vqKh47733BgwYwLVHRUVpaWn17duXPfT29s7MzExOTuY63Lhxw9PT09TUlOvAfiG/ceOGvb0992vC29v70aNH+fn5YrG4V69e3OVqamra2toNfXdQd0JneE3J8uXLnZyc4uLiCgsLp0yZ0rVrV4ZhSktLDx48WFBQwDDMkCFDhg8fnpubm5KS0qlTp3feeYe9cMaMGV5eXunp6dnZ2QMHDhw7dizbXlRUlJubO3z48GXLluXm5gpVko4t7sA93+TKu9f5BZ89ezZs2DAiEovFgYGBPIYKUAehoQwRM26c0HE0R35+fkT0xx9/CB2IkAIDA4nI0dGxsLBQuf333393cHDgDtk0VHmISyKRjFP6e3n+/HkiKiwsXL16dbdu3bh2dh+CKisJnjx5oqmpyQ2PQSOEEazXsHLlyl69erm6upqYmCQmJu7atYuInj17Nn78ePYfwObNm0tLSy0sLFq3bu3s7Mz+qyOin3/+2cLColWrVtbW1urq6hv+G7Xw9fU1NTU9efLkDz/8YGpqeuTIEUHui9uXkJ26XlBQEBwc7OXl5e7uHhQU9LpTg+/cudO9e/fw8HALC4vw8HBu8SC8og0baMYMio8XOo5mhB0IQKnRhoARLLYas0gkkkql7J8GR1dXt6ysjDtkq0wrT9V9sYOampqOjs5LLywpKZk2bdqbb77Zr1+/Brgn4InQGV7TU1paWuVrShVFRUUlJSUvthcXF79u4ShBPHjwwN/f38LCgv0bws2Rf5XiDrt379bT0yOibt26PXnyRAXRNj8+PgwRs3ev0HE0I6WljLo6o6HBYNsS3rETuj///HOhAxFGZWVl//79iWj+/Pkvno2IiFBXV2efbzAMw84wefr0Kdfhxx9/ZCdXsTZs2GBnZ8cwzO7duw0MDLhnGkeOHBGLxVx1m+zs7EGDBr3xxhuod9PIYQTrtWlra1f5mlKFvr4+t2ZQma6uLpt8NHKurq6BgYEpKSmhoaE+Pj5yuZwt7uDo6BgQEJCUlFTtVQzDfPHFF2+99VZxcfH06dMvXrzIVmyH14XhFt5pa5OjI8lklJAgdCjNTgsv07Bt2+3IyEhbW9sffvjhxbP9+/c3NzffsmULe7hp06YBAwZYWlpyHSZMmBAfH88+GZTL5SEhIZMnTyaikSNHElFoaCgRMQyzZcuWsWPHstVtbt682aNHDxcXlwMHDqDeTWMndIYHjV1KSkpgYGDr1q3ZvzDcHHnlGWNJSUnsRE5Muqq/P/9kiJhZs4SOo3l54w2GiGnxa934J5VKiWjhwoVCByKAxERGX59p3frRP/+cq6nP5s2bNTU1p02bNnr0aF1dXXbK1JEjR9q0acN2WLJkibGx8bx583r27GlnZ8eVqv7hhx90dHRmzZo1ZMgQY2NjdgH76dOntbS0TE1Nff+zYsWKhr9RqCMR81/5MoBaKBSK06dPBwcHHzx4kN3H2sjIqG3btl988YVIJJo4cWJFRYWJicmePXuGDBkidLBN24ULNGAA9ehB164JHUoz8uGH9PPPFBhImBPIr507d06fPn3q1Kk7d+4UOhaVYhgaOZJOnqRp0+ivv2rrefv27fDwcE1NzXHjxrHj+o8fPz5z5sy8efPYDqdPn75+/bqZmdnkyZNNTEy4C69du3bu3Dk9Pb2JEyeyq7AfPnx44cIF5Re3tLR8xYLPoHpIsOD1ZGVlbd++ffPmzQ8ePFBu19XVPXXqlPISYqib7GyytCQjI8rPFzqUZkQqpbffprlzafNmoUNRobKysj/++CMyMtLMzGz+/PldunSp0iEzM/O3336Lj493dnZ+7733uFoqBQUFv/zyy/3791u1auXn5+fs7My2KxSKAwcOXLx4cfXq1eyEh8OHD48dO9bHx+fw4cOqvDXB/fknSSRkbk7375PSQz+A/4c5WPB6LC0tP/roo/v372/evLlDhw4ikYiIWrdunZycjOyKFxYWZGZGBQWUmSl0KM1I+/ZERC2tlvukSZN+//13T0/PgoKCXr16RUdHK58tKirq3bv3+fPne/fuffny5Z49exYUFBBRZWXloEGD9u/f37Nnz0ePHvXo0SMlJYWIDh8+7OLismzZsp9//pmbdNUy52Clp/87FPrbb8iuoGZCP6OEpi09PT0hIUHoKJqbPn0YIubMGaHjaEYyMhgixtRU6DhUKDIyUiQSxcbGsodjx46dOnWqcodff/3V1taWXfJcVlbm4OCwdu1ahmH279+vq6vLlhevrKz08vLy9/dnGCYmJiY+Pp7dRzUzM5N9kevXrxORp6enKm9NcOPHM0TMmDFCxwGNG0awoF5sbGycnJyEjqK5YYdbsJCQR9bWZGxMubmUnS10KKpy4sSJLl26sPswEtHkyZNPnjyp3OHkyZNjxoxhlzxraWn5+PiwHU6ePDl06FC2vLiamtqECRNOnDhBRO3bt+eeFXLYJdUtagTrr7/owAEyMqI//hA6FGjckGABNDot83lWQ2tpaWtaWpq9vT13aG9v/+zZs9LSUq4lNTW1Soe0tLRa2qvFPiJsOYVGc3Loww+JiNauJTs7oaOBxg0JFkCjw5bCQoLFr5aWtrLzIzkMw4hEIuXGFzvU3l6tl45g5ebmrlix4uzZs+zq46bu3XcpK4uGDqXZs4UOBRo9JFgAjQ5qjTaElpZg2drapqenc4cZGRmmpqbKewO/2MHOzq6W9mpxk9xrysNOnTr17bffDh482MTEZNiwYUFBQVFRUbUnbY3W4cMUFkZ6ehQcTP+bhQJUAwkW1Oj48eNz5syZNm3an3/+WVlZWeVsZWXlpk2bpk2bNmfOnKNHjyqf2r1794wZM2bMmLF7927l9nv37n344Ydnzpxp8NCbOCcn0tCgpCRSep4D9dXS0tahQ4feunWL3SaViA4cOMBuwc7x9vY+cuQIO7Akk8n++ecfb29vtv3UqVOFhYVExDDMwYMHq1yoTF1dXUdHR6FQsPvlvcjV1fWjjz7q3LlzaWlpREREQECAl5dXq1at5s2bt2vXruymMycuP5/eeYeI6PvvCfNO4ZUIOcMeGrHQ0FCxWLxixYqff/7Z0tJy8eLFVTp8+OGH5ubmP/3002effaahofHXX3+x7T/++KOent6aNWu+/fZbPT2977//nmGYrKys4cOHW1hYmJqarl69WtU30wR16MAQMbdvCx1HM3L/PkPEtG0rdBwq5O3t7ebmFhwc/O6772ppabFDR9u3b2fLf+fn59vb248YMWLTpk2jRo2ys7PLy8tjGEYmk7m7u/fu3Xvjxo1Tp041NjZOSkpiGKagoEAqlX7zzTdE9P333+/YsYN9F3bvF25dYU2ysrJCQ0MlEkmrVq2Ufwc5OTktXrw4PDy8tLS0Qf806mnWLIaIGTiQeYVNWQEYhmGQYEH1OnfuvHz5cvbnf/75R0tLKysrizubk5Ojo6Nz8OBB9vDzzz93c3NTKBRlZWVWVlZ//PEH275hwwZLS8uysjK5XH7hwgW5XO7t7Y0E61WMG8cQMaGhQsfRjJSXM2Ixo67OlJUJHYqqPH/+/Ouvvx43btz8+fOvX7/ONu7Zs+fbb79lf05JSVm6dOm4ceM++OCD5ORk7sKcnJzly5ePGzfunXfeiYmJYRufPn0qUfLJJ5+w7ew64ri4uFcPLCEhQSqV+vr6Ku/rqqOj4+3tHRgYGBkZ+Spby6tSeDgjEjG6usyjR0KHAk0HEiyoRmZmJhFFRkayh3K5XF9ff+/evVyHgwcP6ujoyGQy9vDWrVtElJaWduPGDSLiUrGsrCwi4j7ZGYZBgvWK/P0ZIuarr4SOo3lp25YhYu7fFzqO5oUtEB8dHV2Ha2Uy2YULF/z9/T09PdXU/n/KiqWlpa+vr1Qq5fbmE1BBAePgwBAxP/4odCjQpIgb/BkkNEHsqmxuqba6urq1tbXyUu20tDQrKyux+N+/P+wc2LS0tPT0dE1NTXNzc7bdwsJCS0srLS2te/fuKr2Bpq+lzchWjQ4dKD6eYmLIzU3oUJqR+lRqEIvF/fr169evHxFlZWVFREScPHkyPDw8PT09LCwsLCxMJBJ17tx5+PDhb7yxrFcvSx0dnoN/Ff7+lJxMPXvSkiUCvDs0XUiwoBrsV0mFQsG1MAyjvHhbTU2tylkiEolEL7YzDKP8xRReERKshtChAx050oLmuasGX7VGLS0tp02bNm3aNCJKTEyMiIiIiIg4ceLE7du379y5GxISWFhIffuStzd5e1O3bipax3f2LEmlpKlJmzaRuroq3hGaDSRYUA1bW1siSktLYzd/lcvlmZmZyku1bW1ts7KyZDKZhoYG/TfiZWdnJxKJ5HJ5VlaWlZUVEWVnZ1dUVNSyxhtqwi15YxgsCOcN0lbeZWdnP3r0yMTEZPr06d7e3t7e3j4+PuwHSH04OTmxM73Ky8svXbp08WLsoUNq0dEUEUEREUREVlY0bNi///23RTX/Skpo4UJiGFq5ktzdG+pdoLnC0AJUw9LS0sPDY+/evezhyZMnFQrFgAEDuA79+vVTU1PjqjPs37+/c+fONjY2nTp1srGx2bdvH9u+b98+tlHF8TcDpqZkYUHPn5NSQSKoLyRY/IqOju7evTu7O2FeXl5YWJifn1+rVq28vLyWL19+5syZ8vLyer6FlpbWkCFDPv/8nchIevqUQkNJIiFHR3r6lHbsoNmzydaWnJ3Jz4/CwqiwkI+7UvLZZxQfT507/7u1M8DrEXICGDRie/bsEYvFAQEBP/30k6Wl5UcffcQwTEZGxqhRo9hppx9//LGZmdkPP/ywYsUKDQ2NsLAw9sK1a9fq6up+/fXX33zzjZ6e3rp169j2nTt3BgYGtm3bdvjw4YGBgdgi+qX692eImIgIoeNoRrKyGCLGyEjoOJqFXbt26erqEpGnp2dSUpKKFwYmJDBSKePryxgZMUT//icWM56ezKpVTGQkU1lZ37e4epVRV2fEYua/1T4Ar0fENM2KuqACERERO3fuLC8vHzp06Jw5c9TU1HJyclauXLlq1Spra2uFQhESEhIeHq6lpTV16lTlUoR79uw5dOgQEY0bN27SpEls408//RSrNHTw/vvvd+zYUcV31LQsXEgbN9Jvv9G77wodSjNibk7PnlFGBllbCx1Kk1VZWfnpp58GBQUR0cyZM6VSqY7S5PPS0tJLly6xM6hu3rzJ/YqxtLQcOHCgt7f36NGjeZw2UFlJt279++jw3DmSyf5tNzenwYPJ25tGjiQHh9d+2fJy8vSk+/fp00/p66/5ChZaFiRYAI3UDz/Qxx/T4sW0bp3QoTQjffvS5ct05gwNGiR0KE1Tbm7uW2+9FR4eLhaLv/76a/9aH55lZ2efPXs2IiLi2LFjKSkpXLuTkxM7W2vYsGHKu/fU0/PndOYMHTlCJ0/Skyf/3+7k9O/U+OHDycjolV7q009pzRpq355u3SL+AoSWBQkWQCN15AiNGUPDh9OJE0KH0ozMm0dbttCGDeTnJ3QoTdCdO3cmTJiQmJhobm4eGho6ePDgV79WeWFg4X+zpXR0dPr27cvOju/WrZuIvwUdiYn/DmuFh1N+/r+Nn31Gq1e//Nrbt6l7d6qspHPnqF8/viKCFgcJFkAjFR9P7dqRgwMlJQkdSjOyeTMdOEALFtDYsUKH0tSEhobOmzevuLi4a9eu+/fvd3R0rNvryOXy27dvHz58+MiRI9HR0VxhF+4Z4qhRo7gifPUnl9PVq3TyJIWH03ffUf/+L7+koIA++YT09enHH/mKAloiJFgAjVRlJenpUUUFFRaSvr7Q0UALxjDMd999x+6dNX369D///FOHp4qf2dnZERER4eHhJ0+e5EoZi0SiceMy27a1HDaM+vcnQYqLEqFCCtQXEiyAxqtv35zi4gfbtrl26WIhdCxN2JMn9OwZEZGzMxkbV9OhvJzu3SMiatv2VefotByFhYWzZs06ePDgq0y6qg/uGeKFCzfz8uLZCg9iMfXsSWPGkLc3de1K9SlarFAQW3lGXZ3Gj6/+pe7epZgY0tamMWPq/kYALCRYAI3XpEmT9u3bt2vXrrfeekvoWJqwWbNo+3YiogkT6L8abf8jLo5cXIiIjhyh0aNVGlsjFxsbO378+JiYGDMzs927dw8dOlQFb1peXnn5snp4OIWH082bxO0NYWVF3t7/FhetQynT8vL/n64eHEwLF1bTZ/lyCgwka2vKyKhr9AD/QaFRgMarQ4cORBSDvV14sn8/HTokdBBNx5EjR3r27BkTE+Ph4REZGama7IqItLTUBw+mNWvoxg3Kyvqf4qJ//UVz5pCdXX2LiwYEUFYW33ED/C8kWACNV/v27YkoFqXH+fP++1TvTfOaP4ZhgoKCxo0bV1BQ8NZbb126dKl169aCRGJmRr6+JJXSkyeUkEBSKfn6kpERJSZScDBNmUJmZuTlRQEBdPEiKe2DWhuRiHJz6eOPGzh0aPGQYAE0XkiweOThQWZmlJz8Sgv1W7KioqJJkyYFBASIRKLAwECuYrvgnJxIIqHQUHr2jCIjKTCQvL1JJKKoKAoKov79ycqKpkyh4OCXLLxln7eHhNDp06oJHFooJFgAjRf7iDA2Nlbxit/NoWbGxrRiBRHRzz/T3btCR9NYxcXF9erVa//+/aampseOHWu4Ke31oa5Onp7k70/h4ZSbS+HhtHgxtWlDOTkUFkZ+ftS69f8/QywoqHr5qFH/lpl97z2q92aJADVCggXQeBkZGVlbW5eUlKSmpgodS3OwaBG5uJBMRgsWvOrjpBbl6NGjPXr0ePDgQefOnW/cuKG8/1Wjpa9P3t60bh0lJtKjR/TbbzRuHBka/v8zREtLGjqU5PL/ueqXX0gsppgY+u47geKGFgAJFkCjhnnuPNLU/Ld05PXrtGmT0NE0JuykqzFjxuTn50+ZMuXy5ctOTk5CB/Xa2rWjd9+lAwcoN/f/nyEyDBUWklj8Pz07dqQFC4iIvvmG8AQeGggSLIBGDdOw+OXjQ2+8QYR1ZEqeP3/u6+sbEBDAMMyqVav+/vtvPT09oYOqF+VniFlZtG1bNX1WryYzMyovp8WLVR4ftAxIsAAaNSRYvFu/nrS1sY7sX/Hx8b179967d6+hoeGBAwe++OILHjcEbAyMjcnNrZp2c3Nas4aI6ORJ+vtvFQcFLQISLIBGDY8Iede2LX3yCRHR9u109qzAwQjr+PHjPXr0uHfvXvv27a9duza2hW3QuGAB9exJRPThh1RUJHQ00OwgwQJo1DCC1RBWrCAXF2IYWrSo6vTnFoKddOXj45OXl+fj43Pt2jU2lW9R1NTo999JXZ0yMuibb4SOBpodJFgAjVrr1q11dHTS0tIK61ayGqqjpUXr1xMR3b9Pf/5Z9Wx6+ksKKTV1xcXFb775ZkBAgEKh8Pf3P3jwoFFL3YKxWzd6+20iop9/prg4oaOB5gUJFkCjpqam1rZtW4Zh4vDxz6sRI2jCBCKiVauqlkr66SdycqJhwygkhEpLBYmuAaWkpAwcODAsLMzAwGDfvn2BgYFq9dlCuen7+muysqKKCgoIEDoUaF5a9L8rgCYB07AayLp1pK9P2dkUFPQ/7TIZaWpSRATNnk2tWtEHH9C9ewKFyLdz5855eXlFRUW1a9fu6tWr48ePFzoi4Rkb0/ffExHt20eXLgkdDTQjSLAAGjtMw2ogrVrRZ58REe3d+z/t69ZRZiZJpdStGz17RuvWUadO5OVF69ZRbq4gkfIjODjY29s7Kytr1KhR169fd6t2cV2LNGMGDR5MRHThgtChQDOCBAugsUOCVTc3bry8z9Kl5OpKDFO13ciIJBKKiqJ798jfn8zMKCqKPviA7OxoyhSKiKjmksasrKxszpw5fn5+lZWV/v7+hw8fNjY2FjqoRkQkol9/JQ0NoeOA5gUJFkBjh0eEr0sup4AA6tGDtmx5SU9NTdqwgWop/OTuToGBlJZGoaHk7U3l5RQWRsOGUYcOFBRET5/yG3iDSE1NHTBgwLZt2/T19cPCwjDpqlpubvTBB0IHAc0L/pkBNHbt27cXiUSPHj2qrKwUOpYmIDubhg+noCDS0nql/gMG0NSpL+mjpUW+vhQeTklJFBhIjo706BEFBJCdHQ0bRmFhjbfWw4ULF7y8vG7cuNG2bdsrV65MmjRJ6Igary++IEdHoYOAZkTENK2RboAWyd7ePi0tLTExsU2bNkLH0qjdvk0TJtDjx2RrS3v2UO/eRERPntCzZ2RgQC4u1V9VWPjvEv22belV6hUoFHT6NAUH04EDJJMREdnY0KxZtGABtW3L163wIDg4eNGiRTKZbOTIkTt37jQxMRE6IoHl5RER6emRpmb1HUpKqLyc1NRe6a8BQO2QYAE0AUOHDj19+vSxY8dGjhwpdCyN199/0/z5VFJCnp60bx85ODT4O2Zm0u7dtGkT3b37b4unJ0kkNH06CbubX3l5+bvvvrt582aRSPTJJ59888036urqQgYE0PLgESFAE4BpWLWrrKSAAJo6lUpKaOZMunBBFdkVEVlb05IldOcORUaSREL6+hQVRX5+ZGdHfn4UFaWKGF6UlpY2YMCAzZs36+np7d69OzAwENkVgOohwQJoArCQsBa5ufTGGxQURGIxBQZSSAjp6Kg6Bk9PkkopNZU2bKDu3amggIKDycuLunWjrVsf5ufnqyySS5cueXl5Xb9+3cHB4dy5c76+vip7awBQhgQLoAnACFZN7tyh7t0pPJzMzenkSfL3FzIYIyPy86Pr1+nBA/L3JwsLunWLVq16z9raesqUKREREQ09JSM4OHjIkCGZmZkDBw6MjIz09PRs0LcDgFpgDhZAE5CUlNS6dWsbG5v09HShY2lEwsJo7lwqLqauXWn//ka3BKy8nP75J3/DhimnTp1SKBRE5OzsPG/evDlz5tja2vL9XuWLFi3auHEjEUkkkt9++00sFvP7FgDwWpBgATQBDMPo6+uXlJRkZGRYW1sLHY7wGIa++46WLyeGoWnTaONGAR4Lvrq0tLQdO3ZIpdLHjx8TkZqa2pAhQyQSyfjx4zX4qG6Znp4+adKkq1evamtrb9iwYfbs2fV/zWassrJy586d0dHR5ubmM2fObNWqVZUO+fn5W7ZsSUlJcXZ2njNnjt5/CxZKSkq2bt0aHx9vb28/d+5c5VWZp06dioqKWrZsGWqMAQcJFkDToKurW1pa2qdPnx07drTwYg2FhTRrFh08SGIxff31Sx4LJiQkpKamdujQwcrK6sWzCoXizp07z58/9/Dw0NfXVz714MGDZ8+edezYUfn3qFwuv3PnjkKhcHNz09XVfa2wFQrF6dOnQ0JC9uzZU1paSkTso8MFCxZ06tTptV5KGVvdKiMjw97eft++fd27d6/zS7UQvr6+V65cmTVr1s2bN69fv37jxg1nZ2fubFFRUbdu3YyMjEaOHHnw4EENDY2rV69qamrK5fJevXqVlpZOmDDh5MmTz549u3nzppGR0T///PPZZ589efIkPz+/vLxcs6YKENACMQDQFMydO5f7Z9u1a9dDhw7J5XKhgxJATAzj6soQMWZmTEREbT0rKiomTZqkra3doUMHTU3NoKCgKh0yMjI6d+5sYmLSpk0bIyOjo0ePsu2FhYUDBgzQ19d3cXHR0dHZtm0b23758uVWrVo5Ozu3bdvWysrq/PnzdbuF3NxcqVTapUsX7n+op6enVCotKip63ZcKCQnR1tYmov79+2dmZtYtnhYlOjpaJBLdvXuXYZjKysr+/fu/8847yh3Wr1/fqlWr58+fMwzz7NkzY2PjnTt3Mgyze/duQ0PDnJwchmGKi4tbt279008/MQxz+fLl69evR0ZGElF5ebkAtwSNFRIsgKZBJpN98MEHZmZm3G9lJyen1atXp6SkCB2a6hw+zBgZMUSMhweTmPiSzlu2bDE3N09ISGAY5vDhw2pqanFxccod3n77bU9PT/ZX6WeffWZra1tRUcEwzFdffeXs7Jydnc0wzO+//66np5eXl8cwzKRJk9auXcte6+fn17Fjx3reTmRkpEQiMTAwYP+HGhoazpw5Mzw8/FWulclk/v+N3UkkEjZyeKmvvvqqW7du3OEff/xhb2+v3GH48OGLFy/+v/buPLiL8nD8+HIGDKQM9w3lEAkgVjEiit+BolhExiuAo0ZEhJajlKljwNZSW6sB75EZDVjbivWAka8KCBQsg6BcxniDApGbgIZLjkCOz/ePnX5++WG1VR9A5PX6K7s8u9klM8l79rP7bHLxhhtuuP766xOJxI033jho0KDk+nHjxvXq1Su5mJeXJ7A4hsCCU8zKlSvvu+++Nm3axH9cK1eu3KdPnxkzZvywf7mXlydychKVKyeiKDF4cOLgwf+8yf/8z/9U/Et5zjnn/O53v0sulpaWpqam/v3vf48X9+3bl5KSMnfu3EQi0aZNmwceeCA5rGHDhtOmTTtm53PmzKlSpUqQi4iHDh2aMWNGnz59Kv3rnYgdO3bMycnZtWvXV22ya9euXr16RVGUkpLy1FNPffdjOH2MGDHiqquuSi6++uqrx/wc09PTH3zwweTinXfe2bNnz0Qi0atXrzvuuCO5/pFHHjnzzDOTiwKLL3M7HpxiMjIyxo8fv27duoULF950000pKSmLFi0aOHBg48aNR4wY8d57753sAwzviy+ia6+Nxo+PKlWKcnKi556L/pvbnzZs2HD22WcnF7t06bJhw4bkYmFh4cGDB5MD0tLSWrVqtWHDhrKysk2bNiVviqpSpUp6enrFDWMFBQWtW7cOMoFnzZo1MzMzFy5cuGbNmuzs7IYNG65Zs2b8+PEtWrQYOHDg7Nmzj3kH5bx5884///zFixc3a9bs9ddfr/jZMccYO3ZsrQrWrl1bUlJS8adWtWrV8vLyiv/DpaWlxwwoKSn5mvXwVQQWnJLiC1dPP/309u3bc3NzzznnnD179kydOrVr167dunWbOnXqgQMHTvYxhrFuXdS9e/S//xvVrRvNm/eVt7T/4he/uOlfJkyYEEXR3r17k5++RVGUlpa2J34XXRTF/xpF0TEDdu/evX///rKysq/ZMIqiL7744tFHHx01alSQE0zq0KFDTk7O9u3bFy5cmJmZWVZWNnPmzAEDBrRq1Wr8+PHxE4gjR47s16/fpk2bLrroorfeeisjIyPsMfzA3HPPPQUVtG/fvmnTpjt37kwOKCwsbNCgQcU7048ZsGPHjmbNmsXrCwsLK24Yr4evIrDg1FanTp3hw4fn5+e/9dZbv/zlL+vWrZuXlzdixIimTZtmZWUtWrToZB/gd/Lqq1FGRvTRR9HZZ0erV0eXXvqVI7t3737xv5x77rlRFDVo0GD37t3JAUVFRRUfJGzYsGEURRXLqaioqHHjxnXq1ElJSfmaDY8cOTJo0KB27dqNHj06zEn+/6pUqRJ/5ltQUHD33Xe3bt1627ZtkyZNatu2bY0aNR5//PEoirp27bp48WITdvxHtWvXblhBlSpVevbsuWrVqs8//zwe8Oqrr15yySUVN+nZs2f8uEMURWVlZQsWLIgH9OzZ8x//+EdpaWk8bN68ecdsCMc62Z9RAiEdPnz4mBt6zjrrrJycnJ07d57sQ/tmysvL//SnSe3aFUdRIjMzceDAN97DFVdcMXTo0ORiu3btJk+eXHH/DRo0yM3NjRcLCwurVKmyZMmSRCJx9tlnJ+/WOnz4cFpa2nPPPRcvbtu27aKLLurbt+/B/+YusBDKysqWLl06fPjw5OdTN99884n51j9I5eXlP/nJTy6++OKXXnrprrvuqlat2vLlyxOJxAsvvBA/wbB169a0tLShQ4fOnj174MCBjRo12rdvXyKR2L9/f+PGja+99trZs2ffdtttaWlp8fMlRUVFM2bMmDRpUhRFzz777IIFC07uCfL9IbDgh+njjz/Ozs5OXnqpXr16//79Z8yYUVJScrIP7T/bv3//NddcE0VR27b9Jk0qKy//Njt5+eWXU1NTlyxZUlpa+uijj9aoUSOuzNdff33z5s2JROLOO+9s27btxo0bDx06NGTIkPT09PLy8kQiMWXKlIYNG77//vtHjx694447GjVqVFxcHG/YpEmT4cOHn5T/w/Xr10+ePHnFihUn/lv/wOzcuXPUqFHdu3cfMGBA8pnN3Nzc3/zmN/HX77zzzuDBgzMyMrKysj755JPkhuvWrcvKysrIyBg8eHB+fn68cu3atX0qGDZs2Ik9G76/BBb8kJWWlsY39CRfnNKsWbPs7Ox48oLvp3Xr1nXu3DmKorS0tJdeeum77Oq3v/1tzZo1U1JSGjZsOGvWrHhlq1atHnvssUQicfDgwczMzCpVqlSvXj09Pf3dd9+NB5SUlAwfPrxatWopKSmtW7dOznf15Tkk8/LyvsvhAT9gAguCWb58ea9evVq2bNmjR4/58+d/ecAzzzxz3nnntWzZ8oorrvjoo4+S6+fOnXvhhRe2bNmyd+/eK1euTK5/++23b7zxxssvv/y7H9v27dtzcnKSM1bH98j/7W9/O3To0HffeUDz5s2LZ07v0KHDmjVrvvsODx8+vH379orP4R89erSsrCy5+MUXX/zbKToPHTq0ffv28m939Qw47QksCGPXrl21a9cePXp0fn7+xIkTq1evfkwfLF68uGrVqo899lheXt6gQYOaN28ef/D04YcfVqtW7e67787Pzx85cmRaWlo8xeWYMWM6dOjQr1+/Bg0aBDzOeHLL5GtekvfIB/wW3055eXlOTk58p1H//v337t17so8I4NsTWBDGfffd17Zt2+QFj0suuWTMmDEVB1x11VU33HBD/HV863Q8y+XIkSOTU0KXl5f/+Mc/ju/Fjl+c8sorr4QNrNjevXtzc3Pjp+1i55133iOPPFJUVBT8e/03Dh8+fNNNN0VRVKlSpezs7IpXmABORaZpgDBWr17905/+NPnsXu/evePXkx0zIP66Ro0aF1100erVq49ZX6lSpV69esUbHvPu4bB+9KMfDR8+PC8v74MPPsjOzq5Xr15eXt6vfvWrZs2aDRw4cNGiRYkT+Br4LVu2XHzxxdOnT69du/aLL76Yk5NTufJp+qvp3Xffffjhh6dMmbJx48Z/O+C11167//77//znPx8zNdeKFSsefPDBJ554YseOHRXXf/DBBw8//PBjjz1WUFBw/A4b+LLT9LcYBLdjx44GDRokFxs2bLh9+/bkYiKR2Llz578d8PUbHm+dOnXKycnZtm1bPLnDkSNHZs6ceemll5511lmTJk2qOOPicbJkyZJu3brl5eW1b99+xYoVV1999fH+jt9bTz31VEZGxsqVK+fOnZuenr506dJjBowZM+a666778MMPH3/88S5dumzdujVeP3ny5F69euXn5z///PMdO3ZMzuY/ffr0c889d/ny5fPnz+/UqdNrr712Qs8HTnMn+xIa/ED06dPn17/+dXLxgQce6NSpU8UBtWvXnjFjRnIxMzPz1ltvTSQSHTp0SL5COJFIjB079mc/+1ly8Th9RPhVNm/enJOT06pVq/j3Q3LSy+M0MUFubm61atWiKOrXr1/8QuXT1pEjRxo3bvzQQw/Fi8OGDYtfgZe0fv36ypUrxzN1HT16NCMjY+zYsYlEYs+ePampqc8880wikSgvLx8wYMC1116bSCRKSkqaN2+ek5MTbz5y5MiMjIwTeEJwunMFC8Jo0aJFxTfWFRQUtGjR4pgBFT+mKSgoaN68+Zc33LBhQ7z+pGjRokV2dnZBQUE8uUPlypXjFx22bNly/PjxX34l37dWXFx8yy23jBgxorS0NDs7e/bs2XXq1Am181PRsmXLPvvss1tvvTVeHDZs2NKlSyteQZw1a9aZZ54Zzx5erVq1IUOGvPjii1EUzZs3r2rVqgMHDoyiqFKlSsOGDXv55ZdLSkpWrFixbdu22267LbnDVatWbdmy5USfGJyuBBaEkZmZuWDBgk2bNkVRtHv37lmzZsV/8yoOmD59+uHDh6Moevvtt/Pz8+MBmZmZL774YnxLzcaNG+OgORln8P/EkzjMmDFj8+bNjzzySOfOnXfs2DFp0qR27drFLzo8dOjQd9n/1q1bL7nkkr/+9a+1atWaOXPm6XzTVdKWLVvq16+flpYWL8YTalTsoS1btrRp0ya52KZNmx07dpSWlm7ZsqVVq1bxhcB4fWlpaWFh4ZYtW+rUqVO3bt2v2iFwXJ3uv9QglL59+/bq1atHjx5DhgzJyMho3br19ddfH0XRDTfccPvtt0dRNHr06CNHjmRkZNx8882XXXbZ8OHD09PToyi68cYbmzZtmpGRMWTIkB49evTt2ze+53316tUDBw6cNGnS/v37Bw4ceO+99574k2rcuPHYsWPff//9eHKH1NTU5IsOR4wY8fbbb3+LfS5durRbt26rV69u167d8uXLr7322uCH/f33/PPPt6/glVdeKS4uTklJSQ6Iv45zPHbMgBo1apSVlR05cqS4uLjiDKg1atSIN/yPOwSOq6on+wDgB6Jy5cqzZ8+eO3fu2rVrr7zyyiuvvDL+szd06NB40qn69evn5+e//PLLhYWFWVlZyScHzzjjjGXLls2ePfvTTz+97rrr+vXrFz+K2KBBgz59+kRRlJWVFUXRMR84nmDnnXdebm7u/fff//zzzz/99NNvvPHG1KlTp06dmp6enpWVNWzYsHr16v03+5k6dero0aNLSkouv/zyZ599Np5T9DR02WWXxXkda9my5euvv/75558nEon4p79r164oipo2bZoc06RJk48++ii5uHPnzjp16qSmpjZp0iQenFwfb9ikSZOioqKysrJ4arEv7xA4vk72TWDAqeejjz7Kzs5OPvyYkpKSmZm5cOHCr5n3vLi4eOjQodG/ZrqqOLU6iURiy5YtVatWXbZsWbyYm5vbpEmTo0ePJgfMmTMnJSUl+SjA8OHD+/btm0gk3nnnnSiKPv7443j9H/7wh/jpih07dlSrVu2f//xnvP4vf/lLgwYN4rltgRNAYAHfUnFx8YwZM/r37x9fI4n+dY/8xo0bjxm5devWCy64IIqi1NTUio9SUtHgwYPT09OXLFkSPzp67733JhKJDz/88E9/+lMikSgrKzvrrLOuuOKKlStXPvHEE9WrV0++jql3797du3d/4403nnvuudq1az/55JPx+qysrA4dOixevHjOnDmNGzf+/e9/f7JODU5DAgv4rrZs2ZKTk9O6des4s+J75CdMmBC/6HDZsmWNGzeO8+utt9462Qf7/bV///7Ro0e3a9euU6dOf/zjH+OLfAsXLuzRo0c8YOPGjZmZma1bt+7Wrdv06dOTG+7ateuWW25p06bN2WefXXHKjwMHDowdO7Zdu3bp6ekTJ048TnNtAP9WpcQJnK8Z+AErKytbsGDBU089NXv27KNHj0ZRVLly5SZNmuzYsaO8vPzSSy997rnn/stbtQBOdQILCGzPnj0TJ06cNm1acXFxvOaaa6554YUXqlb1VA1wuhBYwPGSm5u7ZMmSCy64YOzYsSf7WABOKIEFABCYiUYBAAITWAAAgQksAIDAPNQDfBt79uxZtWrVGWeckZGRUfGdd0nbtm1777336tWr161bt4rvci4qKlq9enWtWrUuuOCC5CuKoyg6cODAihUrKleufOGFF9asWfNEnAPAcSOwgG9s3rx5gwYNatWq1b59+6pWrbpo0aI2bdpUHDBlypTbb7+9U6dOmzZtat++/YIFC9LS0qIomjVrVlZWVtu2bT///PNatWq99tprzZs3j6LozTffHDBgQL169UpLSw8dOjR//vyuXbuenHMDCMFHhMA3U1ZWNnLkyFGjRr3//vsbNmxo06bNhAkTKg7YtWvX7bff/uSTT+bl5a1bt66oqOjRRx+Noujo0aOjRo0aP378u+++W1BQ0KBBg4kTJ8abjBkz5qqrrvr444/XrVvXs2fPcePGnYQTAwjHNA3AN7N48eLLLrussLAwnpZ97ty5V1999Z49e1JTU+MBU6ZMmTx58qZNmypVqhRF0f333z9t2rRPPvlkzpw5mZmZu3btql27dhRFM2fOvPnmm/fv37927douXbp88skn7du3j6Jo1apV3bt337x5c3xxC+BU5AoW8M0UFBQ0atQo+dKbTp06lZSUbN26NTng008/7dixY1xXURSlp6dv3LixvLy8oKCgZcuWcV3F6w8fPlxYWFhQUFC9evV27drF6zt37pxIJDZu3HjiTgkgNIEFfDP79u2rVatWcjEOpj179nzNgJKSkoMHD37VhvH6ZJDVrFmzSpUqFXcIcMoRWMA306hRo927dycXi4qKoihq3Lhxck3Dhg0rDti9e3dqamrt2rW/asNGjRrt27evrKwsXr93796ysrKKOwQ45Qgs4Jvp3LnzZ599tn79+nhxxYoVdevWbdasWcUB+fn5yTc9L1++vEuXLvH6zZs3b9u2LblhkyZN6tev37Fjx0QisWrVquT4GjVqxPdjAZyiBBbwzXTt2vXCCy8cN27c7t27169ff8899wwbNqxatWpbt2595ZVXoii65pprqlevPmHChEOHDr3xxhvTpk37+c9/HkVRjx49OnfuPG7cuL17965Zs2bSpEkjRoyoVKlSixYt+vfvf8cdd+zcuXPr1q133XXX4MGD69Spc5LPE+A7EFjANzZ9+vQ9e/bUr18/PT39/PPPj2dbePPNN4cMGRJFUY0aNV566aX58+fXqlWrb9++w4cPz8rKijd84YUXNm3aVLdu3XPOOad3797jx4+P1z/xxBM1a9Zs1qxZ69atW7Ro8dBDD52kMwMIwzQNwLd0+PDhqlWrVpyN/RgHDhyI71g/Zv2hQ4eqVav25Q2Li4srVar0b+eFBzi1CCwAgMB8RAgAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAT2fyuZZI+xC+mvAAAAAElFTkSuQmCC", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAyAAAAMgCAIAAABUEpE/AAC87klEQVR4nOzdZ1xUx9cH8LOFpXdUmoqIgl3E3hUwxtgV1ChqLBg1aoqKMVFjSQRjrDEGjMZuxGhiiQ01xl7Aih1RpKMICAILuzvPi0nus38ERLmwgL/vxxfc2dm7cxGWs3NnzpEwxggAAAAAxCPV9QAAAAAAqhoEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAPB6aWlpERERmzdvbteunYODg4GBgYWFxaxZs3Q9rgpKwhjT9RgAAACgAlGpVE+ePImOjr5169bt27f5F4mJiYV23rhx46hRo8p5hBUfAiwAAIB3Wnp6+sOHD7XDqdu3b+fk5BToZm5u7uLi4uzsrFQqa9eu3alTp2+++eb27dsymSwiIqJZs2Y6GXyFhQALAADgHZKQkCBMSvEvoqOjX+1mZ2fXqFEjZ2fnhg0b8i/q1KkjkUi0+2RnZzs4OKSnp7dq1erMmTMKhaK8LqISQIAFAABQNWVkZERFRfEQiodTd+7cyc7OLtBNX1+/bt262uGUm5ubsbFxSV7i/v37Xl5esbGxkyZNWrNmTRlcRGWFAAsAAKAqeHVq6tGjR6/+lbe0tBQmpfgXTk5OUunbb3q7evVqhw4dcnJyQkJCxo8fX7qLqDoQYAEAAFQySqUyKipKO5y6e/fuy5cvC3RTKBSOjo7a4VSzZs1MTU1FH8+WLVtGjhypp6d3/PjxTp06iX7+yggBFgAAQIWWlpamvZvv9u3bjx8/1mg0BbqJPjX1RqZNm7Zq1SpbW9vw8HAHB4fyedGKDAEWAABARZGXl/fgwQNh7fmtW7euX7+elZVVoNurU1NNmzY1MzPTyZg5lUrVo0ePv//+u23btidPntTX19fhYCoCBFgAAAC68eTJkydPnpRkakp7N1/Dhg3d3NxkMplOxlyM1NTU1q1bR0dHjxo1auPGjSV5Ck+4ZWpqWq1ataL6xMfHq9XqmjVrFtjD+PLly+TkZBsbmwKRZV5eXlJSkoWFhW4jTmIAAABQXhITE1u3bm1kZFQgXOAUCkXDhg0HDhz45Zdfbtq06eLFi+np6boe8hu4du2akZEREf3000+v7bx7924bGxuFQiGRSPr06ZOZmVmgw8OHDz08PKRSqVwur1evXkREhPDQt99+a2hoqFAoFArFZ599ptFoePuqVausrKz4Fsi+fftmZGSIeHVvBAEWAABA+alZs6YQTslkMg8PDz8/v8DAwNDQ0MjIyPz8fF0P8DWOHz9+5cqVYjps27aNiPT09E6ePFlMt5SUFAMDgy+//DI3N/f27dsODg4ff/xxgT7du3dv27ZtSkrKixcv+vXr5+TkpFQqGWMHDhyQSqXbtm1Tq9WHDh0yMDBYt24dY+zChQsmJiZhYWGMsYcPH9rZ2U2fPr20F/y2EGABAACUkxs3bvDQ6ttvv42KitL1cN7Y0aNH5XJ57dq1U1JSiun2xRdfEFGNGjViY2OL6hMUFFS9enUhoFyxYoWJiQmPn7i7d+8S0alTp/hhdHS0RCI5ePAgY2zAgAHe3t5CTz8/v7Zt2/Kvnz59KrR/8skn7dq1e+OLFAmKPQMAAJSTgIAAIrK3t589e3bdunV1PZw31rVr13bt2sXExAwcODAvL6+obkFBQe+//35ycnLfvn1fLbnDRUZGtm3bVi6X88NOnTplZWXFxcUJHW7evCmVStu3b88P69Sp4+joyKOue/fudezYUejZsWNH3k5ENjY2QvvLly+trKze8lJLDQEWAABAObl9+zYRTZgw4a3PoNFobt68ee7cuVe3FgqePHly9uxZ7WCFUyqVly5dCg8PV6lUBc5569at8PDwYs7J6enphYaGOjg4nDlzZubMmUV1k8lkW7durVu37tWrV4u62MTERGtra+GQB0ba9aSTkpIsLS211/Lb2NjwDgWeW61atfT09AKR3PPnz//4448BAwYUf0VlBwEWAABAeTh16lRMTIyDg8NXX331dmd48uRJs2bNWrVq1bdvX3t7+127dhXooFarR48e7eTk5OvrW6tWrUmTJrH/cgWcOXOmdu3a3t7e3bp1c3Z2vnLlCm+/fPly/fr1u3bt2rdvX0dHx9DQ0OLHYGtr+/vvv+vr669cuXL9+vVFdbOystqzZ4+xsfGWLVtWrVr1agcTExPtkIgX8NFOgmpiYlKgqs/Lly95hwLPffnypUKhMDAwEFry8/P9/PwaNWr00UcfFX85ZQcBFgAAQHng4cjo0aPfOsOCv7+/QqFITk5++vTplClTRo8enZSUpN0hJCRk165d58+fj4+PP3ny5IYNG7Zu3UpESqVy2LBh3bt3T01NTU1Nbdas2YcffsjzQUybNm3IkCFJSUkJCQkTJ04cN25cZmZm8cNo27ZtSEgIEX3yySeXLl0qqlvTpk03b94skUi++OKLv//+u8Cjjo6Ojx8/Fg4fPXrEG7U75OTkJCcn88O8vLyEhATewdHRkffnHj9+7ODgIOzKfPHixcCBA+Pj4/fv319ueVYLoavFXwAAAO+OjIwMnprhwYMHb3eGpKQkiUSye/dufpidnW1mZvbjjz9q92nXrt2oUaOEw8GDB/PF4IcOHZJIJLw0IWPs+vXrRHT+/HnGmFKpVKvVvP3evXtEdOnSpZKMZ+LEiURkZ2cXHx9fTLdZs2YRkbW1dXR0tHb7iRMnpFLp7du3+eHQoUM9PDy0O+Tk5FSrVi0oKIgf7tixQyaTPX78mDH29ddf29nZvXjxgo+/UaNG/v7+vNvdu3cbNmz4/vvvp6WlleQqyg4CLGD5+fnjxo3r0KHD9OnTX01DAgAApbd27Voi6tat21uf4ezZs0SkHc106NBh2rRp2n1sbGy0E1AFBQU5OTkxxngRG+2ehoaGmzdvLvAS586dI6Jitv5py8vL69KlCxG1b99ee/dfAWq1+oMPPiCi5s2bv3z5UmjXaDSenp6Ojo5z58718fHR09M7ceIEY+z8+fPu7u7Pnz9njK1cuVIul0+ePHnGjBkmJiZCzoVnz545OTm5u7svWLCgY8eO1apVi4mJYYydOHHCxMTE2tp6xowZAf8pybWUBdwiLA8ajSYyMvLq1atKpbKoPrGxsREREU+fPi3QrlQqr169GhkZWSC3r0ajuX37dmRkZDHnLImEhISaNWv+8ssvZ8+eXbp0qZmZWdeuXfmHGwAAEAu/Pzh27Ni3PkNKSgoRWVpaCi3W1tbatwjVanVqaqr2vjkbGxveISUlpcB+OisrK+Hum+Cnn37q2LGj9n26Yujp6e3cubNmzZrnzp377LPPiuomlUq3bNni4uJy7dq18ePHC+0SieSvv/6aOXPmo0ePHBwcLl261K1bNz6wrl276unpEdHUqVP379+fn5+fmpq6fv36JUuWCBd+6dKl/v37P3jwoEuXLleuXKlVqxY/5+TJk8eNG6fLO4MCXUV2744HDx40aNBAX1/fzMzMxsbm6NGjBTrk5eUNHTpUIpFUq1ZNJpN9/fXXwkPHjh2rVq2amZmZgYGBm5vb/fv3efvp06fr1KljZWVlbW1dvXr1w4cPv93Y7t69y1cFSiSSli1baq8QbN++/fr16zGhBQBQejz9lbm5ufYUjrZTp0598sknN2/eLOYkJ06cICLtPE9du3YtkJzT1NT0l19+EQ6XL19uZ2fHGPv+++9r1apVTE/G2Lp160xMTK5fv17iy2KMsStXrhgaGhJRSEhIMd3u3LnDC9f88MMPb3T+ygsBVplr3759+/btX7x4oVKpJkyYYG1tXSBzf1BQkLm5OU+M++eff0ql0r/++osx9uLFCxsbm3HjxqlUqszMzI4dO7Zp04YxplKpGjRosGjRIrVarVar/f39tXO1ldxff/1lYWFBRAYGBkK+3U2bNn3yySfCBx1DQ0MfH5+wsDChCgEAALypqVOnEtHkyZOL6jBs2DAi+uabb4o5SVRUFBGdPXuWH2o0mpo1a3733XfafRo3bjxz5kzhcPLkyfwPx86dOxUKBV+0xBjj01pHjhwRei5btszExOTvv/9+42tjbPPmzUSkp6cnJAUt1J9//imRSGQy2aFDh97iVSodBFhl68GDB0TE0/Yzxp49e6anp7dt2zbtPo0aNZoyZYpw2L179yFDhjDGduzYoaenJ2TL5Z9d7t69yxjLzs4WIp6LFy8SkTC5VRIajSYwMJDPoPr6+r66EjAnJyc0NNTLy0vYlOHm5hYYGJicnPxGlw8AAEqlkid5KqrCTFpamqGhoVQq5Su4i9GyZcuRI0fyrw8ePCiVSgukg1+wYEGtWrV4+cLU1FRbW9vly5czxl68eGFmZrZy5Uqhm7m5eVZWFmMsMzNzyJAhjo6OJVzbXigeQdra2sbFxRXTbc6cOURkZWVVGbPYvykEWGXrr7/+IiLtOeFGjRppf0bRaDT6+vrbt28XWmbPnu3u7s4Ymz9/foMGDYR2nvNj//79BV7i0KFDUqm05PUsMzMzBw0axG8Lzps3r/ipqXv37gUEBNSoUYOHWQqFonfv3qGhoRW/WhYAQAWxY8cOImrRokVRHXiaqPfee++1pzpy5IhCoejZs+e4ceNMTU0/+eQT3t6xY8elS5cyxp49e+bs7NygQYPJkyfXq1evYcOGwl+HpUuX6unpjRgxYsiQITKZjN8fzM3NbdSoERENHDjQ/z98sfkbyc/P5yuo2rZtm5ubW1Q3tVrdp08fImratCkP76qwCrAKrEpLSUnR19fnpcW5AmsS09LSlEql9qJFKysrYU2idruBgYGxsXGBlCdE9PPPP/fs2ZPf236tqKiodu3a7d6928zM7M8///zmm28KLecuqF+/fmBgYHx8fFhYmI+Pj0ajOXDggK+vb+3atWfNmhUdHV2SFwUAeJe9dnn7hg0biu8g6NGjx/Xr19u3b29jY7N9+/bVq1fz9lGjRrVt25aIrK2tr1y5MmnSJGNj488///zixYvCX4cvvvji+PHjtWrVql+//rlz54SX46WmW7du7fwfc3PzN71GuVy+a9cuZ2fnCxcuFJOnnldobtiw4Y0bN/hU3Ju+UGWi6wivituzZ49EItHev+ru7v7ll18Kh/n5+TKZ7Pfffxda5s2b17BhQ8bY7NmzmzVrJrSrVCqpVLpr1y7t869YscLCwuLevXslGcyhQ4d4xObq6nrnzp23uJyEhITAwEAXFxf+wyOVSjt06BAcHFzUsk0AgHfco0ePpFKpoaEhzzvwqsuXLxORtbV1MRM/lcW1a9f4hIJ2qohX3b17l8dwgYGB5Ta28ldpZrDyWX5ifmJifmIeK7K6ZAXk6OjIGHv48CE/VKlUMTEx2jtg5XJ5jRo1+NJF7uHDh7xDzZo1Y2Ji8vPzeXt0dDRf0sgPGWOLFy+eO3fuwYMH69evX/wwGGNBQUG9e/dOS0vr3bv3xYsX3dzc3uJy7OzsAgICHjx4EB4e7u/vb2hoePbs2QkTJjg4OEyYMOHq1atvcU4AgCpsw4YNGo1m0KBB2ncktPH5rZEjR+rr65fv0MTXrFmzdevWEdG0adP++eeforq5urru3LlTJpPNnj374MGD5TjA8qXjAK/EzmadpQiiCDqaUTDNQUWWn59va2v7+eef80P+I1Vgcd+YMWMaNWqUk5PDGEtKSrKwsFixYgVjLDExUaFQCCviZ8yYYWtrm5eXxxh78eLF4MGD69ate+PGjdeOIScnx8/Pj4gkEklAQICQsbf00tPTg4ODPTw8hB8nDw+PFStWpKamivUSAACVl1qt5vmZitqdl52dzXdzv2lyhIrsiy++IKIaNWoUn7B0/vz5RGRpafnWqe0ruAoRYCXmJf6Q/EOvqF4NbzWscaOGS6RL53udFycujs37//+bShpgsf+y+/v4+Pj7+xsZGQkbaD08PNavX88Ye/z4sa2trYeHx6efflq3bl13d3fhjtusWbOMjIzGjx/v6+srk8l27NjBGMvIyKhfv75EIhk8eLCwJvHixYuFvvqTJ094AGRqavrHH3+U0TVGRkYGBAQItc0NDAyQ3AEAgE/PODs7F/VmuGnTJiJq27ZtOQ+sTKlUqvfff5+I3N3ds7Ozi+qm0Wh8fHyIyM3NreT7tCoRCdPpEjNGbFHiosDkwGxN9quPGkuNAx0CP6n2CRGde3muw70ORHTU5ai3mXd5D7R0IiIidu/enZub6+npySsGENHy5cs7dOjQunVrIkpOTt60aVN8fLyrq+vo0aO1F8X/9ddfx48fNzAwGDhwYMuWLYnoxYsXvOSCtj59+jRs2LBA4z///OPr65uSklKvXr0///zz1Q7iUiqV+/btCwkJOX78OP+5ql+//rBhw8aOHSvc2QQAeHcMHjx49+7d33333Zdffllohy5dupw6dWrdunXjxo0r57GVqefPn7du3frhw4d+fn48S1ahsrKy2rVrFxkZ2a9fvz/++KP4TVeVjw6DOw3TjHw0ks9Lya7I+j/s/2PKj7vSdm1K3fTJk0+srlvxh5YmLWWVeQZLV4KDg3mpgV69epWk5mVYWNjYsWNFmXN68uRJYGBg7dq1+c+YTCbz8vIKDQ3l9zcBAHQiNjb26tWrxRSoyMjIuHLlSmJi4qsP3bt37+bNm6++icXHx9+8ebPQjT7Pnj3T19eXy+VF1UKOioqSSCTGxsZVcv7m+vXrxsbGRCQk3ypUdHQ0v/uxcOHCchtb+dBlgPVTyk88ZnK44RDxMqLAoyn5Kd3vd69zs86N7BsMAdabyMnJGTVqFL3JoqsffvhBLpcTkXZGrlJSq9U8uQOP80hrjbxYLwEAUBJZWVkffPCBRCIxNzc3MjJau3btq32CgoL09fXNzc0lEsmQIUOE3d/R0dFNmzaVy+XGxsa2trZ8hp4xFhsb27lzZ319fRsbGyMjo1f3zS1dupSI+vTpU9SoAgICiGjMmDEiXWWFs3v3bolEIpfLi0+sdfToUZlMJpVKX030WKnpLMDKVmfzOSr9K/qROZGF9slSZz3N/7foEgKsEoqNjW3VqhURmZiYaGd/KEpOTs7o0aPfKBp7U4mJiStWrGjSpIkwb+rh4REcHFzls8wBQAXx+eef29nZ3b17V6PRrF69WiqVXrt2TbvD5cuXJRLJ5s2bGWNXr161tLQUJlQ6d+7cunXr58+f5+XljRs3Tih39uGHHw4cOJDPh61evVoulxf49MhXZfz555+FDik/P9/e3p60St9USbNmzSIia2vr6OjoYrp99913RGRqanrr1q1yG1tZ01mAtSl1Ew+YpsZOLUl/BFglcerUKZ513cXFpfiiodybRmOlxJM7mJiY8DDL3Nzc398/PDy8rF8XAN5lGo3G2tr622+/FVrq168vbO7mxo8fz2v2cTNmzKhbty77L8nO4cOHebt2ubP8/HxhBXdeXp6wD4k7d+4cEdWoUaOopRF79+4lIldX16q9GUitVvOVx82bNy8mY6JGoxkyZAj/hvA6P1WAzvJgncg8wb/ws/LT1RiqmJCQEE9Pz+Tk5J49e166dKlx48bF9z99+nTLli0vX77s4uJy/vx5Xj+nTPGJq/j4+E2bNnl5eWVkZISEhLRs2bJRo0ZBQUHPnj0r6wEAwDsoNTU1NTW1Q4cOQku7du3u37+v3ScyMrJ9+/bCYdu2bR89epSfn8+7Cc+1trauX78+b5TL5YaGhrw9MzNTrVZXr15dOAPPbjV69GhhjUQBvMO4ceOq2sru/yWVSrds2eLi4nLt2rXx48cX1U0ikfz6668tWrS4d++en5+fRqMpz0GWEZ0FWOEvw4nIQGrQ3LC5rsZQZSiVyrFjx06YMEGlUgUEBBw4cKCojHaCN43GRGRmZjZy5MiwsLDbt28HBARUq1bt9u3bs2bNcnR09PX1PXbsGKvaxRMAoHwlJycTkfa7YoGSZbyPlZWVcGhjY6PRaFJSUpKTk/X09IR5d/5QYmJigZcICQlxdHTkxWqIKCsrKzQ0lIj4AoxXJSUlHTp0SC6XjxgxojSXVilYWlru37/fzMxs+/bty5YtK6qboaHh7t27bWxs9u/fv2DBgvIcYRnRWYCVokohIieFk1wi19UYqob4+PguXbps2LDB2Nh4586dgYGBMpmsmP5vEY2VkQYNGgQGBsbGxoaGhvbu3VulUu3atcvb25sXOoyJidHJqACgiuHF+LKz/z8ZUFZWVoFye2ZmZi9fvtTuQETm5uZmZmb5+fl5ef9fQSQzM5OnBhWcPn16wYIFq1evFjLs7Ny5MzMzs1OnTkXVzNi4cWN+fn7fvn1tbW1LeXWVgpub2+bNmyUSycyZMw8fPlxUNycnp99++00uly9YsGDXrl3lOcIyoat7k4orCoqgVndalbB/gTVYqfmpUblRr31WlXfmzBn++1mzZs2SLGaKi4tr06YNERkYGPDlnBVHXFxcYGBgnTp1+E+mVCpFcgcAKL28vDw9Pb2tW7cKLV5eXqNHj9bu07dvXx8fH+Fw1apVVlZW7L9CgULxVpVKZW1tvXr1aqHngQMHTE1Nf/75Z+2ztWvXjog2btxY1JB44PXXX3+V7soqmTlz5hCRlZVVgXImBXz//fdEZGJiUpKVxBWZzgIsw6uGFEEedzxK2L9AgLU4cTF/evDT4Cz1O7oZLTg4WKFQEFGXLl2Sk5Nf2/9NozGd4Mkd/Pz8hMUNtra2U6dOLUlRIACAQg0ePNjT05Pvko6KipLL5QWCm82bN5uYmCQkJDDG8vLyWrVqNWHCBMaYWq12dnaePn0677Znzx65XB4XF8cY02g0gYGBZmZmBXYI3blzh/6bEit0MCdPniQiBwcHlUpVBtdacanV6j59+hBR06ZNi99Fzm+t1qtXryRJHCssnQVYdjfsKILqRtYtYf8CAdZX8V8ZXTXiLZbXLD958smVl1fKcrwVS25urpD219/fPz8//7VPEaKxzp07lyQaK0Cj0Wzfvv3DDz8cNmzYr7/+Wmg2h/Dw8AkTJvj4+MydO1e7HGFubu6yZct8fX1HjRp15MgRoT0/P3/9+vV+fn6jR49+9ZzPnz8PDg5u1qyZMNvK18gXkyQQAKBQV69eNTEx6d69+/Tp02vWrNm1a1f+hjN+/Hi+nTA3N7dFixbOzs4zZ87s0KGDtbW1MMuyfft2qVT64YcffvLJJyYmJp999hlvHzBgABF16tRJKFm2c+dOxtjnn39ORDw+KxQvDvv111+X+WVXPC9evODZKwYOHFjM9sns7GxeucTb27vyhqE6C7Da3G3DE7i/VBe5b1Pbq2kaMlQZm1I3ed334u0UQQ1vNQxMCnyW/6wsB6578fHxfCmlgYFBMVPQAqVSKezd8Pf3f7s7bl9//bWBgcGsWbPmzJljYmIydWrB5Bpnz55VKBSDBw9esmRJ48aNGzRoIHx669Wrl729/bfffjtx4kSJRLJp0ybe3r9//3r16i1ZsmThwoUWFhbTpk0r9KV5cgdTU1N+CWZmZn5+fmFhYW9xFQDwznr48OHcuXMnTZoUHBwsJBHdunWrMP/08uXL1atXT5w4cdGiRQWqFF+4cGH69OlTpkzRrui6cOHCgP+1b98+pVLJ9xJeunSp0GGkp6cbGRlJJJLib5NVYXfv3uUL4AIDA4vpFhMTU61aNSKaPXt2uY1NXDoLsKY8mcKjoiMZR17fu9g8WLdzbgfEBVS7Xo130L+i7xPtE/YiTMOqYHKR8PBwXpvd0dGxqF9gbfHx8Xw1gIGBwa+//vp2L/r06VOFQiHkKd62bZtMJouJidHu061bt549e/JPJCkpKUZGRmvWrGGMHT9+XPu95vPPP7e3t1er1Tk5OV999ZUwl7ZmzRp9ff1ipuKys7NDQ0O9vLyELc0NGjSYMWPGvXv33u6iAABEx5dmN2nSpKgOP/30ExF5enqW56gqmn379kmlUqlUWvwqtNOnTysUColE8ttvv5Xb2ESkswDrcMZhHg/5RPu8vncJEo3manJDn4f2juotuyLjPWverBkQF/BY+VjUgevS5s2b+cqkTp06JSUlvbZ/RETEG0VjRdm7d69EIhGqZeXm5hoYGGzZskXooFKp5HK5dkv//v0HDx7MGJs7d66rq6v2kIjo9u3bBV7i0KFDRFSS2+23bt36/PPP+ScbrlatWitWrHjrqwMAEEvPnj2p2Op7Hh4eJGpRskpq/vz5RGRpaVl88bQVK1YQkaGhYUREwXp6FZ/OAiw1U9eLrEcRJImQHEg/8Nr+Jc/kHpsXG5gUWOdmHd5fGiH1uu8V+jw0T1OJN6Pl5+fzqlUlv823ZcuWN4rGirFmzRpra2vtljp16mjP7sbFxRHRmTNnhJZPP/20Xbt2jLGPPvqoR48eQntqaioRvXqDb+7cuS4uLiUfkkqlOnLkiJ2dnRBmrVu37o0uCgBAXLGxsTKZTKFQPH36tNAO169fJyILCwshBfw7S6PR+Pj4EJGbm1vxta7HjBlDRE5OTkV9VyssneXBkpJ0ba21EpIwYoMfDV6ful5D/5O5lRH7J+ufZSlFJiUriqOeY0CNgKjGUWH1wvys/PSl+scyj/k+8q0VWWta3LSbOTfFu4hy8uzZsx49evBCpOvXrw8ODi4qNTCnUqlmzZrl5+eXk5Pj7+9//PhxXj/nreXm5urr62u36OvrayeVycnJISK+iL5AhwLPNTAwoP9NSENE0dHRK1eu5Dt4S0gmk/Xo0SMhIeHChQt8a+TatWvf6KIAAMS1YcMGtVo9cOBAGxubQjusW7eOiLR3Sb+zJBLJhg0bGjdufPfu3ZEjR7Ki80uvWbOmdevWjx8/HjZsmEqlKs9BlpZu47sVySukEVI+1eQc6TzxycTFiYu/T/p+4pOJfH5LfkWelJfESlGL8LnqefDT4Ga3mwlr4Xlyh0x15diMduXKldq1axORg4PDhQsXXtv/6dOn3bp1IyIejYkyhu3btysUCu0dH9bW1tp5X3hSPu276WPGjOnVqxdj7PPPP+dTWdzjx4+J6PLly0JLXFxcvXr1xo8f/9YFub788ksiqlat2ts9HQCg9DQaTd26damwGXouNzeXB15Xr14t36FVXNHR0dbW1kQklNYuVEJCAi+MPWPGjHIbW+npOMBijB1IP+B6y1WIfgr8a3239b3ce0yMYs/hL8P9Y/xNr5ny85hdM/N75Bf2okJvRtu6dSv/oNOhQ4fExMTX9n/TaKyErl69Slol3yMjI4no9OnT2n1q1649c+ZM4dDFxYVnjtm8ebOBgYEwA7xp0yaFQiHU8jxz5oy9vf3HH39caN6HEjp48CARyeXytz4DAEApHT16lIicnJyKejfbtm0bEXl4lDT74zvi6NGjMplMKpXu37+/mG58ozoRvfVurfKn+wCLMZavyT/+4viX8V8Oix7m/cC7d1TvsY/HrkhecSvnltDnifLJ3IS5cxPmPsx9WJrXylZnhz4P9brvJYmQ8Eirwa0GgUmBKfkppb4OMRVYdCVsKi7Gtm3beKGGEkZjb6Rt27Zt27aNj49PTk729PRs2rQpfxMJDQ19/PgxY+y7774zNzc/efJkdnb2okWL9PT0eO7jnJyc6tWrjx49Oj09/fbt287OzqNGjeLnXLlypYGBweLFi5//5+1SSGRnZ8vlcqlUimUNAKArQ4YMIaIFCxYU1aF79+5EJGzHBsF3331HRKamprdu3Sqm25o1a4jIwMBA+x5IRVYhAiyduJtzNyAuoPr16trJHfal71NpdJ/T7NmzZ56envw2X0nWbr9FNPamHj9+zOvJSySSVq1aCfs+iIiX3MnPz584cSLfUlu9evVdu3YJzz179qyLi4tEIpFKpf379+ezWXxWrIDDhw+/3fBcXV2JCNneAUAnUlNTDQwMpFJpgfw1gujoaKlUamhoWKlTk5cRjUbDw1NXV1fh/kaheE7HWrVqpaRUrDmRQr27ARan1Ch3pe3q+aCnsBSs96ne8+fPf/Lkia6GdPXqVScnJyKyt7c/f/78a/trR2MhISFlOrZnz54V+LHOz8/XXjuVnZ0dFxf3auJdjUaTkJBQ/FaR0ujXrx8RhYaGltH5AQCKwbMJvP/++0V1+Prrr4nIz8+vPEdViWRnZ7do0YKI+vTpU8yKkby8vE6dOhFRx44dK36Z2nc9wBLE58UHJgU6Rzq7+7vTf5WGN23aVM53nXbs2MFv87Vr145XxSretWvXeHVkOzu7c+fOlcMIK6aZM2e+dpkkAEAZ4UW9ChQlFKjVap6S8OTJk+U8sErk0aNHfBPAvHnziumWmJjo4OBARAXKdVdACLD+h5qpj4YdHTp0qJBZoFq1ap9//nnxN4ZFoVKphNt8fn5+JQnshGjMw8OjqHnpKmPevHk1a9bcsWNHoY+uX7+eiEaMGFHOowIAuHTpEhFZW1vn5uYW2uGvv/4iImdn57feK/2OOHbsmFwul0gkxd+OOH/+vFwuJ6KxY8eW29jeAgKswqWlpQUHB7u7uwvLg3il4RcvXpTFy6Wmpnp7e/OtcMWXZ+J4NMaLxowYMeJdWNzNczF88803hT569uxZImrZsmU5jwoAYMKECUT0xRdfFNVh4MCBRLR48eLyHFUl9f333xORiYnJzZs3i+k2btw4/qd5w4YN5Ta2N4UA6zXCw8OnTp1qZWXF/y8NDAx8fHzCwsJE/CBy/fp1Z2dnIrKxsTlx4sRr+79pNFY1bNy4kYiGDRtW6KM8QbypqSk+IAJAecrOzrawsCCiom50JCcn6+npyeXy+Pj4ch5bJTV69GgiqlevXvEbAho2bEhENWvWLK9xvTEEWCWSk5NToNKwq6trYGCgUKv4re3cudPY2JiI3N3deb6D4r1pNFZlXLhwgX+XiurAqxPGxcWV56gA4B3HP/tpp1MugE/J9O3btzxHVallZ2e3bNmSiLy9vV/dMiXgtzWMjY3Lc2xvBAHWm7l37968efP4ckUikslkXl5eoaGh+fn5b3oqjUYTGBjII7bhw4eX5DZfaGjoG0VjVUlaWhr/XSpqjqpjx45EdPz48XIeGAC8y/imtl9++aWoDnyiZe/eveU5qsru8ePHNjY248aNKybrEC+S9sEHH5TnwN4IAqy3oVKpwsLCfHx8hJqA9vb2AQEBUVFRJTxDRkZG3759S36bTzsa+/DDD9+FRVev4hUVi8qgwW/Jr1mzppxHBQDvrHv37kkkEhMTk6KW5545c4aIbG1tK35OgYqm+DuqvOpaBU9/qLNiz2Xk2LFjI0eO7NOnz/z581+8ePFqh4cPH06ZMqV3796TJk26d++e0K7RaH755ZdBgwYNGjTol19+0Wj+v/L006dPFy5cqF2KWJi4iomJCQwMdHFxSUhICAoKql+/fseOHUNCQgoUMy7g3r17bdq02bdvn7W19eHDh4XNg0V58eLFgAEDZs2aJZPJAgMDt23b9m4WCnVzcyOiu3fvFvoozzWq/X8KAFCmeL3XIUOGmJqaFtWBiEaPHi18GocS4sUHi/Lbb78RUefOnZs0aVJeI3pzuo7wxLR7926JRDJx4sTly5fXq1evVatWBe7cPXnyxNraunv37j/++GPPnj3Nzc2FjOSffvqpiYnJggULFixYYGJiMm3aNMaYRqOZNGmSiYmJo6NjnTp1innp8PBwf39/fv+OiCwsLPz9/a9cufJqz/3795ubmxNR8+bNHz169NqLunfvXoMGDYjI2tr62LFjJf5mVEF8q86qVasKfXT//v1E1KNHj3IeFUCV8ezZs1mzZr3//vsjRoz4+++/X+2gUqnWrl3br1+/fv36hYSEaCeEjIyMnDBhQo8ePSZPnqw9l69Wq/fv3z969Ohr166VwyWUp/z8fDs7OyIqKgdhZmYmD7x43TAQEf+8feDAAV0PpDhVKsCqV6/euHHj+NcPHz6USqUFcmlMmTKlbt26PFVJfn5+48aNx4wZwxiLi4uTyWS85AtjbPPmzTKZjC+XPnLkyLNnz37++efiAywuPT09ODiYl5ThPDw8VqxYkZqayv67zSeVSolo6NChL1++fO0JhWisWbNm0dHRb/btqHKWLVtGRJMnTy700fv37xNR7dq1y3dQAFVETk5Oo0aNmjRp8uOPP44fP14qlR48eLBAn8mTJ5uZmS1evPjbb781MTH57LPPePvdu3dNTU179+69du1aT09Pa2vr2NhYxtjx48fd3Nxq1aoll8t37txZ3pdUxv744w8icnNzK2phaEhICBF17ty5nAdW5f3zzz9E5ODgUMwS+Iqg6gRYCQkJRKS9sa59+/YF/hg3a9Zs5syZwuH8+fNdXFwYYzt37lQoFMLCpuzsbD09Pe23gxIGWIIrV65MnjzZ0tKSh1lGRkbDhg3r1q0bEfHbfK89w1tEY1UeT9bn5eVV6KMqlUpfX18ikeB7Be+gp0+flvIMISEh+vr6Qp34YcOGtWjRQrvD48ePpVKpkOx348aNcrmcF5wYOXJk8+bN+V+73NxcZ2fnTz/9lDEWFRUVERHBGLOwsKh6AVbv3r2J6IcffiiqQ5s2bYho06ZN5Tmqd8HIkSOJ6KuvvtL1QF6j6qzBiouLIyJhfx//Oj4+vkAf7Q61a9fm01RxcXHVqlUTFjYZGhpWq1atwHPfiLu7+48//piYmMiTO+Tk5OzYseP69esWFhaHDh167aKrzMzMQYMGzZo1SyKRBAYGChnb33HFr8GSyWR169Zl/01lAbw7Dh06VLt27aCgILVa/dYnOX36dLdu3fjOLCIaOnTolStXsrKyhA5nz55VKBQDBgzghz4+PkR07tw5Ijp16pSvr69MJiMifX39AQMGnDp1iojq1q3LC8xVPUlJSYcPH1YoFCNGjCi0w61bty5evGhubj548OByHlvVlpGR8fvvv0skEp4uqyKrOgGWUqkkIoVCIbTo6+vn5uZq98nNzS3QgdcqViqV2u38oZycnFIOSV9fn2clffDggbu7+/Pnz7/44gueI7QYDx48aNu27R9//GFlZVWSaOzd4eTkZGBgEB8fr/2mr634CAygqjp27Fh2dvasWbO6du361vs8Cnz+5F9rf86Mi4urXr26UEbMyMjIxsaGf0ZNTEws8Fz+ibcK27Bhg0ql6tu3b/Xq1QvtwJe3Dxs2DB+PxbVjx47s7Oxu3bq5uLjoeiyvUXUCLL7j4OnTp0JLSkpKgW0I9vb2z5490+5ga2srk8ns7e21n8gYe/r0KS8nKYq6desOGzaMiLRfvVAHDx5s3br17du3mzZtevny5ddGY+8UqVRar149xlhRf0KwkRDeTT/88MNff/3l6Oh45syZ5s2bf/PNN/n5+W96kry8PO2dbvwzp/ZnVKVSWWArnEKhUCqVarU6Pz9f+yF9fX3+ibeqYoz9+uuvRDR27NhCO+Tl5W3durWYDvDWeORaKb6xVSfAqlmzZrVq1f7++29+qFQqz58/37hxY+0+LVq0EDoQ0d9//807NGzYMCsr6/Lly7w9PDw8KyurwHNL6bWTK4yxoKCgPn36pKen+/r6njt3jmdsB23Fh1AIsOCd1atXr8jISH9/f6VSOX/+/FatWl25cuWNzmBnZ1fgAyoRaX/OLPABlX8Qtbe3l8vl1atX135ucnKyiB9QK6CTJ09GRUU5OjoW9Rl47969T58+bdKkCc9IDmK5efNmeHi4hYVF//79dT2W16s6AZaenp6/v/+SJUvOnz///PlzvsSS3x0/ePBgeHg4EX3yySfHjx8PCQl58eLF5s2bDxw4MGnSJCJq2bJl69atp06dGhMTExMTM3Xq1LZt23p4eBBRTk5OWlpadna2RqNJS0vLzMx8u+G99m9/VFTU/PnziWjx4sW//fabkPEBtPE4tahvI24RwrvM3Nw8ODj45MmTLi4u169fb9u27axZs0o+k+Th4XHq1ClhFdfff/9tZ2dnbW2t3SEjIyMiIoIfXrhwIScnh2chatSoUYHPrhU6O1Gp8UmUMWPG8GVnRXUYP358uQ7rHbBu3ToiGjFiROW48aqz5fVlIDc396OPPuIz2/Xq1fvnn394e+vWrYXtxGvWrOGVmy0sLJYtWyY8NyYmpkuXLkQkkUi6desmpAufO3eu9rfrrTfc5ufnKxQKqVRazB63HTt2vLovGrRt2bKFiHx9fQt9lJfTMTIy0k7PA/CuefnyZUBAAN+D3Lhx44sXL5bkWSkpKUZGRp9++unTp0+PHTtmYWGxaNEixtjjx483bNjAf6c6duzYtm3b6OjoqKioli1bdurUiT939+7dcrl848aNz58/X716tVQq5W+/Go3m4cOHDx8+NDMzW7169cOHD3NycsrsustJenq6kZGRRCIpqnRHbGysTCbT19cv/dZO0KZUKm1sbIio0ByTFVCVCrC43Nzc9PT0Yjqo1ernz58Xmj8jKysrKyurjAbG84VWvWx75enSpUtE1LRp06I68D1QMTEx5TkqgAro7NmzfE5XLpdPnTq1JO9sR44c4SsTDA0Np06dyt8kd+/eraenx0vCxcTEdOvWTSKRSCQSb29v7WImS5cu5YlpqlWrJhTme3U/SlE5OSuRH3/8kYrOF8MY++abb4ho2LBh5Tmqd8H27duJyMPDQ9cDKakqGGBVWHx782+//abrgVRiL168kEgkBgYGReWX49OQR48eLeeBAVRAOTk5AQEB/DZW3bp1tdMEFiMjI6P46vXZ2dmFlkPVaDTPnz9/y7FWHu7u7kQk5AMrQK1W165dm4je8cIbZcHT05OIfvrpJ10PpKSqzhqsig9LsEvP1NTUzs4uNzc3Nja20A6vLsNSq9W//vrr6NGjP/744+PHjxf6rLNnz37yySejRo1au3at9varzMzMJUuW+Pn5ffHFF5GRkUJ7Xl7e+vXrx4wZM2XKFJ7+FKACMjAwCAwMDA8Pd3d3f/jwoaen54QJE167kNTMzEwulxfTwdDQsNByqBKJRMiuXFXduHHj6tWrVlZWRS2yPnbsWExMTJ06dXhmaRDL48eP//77b0NDw6FDh+p6LCWFAKv8IMASxZuWfB4zZswXX3xhZ2fHGOvRowdfI6nt999/79SpU2ZmZs2aNefPnz9w4EDenpeX16VLl3Xr1tWtW/fBgwctW7a8cOECEanV6vfff/+XX35xc3NTKBQ+Pj5LliwpiysFEEXz5s0vXrwYGBiop6cXEhLStGnTsLAwXQ+qsuLVb0aMGGFgYFBoB2H9O18DB2LZsGGDRqMZPHhwZQridT2F9g45f/48ERWoPgFvauLEiUS0fPnyQh/l80menp788OrVq0QkbB2YPXu2tbU1X03CaTQaJyenCRMm8EMeQvE7Kb/88otCoeDbHTQaTc+ePbt168YYS09P//rrr/Py8vhTZs2a5ebmVhZXCiCuGzdutGrVir/z+/j48BqpUHI5OTn8r/vVq1cL7fDs2TN9fX2pVIploOJSq9U8k+3Jkyd1PZY3gBC7/AgpBhhjuh5LJVb8RGCBPA4nTpywtbV97733+OGoUaNSU1OvXbsm9H/48OHjx4+Fkgtt2rRxc3PjdxJPnDjh6elZs2ZNIpJIJH5+fqdOncrPzzc3N1+4cKGQVjExMVF7KztAhdWkSZNz586tWLHCyMho165djRo14uWKoYR2796dlpbWqlWr5s2bF9rh8ePHderUee+997Tz2kPpHTly5MmTJ87Ozp07d9b1WN5AcTfaQVwWFhY1atRITk6Oi4vjf7bhLRSfCksop5OZmWlqahobG+vk5CTM1depU0cikRSo/sHbhZY6derwxtjY2KZNm2q3q9XqpKQk/n+XkpKyffv2CxcuXL16NTQ0VPzrBCgDcrl82rRpvXv3Hjdu3MmTJwcOHOjj4/PTTz/x3e9QDI1G89NPP1GxOcQ9PDzu3LmTnp5efsMqdyqVat26dadOnVIoFAMHDuzXr9+rfU6cOLF169bs7OxWrVpNmjRJWLGXkZGxevXqGzduWFlZjR07VphPzc3NXb9+/fnz5w0MDN57771BgwYVuMHKb7yOGzdOIpGU8fWJCTNY5QrLsEqv+DVYQjkdXvI5OztbKJ1GRHK5XC6Xa28dz87OJiLt5RSGhoa8Q3Z2doF2IhKeq1Qqnzx5kpWVpVQqk5KSxLs+gDLHdxQGBwebmJjwqazNmzfrelAVV2xsbFBQkIuLy71790xMTNq0aVN8fwsLi3IZl26MHj16zpw5DRo0sLKyGjx48MqVKwt0CA0N9fLykslkTZs2XblyZZ8+ffhNG6VS2blz561bt7Zs2TI9Pb19+/YnT54kIpVK1aNHj9DQ0NatW9vZ2Y0dO5bn3BY8e/bswIEDcrl81KhR5XWVItHxLcp3jL+/PxGtXr1a1wOpxNRqNc/hm5aWVmgHXrt+69atjLF58+Y1btxYeIhnItXePs3LiWgnDOzUqdOUKVMYY7179x41apTQztcFv5pibdGiRTY2NkWljQCoyB49euTl5cX/FvTu3ZtXbgYuMzPz119/7dSpkzBrYmpqSkTNmjUru3SJFRxf1Xro0CF+uHDhQnNzc+3ksXxV68SJE/nhjRs3hP6//PKLvr6+kDutX79+7dq1Y4y9ePHi22+/FTKDzJ07t27dutovunTpUiLigVrlghmscoUZrNKTSqX169cnIj5H9Srte4hNmza9e/euUCXt9OnTEomEd+Dq1q2rp6d3+vRpfpidnX3lypWGDRvy85w5c4b9t2DuzJkz9vb25ubmBV6uRYsWz549e/78uYjXCFA+nJycjh49GhISYm5ufuDAgdGjD2zapOsxVQARERETJkywt7f/6KOPTp8+bWBg4OPjExYWFhcX17Bhw+vXr48cOZK9k0tpT548Wb169R49evDD4cOHZ2RkaFe9fPz48ePHj3mROiJq0qRJ06ZNeRmlkydPdu/e3d7enj80YsSICxcu5Obmmpqazp49W8gM8vjxY54vWrBhwwaqJNWdC0CAVa54gIVieaVU8pLPffr0qVGjxueff/7y5cvExMQ5c+Z88MEHDg4O2dnZ+/fvT09PNzMz8/Hx+fbbbx89epSbmxsQEKCnp+fj40NEo0ePjomJWbJkiUqlunLlypo1a8aNG0dEsbGxzZs3P3v2LGPs5cuXISEhdevWxRIWqKQkEsn48eMjIyN9fSdcuTJ+9Gjq1YuKSDNXxSUkJAQFBdWvX79ly5YhISGZmZkeHh7BwcHJycn8tpeZmdmePXvMzc337NnzbiZniYuLq127trBAqlatWlKplC9aFTrQ/65qdXJy4steY2NjeQpWoZ0xlpCQwA+TkpIWLVrUv3//iIiI4OBgodu5c+du375do0aNXr16leWVlQkEWOWq+AXaUEIlL/msp6f3+++/nz171tLS0sHBwdzc/JdffiGihISEvn378jP8+OOPdevWdXZ2Njc3/+OPP3bv3s13BTZq1OjXX38NCgoyNjZu3br1+++//9VXXxGRo6PjoEGD3nvvPTMzMysrq3v37u3atatyLb0EKMDR0XHnzp9//llqY0OHDpGbGwUFkUaj62GVC6VSuX//fl9f39q1a8+aNevBgwf29vYBAQH3798PDw/39/fndwY5V1fXLVu2SKXS2bNnHzx4UIfD1onc3FztVa0ymUxPT4+vZOVycnKIiFcE5vT19XmHAs/lK1yF56rV6szMTD09vbS0NO27E3x5++jRo4WN25WJbu9QvmvUarWBgYFEIsnMzNT1WCoxXpFq0KBBhT76ajkdtVr9+PHjxMREoY9arU5PT9deOJWcnPzo0aNXl1Ll5eU9fPjw1QIg2dnZt27devToUekvB6DiSE5mfn6MiBGxjh3Z3bu6HlBZioyMDAgIEKaf9fX1fXx89u3bV3ylIMYYX4VtaWn54MGD8hlqBbFo0SJXV1fhMCMjg4iOHDkitNy8eZOI7ty5I7R07dp18uTJjLH+/fsPHz5caD9x4gQRvZqMbenSpRYWFjxbId8MXuCElQgCrPLWuHFjIoqIiND1QCqxiIgIItJevV6Ag4MDEUVHR5fnqACqjH37mIMDI2KGhiwwkFWxLRzPnz8PDg7mJQW5hg0bBgYGPn36tIRn0Gg0fCGBm5tbRkZGmY62Qjlw4IBUKhUWqu/bt08ulyckJAgdXr58aWZm9vPPPwuHJiYmvHrg7Nmza9asqVar+UPz5s2rXbs2/1poZIwdPnyYiPg5eeGNzp07l/mFlQ0EWOVt0KBBRLRt2zZdD6QSy8rKkkgkCoWiqA+a3bt3J62tLgDwptLSmL//v1NZbduyW7d0PaBSU6lUYWFhPj4+wg0sS0tLf3//K1euvMXZMjMz+aflfv36aTQa0UdbMalUqjp16gwYMCA1NTU6Orpx48a+vr6MsaysrJ07dz579owxNmXKlJo1a964cePFixf+/v6WlpZ8murBgwcKheLrr79++fLlmTNnrKyslixZwhiLjY11dXU9cuRIXl5eWlpa7969XVxceMjVtm1bItq0aZNOL/rtIcAqb3wdz9y5c3U9kMqNZ/vUTq+gbdKkSVR0OR0AKKFDh1itWoyI6emxgACmVWWqMrlz505AQICwN00mk3l5eYWGhipLdz3R0dF8vebChQvFGmrFd/Xq1UaNGkkkEolE0qNHj5SUFMZYdHQ0EfFt15mZmTxTqEQiqVOnzj///CM8948//rC1tZVIJHp6euPHjxcKji1btszKykqhUEilUnd39xs3bjDG7ty5Q0Tm5uYvX77UxYWKAJncyxsyNYjCzc0tNjb27t27devWffVRfJMBRNGzJ928SXPm0I8/UlAQHTpE69dTy5a6HlbJZGS82LFj+8aNGy9evMhbGjVq9NFHH40YMaJGjRqlP3+dOnV27Njx/vvvz5s3r3nz5r17937tUx48ePDnn39mZ2d36tSJT7QX8OLFi9DQ0NjY2Lp16/r6+mrnOj5//vzx48flcvkHH3zQpEkTof327duHDx9WKpXt2rXr2rVr6a+reM2bN4+MjExOTtbX1xdSqtapUycvL4+nWjAxMfn9998zMzOzsrJ4OCU8t3///n369ElKSrKyshLSuxPRZ599NmXKlISEBAMDg+rVq/NGXld72LBhPPFhpaTrCO+dc+nSJSJq1qyZrgdSuX3yySdE9MMPPxT6KL+Lz2szA0DpnT7N6tdnREwuZwEBLDdX1wMqmlrNTp9m/v6sZcsM/mfO3Nzcz88vLCysLF7uu+++IyJTU9Nbr7uNeujQIYVC0a1btyFDhhgYGMyYMaNAh5SUFGdnZxcXl9GjRzs6Orq7uwsZTYOCguRy+aBBg3r16iWXy3kiZcbYqlWrjIyMhg4dOnLkSAMDg4CAANEvUCeUSmW1atWI6PLly7oey9tDgFXeMjIyJBKJoaGh9rI+eFOrV68mIn9//0Ifffz4MRHZ2dmV86gAqrDsbBYQwGQyRsRcXJjWnZ+K4uFDNncuq13736VjUikbOjRg27Zt2qnGRafRaIYMGUJErq6ur1Z60O7m7OzMM5Qyxnbs2CGVSm/fvq3dZ9q0abVr1+Y1KuLj4y0tLYOCghhjCQkJ+vr6K1eu5N1mzJhhY2OTm5ur0WhGjhx5/Phx3r5x40apVPrqvrzKiBd4bdKkia4HUioIsHTAzs6OiLDDvzR44ZqidpdoNJriy+kAwNs5d441bPhv+OLvzypCwpmcHBYayry8mETyb2hVsyYLCGAPH5bTALKzs1u0aEFEffr0KeqT861bt4jo0qVL/FClUlWvXn3p0qXafVxcXL766ivhcMyYMZ06dWKMbd68mddI5e2PHj0iopMnTxb6EteuXRPrunTovffeI6JVq1bpeiClgkSjOvBqnszMzMzFixcPGTLk448/vnDhwqtPYYzt3LnTz89v+PDhmzdv1mhlAHzy5MnMmTN9fX2nT5/OVxpyT58+Xbhw4YcffjhlyhR+X7IqKb7ks0QiKb6cDgC8nXbt6No1CgwkuZxCQqhpUzp+XGeDiYigadPIwYF8fenYMdLXJx8fCgujmBgKDCRn53IahqGh4e7du21sbPbv379gwYJC+zx58oT+Wx5KRDKZzMXFhTdyjLG4uDihAxHVr1+fd3jy5Imjo6OxsTFvd3JyMjAw0H4ud/v2bblcXq9ePfGuTDfi4uKOHTumUCiGDRum67GUCgIsHSgQHOTn53fv3n3Dhg3u7u6ZmZkdO3bk0zPavvnmm1GjRtnb29epU2fy5MmfffYZb09ISGjVqtW5c+datWoVERHRqlUrfncsMTHR3d393Llz7dq1y87ObteuHc/qVmU4ODiYmJikpKQUVQQQ69wByoieHgUE0OXL5OFBjx6RtzdNmEAvXpTfABITaeVKataMWrakVavo+XPy8KDgYEpJodBQ8vKi8i+s4OTk9Ntvv8nl8gULFuzatevVDunp6UQkBElEZGpqyhu53Nzc3NxcExMTocXMzIx3yMjI0G4nIhMTE+3nEpFSqfz2228nTJhQiZeE/2f9+vVqtXrQoEGVvgSZrqfQ3kUrVqwgIqHe+JYtW+RyuXDH0NfX18PDQ7v/06dPFQrFmjVr+OG2bdtkMllMTAxj7NNPP3VycsrOzmaMKZXKBg0aTJgwgTF27969+fPnC9lZevToMXDgwPK4tnLE5+TPnz9f6KPz5s0jIu35dgAQV34+Cwxk+vqMiNnZsT///J9Hvb2ZszNzdmZr1xZ5hg8+YM7OzM+vRC+nVLJ9+5iPD9PT+/dWoJ0dmzqV3bhRqqsQ0ffff09EJiYmN2/eLPAQ/4irncvUw8OjwJp0MzOzX375RThcsGCBm5sbP62Qk5MxplarZTJZaGio0JKfnz9kyJBmzZpVgUURfLEaER07dkzXYyktzGDpQIHJlZMnT3bq1MnJyYkfjhgxIiIi4oXW58Fz587l5+cPHz6cHw4aNEihUPzzzz/8uYMHD+b7XRUKhY+PD69bXr9+/blz5wr7Y+vWrZuamlo+V1duir9LiLraAGVNLqeAAIqIoDZtKDGR+vcnX18S3mni4ig6mqKjaeZMio8v/Ay8T2Lia17o1i2aNYtq1qS+fWnXLpJIqHdvCg2lJ09o5UrSSlmgY9OnTx89enRWVtbAgQMLzDDxhDLh4eH88OXLl69mmalbty4vU8FFRETwDi4uLnFxcUlJSbz92rVrarXaxcWFH6ampn7wwQd37949evSokDeh8jp27Fh0dLSTk1O3bt10PZbSQoClAwUig7i4OCG6IiL+dbzWG1JcXJylpaW5uTk/1NfXt7W15R1efW5sbOyrr3jhwgU+31OVFH8TsPjwC0SUn5+v6yGALjVqRGfP0tKlZGREu3ZR48Z09er/dMjMpP8WNbyZ9HQKCaGOHalxYwoKopQUatiQAgMpLo727ycfH5JXvEyOP/30U8uWLR88eODr66tWq4X2WrVqderU6dtvv83NzSWioKAgiUTSv39/IoqKinr27BkRDR8+fPv27bdv3yaic+fOHT582M/Pj4i8vb2trKy++eYbIlKr1fPnz2/UqFHz5s2J6OrVq61atbKwsDh79qyQQapS49Wdx4wZI5VW/vhE11No7yK1Ws1vk/MiVp07d/7444+FR/m6bO3kH0uXLi2QcaBhw4Zz5sxhjBkbG69bt05o3759u0wmK1CxeOPGjcbGxrGxsWV0Obqyc+dOIurfv3+hj762nA6U3ooVK4yNjaVSab169VavXq3r4YCOPXzIundntWv/u7WwQQNGxGxs/r2dd/BgIU9p1owRMS+v/2lUq1lYGPPzY4aG/z7XwoL5+7Pw8PK4itKLiYnhOZxmz56t3X7//v26deuam5vXqlXL2Nj4999/5+21atX69ttvGWN5eXkDBw6Uy+X169eXSqUTJ04Ulnnw2Sk7Oztra2sHB4erV68yxtLS0gwMDHjCdOf//Pbbb+V6taJ69uyZvr6+VCrla2Aqu4oX/78D+B+k69ev37t3r1WrVnZ2dvzjC/f06VMi4uWKOXt7e57aRLjl9/TpU3t7eyJ69bk1atSQyWRCy8GDBydPnrx582ZHR8eyvq5yVvwMlrGxsaOjY2xsbExMTKHZ3qGUVq9e/cUXX/DP6A8ePJgyZcqSJUs+//zzESNGVPqlqfBWnJ3p2DGKiyPtBdmenhQbS+fO0cSJdOsWaS3yLsT9+7R9O23cSDExRERSKXl5kZ8f+fiQVt7viq5WrVp79uzx9PRcvHhx06ZNeZYsIqpXr97du3cvX76cm5vr7u4u3M47ePAg/5XR09PbvXv3nTt34uPjebQknNPb2zs2NjY8PFwul7dq1UpfX5+ITExMzpw5U+DVa9euXQ7XWEa2bt2qVCrff//9WrVq6XosYtB1hPeO8vX1JaItW7Ywxr755htbW1th2um7776ztrbWTqZy9epVIjp79iw/5MlOTp06xRjr06dPz549hZ6DBg3y+u/DoEaj+eGHH0xNTXfu3Fk+F1XOsrOzpVKpnp6eUNCqAG9vbyI6cOBAOQ+sysvNzR0zZgwRSSQSX1/fvXv3tm7dWv7f3RqFQtG7d+/Q0FDMHb7j+AzW0KHs9Ol/M1R9+WXBPtozWJcu/TtfRcRcXVlgIIuPL9j/0aNHP/744w8//CAklCogKytr69atgYGBf/zxR4GfwCtXrixbtmz16tUP/zdBVlRU1E8//bR69epwUafI+GYmQ0PDiIgIEU9btTVr1oyIhLm9yg4Blm7MnTuXiL7++mvG2KNHjwwMDGbNmpWdnX3p0qXq1at/+eWXjLGEhIRdu3bl5+drNJo2bdq0a9cuISHh6dOnXl5ejRs35hHY/v37ZTLZ5s2blUrlrl279PT0+I9mZmamj49PtWrVwsLCnv9Ht5dcFvj6s3v37hX66JQpU4ioQCo/KKW4uLg2bdoQkYGBwebNm4V2lUoVFhbm4+MjRFoODg4BAQEPyy3bI1QwQoDFGBs6lBExhYIVKCejHWBpNKx5c+bnx8LC2H93xv7H4cOHDQwM2rVr17t3bz09vUWLFhXokJKSUq9evdq1a/N3v06dOuX+V9Pnhx9+kMvlvXr16tixo76+/t69e3n7mjVrDA0N+/btO3DgQD09ve+++07E7wD/HOLk5KS9eRCKwktG8iT1uh6LOBBg6ca2bduIaPDgwfxw3759dnZ2UqlULpePGjWK13jft28f/bdO6/Hjx+3atZNIJFKp1MPDQzuk+P77701MTKRSqbGxsfCOM3/+/AJTlfr6+uV+lWWuZ8+eRCS8Vxbw448/EtH48ePLeVRV2JkzZ2xtbYmoZs2aRX3cj4+PDwwMFG7LSqVSLy+vTZs28WQi8O7QDrBiY5mxMSNinTr9T/BUYA1WoXEVp1ar69SpI5SaCQ4O1s5uw02dOtXJyYl/mIyKijIxMfnxxx8ZY3FxcQqFYsWKFbzbhAkTHBwc8vPz1Wr1wIEDjx49ytt//PFHPT09IWF66eXk5LRu3ZqIPD09MaH7Wv7+/kT0xRdf6HogokGApRt8L27jxo2FFpVKFRsbm6lVeEKtVhcI5J8+fZqcnPzq2ZRKZUxMTJWJ+ktu2rRpRLRkyZJCHy2+nA68qeDgYIVCwb+lhf4cFhAeHu7v7y+kPbSwsPD39+eLc+FdoB1gMcYWLfr39t+GDf/fp9BF7oW6fv06EV25coUfKpVKCwsLITsg5+TkNG/ePOFw2LBhPXr0YIytX7/e2Nj45cuXvJ2vsrhw4UKBl+CLMe7cufNGl1m8hIQEvl721dLOoC07O5svSnttzexKpPJvg6ycXF1dJRLJgwcPhH28MpnM0dFRO12vVCrlKxkFNjY2hW7EVSgUtWrVKtD5XYBMDeVDqVT6+/tPmDAhLy/P39//2LFjJdkQ7uHhERwcnJCQEBwc3KJFi/T09JCQEHd395YtW65cubKoFPxQVc2YQW5u/37x9OkbPz0mJoaIGjRowA8VCkXdunV5I6fRaOLi4oQOROTm5sY7xMTEODk5CbG+q6srz9Vc4CXu3bunp6ennfim9Ozs7Hbt2qVQKL7//vuNGzeKeOYqZufOnenp6e3bt2/YsKGuxyIaBFi6YWxs7ODgwGeedD2WSqz4EOq15XSgJBISErp167Zu3ToDA4Nff/01ODhYT0+v5E83Nzf39/ePiIiIjIwMCAiwtraOiIj49NNPHRwcfH19ebLmshs8VBwKBa1aRUSUmkqzZ7/x09PS0vT09AwMDIQWMzOztLQ04TAzM1OlUpmamgot5ubmvENaWpp2u0wmMzIyKvC2oFQqg4KC/P39tV9CFO3bt1++fDkRTZw4UUg0CgXw9Fdjx47V9UDEhABLZzC/Unp8BuvOnTuFPiqRSFCRsJQiIiLatWt3/vx5R0fHU6dOjR49+q1P1ahRo8DAwPj4+NDQUC8vL74tw9vb283NLSgoKDk5WbxRQwXl7U2+vkREGzbQ5ctv9tzq1avn5+dnZWUJLampqXxFIGdmZmZgYKAdcgkdqlevrt3Oz6P9XLVaPWbMmPz8/EWLFr3pRZXEpEmTxo8fn5ubO2jQoKdvMX1X1d2/f//s2bMmJiY+Pj66HouYEGDpDAKs0rO3tzc3N3/+/Ll2MjBtCLBKY+vWrZ06dXry5EmnTp3Cw8NbtWpV+nPq6+v7+PiEhYXFxMQEBgbWrl37/v37s2bNcnBw8Pb23rVrl0qlKv2rQIW1ciWZm5NGQ59+Sm80d8mTQl39L0l8VlZWVFSUdqYonm/zqlYW+WvXrvEOzs7Ojx49EmIsvhBQeG5aWlq/fv2uX78eFhZWdqVm1qxZw3+bBg4ciOIHBfAKjEOHDtWeaKwCEGDpDP72i6J+/fpU9LcR3+S3o1KpZs2a5efnl5OT4+/vf/z48Ro1aoj7EjVr1gwICIiOjubJHaRS6bFjx3x9fWvVqjVr1qyoqChxXw4qCFtbmjuXiOjcOdq9+w2eWL9+/RYtWnz33Xc8Olm6dKlMJuvTpw8RxcXF8eKtQ4cO3bx586NHj4jo4sWLR44cGTZsGBG9//77hoaGgYGBRKRWq7/77rsmTZo0adKEiK5fv96qVSuJRHL27FntOS3R6enphYaGOjg4nDlzZubMmWX3QpWOSqXaunUrVbn7g0RINKo7R48eJaIuXbroeiCVG6/VpV2CXlvx5XSgUE+fPuVlVvX19devX18+L5qYmLhixYrGjRsLb018jbyIe+ahnBXYRSjIz/9386CzM3NzK+kuQsbY9evX7e3t7ezs3NzcDA0NhRTKNWrU+P777xlj2dnZXl5eRkZG7u7uenp648aNE0rN7Nmzx8jIqH79+vb29ra2tjz557NnzwwNDYlIu9TMnj17RPoGFOL8+fN8N1K5/WZVfHv27CEiNzc3XQ9EfAiwdObJkydEVKNGDV0PpHLjayaK2gJ97do1ImrQoEE5j6ryunLlCi+14eDg8Oo+9nLAkzsY/1dRRVgjX/4jgVIqKsBi7P9zu/N/JQywGGMvX748cuTIvn37UlJShMaLFy/GxcXxrzUazeXLl3///fdXd/s/ffp03759hw8fFqL23NzcsFckJCS84YW+Gb6X0MDA4OLFi2X6QpXFBx98QETLli3T9UDEhwBLZzQaDU/KUCVzrJebXbt2EVGfPn0KffS15XRA27Zt2/gH+g4dOiQmJupwJBkZGcHBwR06dBAmtBo2bBgYGPjs2TMdjgreSDEBFmNs1Ki3CbCqho8//piI7Ozs4l8tBvSOiYuLk8lkCoVCO2KuMrAGS2ckEknx64egJJo3b/7hhx/26tWr0EcNDQ1r1aqVn5/Pl2VAUfiiq+HDh/NFVydOnCjT9SivZWZm5u/vf+bMmdu3bwcEBFSrVu327dt8LTySO1QNP/xA1ta6HoSOrFq1qkuXLomJiT4+Pnl5eboeji5t3LhRrVb369evWrVquh6L+BBg6RKWYJeei4vLtm3b+CfCQmG35mulpqb27NkzKChIX19/3bp1Qsb2iqBBgwaBgYGxsbGhoaG9e/dWqVQ8uUPt2rVnzZqFNHKVl7U1LVyo60HoiJ6e3s6dOx0dHc+dO/f555/rejiiUalU0dHRx44dW7ly5YQJE7y9vWvWrJmbm1tUf/bfDdMquLydiIjkuh7AOw0BVjlwdXU9fPjw3bt3+/btq+uxVETXrl0bOHDgo0eP7O3tf//993bt2ul6RIXgyR18fHzi4uK2bdv2888/P378OCgo6Pvvv+/evbu/v3///v3fKP0plIPly+nFC6pZs8gOEyZQ9eqk0ZDYW1QrgRo1auzbt69Dhw5r1qxp1qzZ+PHjdT2iN5aenv7w4cPo6Ohbt27dvn07Ojr69u3bOTk5BbpFRUVpb17RdvLkyaioKEdHRy8vr7Ifrw4gwNIlTK6UAx7F4ptcqN27k0eO7JCdnd2uXbvff/+dF02ryBwdHQMCAmbMmHHixImQkJC9e/ceO3bs2LFjtra2vr6+48aN43vvoSJ4773XdJBKadCgchlKheTu7h4cHDxy5MjJkye7ubl16tRJ1yMqTkJCAo+ihHCKV9ou0M3Ozq5Ro0bOzs4NGzbkX9SpU6eocwrZ22UyWdmOXkckWMqgQ9euXXN3d2/YsCEvPgplISAgYO3atSqVys/Pb8KECS1atND1iCoEtZq++oqCgqhLl721au0ODg7my9srl7S0tF27dv3000+8EjAReXh4+Pv7f/jhh9plPaHi2LCBMjLok08IE47c1KlTV69ebWtrGx4e7uDgoOvhEBEplcqoqCjtcOrOnTvZ2dkFuunr69etW1c7nHJ1dS35711GRoa9vX1OTk6BhLFVCQIsXcrJyTExMZHL5S9fvpTLK+VsYl5e3saNG69evWpmZjZ8+PCmTZu+2uf48eP79u1Tq9Wenp4DBgwQ2p8/f75u3bro6GhHR8exY8cK0yc5OTmbN2++ceOGhYXFoEGD3jokys/P/+yzz9asWSOR/P/PuYeHx9ixY4cNG1Z2KZsrvtRUGjaMwsJIoaCVK9nHH0t0PaLSioiICAkJ2bFjR2ZmJhGZmZn169dv5MiRVfXWQ+VlZUVpaZSaSlZWuh5KxaBSqXr06PH333+3bdv25MmTPEtWeSrh1JSlpaUwKcW/cHJykkrffhn3mjVrPvnkE29vb54SsmrS2f5FYIwxxnMO3b9/X9cDeRsajcbb29vOzm7GjBl9+/bV09M7ceJEgT6rVq2SyWRjx46dMmWKoaHh9OnTefvz58+dnZ0bN2785ZdftmnTpnr16jExMYyxrKys5s2bd+3adeHChX5+flKp9O3y/j19+rR79+5EpK+v/8svv9y6dSsgIMDGxob/2AsFW4Q8hO+O69eZszMjYjY27JX/rrKiVqtzcnKK7/Py5ctC25VKZX5+fkleJTs7mxc6lEj+DRn5GvkquQO8kqpVixGxx491PY6K5NmzZ/w+2qhRo8r0hXJzcyMjI0NDQwMDA/38/Dw8PISEc9oUCoWzs3Pv3r0DAgKCg4NPnz794sUL0Qfj7u5ORL/99pvoZ644EGDp2HvvvUdE+/bt0/VA3sa+ffskEsn169f54eDBgz08PLQ7ZGZmmpqaLly4kB9u3rxZJpM9fvyYMTZ37twaNWqkpaUxxnJycho0aODv788Yi4mJ+frrr4W4Z/Dgwb169XrTgQnZMu3t7bWzZebm5vLNaMItf16w5fE7836/cyczNmZEzN29nP7IZWVljRw5kn8ud3d3v3z58qt9li9fzmNfW1tb7aT8UVFRXbt2lUgkMpmsb9++ycnJvD07Ozs4ONjNza1bt26FvuidO3cCAgKqV6+uHU/v27dPpVKVxTVCyTVsyIhYZKSux1HBXL161cjIiIjWrl0r1jmfP39++vTp4ODggICA3r17Ozs7FzrhZGlp2aFDB39//8DAwNDQ0MjISLVaLdYYinL48GEisra2zs3NLevX0iEEWDo2depUIuJ1HiqdKVOmtGrVSjj866+/iEg7FeSxY8eISMiMnJuba2JismHDBsZYu3btJk6cKPRcsGCBk5PTqy/RrVu3Dz/88I1GtX37dv5W1aFDh6KSMsfGxgYGBjo5OfG3GKlU6uXlFRoaWoXzkWo0LDDw3/TZw4ez7Oxyel1/f387O7vTp0/HxMT4+PhUr149IyNDu8OePXtkMllwcHBiYuIPP/wglUr//vtvxphKpWrcuHHHjh3v379//fr1hg0bent7M8by8/Pr16/fs2fPnj17NmvWrJiXViqV+/bt8/HxEe6/Ozg48AKIZXjBUKw2bRgRO39e1+OoeLZt20ZEenp6J0+efNPnKpXKhw8f7tu3LzAw0N/fv0OHDoVWTdbT0yswNVXgl7Hc8H2Fnp6eOnn1coMAS8fWrFlDROPGjdP1QN5G//79h2rlab5z5w4RXbt2TWj59ddfDQ0NtZ/i6uo6f/58xpijo6N2WLl161Y9PT3hk1NUVNSqVasGDBjQpEkTviCgJFQqVUBAAH8r8ff3VyqVxfdXq9W80rCQ9snW1nbq1Kk3btwo4StWFhkZrF8/RsTkchYYWH6vm5mZqa+v//PPP/PDtLQ0fsdWu4+np6d2scguXboMHjyY/RedCwVPjhw5ItxM5/+zQUFBxQdYgvj4+MDAQGEhrVQqdXFxmTJlSnp6uggXCW/C05MRsbAwXY+jQuI5sWrUqBEbG/vazhcvXpw6dWqPHj1q164t3BPXVqNGja5du/r7+//www8HDx58+PBhBZnBff78OR+wUE2yqkKiUR2r1JkaXr58qb31jN/O56uMuaysLD6ZJDAyMuIdXn1ufn6+Uqnkh8+fP7927VpiYqJMJtM+YTGEbJkKhSIkJKQk2TKFiaukpKTg4OBmzZolJSWtWrWqadOmLVu2DAkJycrKKslLV3D37lGbNrR3L1lb0+HD9F8IWh6ioqKUSqWnpyc/tLCw8PDwiIyM1O5z69YtoQMRde/enXe4deuWnZ1dw4YNeXu3bt2kUil/6E3zoNrb2wcEBDx48ODo0aNDhgyRy+VRUVGrV6+2srIaPnx4aS4Q3hSfWCnZ7/Q7Z8mSJT179kxOTu7bt++rCaUKuH///qpVq44ePRoTEyOXy52dnb28vKZOnRocHBwWFpaUlJSUlPT3338HBwd//vnn77//vrOzsw6zIaSkpPz9998jR45s1apV7dq1GWPGxsa+vr66Gk/5qJQ716qSSh1g2drapqamCofPnj0jIjs7O6HFzs4uLS1NrVYLv9jPnj3jHezs7Ao819LSUgi5WrVq1apVKyIaNWrU2LFjL126VPxIrl+/PmDAgEePHlWrVm3Xrl1dunR5owuxtLT09/fnRYX5ZrSIiIgJEybMmDGjsm9GO3CARoygjAxq3pz++IP+uylaThISEohIuwhG9erVeSOnUqlSUlK0O9SoUYN3SEhI0G7X09OztLSMj49/68FIpVJvb29vb++4uLgvvvhi//79OTk527dv7969e1VNJF0B8V38VeKTi/hkMtm2bdtat2599erVCRMmbN68uZjOHTp0+P77711dXd3c3OrUqVNx9qHn5+fHxsZqb0uMjIxMSkrS7iOVSr/66itdjbDcVJT/kneWvb29mZnZs2fPUlNTrStbaa7GjRsvW7ZMpVLx3+2zZ89aWFjU1Mrc3LhxY41Gc/78+Y4dOxLRkydP4uLieCqHxo0bnzt3Tuh59uzZQlNEtm7des+ePcUPY+fOnWPGjMnOzm7RosUff/xRq1att74iDw+P4ODgFStWHDhwICQk5Pjx41u2bNmyZUuDBg1GjRo1ZsyYSlQwizFasoRmzyaNhoYOpfXr6X8nE8uDgYEBEeXn5wstSqXS3NxcOJTJZLwUt9CSm5vL42wDA4MCZdqUSqUoybocHR137txJRM7Ozo8ePSowowZligdYmMEqipWV1Z49e9q3b79ly5ZWrVpNmTKlqJ516tSZPn16eY6tUKmpqXfv3r179+69e/fu3bt39+7d6OholUpVoJuFhYWrq2t+fn61atWaN2/et2/f9u3b62TA5UrX9yiBtWzZkojOnDmj64G8seTkZH19/W+++YaXoHJycvr8888ZY6mpqWFhYXzBeNeuXTt16pSenp6dne3r61uvXj2+0Or48eMymWz37t2MsZMnTxoZGW3fvp0xduXKlY4dO/JlyGlpaR06dOjSpUtRA+CLrvjt/BEjRmSLvXK78m5Ge/GCDRjAiJhMVq6LrgrgU7MRERFCS+PGjWfPnq3dx8nJ6dtvvxUOp0+f3qJFC8ZYSEiIubm58K1+/vw5ER0+fFjoWfI1WEUJCgoiok8//bQ0J4E3MmMGI2JBQboeR8W2e/duiUQil8tfTXyjQ/n5+Q8fPgwLC1uxYoW/v7+Xl1dRCULt7Oy8vLz8/f1XrFgRFhb28OHDdzAhDsMi94pgxIgRRLR+/fpi+vzxxx+iRw+iCA0NtbS0NDY2lkgkvXr14ulSeOI4vqk+KiqqadOmcrncwMCgTp064eHhwnPnz5+vUChMTExkMtm0adP4b2BOTs6YMWNkMlmNGjX09fXd3d2joqIKfenU1NQePXoQkVwuDyzLIEKpVO7atatnz57CJucmTZrPn6958qTsXrNU7t//dzO8lRU7elSXI1Gr1S4uLgEBAfzw9u3bRHT27FntPpMnT27RogUPu/Py8lxcXObMmcMYu3PnjkQi+euvv3i34OBgc3PzzMxM4YmlD7D27t1LRD179izNSeCNzJ/PiNicOboeR4XH9+tYW1vratNrWlpaeHh4aGjovHnzfHx8PDw8Cp0/1tfXb9iwoY+PT0BAwKZNm8LDw4tKaPcOQoClewsXLiSimTNnFtUhPDyciMzMzPz9/U+fPl2eYyuJnJycmzdvxsfHCy1KpTIxMVE7mcqDBw/u3r37anqV9PT0GzdupKamFmh//vx5RETEw4cPi3rRGzdu8A9PNjY25fYhT9iM1rnzJCImlTIvLxYayl63W7Fc/fUXs7BgRKxpU1b096/8bNiwQSaTzZo166effnJxcenevTtvnzBhwrZt2xhj9+/fNzExGTBgwPr163v27GljYyMk1xg5cmS1atWWL1/+3XffGRkZffPNN7z93LlzwcHBAwcOdHR0DA4OPnTo0NuNjU+w1alTp9RXCSX1ww+MiGHS8LXUanWvXr2IqHnz5uUQssTHx4eFhQUHB0+dOpVPTRW6M1GYmgoMDNy3b987OzVVQiiVo3u///67j49P3759+efpV505c+aLL74QFnq7u7uPGTNm+PDhlpaW5TjMCmT//v0jRox48eKFu7v7H3/8wXOKlhuNRnPy5MuQENM//yS+67F6dfLzozFj6L8db7qhvejK15c2bKDCsjTrwJ49e7Zu3Zqdnd22bdvp06fzamWfffZZ+/btfXx8iOjmzZvLly+PjY11dnaeMWOGi4sLf2J+fv7q1auPHTsmk8n69+8/ZswY/qa/cePGgwcPCudv0aLFrFmz3mJgKpXKyMhIrVZnZmYalf8KtXfSunXk70/jxtG6dboeSoWXlpbWunXrqKio4cOHb926VazTlk+pQSDCGqwK4MaNG0Tk6upafLfbt28HBAQIi6zfzWIvGo0mMDCQ36r78MMPdTsXnZbGgoOZuzsj+vefhwcLDmZlUFXi9TIz2aBBjIhJJGzePPYu/VCUCt/GK1QjgLK2fTsjYkOG6HoclcSdO3fMzMyI6Icffni7M5Rwako7nzufmiqHfO5VHmawdE+pVPI1TC9fvnxtgp+8vLwjR45s2bJlz549arWaiBwdHYcPH/7xxx87lfMW/HL34sWLkSNH7t27Vy6XL1q0KKA8EzoVKyKCNm+mrVvp+XMiIkND6t2b/P3J05MKeysTX1QUDRhAkZFkZkZbtlDfvuXxolXDgAED/vzzz507d1b5lDwVxIED1KcPffABHTig66FUEnv37h0wYIBUKj1w4EDPnj2L6fnq1NS9e/deTeanUCgcHR21Kzc3a9as0MzvUFq6jvCAMcb4vNTx48dL/pS4uLjAwEBeIpTegWIv9+7da9CgARFZW1sfO3ZM18MpRE4OCw1lXl7/lqMhYq6uLDCQ/VdA7/Wyslh4OAsPZ8XMp7x8+W8foSLRoUPM0vLfl7tzp8gn7t+/39PTs0GDBv369bty5cqrHR4+fDhs2LAGDRp06tRp06ZNQrtGo/nxxx/btWvXqFGjMWPGaC+22759u5eXV+PGjQcNGlToOSs+HqYvWLBA1wN5V/z9NyNinTvrehyVytdff01EVlZW2jt+3rrUYMXfBF1lIMCqEHiANWvWrDd9Ii/24ufnJ+zv4Dkzq9gtjwMHDvDkSc2aNav4heTu3WPz5rGaNf8NsxQK1rs3Cw1l+fmveeLFi/9/t/G/zXMFXb36b4fNmxljbPFiJpMxIjZwYHG3Jo8fPy6VSj/77LMDBw4MGjTI3Nw8JiZGu8OLFy9q1qzZtWvXffv2LVq0SCaTCTHWDz/8oFAoli1b9scff7Rs2bJBgwa8Us3y5cutra1DQkKOHj06bNgwc3Nz7dirstiwYQMRDR8+XNcDeVeEhzMi1qLF/zQ+ffo0PDy8mPowubm5V69evXXr1qsrIh4/fhweHv78+fMC7fyciYmJIg1cl9Rqde/evYmoVq1avr6+LVq0MC5sfaW+vn6TJk0GDx781Vdfbdmy5dKlS7oqNQgcAqwKoUWLFkTUo0ePtz5DWlpacHBw8+bNhV82njNTe1t7ZaS96Gro0KGVaAOwSsXCwpiPD9PT+zcksrdnAQHF7ezTDrBcXFhOTiF9CgRYkyYxiYQFBLDi10t4enp+8MEH/Ov8/PzatWtPnz5du8OqVauMjY2Ft+NJkybVq1ePd7axsZk7dy5vj4uLk8lkO3bsYIxduXLlwoULvD03N9fY2HjdunWv/75UMDzbrYeHh64H8q64e5cRsfr1/79l9uzZMpnMxsZGIpEMHz48/5UPIkeOHLGxsTEzM1MoFI0aNRI2F+fm5g4YMEAikdjY2Ojp6X333Xe8PS8vb/LkyYaGhrVr19bT0+vfv39Oob9LlUp6erqpqal2FmVLS0sPDw8/Pz9MTVVYCLAqhFGjRhGRRCJp2rTpH3/8UZpThYeH+/v7CzfUTU1N/fz8wipnbdUXL14MGDCAiGQyWWBgYCVdzp+QwAIDmYvLv4GRVMo6dGDBwezVWFE7wCJi/0U1/6NAgJWXx06efP0YzMzMtKOfSZMmdf7fmzR+fn69e/cWDnll5efPn9+7d4+ItG//tWnTptDMnNWrV1+zZs3rh1LB8HpNJiYmlfSnq9KJj2dEzM7u38N9+/bJZLI///yTMRYeHm5mZlZgNXdOTk61atUmTpyoUqkyMjLatGkj/OjOmzfP2to6MjKSMbZ9+3aJRHLy5EnG2O+//+7h4ZGSksIYi46OtrS0XLZsWTleYplYtWoVEdnY2Pz0008XLlxIS0vT9Yjg9RBgVQgZGRn29vbCR5M2bdqsX7++NJNPOTk5oaGhXl5ewoYRNze3wMBA/qZTKdy/f58X+rWysjpy5IiuhyOC8HDm78+Mjf+NkCwsmL8/0165JARYrVszIqavX8iaqgIBVkm8ePGCiP7Suum4aNEiZ2dn7T7dunWbMGGCcHjz5k0iunnz5okTJ4goKSlJeGjAgAE+Pj4FXiIiIkIikVy7dq2kY6pIeKb+Yu5PgYgyMhgRMzX993Dw4MGenp7Co5MmTWratKl2/+3bt+vr6wt3AA8fPkxEfJ2Ak5OTkMOWMda6deuPPvqIf52bmyu0N2/e/LPPPiuTiykvjx8/5vkR9uzZo+uxwBsoZE0clD8zM7P4+PiNGzc2btxYX1//4sWLY8eOrV69uq+vL1/Q/aYnNDAw4EkceLGXGjVq3L17d9asWY6Ojn369Nm1a9ertaIqlEOHDrVu3fr27dtNmjS5fPkyz9he1rKzs1++fFlMh7y8vPT09EIfevHihZInxSqahwcFB1NsLP34I7m7U3o6hYRQixbUpg0VKOo6fTrVqEFKJU2YQKXf5qvRaIhIe2+2VCot8ENV4JDfk+WTOq8+l59QkJub+/HHHw8bNqxZs2alHasuuLq6EhGfq4OyZmJCEgm9fEn8h+j+/fvaNenatm374MED7f63b992c3MTcv61a9eOiB48eJCXlxcTE1Pguffv3+df6+vrE1FmZmZwcPCDBw+GDx9etldVlhhj/v7+WVlZH374IZ/Rf2svX74s6h2My8/Pf/78eaF/HTIyMl7dkEhESqWy+LfNdxkCrApk1KhRN2/eTE9P55NPubm5u3bt8vb2btiwYVBQUEpKyluc09XVNTAwkKdC8fHx0Wg0Bw4c8PX1dXJymjVr1qNHj0S/ilJijAUFBfXu3Ts9Pb1v375nzpwpqtyViOLi4jw9PU1MTExMTDp27BgVFVWgg0qlmjx5srm5uaWlZa1atfbt2yc8dPr0aVdXV3Nzc1NT0+HDhwv5+lJSUubPn1+jRo2xY8dqn8rSkiZPpitXKDKSAgLIxoYuXaKTJ//n5UxM6JtviIhOnaItW0p7debm5iYmJsnJyUJLUlKSg4ODdh8HB4cCHXgjn1jVfigxMdHR0VE4zM7OHjhwoEwmCw4OLu1AdYSnwuJZ3aGsSaVkZEQaDfFflOTkZCsrK+FRa2vrnJycjIwMoSU5OVk7o7KZmZmenl5SUhKfjNd+yNramv/ccl9//bWZmdnHH3/85Zdfenh4lOlFlan169cfPXrUxsZm+fLlb32S58+f9+3b18zMzNLSsnHjxrw6SAHz58+3srKytrauUaPG2rVrhfbbt2+3atXKwsLCzMzMy8srISGBt0dHR3t7exsaGpqYmDRp0kRIhQ3/T4ezZ1C8e/fu8ckn/j+lUCh69+4dGhr66iLQkuPFXurWrcvPyZM7bNq0qYIUOszMzBw8eDARSSSSgICAclsW06FDhxYtWkRFRcXExHTu3LlJkyYFkuwtXLjQzMwsLCzs+fPnM2fO1NfXf/DgAWPs2bNnlpaWo0ePTklJuXjxop2d3ccff8wYS0pKsre3HzFiRLt27Xx9fYt56exstmUL4/fWhFuEBw8ytZq1bMmImLU1e/r0//u/xS1CxlinTp2EYWg0GldX12nTpml3WLp0qYWFhbAWeMaMGU5OTowxpVJpbm4e9F9t3mfPnikUCmGD4YMHD5o0aeLt7V2pV4QsXbqUiKZMmaLrgbwratRgRIxv76tfv/7ChQuFh3777TeZTKa9WHv69OmtWrUSDnNycoho7969PAjTrpI0Y8aMli1bar+QUqk8efKkpaVlZVwdyMXHx/Mg8rfffivNeQYPHly3bt3IyMiUlJTBgwfb2dllZWVpd9iyZYuent6OHTsyMzPXrFkjlUp5CbK8vLz69evzuCoqKqpZs2bdunVjjGk0miZNmowePfrp06fp6ek+Pj41a9asqkmC3hoCrIpOpVLxySe5XM6jInt7+4CAgGLq9L2WRqM5ffq0v7+/UB7EwsLC39//6tWr4g38jUVFRTVu3JiIzMzM+KLX8nH9+nUi+ueff/ghX3506tQp7T729vazZ8/mX6vV6po1a/LD1atXGxsbC29Va9asMTQ05Fsd+R+JsWPHFh9gadMOsBhj//zzb0qt8eP/v8/bBVh79+6VSqXff//91atXJ02aZGRkxAPEP//8c+PGjYyxZ8+e2djYDBo0KDw8fMOGDfr6+j/99BN/7ty5c01NTX/77bdLly716NGjTp06PBzfu3evubm5t7f3xYsXw8PDw8PDK34GjULt37+fSreHF94I3/Bx/z5jjHXv3n3UqFHCQ99++23NmjW1O69cudLGxkYIuSIjI+m/XRfm5uarV68Weg4cOLB///6vvty4ceM6V9q8W/379yci7Q0obyE5OVkqlfLNv4yxlJQUuVzOK4EKOnToMGzYMOGwc+fO/I2L73e5z/+3GAsLCyOie/fuMcbi4+OV/9VhjYiIIKLbt2+XZpxVDwKsSiMhISEwMFAo0yaVSjt06BAcHFyazAXp6enBwcE8SQTn4eGxYsWKV6svl7XDhw/zD2r169cv59/SHTt26OnpaU9Z2djYrF27VjhMS0sjIu2dmCNGjOjXrx9jbOLEifzzHHfr1i0i4tuauNIEWIyx4cP/3Xh45sy/LW8XYDHGfv3114YNG1paWrZt2/bkfzsPZ82a5efnx7++du1a9+7drays6tat+/333wtPVKlUc+bMqV27trW1da9evXhkxhj74IMPnP/X1KlT32xMFQNf9FOrVi1dD+Rd0bw5I/p3e8eSJUssLS2Tk5MZY9nZ2Q0bNvT399fuHBsbK5fLf//9d3746aefOjg48HjL19e3ZcuWfNYkNjbWyMgoJCSEMRYTE7N//37hDJ6enqUMUHSF1x80NzePi4srzXlOnTpF/7tVpVmzZl999ZV2H0tLy+DgYOFw7ty5jRs3ZowtX77c0dFRaM/Ly5NIJK9udT9//jwRlXKcVQ8CrMqHJ2IQEs3xyadS5tGOjIwMCAiwtrbm5xTWyJfPTboVK1bIZDL+QS09Pb0cXlHbsmXL7IRd44wxxho0aPD1118Lhzxs0t4i99lnn/GbEf3799eOn54+fUpER48eFVpKGWAlJjJzc0bEmjRhfPb9rQMsKIpKpdLX15dIJJU9aVxl0akTI2J8yjgzM7NJkyYuLi6fffaZh4eHvb093875yy+/CHcGP/vsM2Nj4wkTJvj4+MhkMiHYun//vrW1ddu2bT/99FMnJ6c2bdrwCZW9e/caGhoOHjx43rx5H3zwgYGBwenTp3VzqaXw9OlTvr/1119/LeWpfvvtN4lEor22xMvLS9hxyf678bp7926hZfXq1VZWVoyxmTNnuru7a5/Nysrq1VuukyZNat68eSnHWfVgkXvlwzOIxsfHBwcHe3h4pKenh4SEtGjRomXLlitXrnzO6+G9oUaNGgUGBsbFxfH19Uqlkq+vb9CgQVBQkPYaZ3Hl5uaOGjXq008/1Wg0AQEB/K5TGb1WUQwNDXNzcwuMSrh5SkT8a+1NgjyvJn+udjt/n9J+binZ2tKCBUREN2/SmjVinRX+h0wmc3FxYf9NZUFZMzEhIuI70kxMTM6dOzdt2rT8/HxfX9+rV6/yLRSNGjUaNmwY779s2bLt27cbGhq6uLhcunRp0KBBvL1evXrXr1/v27evSqUKCAg4efIkr+Xat2/f69evN2nSJDExsXXr1jdu3OjYsaMuLrRUJk2alJKS4unpybMkloaRkRFjLC8vT2gR3sE4hUIhl8sLvJUJb3EF3h6FhwQHDhxYv3796tWrSznOKkjXER6UVllMPt2/f3/evHlC1mCZTMYLHZZmff2rnjx5wnf3mJqa6jC/y759+yQSiZDEXKlUGhkZbdaaIMrNzZVKpaGhoULLBx988OGHHzLGpk+frv3x7uzZs0T0+PFjoaWUM1iMMZWKubv/mzcrJQUzWGVi4MCBRCQsUoEy5evLiFjpFm1XcXyfsrGxcWnW2gr4Aqk7Wln1atWqtXjxYu0+NWvWDAwMFA4/+eSTtm3bMsZCQkLMzMyEd36+mV17vcTu3buNjY3xu1MoBFhVRG5uboHMovXr1w8MDNS+7/6meKFDHx8fPT09fk47O7uAgADtgqNv7Z9//uET4PXq1bt161bpT/jWMjIyTE1N+eoNxtiuXbv09fULfN/ee++9gQMH8q9TUlIMDQ35G8rJkyclEonwzjVlypQCaRJLH2Axxs6fZ1IpI2Iff4wAq0zMnj2biObNm6frgbwTZs8+1aVL4NatlbI6eDlIT0/nWVTE2vyYl5dnbW09Z84cfnjmzBkiEupccSNGjGjRogVf3JadnS3s47l3755EIhF2Ha1atcrIyOjFixeMMZVKNW/ePHNz81JWH6nCEGBVNU+ePAkMDKxdu7aIk0+JiYkrVqzgW/w4fpvyrdfXBwcH86CtV69eFWGH/9dff21kZPTNN99899135ubmwmJtLy8vvlf55MmTcrl85MiRq1atatq0aePGjYUNyV26dHF2dl6+fPnUqVOlUumuXbt4e2hoaGBgYKtWrZo2bRoYGHj8+PHXDqOoAIsxNm4cI2IyGdu6FQGW+DZt2kREQ4cO1fVA3gnTpk0jouXLl+t6IMXJysoqPnmNUqksqpRyenp6oQkLsrOzS5LIgN8TbN++vbr4CqNvYu3atTKZ7IsvvggMDLSzsxs0aBBvHzp06Pbt2xljd+/eNTMz69Wr18qVKzt27Ghvb893HjDGxo0bZ2lpuWjRoq+++kpfX5/XfMzNzfX09CSijz76KPA/169fF2vAVQPWYFU1NWvWDAgIiI6O5pNPUqn02LFjvr6+tWrVmjVr1qspNEvC1tZ22rRpN27cOHnypJ+fn6GhYURExIQJExwdHadOnRodHV3yU+Xm5n700UcTJkzgyyb2799vYWHxFkMS14IFC9auXXvjxo1Lly4tWbJESOjn4OBgaGhIRF26dDl16pRUKj18+HDv3r1PnTolzOr99ddfY8aMOX78eGpq6qFDh3geLyJKSkqKjo52d3dv27ZtdHT0262NEwQGkrU1qdU0b15pTgOF48nckWu0fPCqL4WmBa8I7ty506ZNG552uFevXq+uQM3JyRk1apSpqam5uXn9+vVPaqUJPnz4sLOzs4WFhampqb+/v7Ds6fLly61atTIyMjI0NPT09Cwmw/Px48c3b95saGi4ceNGXlBBFB9//PGePXsSExPPnz8fEBCwfft23u7o6MiXvbq6ul66dMnJyenYsWOtWrW6ePEiv8NARD///PPixYuvXLny4MGDTZs2ffnll0SUl5dXt25df39/PT296P/wqlzw/3Qd4UHZ4pNPTZo0Ef7H+eRTgSxzbyQjIyM4OLhDhw78hAWmmosRGxvbqlUrIjIxMRG2AoGgmBksxlhIyP+UgsYMloh4/RAjIyMR5wygKIsXLyYi7TKCFYeQVzM2NvbOnTuNGjV6NUHal19+Wb169bNnz6akpIwfP97MzIxP9jx58sTIyGjKlClPnz79+++/rays+F22Fy9eWFtb//zzz1lZWbGxsR07duzevXuhr56RkcFXvi5durSsrxTKAQKsdwVP7sA/OxKRubm5v79/eHh4ac557dq1BQsWlLDzqVOneFZ6FxeXmzdvluZ1q6riAyy1mrVrhwCrrNja2hJRTEyMrgdS9f34449ENGnSJF0PpBCHDh2SSCTCMtODBw8SkfZKc6VSaW1tvWzZMn6Yk5NjZmbGDxcuXFitWjVhMcbChQttbGx4yK6dIGrp0qXVq1cv9NU//vhjImrdurV2LnuovHCL8F0hJHfYtGmTl5dXRkZGSEhIy5YtGzVqFBQUlJqa+hbnbNas2Zw5c0rSMyQkxNPTMzk5uWfPnpcuXdJezgUlJJVScDD9l88fRIaKhOWmIt8ivHXrVq1atYRiYt26deONQoeYmJjU1FTeTkQGBgbt2rXjHW7fvt2pUyeh5Ea3bt2ePXsmlPXkjSkpKTt27Ojdu/erL/3PP/8EBwcrFIr169fzvIBQ2SHAereYmZmNHDkyLCzs9u3bAQEB1apVu3379qxZsxwcHHx9fY8dO8YYE/cVlUrl2LFjhUVXBw4c0K7PCm+kSROaOFHXg6ii3NzcpFLpw4dxuh5I1WdqakpEmZmZuh5IIRITE6tVqyYcGhgYmJmZCeWNeQci0u5TvXp13iEhIaFAO2/khw8ePHBycnJ0dNTX1381ZVR2dva4ceMYY3PmzMHnzyoDAdY7qkGDBoGBgbGxsaGhob1791apVDyzaO3atWfNmhUTEyPKq8THx3fp0mXDhg0GBgabNm0KDAzEJ7NitG5NjBFj9P77RfZZterfPn5+5Tiyd0DTposUipeRkWN0PZCqryLPYL2aV5MnxtPuQG+Vdrh27drHjh07ePBgdnb26NGjC7zu119/HRUV1bRp04CAAHGvCHQIAdY7TV9f38fHZ//+/Y8fPw4MDHRycoqNjQ0KCnJ2dvb29t61a1d+fv5bn/zs2bMtW7a8ePFizZo1z5w544eIACowJyfr3FyDe/d0PY53QEWewXJwcIiPj1er1fwwOTlZqVTy5PJCByKKjY0VWp48ecIbHRwcnjx5IrTzPsLNQYVC4eLi4uXltXjx4l27dmlvTrx48eKqVavkcvmGDRuE7clQBSDAAiIiR0fHgICAhw8f8uQOcrlcSO4wbdq0mzdvvukJQ0JCunfvnpSU1Llz5/DwcJ6xHd5Ofj717UvNmpHY92/h/7m5ERFhCVY5qMgzWO+///6LFy+OHj3KD3/77TcbG5t27doJHezt7Vu0aLFjxw5+GB0dffny5T59+hBR7969T58+HR8fzx/auXNn586deRIEXm6Ze/nypUQiETJC80UUarV65syZeJ+sanS9yh7egFKpTEhIyM3NLaqDRqNJTEzkaXYLyM7OTkhIeHVzSn5+flJSUoH258+fBwcHN2vWTPg54WvkS1INV6lUjh8/nj+LZ4Ip2cVBcWrUYETsyRNdj6PqUquZoSGTSFgRySNBNDwLVO3atXU9kMJ99NFHNjY2S5YsmTNnjr6+/pIlSxhjqamp3t7eV69eZYz98ccfUql08uTJK1eurFevXseOHXldMpVK1bx584YNG65atcrf318mk/G670lJSTY2NgMGDFi3bt2KFSvs7Oy0qzt89dVXROTq6pqTk6ObC4YygwCr0liyZAmfWjc2Ni40OcKpU6f45hepVNqnTx8hQ3peXt64ceP4zHONGjWEmnpqtXru3LkmJiYSicTAwGDGjBmvli/kyR346xKRmZmZn5+fdiGqAuLj4/mnPQMDg9IXgQdBly6MiB09qutx6MKzZ88+++yzDh06vP/++1u3bn21g0qlWr16taenZ6dOnb755hvtAgORkZF+fn5t27b19fU9d+6c0K5Wq//888/BgwfzjR1ckyaMiJUudQm83tOnT4nI2tpa1wMpXH5+/qpVq/r27Tto0CCe5Zwxlp6ePnLkSKGo19GjR4cPH/7BBx/Mnz9f+2NnWlra119/3atXLz8/v3/++Udoj4uLmz59+nvvvdenT5+VK1cqlUrefu1anqtrU6lUevbs2fK6Pig/CLAqh/3790ul0l9//TUnJ2fHjh16eno7d+7U7pCWlmZtbT1y5Mi0tLRbt27VqVOHVyNmjH3zzTcWFhb//PNPVlbW3Llz9fT0eO28X375pUmTJvfv32eMHT9+XF9fX3g3KSA7O7tAoUO+Rj4lJUW726ZNm/iCA0dHx0uXLpXJN+Jd5e/PiNjq1boeR7nLz8/38PBo2LDhr7/+Om/ePJlMtnbt2gJ9AgICjI2Nly5dGhwcbG9v369fP94eFxdnYWHxwQcfbNu2bfjw4fr6+hEREYyx8+fP16tXz8XFxdTUdNWqVcJ5fHwYESsshAMx8VXkCoVC1wPRsfx85u7O9PVzFy48oOuxQJlAgFU59OnTp2fPnsLhoEGDPD09tTusW7fOwMBAqI21ceNGPT09PollZ2f31Vdf8XaNRuPk5DRz5kzGmFqtfvbsmXAGFxeXuXPnFj+MO3fuBAQECCUU9PX1a9euvXDhQj5Jxhs7duxYmgrTUKgffmBE7JNPdD2Ocvf7779LpVIh02NAQICtra32He3U1FSFQiFEXSdOnCAiHkh99dVXzs7O/Ca1RqNp37794MGDGWPJycmRkZGMMVdXV+0Aa84cRsT+K4kLZYhPqAsTOe+mRYsYEXNyYiVYeQGVEha5Vw537tzp0qWLcNi5c+c7d+5od7h79667u7uZmRk/7Nq1a35+/oMHD9LT0xMTE7t27crbJRJJp06dbt++TURSqdTa2pq379q1Ky4urtD0d9rc3Nx4coddu3b17NkzLy8vJiZmzpw5BgYGv/zyCxE1a9YsLCyMZ2wHEb2zS7DPnTvXokULZ2dnfjho0KCkpCTtUm4RERF5eXlCCciuXbtWq1bt3LlzRHT06NH+/fvzv+USiWTQoEFnz54lourVqzdq1OjV13J1JXonv8nlj69zr5gbCcvH3bu0aBFJJBQSQv/V14CqBgFW5VAg/V21atWSk5OFvcRElJCQYGNjo92BN/K0eEIgxR/ijdz8+fPNzMyGDBmybNkyXijwtRQKxeDBgw8dOhQeHu7t7S2XyzUajUQiGTt27LVr1wwMDEpxoVC4dzbAio+P194kz78WNmoRUVxcnIGBgfDDL5FI+E57/lCB5yYnJ6tUqqJeiwdYyNRQDviyzoq5kbAcaDQ0bhzl5pK/P3l763o0UGZQd6NyMDIy4mnruJycHAMDA+2kncbGxnzpqNCBiExMTHgGPO3UeTk5OSZan5hmzJgxcuTIEydOfPLJJzVr1nztJJa2Fi1aHD16VKVS/fXXX82aNXNycnqba4MScHIiAwOKj6esrHfr865KpdL+Oed1SLTTs6lUKvn/1g+Sy+W8Q4Hn6unpMcZe7S9wdSWJhO7fJ7WakBC3TL3jM1grVtDZs2RvT4GBuh4KlCXMYFUODg4O2tnVY2JitD+aF9qBiBwdHW1tbaVSqXb6u5iYGCH3HREZGRnVqVNn7NixvXr1Wrdu3VuMTS6X9+vXD9FVmZJKycWFGKP793U9lPJlb2+fkpIiHPL0jNo/wA4ODllZWdnZ2dp9eIcCz+W75YuZYTU1JXt7ys0lrV8XKBPv8gzWo0c0bx4R0dq1ZGGh48FAmUKAVTl069Zt9+7dfCIqPz//999/7969e4EODx48uHz5Mj/ctm1bgwYN6tevr1AoOnTosG3bNt6enJx84sSJ/v37E1FSUtK1a9eEM6SlpSGJcEX2bt4lbNKkyeXLlzMyMvjh8ePHTU1NhSVZRNS4cWOJRBIWFsYP7969Gxsby1O4tW7d+tixY0LPY8eOaad2K9S7+U0uf+/sDBZj5O9PWVk0ciT17avr0UBZ0/UqeyiR+Pj4GjVqdOzY8YcffujevbuNjU1MTAxjbP369cOGDeN9evfubW9v/+23306aNEkqlf7222+8/fTp0wqFYsiQId9//33Dhg2bN2/O91X9/PPPxsbG06dPDwkJ8fPzk8vlf//9t46uD17v668ZEXvdRs+q5sWLF7a2tgMHDrx///6RI0esra35Hti4uLjly5fznLqDBg1ydnY+f/58ZGRkhw4dmjdvrlarGWPXr1+XyWTz589//PjxmjVrZDLZ/v37+WkfPXr08OFDZ2fnefPmPXz4MCsri7dPmsSI2PLlurnYdwf/jLdnz56iOqSmpt69e7c8h1Q+fvqJEbFq1dj/priBqgkzWJWDvb395cuX27Zte/78+ebNm1++fLlWrVpEVK1aNRcXF95n9+7dM2fOvH79enZ29tGjR4cMGcLbO3bseP78eQsLi4sXLw4dOvSff/7hM1UTJkzYv39/Wlran3/+aWxsfOHCBWGzIVRA7+YeN1NT08OHDycmJrq5ufn4+AwfPnzRokVE9OTJk0WLFr148YKIfvnll7Zt23bv3t3d3d3U1HTv3r1SqZSImjZtumPHjs2bNzs5OS1evHjNmjXCEsNOnTq1bNkyLS1t1apVLVu2/Oeff3g71rmXj9fOYO3atcvNzY0nPQ8JCdHel1N5xcfT7NlERGvXktaeJaiyJAzlzQAqg8uXqXVrataMtO7rgsiOHqX33qOuXenvv3U9lCpt0qRJa9euXbNmzaRJkwrtsHLlyu+++05YQieVSj08PLy9vb29vdu3b69QKMpxsKJ5/306fJh8fCg0VNdDgXKBGSyAysHN7d89bhpNwYeUSmXxz83LyyvqoaKeq71T792BGazy8doZrGnTpiUnJz98+DA4OLh3794KheLy5cvfffddt27dLC0tvb29g4KCeDrZchx1qWzcSIcPk7U1rV6t66FAeUGABVA5mJqSnR3l5PzPHreQkBA7OzsDAwNbW9uffvrp1WdduXKlZcuW+vr6hoaGH330kbDbTqPRzJgxw8zMzMDAwNXV9ejRo0L7d999Z29vr1AoHB0d16xZU/ZXVoHUqkVGRpSYSOnpuh5KlcYDrNfuInR2dvb399+/f//z58/DwsICAgI8PDxycnKOHTs2a9asli1bCvcQtVOjVUBJSfTFF0REK1cS0jC/Q3S9CAwASqp7d0bEDh369zAsLEwqlS5fvjw+Pv7HH3+USqV//fWXdn9hhXh0dPTp06ft7Oz8/f35QytXrjQ2Nv7zzz+fPHkyZcoUQ0PDR48eMcaWLVvm6Oh44sSJZ8+erVu3TiKRHD9+vFwvUteaN2dE7OJFXY+jSlu2bBkRffrpp2/x3OTk5NDQUH9//wKpapydnadOnRoWFpaTkyP6gEtp4EBGxHr10vU4oHwhwAKoNCZOZERsxYp/D/v16+fl5SU8+sEHH/T637fwDRs2KBSK1NRUfhgcHGxgYJCZmckYc3FxCQgI4O0qlcre3n7OnDmMsZcvX0ZFRQlnaNSo0axZs8rwkiqeoUMZEdu0SdfjqNJCQkKIaOzYsaU8D7+H6OPjwxNrcYaGhl5eXoGBgeHh4RqNRpQBl8ZvvzEiZmbGnjzR9VCgfOEWIUClUWCF0K1btzw9PYVHu3fvfvPmTe3+kZGRLVq0sLKy4oeenp65ubm8QmVUVJTwXJlM1rVrV/5cIyOjunXrCmdgjBkZGZXdFVVAWIZVDsRKNMrvIYaGhqakpISFhc2cObN58+a5ubnCPUQHB4cpUzK3baPkZDHG/eZSU2naNCKipUupZk3djAF0BQEWQKVRIA1mQkJC9erVhUerV6+emJio0VoD/2oHIoqPj09ISBAOhYd4o7Zr167dvXv3vffeE/s6KjQEWOXA0NCQiHisL8oJDQwMvLy8goKCrl69KtxDrFWrllpdbc0a0xEjyNaW6talCRNo1y4qz/ymU6dScjJ160bjxpXfi0IFgVqEAJVGgb/9+vr62tsDlUqlvr4+TwHFGRgYaP8B4xsGDQ0NebkY7efm5eXxv3mCzMzMUaNGjRw5snXr1uJfSQWGZO5lLTU1dcmSJba2tleuXLGxsWnevLmXl5eXl1fnzp1Fyb9QrVo1Hx8fHx8fIrpzJ/nwYQoLo1OnKDqaQkIoJIQMDaljR+rRg7y9qWlTkkhK/5qF++sv2r6djIxo3boyfBWosDCDBVBp8D1uCQnEK8c4ODjExsYKj8bFxWkX6Su0A2+0t7eXSCT8kIuNjdV+7tOnT9977z1bW9uff/65zK6mgqpfnyQSevCAVCpdD6Uqunr1qoeHx7lz5/Lz89u0aSOTySIiIoKCgry9vatVq9a/f/81a9bcF6/iZoMGNT77jA4epPR0Cg+nwEDq0IGUSgoLoxkzqHlzqlGDfH0pJIS0flHEkZFBH39MRLR4MWnddYd3ia4XgQHAG2jWjBGxS5cYY2zmzJlubm75+fmMMZVK1bRp0wLbss6cOUNEkZGR/HD27Nm2trYqlYox1r1796FDh/L21NRUIyOjrVu38sMLFy7UrFnTz88vOzu7vC6rYqlVixGxBw90PY4qZ8eOHXxJn4eHBy/29fLlSyH/gkRrksfW1tbHx2fTpk3CFg0RPX3KQkOZvz+rXZsR/f8/Z2fm789CQ1lGhgivMnYsI2Jt2zKVSoSzQWWEAAugMhkyhBGxzZsZYywmJsbCwuKDDz749ddf+/XrZ2ZmFh0dzRjbtm3bxIkTef+uXbvWrVt37dq1X331lVwuX7duHW/nKR4mTZr0yy+/tGzZ0tXVValUMsY2bdqkr6/fokWL4P/s3btXN5eqO97ejIgdOKDrcVQhKpUqICCAh1AjRowoNHZPSkria6fs7e2FSIvncA8ICAgLC8vNzRV9YA8fsuBg5uPDzM3/P9KSy5mHB5s3j4WHM7X6bU57/DiTSJi+Prt1S+wRQ+WBUjkAlcm8ebRgAX31FS1aRER0586d77//PiYmplatWtOnT2/UqBERhYaGnjt3bsWKFUSUmZn5/fffnzt3zsTExM/Pb9CgQcKpTpw48fPPP6empjZr1mzWrFl8zfvixYuvXr2q/YoNGjSYP39+OV6i7k2dSqtX09Kl/yaHhFJ6/vz50KFDw8LC5HL5okWLAgICXvuU6Ojo/fv3Hzhw4MyZM7m5ubzRyMioffv2fMFWixYtJKIua8rLo/PnKSyMjh6liIj/r5dQvTp5elKPHvTee2RnV6JTvXxJzZrRw4cUGEgluFaoshBgAVQm27fT8OE0eDDt2qXroVRda9bQJ5/Q+PEUEqLroVR+N27cGDBgQHR0tI2NTWhoaLdu3d7o6Tk5OWfPnj127NixY8euXLki/MGytbXt1KmTl5dX7969tWe8RJGVRX//TQcO0NGj9Pjxv40LFtCcOSV6+qef0sqV5OFBFy6QHBvJ3mEIsAAqkytXyMODGjem/814BWK6eJG+/5569sTW+tLatWvXRx999PLlS3d39z/++KN27dr/196dx1VV5g8c/957kUVEUXADFMQlwbAMbFQ008DMnNJmQF+YtmhoNa1TQZs5WYZbo+nPhLQUNRU0R2vUQms0NUVxy9DUVFRQcWcREC7P74+jd264pPgAwnzef3kOzzn3uTYTn+495zm3crbs7Ow1a9asWrVq+fLl9rdoBAYG/vnPfw4LC+vWrZuTk9Mtz/p39u6V776TlBR5910JCbmhQ9LTZcQImTpV2rfXOxdUMwQWUJ3k54ubmzg6Sn6+WCxVPRvgGpRS48aNe/PNN5VSUVFRM2bMKLMOyC2efMeOHSkpKSkpKT/++KPtO8QWLUIDAtaFh0t4uLRrp+vVgHIisIBqJjAwR2TvN9+08fevW9VzqTlOnJD8fBGRhg3F7rErv5OdLXl54uAgzZtX5tSqn5ycnCFDhixduvTGL7oqN/vvEF1cRqxbd+lTx8aN5b77JCxMHn5Yfr96SXmUlMiSJZf+/MgjctWPyUpLZfFiEZG2bSUo6FZfETUAgQVUM+Hh4atWrfr3v//dp0+fqp5LzfHYY5d+g3bvLj/8cPVlIQcOlIULxcdH/5pJNcmvv/7av3//3bt3e3h4LFy40P5pThXtxAlrSoolJUVSUuTYsUs7TSa5+24xPtbq2lWcnctz5txcqXv5P2dGj5Z33rnKmIsXL4XXm2/KmDHleRXUMCw0ClQzbdu2FZE9rDVeMdaskXnzqnoS1dY333zzpz/9affu3XfdddfmzZsrs65EpHFjy+OPy+zZkpUlv/0m8fESESFubrJtm4wbJ+Hh0qCBhIfL2LGSlibl/mzhgw94khJuCIEFVDN33HGHiPzKv+MrzCuvyKlTVT2J6kYpNXbs2EcfffT8+fMDBw7csGFDixYtqnA+/v4SHS1JSXL6tGzZIu+9J8HBUlQkq1ZJbKyEhEiTJpfWcLe7XP6GFBXJiy9WzKRRsxBYQDVDYFWcZs2kbl05dUrefruqp1Kt5Obm/uUvf4mNjTWZTHFxcbYV228HDg4SHCyjRsmWLXLihCQlSXS0NG8u2dmSnCzDh0uzZjfxHOj77xcR+e47WbCg4qeOao7AAqoZviKsOB4e8tprIiIzZsiGDVU9m2pi3759nTp1WrJkSYMGDVasWFGhl7TfIk9PiYiQ+HjJyPjvd4j16l16DnRkpDRoIF27XvoO0bbWqL2+fcVYyeuVV8TuQerAVRBYQDXj4+NTp06dEydOnD17tqrnUgO9+qr4+EhpqTz7LM97/mPLly+/995709PT27dvv3nz5vDw8Kqe0Y2yfYeYnS0//CBvvikhIVJaKuvXX/oOsWlTGTRI5s4te+CECWI2y/HjN7ruKP5nEVhANWMymdq0aSN8S1gxXF1l3DgRkZ07ZdKkKp7M7cy46OrPf/7zuXPnIiMjN2zY4O/vX9WTKg9HR7n/fhkzRjZvluzsS98h+vlJdrZ8+aVMn152/D33yJNPiohMmyYbN1b6dFF9EFhA9cNlWBVq4EDp2lVEZNQoOXy4qmdzW8rLy4uIiIiNjVVKxcTELFiwwNXVtaonpYGHx6XvEA8elD17ZMoU+dvfrjJszBipV09KS+X558VqrfRZoprgOUlA9UNgVSiTSaZNk3vukfx8GTFCli+v6gndZvbv39+/f/9du3bVrVs3MTHx0UcfrdCXy8nJ+fHHH0tKSjp37mw8krwMq9X6008/HT9+PCAgoN3vV3DPyspKTU11cnK677777BMwJydn/fr1paWlwcHBTZo0uerr3nGH3HHH1afUuLH84x/y8suydatMnSovvVT+d4cajE+wgOqH69wrWlCQPPeciMiKFbJ0aVXP5naycuXKe++9d9euXXfcccemTZsquq5++uknf3//Z599NjY21s/PLykpqcyA8+fPd+nSpU+fPnFxcR06dHjmmWdsq2fPmjXL39//nXfeGTZsWMuWLbdt22bs//rrr/38/N59992RI0f6+fkllOuZ3n/7m9x9t4jIu+9KZma53x9qNAWgutm+fbuIBAQEVPVEym/RokWBgYEuLi6tW7eeNWvWlQN+/fXXXr161a5du1GjRi+88EJBQYHtR3Fxcb6+vi4uLiEhIWvWrLHtj4+PDwwMdHV1DQgIuOo5r6N/fyWi7r77v3vOn1dNmyoR5eurLlxQSqkBA5SI8vG5ybdaU5SWlsbFxVksFhHp27fvuXPnKvoVrVZrmzZt+vfvb7ValVIxMTF169Y9c+aM/ZhXX33V29s7MzNTKbV27VqLxfLVV18ppY4dO+bi4jJ69GilVHFx8YMPPtihQwfjXXTv3v27774zDv/oo49cXFwuXrx4/Znk5CgRJaImTPjvztRUZTYrERUVpZRSRUWXxrz5pra/AVRrBBZQ/Vy4cMFsNjs6OhYXF1f1XMpj48aNFovlrbfe+uWXX+Li4sxm88qVK+0HFBQU+Pv79+jRIy0t7d///renp+dzzz1n/Gj69OmOjo4zZ878+eefn376aVdX14yMDKXUZ5995uHhsWjRov3790+aNMlkMv3nP/+58SldGVhKqXnzLv3KfP99pf63A6ugoGDw4MEiYjKZYmJijOKpaFu2bBGRrVu3GptnzpxxdHT88ssv7cf4+Pi89957ts2wsLABAwYopRISEurUqZOfn2/sX7NmjYj8+uuvZV4iOTnZbDbn5ORcfyZXDSyl1NChSkSZTGrtWgILZRFYQLXk6+srInv37q3qiZRHVFTUn/70J9vmQw891Lt3b/sBCxcutFgsxscSSqnp06c7OTkZvwXbtGnz/PPPG/tLSkq8vLzefvttpdTZs2d37NhhO0NQUNDrr79+41O6amAppXr2VCLK1VUdOVI2sCZOVLNnq8u/wWuyw4cPBwcHi4ibm5vx+VDlSEpKMpvNJSUltj2tW7f+4IMPbJtFRUUmk+lf//qXbc9rr73WsWNHpVRsbGxwcLBt//nz50XE1vGlpaX79+9fsWJF27ZtX3rppT+cybUC6+RJ5eGhRFTHjqqggMDC73ANFlAtVevLsHbs2PHggw/aNnv37r1jx44yA4KCgry8vIzNBx98sKioaPfu3QUFBXv37rUda7FYwsLCjGPd3d3bt29vO4Ojo2OtWrVufaqffipOTpKfX3bRo7w8GTVKnnhCmjaV4cNl3bpbf6nb1Jo1a0JCQtLS0lq3br1x48b+/ftX2kufOnXKzc3N+FLSUL9+/ezsbPsBSil3d3f7ASdPnhSRkydP2u+vW7eug4OD7djS0tJevXpFRESUlJQMHTq03DP09JSPPhIR2bxZ5s8v92lQMxFYQLVUrQMrKyvL/tatxo0bZ2dnl9gt65mVldW4cWPbpjE4KysrKyvLGG9/bOYV1xj/9ttv27dvDwsLu/Wptmkjr7wiIpKYKLt2/Xe/xSIffyydOklOjiQkSLducvfdMmWKnDlz6695G0lISAgPD8/Ozu7Tp09qampgYGBlvrqHh0deXp7VbiGEc+fOeXp62jYbNGhgMpmMT6fKDPDw8LDfn5eXV1JSYjvWYrH89ttvZ8+eHThw4P3333/uFhZlHzpUOnUSERk5stznQM1EYAHVUrVeqcHBwcE+p4qLi81ms9lsvs4AEalVq5bxoZT9j0pKShwdHe1PXlRU9MQTT/Tt27eH8UyTW/buu+LnJ6Wl8ssv/93p4iLDhslPP8nu3RITI40ayY4d8uKL4uUlkZHy9dfVfnmkwsLCp556avjw4SUlJTExMV9//bX9B0KVo3nz5larde/evcZmfn7+kSNH/Pz8bAOcnZ0bNWq0e/du257du3cb3577+vru37//4sWLxv709HRjp/35HRwcnn/++TNnzmzevLnckzSbJT5eHBxu+qHRqPEILKBaqtaBZdz2ZdvMysry8vKyD6wrBxg7mzZtajabjU1DZmamt7e3bTM3N7dfv34lJSVz5sz5w2nk5Uli4h/PtnZt+fjja/60bVuJi5MjR2TZMomIEKtVkpPlkUfEz09iY+XQoT8+/23o6NGj991336xZs+rUqZOcnGzciFD50wgJCfHx8Zl0eUH9+Ph4EbH/cllE+vXr9/nnnxsfVqWnp6ekpBhfYvbp0yc/P3/WrFkiopSaPHly27ZtjRtvP/zww6OXaygtLU1EbF9Gl0/79jJixK2cADVUFV8DBqBcjP7w9PSs6omUx4gRIwICAoyLl0tLSzt37jxw4ED7AStWrBCR9PR0YzMuLs7d3b2wsFAp1bFjxyjjtnil8vPz3d3dJ1y+8Dg9PT0gIKBPnz43soLAvn3qzjuViJo7V6lrX+Ru07fvpUuYr38X4dGjKi5O+ftfGmw2q7AwlZSkior+cEa3i7Vr1xpfwrZq1ernn3+u2sksX77cxcWlS5cuDz74YK1atT799FNjf7169Yw/Hz9+vHXr1n5+fo899lj9+vUfeeQR20Xx48aNq1Wr1kMPPdSxY8c6deoYK3oUFxdHRUV5eno+/vjjkZGRzs7OI0aM+MNpXOsid5vz55WXFxe543cILKC6qlu3rly+zrd62bdvn5ubW2Rk5JIlS5566ilnZ+ft27crpZYuXfr3v/9dKWW1Wjt37hwQEDB//vyPP/7Y2dl57NixxrFff/212Wx+4403Fi9e3LNnTx8fHyOn/vWvf7m5ud11110LFixISkpKSkpatWrVtSawcqWqX1+JqDZtlFFxfxhYhw6p2rVvdJkGq1WlpKjBg5WLy6VfuvXrq+hoZXeb420qPj7e+B62d+/eZVacqiqHDh369NNPJ0+evGvXLtvO+fPn29ZcyMvLW7BgwYQJE1asWFFaWmp/7LZt2yZNmhQfH3/06FH7/evXr584ceKECRPWrVt3I3PIzVX166v69dXUqdccM2/epTH/+MeNvznUZCZ1edFbANXLvffeu3nz5nXr1oWGhlb1XG7a1q1bP/zww4MHDzZr1iwmJqZLly4iMm/evG+//TYxMVFEzp49O2rUqHXr1rm6ukZFRQ0fPtxkMhnHLlu2bOrUqadOnQoKCho1alSLFi1E5L333tuwYYP9S7Rt23bKlClXvvTkyfL3v4vVKn37yty5Uq+eiEhenhQXi8Uidetec865uVJSImbzpUNuxLlzkpQkn34q27df2hMcLNHREhUlderc6EkqR1FR0XPPPff555+bTKY33njjww8/tL99D8DNIrCA6mrw4MFz586dOXPm008/XdVzqR4KCyU6WubMEZNJ3nhDxoyRSruyKC1NEhNl7txLtxm6uEjfvhIdLQ88IJe7sSplZmb+5S9/2bRpk7Ozc0JCgrGmKIBbwUXuQHVVra9zr3xHjkjXrjJnjri5yeLFEhdXeXUlIsHBMnmyZGZKUpKEhUlhoSQnS3i4BAbK2LFit7RTFVi/fn1ISMimTZuaNWu2bt066grQgsACqqtbXwpLKfXZZ5/17t27e/fuI0eOzMvLu3LMTz/9NGDAgK5duz755JP2r1VQUDB69Oj777+/V69e06ZNKy0tNfaXlJR88sknffr0CQ8Pf++99656zsq3dq2EhEhamrRuLRs3SiUulvk7zs4SESEpKZKeLq+/Lo0by549EhsrzZpJZKR8//0B219jpUlISOjZs+fx48e7d+++ZcsWY8V2ABpU8TVgAMpr586dInLHHXeU+wyjR492cnIaM2ZMfHy8r69vr169ygzYvHmzk5PToEGDEhMTe/fu3aBBgyNHjhg/6tevn5eX17Rp08aPH1+7du3Y2Fhj/1//+te2bdvOmDHjiy++aNmyZZln4FSJ+HhVq5YSUX36qLNnq3o2dkpKVEqKiohQDg7KyUk1aNDG29s7Jibmt99+q4RXLywsHDZsmPGLIDo6+loPPC4qKtqxY0d6enqZ68ft7du3b+vWrflXPDYoNzd369atBw4cuPKQ/fv3X/UQoMYgsIDqqrCw0GKxODg4FJVrDYD8/HxXV9dx48YZm5s2bRKRH3/80X5Mv379unbtavxmLSoq8vX1fe2115RSxupB33//vTFs2rRpTk5OZ8+eLS0tjY+Ptz1D8JtvvhGRrKyscr/HW1RQoJ588tLjeGNiVKU8obg8jh5VkycfbtmypZE7ZrO5V69eCxYsMFamqAiZmZmdO3cWEWdn51mzZl1r2KpVqxo1auTm5ubo6NiuXbv9+/eXGXDy5MnQ0FCLxVKvXj03N7d58+bZfjRz5szatWu7u7ubzeaePXva1s44deqUcYi7u3uZQ4CahMACqjF/f38Rsb99/cYZ99wdPHjQtqd58+ZxcXH2Yxo1ajRp0iTb5gsvvNC5c2el1JQpUxo2bGj7SMN4xFtKSkqZl9i6datU3ROpjxxRHTsqEVWnjlq0qEqmcNO2bNkSHR1du3Zto7Tc3d2jo6O3bdum/VWaN28uIj4+Pqmpqdcadv78eU9Pz6FDhxYXF58/f75z585dunQpMyYqKqpVq1aZmZlWq/X99993dHQ0/heVnp7u4OAwfvz40tLSjIwMX1/fZ555xjhk0KBBtkNGjx5tOwSoYQgsoBozHv3Rv3//chybnJxsMpmKi4tte0JDQ1944QXbZlFRkclk+uqrr2x7xo4d27x5c6VUbGzs3b9fM8rFxeXKD0ImTpzYqFEja1V8cLR2rWrcWImoVq1UVS+WedPOnTsXHx9/zz332K7lCA4OnjRp0unTp2/95ImJiS4uLiLSrVu348ePX2fkl19+WatWrZMnTxqbKSkpIrJv3z7bgLy8PEdHx88++8zYLC4ubtiw4UcffaSUevvtt40H3Rg/mjx5cp06dS5evJifn+/k5JSQkGA7pFGjRmPGjLn19wXcbrjIHajG7r33XhFZsmSJs7Pzo48+mn0zd6Nd9QmAtme3iYixIrb9Yki1atUyBhQXFzs4ONifzWKx2B8rIhkZGR9++OH7779f+U9ZSUiQBx6QEyekd29JTZU776zk179V9erVi46OTktL27VrV0xMjIeHR1pa2ssvv+zt7R0ZGWksoFqO05aUlMTGxg4ZMqSgoCA6Onr16tX2j82+0t69e9u0aWN7QLLxlaLtyYAikpGRcfHiRdsybA4ODiEhIcaAvXv3durUyfaPvnPnznl5eVlZWRkZGUVFRVc9BKhhCCygGktMTOzWrZvZbC4qKlq2bFnz5s0jIyO//fbbG7kZzcvLy2q1njp1yrbnxIkT9s/1My6gsY822wAvLy/7/fn5+Xl5eT4+PrY9R48e7dWr12OPPRYdHX2L7/GmFBXJ0KEyfLiUlEhMjHzzjdSvX5mvr1m7du3i4uIyMzOTkpLCwsKKioqSk5PDw8Pbtm07duzYEydO3PipTp061atXr7Fjxzo5Oc2cOdO2Yvt1nDhxor7dX5+rq6uzs/Px48ftB4iI/RhPT09jQJljPTw8ROT48ePXOQSoYQgsoBpzdnZeu3Ztfn7+qFGjHnjgAavVmpyc3Lt37+bNm8fGxh44cOA6xwYGBlosllWrVhmbR44c2bt371133WU/JigoyPhiyLB69WpjQPv27Q8fPmxbgmvVqlUmkykoKMjYXLNmTUhISHh4eHx8vKkSl9HMzJTu3eXzz8XZWWbPlrg4qYSlyNPS0qZMmTJr1qxrfXyYmZk5c+bM//u//9u1a5f9/tLS0hUrVnzyySeLFi0qKCiw/1FBQcHixYsPXX5StJOTU0REREpKSkZGRlxcnK+v7969e2NjY729vcPDw5OTk0tKSq4/yW3btoWEhPzwww/e3t5r1qy5wZVp69Wrl5+fb9ssLi4uKipyd3e37TEe1mQ/Jjc31xhQ5tjc3FwRcXd3v84hQE1T1d9RAtAmKysrLi7O/ma0sLCw2bNnX7hw4arjBw8e3KxZs/Xr1+/Zsyc8PLxNmzbGvfozZ85MS0tTSiUnJ1sslvj4+MOHD48aNcrBwcHYX1JSEhQU1LVr1/T09NTU1JYtW0ZERCilSktLJ0yYUKtWrbfeeuu3y3Jycirhva9bp5o0USKqWTO1ZUslvKBSSo0cOdLR0TE8PDwoKKhevXobN24sM2DVqlW1a9cODg7u2bOng4PDxx9/bOy/ePFieHh4/fr1+/bt6+3tHRAQYDxTMjs7+913323YsKGIjB8//qovarVaU1JSIiIibB9BNW3aNCYmxv7qKHtz5841LroKDQ09duzYjb+7qVOnNmjQwHaV3u7du0XE/qJ445OnlStX2vYEBQW98cYbSqnnnnvOuB/C8NVXX5nN5tzcXOMTrBUrVth+1L59+9dff/3GZwVUFwQWUAPd4M1oubm5TzzxRO3atS0WS48ePWy/oQMDA6dMmWL8+ZNPPmnatKmItGrVavHixbZjDx482KtXLwcHB2dn56ioKOMm/GPHjtW/wvz58yv6/cbHK0dHJaK6d1cnTlT0q12SkZHh4OCQmJiolLJarX379r3nnnvKjGnfvv2gQYOM2y0nT57s6OhoLCQWHx/v4uKye/dupdSpU6d8fX1ffvllpdSWLVveeuut1NTUoKCgawWWzbFjxyZNmmT74FBEgoOD4+PjbYtLFRQUdOzY0fhRdHT0zS7n8euvv5rN5oULFxqbr7/+esuWLcvcstChQwejrdXllT62bNmilPrhhx/MZvPPP/+slCotLX344Yf79u1rDLvnnnv++te/Gn9OTU0Vkc2bN9/UxIBqgcACaqwbvxntOmtIXn/AHx5Y0QoL1TPPKBEloqKj1TUWy6wQcXFxxnVsxqbxZWtGRoZtwPbt20XE1rUFBQVubm7x8fFKqT59+tgiQyk1cuRIPz8/+5O3b9/+DwPLxuhpV1dX459yvXr1hgwZ8s9//tO41MlkMk2fPr187/HVV191dXUdMWLEgAEDLBbLggULlFJZWVn+/v7GR1k//PCDk5PTQw899OKLL3p6ekZGRtqOfeihh5o0afLSSy+Fh4fXrl3bVlH/+c9/7A+x9RlQw3ANFlBj3fjNaH94pdS1BlTmJVZXOn48v0cP+ewzcXGRuXMlPl7+6LptnQ4ePNi2bVvbjXLt2rUTkYyMDPsBIhIYGGhsOjs7+/v7GwMOHTp0p93Nje3atTty5IjVai3fTIwPrjIzMz/99NOQkJDz588nJia+8sorZ8+etVgsCQkJw4cPL9+ZJ06cOH/+fCcnJz8/v40bNw4YMEBEXF1dhwwZYtyBeP/99xsXeCmlJk2aNH/+fNuxy5Yt++ijj6xWa2ho6M6dO0NCQoz93bt33759+1UPAWqUKg48AJWlsLDQuBnNVkVt2rSJi4u7/mJIty1jtcwuXTJ8fNS1F8usQBEREf369bNtXrhwQUSWLFli2zNz5kxnZ2f7Q+67774RI0YopZo2bTpx4kTb/pUrV4qI/SeLN/UJVhnbt28PDQ11dXX18/PTvkgpgBvEJ1jA/4pbvxnt9jFjxozQ0NDDhw/Xrfvq1q1y+UKjStWoUaOzZ8/aNk+fPi0i9itLNWzYsLCw0P4OwTNnzhgDGjZsWOZYR0dHXTfT3XXXXevWrcvLyzt48ODdd9+t5ZwAbhaBBfzPadasWUxMzIEDB4yb0cxm86pVqyIjI43FHfbv31/VE7weY7XMZ555pqioKDo6etmy+Q0bVs1MWrVq9csvv9iqdOfOnSaTqUWLFvYDRGTHjh3GZl5e3oEDB4wBLVq0sO03jvXz86v8FVkBVBz+/wz8jzIWcUhKSjp8+PCkSZPuvPPOY8eOjR07tnXr1iEhIQkJCcZ3XreVcqyWWXGioqJyc3Pj4+NF5OLFi+PHj+/evXuTJk2Ki4uPHz9utVoDAgI6deoUFxdnXFz1z3/+02w2P/zwwyJirAe7ZcsWETl69Ojs2bONy5sA1BxV/R0lgNvFlTejGdfIV/W8Ltm6davx7EVvb+8rV5yqElOnTnV0dOzQoYO3t7ePj8+ePXvU5dUKjEdcp6amNmzY0M/Pr3379s7OzvPmzTMOtFqtjz/+uLOzc6dOnerWrdutW7e8vDzjRz179gwODnZxcfHx8QkODv7uu++q6t0BuBUmVa5nWgGoqXJychYsWJCYmLh+/XpjT2Bg4JAhQ4YNG2Y88KRKfPnll8OGDSsoKAgNDV20aFGTJk2qaiZlHDp0aNOmTXXq1OnRo4ex8FhOTk5qampoaKixvGdOTs6aNWsKCwtDQ0O9vLzsj01LS9u3b5+Xl1fXrl1t3w+uWbOmuLjYNiYoKOj6TwwEcHsisABc3e7du2fPnv3555+fPHlSRJycnB555JHo6OgHHnigMldnKCkpeeedd8aOHSsi0dHRU6ZMcXR0rLRXB4DyIbAAXI/xGOnExMQVK1YY1xI1a9YsKirq2WefNb6wq1CnT58eMGDA6tWrnZycpk6dOmzYsIp+RQDQgsACcEOOHj06b9686dOnGw8hNpvNPXv2jI6O7tevXwVdab5jx47+/fsfPHjQy8tr0aJFnTt3rohXAYCKQGABuAmlpaXff/99QkLC0qVLL168KCJNmjSJjIwcNmyY/UPxbt2CBQuGDh164cKF4ODgr776qnnz5hpPDgAVjcACUB5nz55NTk6eNm2abT2n4ODg6OjoqKioOnXq3MqZrVbr22+/bVx0NXjwYOO5yBpmDACViMACcEvS0tISEhLmz5+fm5srInXr1n300UeHDBkSFhZWjrOdOXNm4MCBKSkpDg4OH3zwQUxMjO75AkBlILAAaFBQUPDNN98kJCSsXr3a+LdKQEDAE0888fTTTze84aXWd+7c2b9//wMHDnh6eiYlJfXo0aMipwwAFYjAAqDTnj17Zs2a9cUXX2RnZ8vlxR0GDx7cp08fi8VynQOTk5Ofeuqp/Pz8Dh06LFmypBJuUQSAikNgAdDv4sWL33777Zw5c5YsWWI8rc/Hx2fQoEEjRozw8/MrM1gpNW7cuDfffFMpFRUVNWPGDC66AlDdEVgAKlBWVtacOXMSEhIOHDgglxd3GDp0aN++fY1r4XNycoYMGbJ06VIuugJQkxBYACpcaWnphg0b5syZM3fuXOMZ0iaTKSgoqF+/frNmzTp8+LCHh8fChQsfeOCBqp4pAOhBYAGoPKdPn547d+77779/5swZ287AwMDly5dz0RWAmoTAAlAFvvjii/Hjxx87dszX13f16tVV+BhpAKgIBBYAAIBm5qqeAAAAQE1DYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGj2/wFk5f+x2ImxAAAAAElFTkSuQmCC", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Draw molecule with atom SHAP values\n", + "atom_labels = {i: f'{atom_shap_values[i]:.3f}' for i in range(n_atoms) if atom_shap_values[i] != 0}\n", + "mol_with_atom_shap = Chem.Mol(mol)\n", + "for atom in mol_with_atom_shap.GetAtoms():\n", + " atom_idx = atom.GetIdx()\n", + " if atom_idx in atom_labels:\n", + " atom.SetProp('atomNote', atom_labels[atom_idx])\n", + "img_atom_shap = Draw.MolToImage(mol_with_atom_shap, size=(800, 800), kekulize=True)\n", + "img_atom_shap.save('atom_shap_values.png')\n", + "\n", + "# Draw molecule with bond SHAP values\n", + "bond_labels = {bond.GetIdx(): f'{bond_shap_values[bond.GetIdx()]:.3f}' for bond in mol.GetBonds() if bond_shap_values[bond.GetIdx()] != 0}\n", + "mol_with_bond_shap = Chem.Mol(mol)\n", + "for bond in mol_with_bond_shap.GetBonds():\n", + " bond_idx = bond.GetIdx()\n", + " if bond_idx in bond_labels:\n", + " bond.SetProp('bondNote', bond_labels[bond_idx])\n", + "img_bond_shap = Draw.MolToImage(mol_with_bond_shap, size=(800, 800), kekulize=True)\n", + "img_bond_shap.save('bond_shap_values.png')\n", + "\n", + "# Display the images if running in a Jupyter notebook\n", + "try:\n", + " from IPython.display import Image, display\n", + " display(Image(filename='atom_shap_values.png'))\n", + " display(Image(filename='bond_shap_values.png'))\n", + "except ImportError:\n", + " print(\"IPython is not installed. Images are saved as 'atom_shap_values.png' and 'bond_shap_values.png'.\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop_delete", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop-updated/examples/training.ipynb b/chemprop-updated/examples/training.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..a4cb71fefea1882de1d6bf664a83ec1b557e4cf8 --- /dev/null +++ b/chemprop-updated/examples/training.ipynb @@ -0,0 +1,887 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/training.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "from lightning import pytorch as pl\n", + "from lightning.pytorch.callbacks import ModelCheckpoint\n", + "import pandas as pd\n", + "\n", + "from chemprop import data, featurizers, models, nn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change data inputs here" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\" # path to your data .csv file\n", + "num_workers = 0 # number of workers for dataloader. 0 means using main process for data loading\n", + "smiles_column = 'smiles' # name of the column containing SMILES strings\n", + "target_columns = ['lipo'] # list of names of the columns containing targets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smileslipo
0Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc143.54
1COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)...-1.18
2COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl3.69
3OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C...3.37
4Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N...3.10
.........
95CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C...2.20
96CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)...2.04
97CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)...4.49
98COc1ccc(Cc2c(N)n[nH]c2N)cc10.20
99CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(...2.00
\n", + "

100 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " smiles lipo\n", + "0 Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14 3.54\n", + "1 COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)... -1.18\n", + "2 COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl 3.69\n", + "3 OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C... 3.37\n", + "4 Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N... 3.10\n", + ".. ... ...\n", + "95 CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C... 2.20\n", + "96 CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)... 2.04\n", + "97 CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)... 4.49\n", + "98 COc1ccc(Cc2c(N)n[nH]c2N)cc1 0.20\n", + "99 CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(... 2.00\n", + "\n", + "[100 rows x 2 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_input = pd.read_csv(input_path)\n", + "df_input" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get SMILES and targets" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "smis = df_input.loc[:, smiles_column].values\n", + "ys = df_input.loc[:, target_columns].values" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14',\n", + " 'COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)CCc3ccccc23',\n", + " 'COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl',\n", + " 'OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(Cl)sc4[nH]3',\n", + " 'Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)NCC#N)c1'],\n", + " dtype=object)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "smis[:5] # show first 5 SMILES strings" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 3.54],\n", + " [-1.18],\n", + " [ 3.69],\n", + " [ 3.37],\n", + " [ 3.1 ]])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ys[:5] # show first 5 targets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get molecule datapoints" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "all_data = [data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Perform data splitting for training, validation, and testing" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['SCAFFOLD_BALANCED',\n", + " 'RANDOM_WITH_REPEATED_SMILES',\n", + " 'RANDOM',\n", + " 'KENNARD_STONE',\n", + " 'KMEANS']" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# available split types\n", + "list(data.SplitType.keys())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ChemProp's `make_split_indices` function will always return a two- (if no validation) or three-length tuple.\n", + "Each member is a list of length `num_replicates`.\n", + "The inner lists then contain the actual indices for splitting.\n", + "\n", + "The type signature for this return type is `tuple[list[list[int]], ...]`." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "mols = [d.mol for d in all_data] # RDkit Mol objects are use for structure based splits\n", + "train_indices, val_indices, test_indices = data.make_split_indices(mols, \"random\", (0.8, 0.1, 0.1)) # unpack the tuple into three separate lists\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " all_data, train_indices, val_indices, test_indices\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ChemProp's splitting function implements our preferred method of data splitting, which is random replication.\n", + "It's also possible to add your own custom cross-validation splitter, such as one of those as implemented in scikit-learn, as long as you get the data into the same `tuple[list[list[int]], ...]` data format with something like this:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import KFold\n", + "\n", + "k_splits = KFold(n_splits=5)\n", + "k_train_indices, k_val_indices, k_test_indices = [], [], []\n", + "for fold in k_splits.split(mols):\n", + " k_train_indices.append(fold[0])\n", + " k_val_indices.append([])\n", + " k_test_indices.append(fold[1])\n", + "k_train_data, _, k_test_data = data.split_data_by_indices(\n", + " all_data, k_train_indices, None, k_test_indices\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get MoleculeDataset\n", + "Recall that the data is in a list equal in length to the number of replicates, so we select the zero index of the list to get the first replicate." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer()\n", + "\n", + "train_dset = data.MoleculeDataset(train_data[0], featurizer)\n", + "scaler = train_dset.normalize_targets()\n", + "\n", + "val_dset = data.MoleculeDataset(val_data[0], featurizer)\n", + "val_dset.normalize_targets(scaler)\n", + "\n", + "test_dset = data.MoleculeDataset(test_data[0], featurizer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get DataLoader" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = data.build_dataloader(train_dset, num_workers=num_workers)\n", + "val_loader = data.build_dataloader(val_dset, num_workers=num_workers, shuffle=False)\n", + "test_loader = data.build_dataloader(test_dset, num_workers=num_workers, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change Message-Passing Neural Network (MPNN) inputs here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Message Passing\n", + "A `Message passing` constructs molecular graphs using message passing to learn node-level hidden representations.\n", + "\n", + "Options are `mp = nn.BondMessagePassing()` or `mp = nn.AtomMessagePassing()`" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "mp = nn.BondMessagePassing()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Aggregation\n", + "An `Aggregation` is responsible for constructing a graph-level representation from the set of node-level representations after message passing.\n", + "\n", + "Available options can be found in ` nn.agg.AggregationRegistry`, including\n", + "- `agg = nn.MeanAggregation()`\n", + "- `agg = nn.SumAggregation()`\n", + "- `agg = nn.NormAggregation()`" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'mean': ,\n", + " 'sum': ,\n", + " 'norm': \n", + "}\n" + ] + } + ], + "source": [ + "print(nn.agg.AggregationRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "agg = nn.MeanAggregation()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Feed-Forward Network (FFN)\n", + "\n", + "A `FFN` takes the aggregated representations and make target predictions.\n", + "\n", + "Available options can be found in `nn.PredictorRegistry`.\n", + "\n", + "For regression:\n", + "- `ffn = nn.RegressionFFN()`\n", + "- `ffn = nn.MveFFN()`\n", + "- `ffn = nn.EvidentialFFN()`\n", + "\n", + "For classification:\n", + "- `ffn = nn.BinaryClassificationFFN()`\n", + "- `ffn = nn.BinaryDirichletFFN()`\n", + "- `ffn = nn.MulticlassClassificationFFN()`\n", + "- `ffn = nn.MulticlassDirichletFFN()`\n", + "\n", + "For spectral:\n", + "- `ffn = nn.SpectralFFN()` # will be available in future version" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'regression': ,\n", + " 'regression-mve': ,\n", + " 'regression-evidential': ,\n", + " 'regression-quantile': ,\n", + " 'classification': ,\n", + " 'classification-dirichlet': ,\n", + " 'multiclass': ,\n", + " 'multiclass-dirichlet': ,\n", + " 'spectral': \n", + "}\n" + ] + } + ], + "source": [ + "print(nn.PredictorRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "output_transform = nn.UnscaleTransform.from_standard_scaler(scaler)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "ffn = nn.RegressionFFN(output_transform=output_transform)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Batch Norm\n", + "A `Batch Norm` normalizes the outputs of the aggregation by re-centering and re-scaling.\n", + "\n", + "Whether to use batch norm" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "batch_norm = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Metrics\n", + "`Metrics` are the ways to evaluate the performance of model predictions.\n", + "\n", + "Available options can be found in `metrics.MetricRegistry`, including" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'mse': ,\n", + " 'mae': ,\n", + " 'rmse': ,\n", + " 'bounded-mse': ,\n", + " 'bounded-mae': ,\n", + " 'bounded-rmse': ,\n", + " 'r2': ,\n", + " 'binary-mcc': ,\n", + " 'multiclass-mcc': ,\n", + " 'roc': ,\n", + " 'prc': ,\n", + " 'accuracy': ,\n", + " 'f1': \n", + "}\n" + ] + } + ], + "source": [ + "print(nn.metrics.MetricRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metric_list = [nn.metrics.RMSE(), nn.metrics.MAE()] # Only the first metric is used for training and early stopping" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constructs MPNN" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): Identity()\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0): RMSE(task_weights=[[1.0]])\n", + " (1): MAE(task_weights=[[1.0]])\n", + " (2): MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mpnn = models.MPNN(mp, agg, ffn, batch_norm, metric_list)\n", + "mpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Set up trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "# Configure model checkpointing\n", + "checkpointing = ModelCheckpoint(\n", + " \"checkpoints\", # Directory where model checkpoints will be saved\n", + " \"best-{epoch}-{val_loss:.2f}\", # Filename format for checkpoints, including epoch and validation loss\n", + " \"val_loss\", # Metric used to select the best checkpoint (based on validation loss)\n", + " mode=\"min\", # Save the checkpoint with the lowest validation loss (minimization objective)\n", + " save_last=True, # Always save the most recent checkpoint, even if it's not the best\n", + ")\n", + "\n", + "\n", + "trainer = pl.Trainer(\n", + " logger=False,\n", + " enable_checkpointing=True, # Use `True` if you want to save model checkpoints. The checkpoints will be saved in the `checkpoints` folder.\n", + " enable_progress_bar=True,\n", + " accelerator=\"auto\",\n", + " devices=1,\n", + " max_epochs=20, # number of epochs to train for\n", + " callbacks=[checkpointing], # Use the configured checkpoint callback\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Start training" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /home/knathan/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | BatchNorm1d | 600 | train\n", + "3 | predictor | RegressionFFN | 90.6 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.276 Total estimated model params size (MB)\n", + "25 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1: 0%| | 0/2 [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test/mae 0.643399715423584 │\n", + "│ test/rmse 0.9120855927467346 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mae \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.643399715423584 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test/rmse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9120855927467346 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results = trainer.test(dataloaders=test_loader)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop-updated/examples/training_classification.ipynb b/chemprop-updated/examples/training_classification.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..ed28f59641fff508c906b6c912b47df2aee5eee8 --- /dev/null +++ b/chemprop-updated/examples/training_classification.ipynb @@ -0,0 +1,848 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training Classification" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/training_classification.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from pathlib import Path\n", + "\n", + "from lightning import pytorch as pl\n", + "\n", + "from chemprop import data, featurizers, models, nn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change data inputs here" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"classification\" / \"mol.csv\" # path to your data .csv file\n", + "num_workers = 0 # number of workers for dataloader. 0 means using main process for data loading\n", + "smiles_column = 'smiles' # name of the column containing SMILES strings\n", + "target_columns = ['NR-AhR', 'NR-ER', 'SR-ARE', 'SR-MMP'] # classification of activity (either 0 or 1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smilesNR-AhRNR-ERSR-ARESR-MMP
0CCOc1ccc2nc(S(N)(=O)=O)sc2c11.0NaN1.00.0
1CCN1C(=O)NC(c2ccccc2)C1=O0.00.0NaN0.0
2CC[C@]1(O)CC[C@H]2[C@@H]3CCC4=CCCC[C@@H]4[C@H]...NaNNaN0.0NaN
3CCCN(CC)C(CC)C(=O)Nc1c(C)cccc1C0.00.0NaN0.0
4CC(O)(P(=O)(O)O)P(=O)(O)O0.00.00.00.0
..................
495Cc1ccccc1CO[C@H]1C[C@]2(C(C)C)CC[C@@]1(C)O2NaN0.00.00.0
496NNc1ccc(C(=O)O)cc1NaNNaN0.00.0
497CCCCCCOc1ccccc1C(=O)O0.0NaN0.00.0
498O=C(OCc1ccccc1)C(=O)OCc1ccccc10.00.00.00.0
499CCCSc1ccc2[nH]c(NC(=O)OC)nc2c11.01.00.01.0
\n", + "

500 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " smiles NR-AhR NR-ER SR-ARE \\\n", + "0 CCOc1ccc2nc(S(N)(=O)=O)sc2c1 1.0 NaN 1.0 \n", + "1 CCN1C(=O)NC(c2ccccc2)C1=O 0.0 0.0 NaN \n", + "2 CC[C@]1(O)CC[C@H]2[C@@H]3CCC4=CCCC[C@@H]4[C@H]... NaN NaN 0.0 \n", + "3 CCCN(CC)C(CC)C(=O)Nc1c(C)cccc1C 0.0 0.0 NaN \n", + "4 CC(O)(P(=O)(O)O)P(=O)(O)O 0.0 0.0 0.0 \n", + ".. ... ... ... ... \n", + "495 Cc1ccccc1CO[C@H]1C[C@]2(C(C)C)CC[C@@]1(C)O2 NaN 0.0 0.0 \n", + "496 NNc1ccc(C(=O)O)cc1 NaN NaN 0.0 \n", + "497 CCCCCCOc1ccccc1C(=O)O 0.0 NaN 0.0 \n", + "498 O=C(OCc1ccccc1)C(=O)OCc1ccccc1 0.0 0.0 0.0 \n", + "499 CCCSc1ccc2[nH]c(NC(=O)OC)nc2c1 1.0 1.0 0.0 \n", + "\n", + " SR-MMP \n", + "0 0.0 \n", + "1 0.0 \n", + "2 NaN \n", + "3 0.0 \n", + "4 0.0 \n", + ".. ... \n", + "495 0.0 \n", + "496 0.0 \n", + "497 0.0 \n", + "498 0.0 \n", + "499 1.0 \n", + "\n", + "[500 rows x 5 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_input = pd.read_csv(input_path)\n", + "df_input" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get SMILES and targets" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "smis = df_input.loc[:, smiles_column].values\n", + "ys = df_input.loc[:, target_columns].values" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array(['CCOc1ccc2nc(S(N)(=O)=O)sc2c1', 'CCN1C(=O)NC(c2ccccc2)C1=O',\n", + " 'CC[C@]1(O)CC[C@H]2[C@@H]3CCC4=CCCC[C@@H]4[C@H]3CC[C@@]21C',\n", + " 'CCCN(CC)C(CC)C(=O)Nc1c(C)cccc1C', 'CC(O)(P(=O)(O)O)P(=O)(O)O'],\n", + " dtype=object),\n", + " array([[ 1., nan, 1., 0.],\n", + " [ 0., 0., nan, 0.],\n", + " [nan, nan, 0., nan],\n", + " [ 0., 0., nan, 0.],\n", + " [ 0., 0., 0., 0.]]))" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Take a look at the first 5 SMILES strings and target columns\n", + "smis[:5], ys[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get molecule datapoints" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[09:05:03] WARNING: not removing hydrogen atom without neighbors\n" + ] + } + ], + "source": [ + "all_data = [data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Perform data splitting for training, validation, and testing" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['SCAFFOLD_BALANCED',\n", + " 'RANDOM_WITH_REPEATED_SMILES',\n", + " 'RANDOM',\n", + " 'KENNARD_STONE',\n", + " 'KMEANS']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# available split types\n", + "list(data.SplitType.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "mols = [d.mol for d in all_data] # RDkit Mol objects are use for structure based splits\n", + "train_indices, val_indices, test_indices = data.make_split_indices(mols, \"random\", (0.8, 0.1, 0.1))\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " all_data, train_indices, val_indices, test_indices\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get MoleculeDataset" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer()\n", + "\n", + "train_dset = data.MoleculeDataset(train_data[0], featurizer)\n", + "val_dset = data.MoleculeDataset(val_data[0], featurizer)\n", + "test_dset = data.MoleculeDataset(test_data[0], featurizer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get DataLoader" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = data.build_dataloader(train_dset, num_workers=num_workers)\n", + "val_loader = data.build_dataloader(val_dset, num_workers=num_workers, shuffle=False)\n", + "test_loader = data.build_dataloader(test_dset, num_workers=num_workers, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change Message-Passing Neural Network (MPNN) inputs here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Message Passing\n", + "A `Message passing` constructs molecular graphs using message passing to learn node-level hidden representations.\n", + "\n", + "Options are `mp = nn.BondMessagePassing()` or `mp = nn.AtomMessagePassing()`" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "mp = nn.BondMessagePassing()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Aggregation\n", + "An `Aggregation` is responsible for constructing a graph-level representation from the set of node-level representations after message passing.\n", + "\n", + "Available options can be found in ` nn.agg.AggregationRegistry`, including\n", + "- `agg = nn.MeanAggregation()`\n", + "- `agg = nn.SumAggregation()`\n", + "- `agg = nn.NormAggregation()`" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'mean': ,\n", + " 'sum': ,\n", + " 'norm': \n", + "}\n" + ] + } + ], + "source": [ + "print(nn.agg.AggregationRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "agg = nn.MeanAggregation()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Feed-Forward Network (FFN)\n", + "\n", + "A `FFN` takes the aggregated representations and make target predictions.\n", + "\n", + "Available options can be found in `nn.PredictorRegistry`.\n", + "\n", + "For regression:\n", + "- `ffn = nn.RegressionFFN()`\n", + "- `ffn = nn.MveFFN()`\n", + "- `ffn = nn.EvidentialFFN()`\n", + "\n", + "For classification:\n", + "- `ffn = nn.BinaryClassificationFFN()`\n", + "- `ffn = nn.BinaryDirichletFFN()`\n", + "- `ffn = nn.MulticlassClassificationFFN()`\n", + "- `ffn = nn.MulticlassDirichletFFN()`\n", + "\n", + "For spectral:\n", + "- `ffn = nn.SpectralFFN()` # will be available in future version" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'regression': ,\n", + " 'regression-mve': ,\n", + " 'regression-evidential': ,\n", + " 'regression-quantile': ,\n", + " 'classification': ,\n", + " 'classification-dirichlet': ,\n", + " 'multiclass': ,\n", + " 'multiclass-dirichlet': ,\n", + " 'spectral': \n", + "}\n" + ] + } + ], + "source": [ + "print(nn.PredictorRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "ffn = nn.BinaryClassificationFFN(n_tasks = len(target_columns))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Batch Norm\n", + "A `Batch Norm` normalizes the outputs of the aggregation by re-centering and re-scaling.\n", + "\n", + "Whether to use batch norm" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "batch_norm = False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Metrics\n", + "`Metrics` are the ways to evaluate the performance of model predictions.\n", + "\n", + "Available options can be found in `metrics.MetricRegistry`, including" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'mse': ,\n", + " 'mae': ,\n", + " 'rmse': ,\n", + " 'bounded-mse': ,\n", + " 'bounded-mae': ,\n", + " 'bounded-rmse': ,\n", + " 'r2': ,\n", + " 'binary-mcc': ,\n", + " 'multiclass-mcc': ,\n", + " 'roc': ,\n", + " 'prc': ,\n", + " 'accuracy': ,\n", + " 'f1': \n", + "}\n" + ] + } + ], + "source": [ + "print(nn.metrics.MetricRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# AUROC used by default\n", + "metric_list = None " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constructs MPNN" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): Identity()\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): Identity()\n", + " (predictor): BinaryClassificationFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=4, bias=True)\n", + " )\n", + " )\n", + " (criterion): BCELoss(task_weights=[[1.0, 1.0, 1.0, 1.0]])\n", + " (output_transform): Identity()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0): BinaryAUROC()\n", + " (1): BCELoss(task_weights=[[1.0, 1.0, 1.0, 1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mpnn = models.MPNN(mp, agg, ffn, batch_norm, metric_list)\n", + "\n", + "mpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Set up trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(\n", + " logger=False,\n", + " enable_checkpointing=True, # Use `True` if you want to save model checkpoints. The checkpoints will be saved in the `checkpoints` folder.\n", + " enable_progress_bar=True,\n", + " accelerator=\"cpu\",\n", + " devices=1,\n", + " max_epochs=20, # number of epochs to train for\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Start training" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /home/knathan/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "--------------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | BinaryClassificationFFN | 91.5 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "--------------------------------------------------------------------\n", + "319 K Trainable params\n", + "0 Non-trainable params\n", + "319 K Total params\n", + "1.277 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sanity Checking DataLoader 0: 0%| | 0/1 [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test/roc 0.6421189308166504 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/roc \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.6421189308166504 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results = trainer.test(mpnn, test_loader)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop-updated/examples/training_regression_multicomponent.ipynb b/chemprop-updated/examples/training_regression_multicomponent.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..0e76efaef4e5d7b32e27c4e77687c516000df720 --- /dev/null +++ b/chemprop-updated/examples/training_regression_multicomponent.ipynb @@ -0,0 +1,713 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training Regression - Multicomponent" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/training_regression_multicomponent.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from lightning import pytorch as pl\n", + "from pathlib import Path\n", + "\n", + "from chemprop import data, featurizers, models, nn\n", + "from chemprop.nn import metrics\n", + "from chemprop.models import multi\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Change your data inputs here" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol+mol\" / \"mol+mol.csv\" # path to your data .csv file containing SMILES strings and target values\n", + "smiles_columns = ['smiles', 'solvent'] # name of the column containing SMILES strings\n", + "target_columns = ['peakwavs_max'] # list of names of the columns containing targets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smilessolventpeakwavs_max
0CCCCN1C(=O)C(=C/C=C/C=C/C=C2N(CCCC)c3ccccc3N2C...ClCCl642.0
1C(=C/c1cnccn1)\\c1ccc(N(c2ccccc2)c2ccc(/C=C/c3c...ClCCl420.0
2CN(C)c1ccc2c(-c3ccc(N)cc3C(=O)[O-])c3ccc(=[N+]...O544.0
3c1ccc2[nH]ccc2c1O290.0
4CCN(CC)c1ccc2c(c1)OC1=C(/C=C/C3=[N+](C)c4ccc5c...ClC(Cl)Cl736.0
............
95COc1ccc(C2CC(c3ccc(O)cc3)=NN2c2ccc(S(N)(=O)=O)...C1CCOC1359.0
96COc1ccc2c3c(c4ccc(OC)cc4c2c1)C1(c2ccccc2-c2ccc...C1CCCCC1386.0
97CCCCOc1c(C=C2N(C)c3ccccc3C2(C)C)c(=O)c1=OCCO425.0
98Cc1cc2ccc(-c3cccc4cccc(-c5ccc6cc(C)c(=O)oc6c5)...c1ccccc1324.0
99Cc1ccc(C(=O)c2c(C)c3ccc4cccc5c6cccc7ccc2c(c76)...ClCCl391.0
\n", + "

100 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " smiles solvent peakwavs_max\n", + "0 CCCCN1C(=O)C(=C/C=C/C=C/C=C2N(CCCC)c3ccccc3N2C... ClCCl 642.0\n", + "1 C(=C/c1cnccn1)\\c1ccc(N(c2ccccc2)c2ccc(/C=C/c3c... ClCCl 420.0\n", + "2 CN(C)c1ccc2c(-c3ccc(N)cc3C(=O)[O-])c3ccc(=[N+]... O 544.0\n", + "3 c1ccc2[nH]ccc2c1 O 290.0\n", + "4 CCN(CC)c1ccc2c(c1)OC1=C(/C=C/C3=[N+](C)c4ccc5c... ClC(Cl)Cl 736.0\n", + ".. ... ... ...\n", + "95 COc1ccc(C2CC(c3ccc(O)cc3)=NN2c2ccc(S(N)(=O)=O)... C1CCOC1 359.0\n", + "96 COc1ccc2c3c(c4ccc(OC)cc4c2c1)C1(c2ccccc2-c2ccc... C1CCCCC1 386.0\n", + "97 CCCCOc1c(C=C2N(C)c3ccccc3C2(C)C)c(=O)c1=O CCO 425.0\n", + "98 Cc1cc2ccc(-c3cccc4cccc(-c5ccc6cc(C)c(=O)oc6c5)... c1ccccc1 324.0\n", + "99 Cc1ccc(C(=O)c2c(C)c3ccc4cccc5c6cccc7ccc2c(c76)... ClCCl 391.0\n", + "\n", + "[100 rows x 3 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_input = pd.read_csv(input_path)\n", + "df_input" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get SMILES and targets" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "smiss = df_input.loc[:, smiles_columns].values\n", + "ys = df_input.loc[:, target_columns].values" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([['CCCCN1C(=O)C(=C/C=C/C=C/C=C2N(CCCC)c3ccccc3N2CCCC)C(=O)N(CCCC)C1=S',\n", + " 'ClCCl'],\n", + " ['C(=C/c1cnccn1)\\\\c1ccc(N(c2ccccc2)c2ccc(/C=C/c3cnccn3)cc2)cc1',\n", + " 'ClCCl'],\n", + " ['CN(C)c1ccc2c(-c3ccc(N)cc3C(=O)[O-])c3ccc(=[N+](C)C)cc-3oc2c1',\n", + " 'O'],\n", + " ['c1ccc2[nH]ccc2c1', 'O'],\n", + " ['CCN(CC)c1ccc2c(c1)OC1=C(/C=C/C3=[N+](C)c4ccc5ccccc5c4C3(C)C)CCCC1=C2c1ccccc1C(=O)O',\n", + " 'ClC(Cl)Cl']], dtype=object),\n", + " array([[642.],\n", + " [420.],\n", + " [544.],\n", + " [290.],\n", + " [736.]]))" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Take a look at the first 5 SMILES strings and targets\n", + "smiss[:5], ys[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Make molecule datapoints\n", + "Create a list of lists containing the molecule datapoints for each components. The target is stored in the 0th component." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "all_data = [[data.MoleculeDatapoint.from_smi(smis[0], y) for smis, y in zip(smiss, ys)]]\n", + "all_data += [[data.MoleculeDatapoint.from_smi(smis[i]) for smis in smiss] for i in range(1, len(smiles_columns))]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Split data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Perform data splitting for training, validation, and testing" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "component_to_split_by = 0 # index of the component to use for structure based splits\n", + "mols = [d.mol for d in all_data[component_to_split_by]]\n", + "train_indices, val_indices, test_indices = data.make_split_indices(mols, \"random\", (0.8, 0.1, 0.1))\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " all_data, train_indices, val_indices, test_indices\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Get MoleculeDataset for each components" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer()\n", + "\n", + "train_datasets = [data.MoleculeDataset(train_data[0][i], featurizer) for i in range(len(smiles_columns))]\n", + "val_datasets = [data.MoleculeDataset(val_data[0][i], featurizer) for i in range(len(smiles_columns))]\n", + "test_datasets = [data.MoleculeDataset(test_data[0][i], featurizer) for i in range(len(smiles_columns))]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Construct multicomponent dataset and scale the targets" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "train_mcdset = data.MulticomponentDataset(train_datasets)\n", + "scaler = train_mcdset.normalize_targets()\n", + "val_mcdset = data.MulticomponentDataset(val_datasets)\n", + "val_mcdset.normalize_targets(scaler)\n", + "test_mcdset = data.MulticomponentDataset(test_datasets)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Construct data loader" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = data.build_dataloader(train_mcdset)\n", + "val_loader = data.build_dataloader(val_mcdset, shuffle=False)\n", + "test_loader = data.build_dataloader(test_mcdset, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Construct multicomponent MPNN" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## MulticomponentMessagePassing\n", + "- `blocks`: a list of message passing block used for each components\n", + "- `n_components`: number of components" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "mcmp = nn.MulticomponentMessagePassing(\n", + " blocks=[nn.BondMessagePassing() for _ in range(len(smiles_columns))],\n", + " n_components=len(smiles_columns),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Aggregation" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "agg = nn.MeanAggregation()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## RegressionFFN" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "output_transform = nn.UnscaleTransform.from_standard_scaler(scaler)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "ffn = nn.RegressionFFN(\n", + " input_dim=mcmp.output_dim,\n", + " output_transform=output_transform,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "metric_list = [metrics.RMSE(), metrics.MAE()] # Only the first metric is used for training and early stopping" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## MulticomponentMPNN" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MulticomponentMPNN(\n", + " (message_passing): MulticomponentMessagePassing(\n", + " (blocks): ModuleList(\n", + " (0-1): 2 x BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): Identity()\n", + " )\n", + " )\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): Identity()\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=600, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0): RMSE(task_weights=[[1.0]])\n", + " (1): MAE(task_weights=[[1.0]])\n", + " (2): MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mcmpnn = multi.MulticomponentMPNN(\n", + " mcmp,\n", + " agg,\n", + " ffn,\n", + " metrics=metric_list,\n", + ")\n", + "\n", + "mcmpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Set up trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(\n", + " logger=False,\n", + " enable_checkpointing=True,\n", + " enable_progress_bar=True,\n", + " accelerator=\"auto\",\n", + " devices=1,\n", + " max_epochs=20, # number of epochs to train for\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Start training" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /home/knathan/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "-------------------------------------------------------------------------\n", + "0 | message_passing | MulticomponentMessagePassing | 455 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | RegressionFFN | 180 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "-------------------------------------------------------------------------\n", + "636 K Trainable params\n", + "0 Non-trainable params\n", + "636 K Total params\n", + "2.544 Total estimated model params size (MB)\n", + "35 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sanity Checking: | | 0/? [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test/mae 87.1765365600586 │\n", + "│ test/rmse 105.41293334960938 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mae \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 87.1765365600586 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test/rmse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 105.41293334960938 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results = trainer.test(mcmpnn, test_loader)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop-updated/examples/training_regression_reaction.ipynb b/chemprop-updated/examples/training_regression_reaction.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..5e5439d54f3aefe1b94d73656fb3b65e557def7f --- /dev/null +++ b/chemprop-updated/examples/training_regression_reaction.ipynb @@ -0,0 +1,804 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training Regression - Reaction" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/training_regression_reaction.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from lightning import pytorch as pl\n", + "from pathlib import Path\n", + "\n", + "from chemprop import data, featurizers, models, nn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change data inputs here" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"rxn\" / \"rxn.csv\"\n", + "num_workers = 0 # number of workers for dataloader. 0 means using main process for data loading\n", + "smiles_column = 'smiles'\n", + "target_columns = ['ea']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smilesea
0[O:1]([C:2]([C:3]([C:4](=[O:5])[C:6]([O:7][H:1...8.898934
1[C:1]1([H:8])([H:9])[O:2][C@@:3]2([H:10])[C@@:...5.464328
2[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H...5.270552
3[C:1]([O:2][C:3]([C@@:4]([C:5]([H:14])([H:15])...8.473006
4[C:1]([C:2]#[C:3][C:4]([C:5](=[O:6])[H:12])([H...5.579037
.........
95[C:1]([C:2]([C:3]([H:12])([H:13])[H:14])([C:4]...9.295665
96[O:1]=[C:2]([C@@:3]1([H:9])[C:4]([H:10])([H:11...7.753442
97[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H...10.650215
98[C:1]1([H:8])([H:9])[C@@:2]2([H:10])[N:3]1[C:4...10.138945
99[C:1]([C@@:2]1([C:3]([C:4]([O:5][H:15])([H:13]...6.979934
\n", + "

100 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " smiles ea\n", + "0 [O:1]([C:2]([C:3]([C:4](=[O:5])[C:6]([O:7][H:1... 8.898934\n", + "1 [C:1]1([H:8])([H:9])[O:2][C@@:3]2([H:10])[C@@:... 5.464328\n", + "2 [C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H... 5.270552\n", + "3 [C:1]([O:2][C:3]([C@@:4]([C:5]([H:14])([H:15])... 8.473006\n", + "4 [C:1]([C:2]#[C:3][C:4]([C:5](=[O:6])[H:12])([H... 5.579037\n", + ".. ... ...\n", + "95 [C:1]([C:2]([C:3]([H:12])([H:13])[H:14])([C:4]... 9.295665\n", + "96 [O:1]=[C:2]([C@@:3]1([H:9])[C:4]([H:10])([H:11... 7.753442\n", + "97 [C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H... 10.650215\n", + "98 [C:1]1([H:8])([H:9])[C@@:2]2([H:10])[N:3]1[C:4... 10.138945\n", + "99 [C:1]([C@@:2]1([C:3]([C:4]([O:5][H:15])([H:13]... 6.979934\n", + "\n", + "[100 rows x 2 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_input = pd.read_csv(input_path)\n", + "df_input" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load smiles and targets" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array(['[O:1]([C:2]([C:3]([C:4](=[O:5])[C:6]([O:7][H:15])([H:13])[H:14])([H:11])[H:12])([H:9])[H:10])[H:8]>>[C:3](=[C:4]=[O:5])([H:11])[H:12].[C:6]([O:7][H:15])([H:8])([H:13])[H:14].[O:1]=[C:2]([H:9])[H:10]',\n", + " '[C:1]1([H:8])([H:9])[O:2][C@@:3]2([H:10])[C@@:4]3([H:11])[O:5][C@:6]1([H:12])[C@@:7]23[H:13]>>[C:1]1([H:8])([H:9])[O:2][C:3]([H:10])=[C:7]([H:13])[C@:6]1([O+:5]=[C-:4][H:11])[H:12]',\n", + " '[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])=[C:6]([H:16])[C@@:7]12[H:17])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([H:14])=[C:5]([H:15])[C:6]([H:16])=[C:7]1[H:17])([H:8])([H:9])[H:10]',\n", + " '[C:1]([O:2][C:3]([C@@:4]([C:5]([H:14])([H:15])[H:16])([C:6]([O:7][H:19])([H:17])[H:18])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C-:1]([O+:2]=[C:3]([C@@:4]([C:5]([H:14])([H:15])[H:16])([C:6]([O:7][H:19])([H:17])[H:18])[H:13])[H:12])([H:8])[H:10].[H:9][H:11]',\n", + " '[C:1]([C:2]#[C:3][C:4]([C:5](=[O:6])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[C:3]=[C:4]([H:10])[H:11])[C:5](=[O:6])[H:12])([H:7])([H:8])[H:9]'],\n", + " dtype=object),\n", + " array([[8.8989335 ],\n", + " [5.46432769],\n", + " [5.27055228],\n", + " [8.47300569],\n", + " [5.57903696]]))" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "smis = df_input.loc[:, smiles_column].values\n", + "ys = df_input.loc[:, target_columns].values\n", + "\n", + "smis[:5], ys[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get datapoints" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "all_data = [data.ReactionDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Perform data splitting for training, validation, and testing" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "mols = [d.rct for d in all_data] # Can either split by reactants (.rct) or products (.pdt)\n", + "train_indices, val_indices, test_indices = data.make_split_indices(mols, \"random\", (0.8, 0.1, 0.1))\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " all_data, train_indices, val_indices, test_indices\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Defining the featurizer\n", + "\n", + "Reactions can be featurized using the ```CondensedGraphOfReactionFeaturizer``` (also labeled ```CGRFeaturizer```).\n", + "\n", + "\n", + "Use ```_mode``` keyword to set the mode by which a reaction should be featurized into a ```MolGraph```.\n", + "\n", + "Options are can be found with ```featurizers.RxnMode.keys```" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "REAC_PROD\n", + "REAC_PROD_BALANCE\n", + "REAC_DIFF\n", + "REAC_DIFF_BALANCE\n", + "PROD_DIFF\n", + "PROD_DIFF_BALANCE\n" + ] + } + ], + "source": [ + "for key in featurizers.RxnMode.keys():\n", + " print(key)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.CondensedGraphOfReactionFeaturizer(mode_=\"PROD_DIFF\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get ReactionDatasets" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "train_dset = data.ReactionDataset(train_data[0], featurizer)\n", + "scaler = train_dset.normalize_targets()\n", + "\n", + "val_dset = data.ReactionDataset(val_data[0], featurizer)\n", + "val_dset.normalize_targets(scaler)\n", + "test_dset = data.ReactionDataset(test_data[0], featurizer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get dataloaders" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = data.build_dataloader(train_dset, num_workers=num_workers)\n", + "val_loader = data.build_dataloader(val_dset, num_workers=num_workers, shuffle=False)\n", + "test_loader = data.build_dataloader(test_dset, num_workers=num_workers, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change Message-Passing Neural Network (MPNN) inputs here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Message passing\n", + "\n", + "Message passing blocks must be given the shape of the featurizer's outputs.\n", + "\n", + "Options are `mp = nn.BondMessagePassing()` or `mp = nn.AtomMessagePassing()`" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "fdims = featurizer.shape # the dimensions of the featurizer, given as (atom_dims, bond_dims).\n", + "mp = nn.BondMessagePassing(*fdims)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Aggregation" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'mean': ,\n", + " 'sum': ,\n", + " 'norm': \n", + "}\n" + ] + } + ], + "source": [ + "print(nn.agg.AggregationRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "agg = nn.MeanAggregation()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Feed-Forward Network (FFN)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'regression': ,\n", + " 'regression-mve': ,\n", + " 'regression-evidential': ,\n", + " 'regression-quantile': ,\n", + " 'classification': ,\n", + " 'classification-dirichlet': ,\n", + " 'multiclass': ,\n", + " 'multiclass-dirichlet': ,\n", + " 'spectral': \n", + "}\n" + ] + } + ], + "source": [ + "print(nn.PredictorRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "output_transform = nn.UnscaleTransform.from_standard_scaler(scaler)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "ffn = nn.RegressionFFN(output_transform=output_transform)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Batch norm" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "batch_norm = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'mse': ,\n", + " 'mae': ,\n", + " 'rmse': ,\n", + " 'bounded-mse': ,\n", + " 'bounded-mae': ,\n", + " 'bounded-rmse': ,\n", + " 'r2': ,\n", + " 'binary-mcc': ,\n", + " 'multiclass-mcc': ,\n", + " 'roc': ,\n", + " 'prc': ,\n", + " 'accuracy': ,\n", + " 'f1': \n", + "}\n" + ] + } + ], + "source": [ + "print(nn.metrics.MetricRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metric_list = [nn.metrics.RMSE(), nn.metrics.MAE()] \n", + "# Only the first metric is used for training and early stopping" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Construct MPNN" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=134, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=406, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): Identity()\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0): RMSE(task_weights=[[1.0]])\n", + " (1): MAE(task_weights=[[1.0]])\n", + " (2): MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mpnn = models.MPNN(mp, agg, ffn, batch_norm, metric_list)\n", + "mpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training and testing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set up trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(\n", + " logger=False,\n", + " enable_checkpointing=True, # Use `True` if you want to save model checkpoints. The checkpoints will be saved in the `checkpoints` folder.\n", + " enable_progress_bar=True,\n", + " accelerator=\"auto\",\n", + " devices=1,\n", + " max_epochs=20, # number of epochs to train for\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start training" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /home/knathan/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 252 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | BatchNorm1d | 600 | train\n", + "3 | predictor | RegressionFFN | 90.6 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "343 K Trainable params\n", + "0 Non-trainable params\n", + "343 K Total params\n", + "1.374 Total estimated model params size (MB)\n", + "25 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sanity Checking DataLoader 0: 0%| | 0/1 [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test/mae 1.111189842224121 │\n", + "│ test/rmse 1.4387098550796509 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mae \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.111189842224121 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test/rmse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.4387098550796509 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results = trainer.test(mpnn, test_loader)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop-updated/examples/transfer_learning.ipynb b/chemprop-updated/examples/transfer_learning.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..328e2197e98593be679a016c7893ce746899d05b --- /dev/null +++ b/chemprop-updated/examples/transfer_learning.ipynb @@ -0,0 +1,953 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Transfer Learning / Pretraining\n", + "Transfer learning (or pretraining) leverages knowledge from a pre-trained model on a related task to enhance performance on a new task. In Chemprop, we can use pre-trained model checkpoints to initialize a new model and freeze components of the new model during training, as demonstrated in this notebook." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/transfer_learning.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from pathlib import Path\n", + "\n", + "from lightning import pytorch as pl\n", + "from sklearn.preprocessing import StandardScaler\n", + "import torch\n", + "\n", + "from chemprop import data, featurizers, models" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change data inputs here" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\" # path to your data .csv file\n", + "num_workers = 0 # number of workers for dataloader. 0 means using main process for data loading\n", + "smiles_column = 'smiles' # name of the column containing SMILES strings\n", + "target_columns = ['lipo'] # list of names of the columns containing targets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smileslipo
0Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc143.54
1COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)...-1.18
2COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl3.69
3OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C...3.37
4Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N...3.10
.........
95CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C...2.20
96CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)...2.04
97CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)...4.49
98COc1ccc(Cc2c(N)n[nH]c2N)cc10.20
99CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(...2.00
\n", + "

100 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " smiles lipo\n", + "0 Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14 3.54\n", + "1 COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)... -1.18\n", + "2 COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl 3.69\n", + "3 OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C... 3.37\n", + "4 Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N... 3.10\n", + ".. ... ...\n", + "95 CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C... 2.20\n", + "96 CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)... 2.04\n", + "97 CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)... 4.49\n", + "98 COc1ccc(Cc2c(N)n[nH]c2N)cc1 0.20\n", + "99 CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(... 2.00\n", + "\n", + "[100 rows x 2 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_input = pd.read_csv(input_path)\n", + "df_input" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get SMILES and targets" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "smis = df_input.loc[:, smiles_column].values\n", + "ys = df_input.loc[:, target_columns].values" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14',\n", + " 'COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)CCc3ccccc23',\n", + " 'COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl',\n", + " 'OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(Cl)sc4[nH]3',\n", + " 'Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)NCC#N)c1'],\n", + " dtype=object)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "smis[:5] # show first 5 SMILES strings" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 3.54],\n", + " [-1.18],\n", + " [ 3.69],\n", + " [ 3.37],\n", + " [ 3.1 ]])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ys[:5] # show first 5 targets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get molecule datapoints" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "all_data = [data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Perform data splitting for training, validation, and testing" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['SCAFFOLD_BALANCED',\n", + " 'RANDOM_WITH_REPEATED_SMILES',\n", + " 'RANDOM',\n", + " 'KENNARD_STONE',\n", + " 'KMEANS']" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# available split types\n", + "list(data.SplitType.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "mols = [d.mol for d in all_data] # RDkit Mol objects are use for structure based splits\n", + "train_indices, val_indices, test_indices = data.make_split_indices(mols, \"random\", (0.8, 0.1, 0.1))\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " all_data, train_indices, val_indices, test_indices\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change checkpoint model inputs here\n", + "Both message-passing neural networks (MPNNs) and multi-component MPNNs can have their weights initialized from a checkpoint file." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "checkpoint_path = chemprop_dir / \"tests\" / \"data\" / \"example_model_v2_regression_mol.ckpt\" # path to the checkpoint file.\n", + "# If the checkpoint file is generated using the training notebook, it will be in the `checkpoints` folder with name similar to `checkpoints/epoch=19-step=180.ckpt`." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "mpnn_cls = models.MPNN" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): GraphTransform(\n", + " (V_transform): Identity()\n", + " (E_transform): Identity()\n", + " )\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0-1): 2 x MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mpnn = mpnn_cls.load_from_file(checkpoint_path)\n", + "mpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Scale fine-tuning data with the model's target scaler\n", + "\n", + "If the pre-trained model was a regression model, it probably was trained on a scaled dataset. The scaler is saved as part of the model and used during prediction. For furthur training, we need to scale the fine-tuning data with the same target scaler." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "pretraining_scaler = StandardScaler()\n", + "pretraining_scaler.mean_ = mpnn.predictor.output_transform.mean.numpy()\n", + "pretraining_scaler.scale_ = mpnn.predictor.output_transform.scale.numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get MoleculeDataset" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer()\n", + "\n", + "train_dset = data.MoleculeDataset(train_data[0], featurizer)\n", + "train_dset.normalize_targets(pretraining_scaler)\n", + "\n", + "val_dset = data.MoleculeDataset(val_data[0], featurizer)\n", + "val_dset.normalize_targets(pretraining_scaler)\n", + "\n", + "test_dset = data.MoleculeDataset(test_data[0], featurizer)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get DataLoader" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = data.build_dataloader(train_dset, num_workers=num_workers)\n", + "val_loader = data.build_dataloader(val_dset, num_workers=num_workers, shuffle=False)\n", + "test_loader = data.build_dataloader(test_dset, num_workers=num_workers, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Freezing MPNN and FFN layers\n", + "Certain layers of a pre-trained model can be kept unchanged during further training on a new task." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Freezing the MPNN" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mpnn.message_passing.apply(lambda module: module.requires_grad_(False))\n", + "mpnn.message_passing.eval()\n", + "mpnn.bn.apply(lambda module: module.requires_grad_(False))\n", + "mpnn.bn.eval() # Set batch norm layers to eval mode to freeze running mean and running var." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Freezing FFN layers" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "frzn_ffn_layers = 1 # the number of consecutive FFN layers to freeze." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "for idx in range(frzn_ffn_layers):\n", + " mpnn.predictor.ffn[idx].requires_grad_(False)\n", + " mpnn.predictor.ffn[idx + 1].eval()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Set up trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(\n", + " logger=False,\n", + " enable_checkpointing=True, # Use `True` if you want to save model checkpoints. The checkpoints will be saved in the `checkpoints` folder.\n", + " enable_progress_bar=True,\n", + " accelerator=\"auto\",\n", + " devices=1,\n", + " max_epochs=20, # number of epochs to train for\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Start training" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | eval \n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | BatchNorm1d | 600 | eval \n", + "3 | predictor | RegressionFFN | 90.6 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "301 Trainable params\n", + "318 K Non-trainable params\n", + "318 K Total params\n", + "1.276 Total estimated model params size (MB)\n", + "11 Modules in train mode\n", + "15 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sanity Checking DataLoader 0: 0%| | 0/1 [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test/mse 0.9625480771064758 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9625480771064758 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results = trainer.test(mpnn, test_loader)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Transfer learning with multicomponenent models\n", + "Multi-component MPNN models have individual MPNN blocks for each molecule it parses in one input. These MPNN modules can be independently frozen for transfer learning." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Change data inputs here" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "checkpoint_path = chemprop_dir / \"tests\" / \"data\" / \"example_model_v2_regression_mol+mol.ckpt\" # path to the checkpoint file. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Change checkpoint model inputs here" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MulticomponentMPNN(\n", + " (message_passing): MulticomponentMessagePassing(\n", + " (blocks): ModuleList(\n", + " (0-1): 2 x BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): GraphTransform(\n", + " (V_transform): Identity()\n", + " (E_transform): Identity()\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): BatchNorm1d(600, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=600, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0-1): 2 x MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mpnn_cls = models.MulticomponentMPNN\n", + "mcmpnn = mpnn_cls.load_from_checkpoint(checkpoint_path)\n", + "mcmpnn" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "blocks_to_freeze = [0, 1] # a list of indices of the individual MPNN blocks to freeze before training." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BatchNorm1d(600, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mcmpnn = mpnn_cls.load_from_checkpoint(checkpoint_path)\n", + "for i in blocks_to_freeze:\n", + " mp_block = mcmpnn.message_passing.blocks[i]\n", + " mp_block.apply(lambda module: module.requires_grad_(False))\n", + " mp_block.eval()\n", + "mcmpnn.bn.apply(lambda module: module.requires_grad_(False))\n", + "mcmpnn.bn.eval()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop-updated/examples/uncertainty.ipynb b/chemprop-updated/examples/uncertainty.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..aff3b9b190fb2f9d7a50bbecb6e56701a33fa566 --- /dev/null +++ b/chemprop-updated/examples/uncertainty.ipynb @@ -0,0 +1,1152 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Uncertainty Quantification" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/uncertainty.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import torch\n", + "import pandas as pd\n", + "from pathlib import Path\n", + "\n", + "from lightning import pytorch as pl\n", + "from lightning.pytorch.callbacks import ModelCheckpoint\n", + "\n", + "from chemprop import data, models, nn, uncertainty\n", + "from chemprop.models import save_model, load_model\n", + "from chemprop.cli.conf import NOW\n", + "from chemprop.cli.predict import find_models\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Training" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loda data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = (\n", + " chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\"\n", + ") # path to your data .csv file\n", + "df_input = pd.read_csv(input_path)\n", + "smis = df_input.loc[:, \"smiles\"].values\n", + "ys = df_input.loc[:, [\"lipo\"]].values\n", + "all_data = [data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "mols = [d.mol for d in all_data] # RDkit Mol objects are use for structure based splits\n", + "train_indices, val_indices, test_indices = data.make_split_indices(mols, \"random\", (0.8, 0.1, 0.1))\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " all_data, train_indices, val_indices, test_indices\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "train_dset = data.MoleculeDataset(train_data[0])\n", + "scaler = train_dset.normalize_targets()\n", + "\n", + "val_dset = data.MoleculeDataset(val_data[0])\n", + "val_dset.normalize_targets(scaler)\n", + "\n", + "test_dset = data.MoleculeDataset(test_data[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = data.build_dataloader(train_dset)\n", + "val_loader = data.build_dataloader(val_dset, shuffle=False)\n", + "test_loader = data.build_dataloader(test_dset, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constructs MPNN\n", + "\n", + "- A `Message passing` constructs molecular graphs using message passing to learn node-level hidden representations.\n", + "\n", + "- An `Aggregation` is responsible for constructing a graph-level representation from the set of node-level representations after message passing.\n", + "\n", + "- A `FFN` takes the aggregated representations and make target predictions. To obtain uncertainty predictions, the `FFN` must be modified accordingly.\n", + "\n", + " For regression:\n", + " - `ffn = nn.RegressionFFN()`\n", + " - `ffn = nn.MveFFN()`\n", + " - `ffn = nn.EvidentialFFN()`\n", + "\n", + " For classification:\n", + " - `ffn = nn.BinaryClassificationFFN()`\n", + " - `ffn = nn.BinaryDirichletFFN()`\n", + " - `ffn = nn.MulticlassClassificationFFN()`\n", + " - `ffn = nn.MulticlassDirichletFFN()`\n", + "\n", + " For spectral:\n", + " - `ffn = nn.SpectralFFN()` # will be available in future version" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): Identity()\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): Identity()\n", + " (predictor): MveFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=2, bias=True)\n", + " )\n", + " )\n", + " (criterion): MVELoss(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0): MSE(task_weights=[[1.0]])\n", + " (1): MVELoss(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mp = nn.BondMessagePassing()\n", + "agg = nn.MeanAggregation()\n", + "output_transform = nn.UnscaleTransform.from_standard_scaler(scaler)\n", + "# Change to other predictor if needed.\n", + "ffn = nn.MveFFN(output_transform=output_transform)\n", + "mpnn = models.MPNN(mp, agg, ffn, batch_norm=False)\n", + "mpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set up trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "model_output_dir = Path(f\"chemprop_training/{NOW}\")\n", + "monitor_mode = \"min\" if mpnn.metrics[0].higher_is_better else \"max\"\n", + "checkpointing = ModelCheckpoint(\n", + " model_output_dir / \"checkpoints\",\n", + " \"best-{epoch}-{val_loss:.2f}\",\n", + " \"val_loss\",\n", + " mode=monitor_mode,\n", + " save_last=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(\n", + " logger=False,\n", + " enable_checkpointing=True,\n", + " enable_progress_bar=False,\n", + " accelerator=\"cpu\",\n", + " callbacks=[checkpointing],\n", + " devices=1,\n", + " max_epochs=20,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start training" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MveFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.274 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "`Trainer.fit` stopped: `max_epochs=20` reached.\n" + ] + } + ], + "source": [ + "trainer.fit(mpnn, train_loader, val_loader)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Save the best model" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "best_model_path = checkpointing.best_model_path\n", + "model = mpnn.__class__.load_from_checkpoint(best_model_path)\n", + "p_model = model_output_dir / \"best.pt\"\n", + "save_model(p_model, model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Predicting" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Change model input here" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smileslipo
0Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc143.54
1COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)...-1.18
2COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl3.69
3OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C...3.37
4Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N...3.10
.........
95CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C...2.20
96CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)...2.04
97CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)...4.49
98COc1ccc(Cc2c(N)n[nH]c2N)cc10.20
99CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(...2.00
\n", + "

100 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " smiles lipo\n", + "0 Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14 3.54\n", + "1 COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)... -1.18\n", + "2 COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl 3.69\n", + "3 OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C... 3.37\n", + "4 Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N... 3.10\n", + ".. ... ...\n", + "95 CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C... 2.20\n", + "96 CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)... 2.04\n", + "97 CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)... 4.49\n", + "98 COc1ccc(Cc2c(N)n[nH]c2N)cc1 0.20\n", + "99 CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(... 2.00\n", + "\n", + "[100 rows x 2 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "test_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\"\n", + "df_test = pd.read_csv(test_path)\n", + "test_dset = data.MoleculeDataset(test_data[0])\n", + "test_loader = data.build_dataloader(test_dset, shuffle=False)\n", + "df_test" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# use the validation set from the training as the calibration set as an example\n", + "cal_dset = data.MoleculeDataset(val_data[0])\n", + "cal_loader = data.build_dataloader(cal_dset, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constructs uncertainty estimator\n", + "An uncertianty estimator can make model predictions and associated uncertainty predictions.\n", + "\n", + "Available options can be found in `uncertainty.UncertaintyEstimatorRegistry`." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'none': ,\n", + " 'mve': ,\n", + " 'ensemble': ,\n", + " 'classification': ,\n", + " 'evidential-total': ,\n", + " 'evidential-epistemic': ,\n", + " 'evidential-aleatoric': ,\n", + " 'dropout': ,\n", + " 'classification-dirichlet': ,\n", + " 'multiclass-dirichlet': ,\n", + " 'quantile-regression': \n", + "}\n" + ] + } + ], + "source": [ + "print(uncertainty.UncertaintyEstimatorRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "unc_estimator = uncertainty.MVEEstimator()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constructs uncertainty calibrator\n", + "An uncertianty calibrator can calibrate the predicted uncertainties.\n", + "\n", + "Available options can be found in `uncertainty.UncertaintyCalibratorRegistry`.\n", + "\n", + "For regression:\n", + "\n", + "- ZScalingCalibrator\n", + "\n", + "- ZelikmanCalibrator\n", + "\n", + "- MVEWeightingCalibrator\n", + "\n", + "- RegressionConformalCalibrator\n", + "\n", + "For binary classification:\n", + "\n", + "- PlattCalibrator\n", + "\n", + "- IsotonicCalibrator\n", + "\n", + "- MultilabelConformalCalibrator\n", + "\n", + "For multiclass classification:\n", + "\n", + "- MulticlassConformalCalibrator\n", + "\n", + "- AdaptiveMulticlassConformalCalibrator\n", + "\n", + "- IsotonicMulticlassCalibrator" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'zscaling': ,\n", + " 'zelikman-interval': ,\n", + " 'mve-weighting': ,\n", + " 'conformal-regression': ,\n", + " 'platt': ,\n", + " 'isotonic': ,\n", + " 'conformal-multilabel': ,\n", + " 'conformal-multiclass': ,\n", + " 'conformal-adaptive': ,\n", + " 'isotonic-multiclass': \n", + "}\n" + ] + } + ], + "source": [ + "print(uncertainty.UncertaintyCalibratorRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "unc_calibrator = uncertainty.ZScalingCalibrator()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constructs uncertainty evaluator\n", + "An uncertianty evaluator can evaluates the quality of uncertainty estimates.\n", + "\n", + "Available options can be found in `uncertainty.UncertaintyEvaluatorRegistry`.\n", + "\n", + "For regression:\n", + "\n", + "- NLLRegressionEvaluator\n", + "\n", + "- CalibrationAreaEvaluator\n", + "\n", + "- ExpectedNormalizedErrorEvaluator\n", + "\n", + "- SpearmanEvaluator\n", + "\n", + "- RegressionConformalEvaluator\n", + "\n", + "For binary classification:\n", + "\n", + "- NLLClassEvaluator\n", + "\n", + "- MultilabelConformalEvaluator\n", + "\n", + "\n", + "For multiclass classification:\n", + "\n", + "- NLLMulticlassEvaluator\n", + "\n", + "- MulticlassConformalEvaluator" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'nll-regression': ,\n", + " 'miscalibration_area': ,\n", + " 'ence': ,\n", + " 'spearman': ,\n", + " 'conformal-coverage-regression': ,\n", + " 'nll-classification': ,\n", + " 'conformal-coverage-classification': ,\n", + " 'nll-multiclass': ,\n", + " 'conformal-coverage-multiclass': \n", + "}\n" + ] + } + ], + "source": [ + "print(uncertainty.UncertaintyEvaluatorRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "unc_evaluators = [\n", + " uncertainty.NLLRegressionEvaluator(),\n", + " uncertainty.CalibrationAreaEvaluator(),\n", + " uncertainty.ExpectedNormalizedErrorEvaluator(),\n", + " uncertainty.SpearmanEvaluator(),\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load model" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "model_paths = find_models([model_output_dir])\n", + "models = [load_model(model_path, multicomponent=False) for model_path in model_paths]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(logger=False, enable_progress_bar=True, accelerator=\"cpu\", devices=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Make uncertainty estimation" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicting DataLoader 0: 100%|███████████████████| 1/1 [00:00<00:00, 126.93it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smilestargetpredunc
0Cc1ccc(NC(=O)c2cscn2)cc1-n1cnc2ccc(N3CCN(C)CC3...2.062.0474741.543233
1O=C(Nc1nnc(C(=O)Nc2ccc(N3CCOCC3)cc2)o1)c1ccc(C...1.922.0475611.534631
2CNCCCC12CCC(c3ccccc31)c1ccccc120.892.0620571.548673
3Oc1ncnc2scc(-c3ccsc3)c122.252.0618131.555989
4C=CC(=O)Nc1cccc(CN2C(=O)N(c3c(Cl)c(OC)cc(OC)c3...2.042.0382381.532385
5COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCCC13.132.0488351.535416
6O=C(COc1ccccc1)c1ccccc12.872.0668441.534430
7CC(C)c1ccc2oc3nc(N)c(C(=O)O)cc3c(=O)c2c11.102.0537711.550390
8N#Cc1ccc(F)c(-c2cc(C(F)(F)F)ccc2OCC(=O)O)c1-0.162.0475541.535353
9COc1cnc(-c2ccccn2)nc1N(C)C1.902.0505011.537318
\n", + "
" + ], + "text/plain": [ + " smiles target pred \\\n", + "0 Cc1ccc(NC(=O)c2cscn2)cc1-n1cnc2ccc(N3CCN(C)CC3... 2.06 2.047474 \n", + "1 O=C(Nc1nnc(C(=O)Nc2ccc(N3CCOCC3)cc2)o1)c1ccc(C... 1.92 2.047561 \n", + "2 CNCCCC12CCC(c3ccccc31)c1ccccc12 0.89 2.062057 \n", + "3 Oc1ncnc2scc(-c3ccsc3)c12 2.25 2.061813 \n", + "4 C=CC(=O)Nc1cccc(CN2C(=O)N(c3c(Cl)c(OC)cc(OC)c3... 2.04 2.038238 \n", + "5 COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCCC1 3.13 2.048835 \n", + "6 O=C(COc1ccccc1)c1ccccc1 2.87 2.066844 \n", + "7 CC(C)c1ccc2oc3nc(N)c(C(=O)O)cc3c(=O)c2c1 1.10 2.053771 \n", + "8 N#Cc1ccc(F)c(-c2cc(C(F)(F)F)ccc2OCC(=O)O)c1 -0.16 2.047554 \n", + "9 COc1cnc(-c2ccccn2)nc1N(C)C 1.90 2.050501 \n", + "\n", + " unc \n", + "0 1.543233 \n", + "1 1.534631 \n", + "2 1.548673 \n", + "3 1.555989 \n", + "4 1.532385 \n", + "5 1.535416 \n", + "6 1.534430 \n", + "7 1.550390 \n", + "8 1.535353 \n", + "9 1.537318 " + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_predss, test_uncss = unc_estimator(test_loader, models, trainer)\n", + "test_preds = test_predss.mean(0)\n", + "test_uncs = test_uncss.mean(0)\n", + "\n", + "df_test = pd.DataFrame(\n", + " {\n", + " \"smiles\": test_dset.smiles,\n", + " \"target\": test_dset.Y.reshape(-1),\n", + " \"pred\": test_preds.reshape(-1),\n", + " \"unc\": test_uncs.reshape(-1),\n", + " }\n", + ")\n", + "\n", + "df_test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Apply uncertainty calibration" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicting DataLoader 0: 100%|███████████████████| 1/1 [00:00<00:00, 228.26it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smilestargetpredunccal_unc
0Cc1ccc(NC(=O)c2cscn2)cc1-n1cnc2ccc(N3CCN(C)CC3...2.062.0474741.5432331.691122
1O=C(Nc1nnc(C(=O)Nc2ccc(N3CCOCC3)cc2)o1)c1ccc(C...1.922.0475611.5346311.681696
2CNCCCC12CCC(c3ccccc31)c1ccccc120.892.0620571.5486731.697084
3Oc1ncnc2scc(-c3ccsc3)c122.252.0618131.5559891.705101
4C=CC(=O)Nc1cccc(CN2C(=O)N(c3c(Cl)c(OC)cc(OC)c3...2.042.0382381.5323851.679235
5COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCCC13.132.0488351.5354161.682556
6O=C(COc1ccccc1)c1ccccc12.872.0668441.5344301.681475
7CC(C)c1ccc2oc3nc(N)c(C(=O)O)cc3c(=O)c2c11.102.0537711.5503901.698965
8N#Cc1ccc(F)c(-c2cc(C(F)(F)F)ccc2OCC(=O)O)c1-0.162.0475541.5353531.682488
9COc1cnc(-c2ccccn2)nc1N(C)C1.902.0505011.5373181.684641
\n", + "
" + ], + "text/plain": [ + " smiles target pred \\\n", + "0 Cc1ccc(NC(=O)c2cscn2)cc1-n1cnc2ccc(N3CCN(C)CC3... 2.06 2.047474 \n", + "1 O=C(Nc1nnc(C(=O)Nc2ccc(N3CCOCC3)cc2)o1)c1ccc(C... 1.92 2.047561 \n", + "2 CNCCCC12CCC(c3ccccc31)c1ccccc12 0.89 2.062057 \n", + "3 Oc1ncnc2scc(-c3ccsc3)c12 2.25 2.061813 \n", + "4 C=CC(=O)Nc1cccc(CN2C(=O)N(c3c(Cl)c(OC)cc(OC)c3... 2.04 2.038238 \n", + "5 COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCCC1 3.13 2.048835 \n", + "6 O=C(COc1ccccc1)c1ccccc1 2.87 2.066844 \n", + "7 CC(C)c1ccc2oc3nc(N)c(C(=O)O)cc3c(=O)c2c1 1.10 2.053771 \n", + "8 N#Cc1ccc(F)c(-c2cc(C(F)(F)F)ccc2OCC(=O)O)c1 -0.16 2.047554 \n", + "9 COc1cnc(-c2ccccn2)nc1N(C)C 1.90 2.050501 \n", + "\n", + " unc cal_unc \n", + "0 1.543233 1.691122 \n", + "1 1.534631 1.681696 \n", + "2 1.548673 1.697084 \n", + "3 1.555989 1.705101 \n", + "4 1.532385 1.679235 \n", + "5 1.535416 1.682556 \n", + "6 1.534430 1.681475 \n", + "7 1.550390 1.698965 \n", + "8 1.535353 1.682488 \n", + "9 1.537318 1.684641 " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cal_predss, cal_uncss = unc_estimator(cal_loader, models, trainer)\n", + "average_cal_preds = cal_predss.mean(0)\n", + "average_cal_uncs = cal_uncss.mean(0)\n", + "cal_targets = cal_dset.Y\n", + "cal_mask = torch.from_numpy(np.isfinite(cal_targets))\n", + "cal_targets = np.nan_to_num(cal_targets, nan=0.0)\n", + "cal_targets = torch.from_numpy(cal_targets)\n", + "unc_calibrator.fit(average_cal_preds, average_cal_uncs, cal_targets, cal_mask)\n", + "\n", + "cal_test_uncs = unc_calibrator.apply(test_uncs)\n", + "df_test[\"cal_unc\"] = cal_test_uncs\n", + "df_test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluate predicted uncertainty" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "nll-regression: [1.4490190356267003]\n", + "miscalibration_area: [0.15619999170303345]\n", + "ence: [0.6248166925739804]\n", + "spearman: [0.27272725105285645]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/torchmetrics/utilities/prints.py:43: UserWarning: Metric `SpearmanCorrcoef` will save all targets and predictions in the buffer. For large datasets, this may lead to large memory footprint.\n", + " warnings.warn(*args, **kwargs) # noqa: B028\n" + ] + } + ], + "source": [ + "test_targets = test_dset.Y\n", + "test_mask = torch.from_numpy(np.isfinite(test_targets))\n", + "test_targets = np.nan_to_num(test_targets, nan=0.0)\n", + "test_targets = torch.from_numpy(test_targets)\n", + "\n", + "for evaluator in unc_evaluators:\n", + " evaluation = evaluator.evaluate(test_preds, cal_test_uncs, test_targets, test_mask)\n", + " print(f\"{evaluator.alias}: {evaluation.tolist()}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop-updated/examples/use_featurizer_with_other_libraries.ipynb b/chemprop-updated/examples/use_featurizer_with_other_libraries.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..40a6083bb7f570d89c72236b0dd7b2efc74ca435 --- /dev/null +++ b/chemprop-updated/examples/use_featurizer_with_other_libraries.ipynb @@ -0,0 +1,513 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "bd06dbac-fe7b-43f1-a9e7-6b6a5b39ad52", + "metadata": {}, + "source": [ + "# Demonstration of using Chemprop featurizer with DGL and PyTorch Geometric" + ] + }, + { + "cell_type": "markdown", + "id": "4c55d990", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/use_featurizer_with_other_libraries.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb316f5c", + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0\n", + " !pip install .\n", + " !pip install dgl -f https://data.dgl.ai/wheels/torch-2.4/repo.html\n", + " !pip install torch_geometric\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "id": "40cfeccb-bfec-4aef-a09b-929903455cce", + "metadata": {}, + "source": [ + "# Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "77d50745-e204-4a53-9e32-5c585caa1b91", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import torch\n", + "from sklearn.decomposition import PCA\n", + "from pathlib import Path\n", + "import numpy as np\n", + "\n", + "from chemprop import data, featurizers, models" + ] + }, + { + "cell_type": "markdown", + "id": "6301b0e9-d2f4-41b5-9e05-726c09ae1565", + "metadata": {}, + "source": [ + "### Load data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3139fce1-cd87-4c56-9b65-ad13b6698d21", + "metadata": {}, + "outputs": [], + "source": [ + "test_path = Path(\"..\") / \"tests\" / \"data\" / \"smis.csv\"\n", + "smiles_column = \"smiles\"\n", + "df_test = pd.read_csv(test_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "caddf77e-317c-4dc1-9ddc-77a972ca58dd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[MoleculeDatapoint(mol=, y=None, weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14', V_f=None, E_f=None, V_d=None),\n", + " MoleculeDatapoint(mol=, y=None, weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)CCc3ccccc23', V_f=None, E_f=None, V_d=None),\n", + " MoleculeDatapoint(mol=, y=None, weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl', V_f=None, E_f=None, V_d=None),\n", + " MoleculeDatapoint(mol=, y=None, weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(Cl)sc4[nH]3', V_f=None, E_f=None, V_d=None),\n", + " MoleculeDatapoint(mol=, y=None, weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)NCC#N)c1', V_f=None, E_f=None, V_d=None)]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_test = pd.read_csv(test_path)\n", + "\n", + "smis = df_test[smiles_column]\n", + "\n", + "test_data = [data.MoleculeDatapoint.from_smi(smi) for smi in smis]\n", + "test_data[:5]" + ] + }, + { + "cell_type": "markdown", + "id": "5c0b1062-674d-41ad-8e06-1c925ca158f6", + "metadata": {}, + "source": [ + "## Featurize molecules" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "77b7159f-ea91-4b0b-8cd1-5c7c64d02fe8", + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer()\n", + "molgraphs = [featurizer(data.mol) for data in test_data]" + ] + }, + { + "cell_type": "markdown", + "id": "ba9f69aa-e676-463f-bafd-84f4a75a357a", + "metadata": {}, + "source": [ + "# Use Chemprop featurizer with DGL" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b5b24e61-67b9-42b7-a7ee-3bad644f18ec", + "metadata": {}, + "outputs": [], + "source": [ + "# Install DGL separately if not already installed\n", + "# see https://www.dgl.ai/pages/start.html\n", + "import dgl\n", + "import networkx as nx" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8ca20930-94ea-4399-b262-e6b0cc6bff2a", + "metadata": {}, + "outputs": [], + "source": [ + "def convert_molgraph_to_dgl_graph(mg):\n", + " \"\"\"\n", + " Takes a Chemprop molgraph from featurizer and converts it to a DGL graph object.\n", + " Atom features are saved in 'n' and edge features in 'e'\n", + " \"\"\"\n", + " # Instantiate a graph from the edges\n", + " g = dgl.graph((mg.edge_index[0], mg.edge_index[1]), num_nodes=mg.V.shape[0])\n", + "\n", + " # Assign features\n", + " g.ndata[\"n\"] = torch.tensor(mg.V)\n", + " g.edata[\"e\"] = torch.tensor(mg.E)\n", + " return g\n", + "\n", + "\n", + "def visualize_dgl_graph(g):\n", + " \"\"\"\n", + " Visualize a DGL graph object.\n", + " Adapted from https://docs.dgl.ai/en/0.2.x/tutorials/basics/1_first.html\n", + " \"\"\"\n", + " nx_G = g.to_networkx()\n", + " pos = nx.kamada_kawai_layout(nx_G)\n", + " nx.draw(nx_G, pos, with_labels=True, node_color=[[0.5, 0.5, 0.5]])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "954173cb-1913-4790-94ee-8705a1bce998", + "metadata": {}, + "outputs": [], + "source": [ + "# Convert the molgraphs to DGL graphs\n", + "gs = [convert_molgraph_to_dgl_graph(x) for x in molgraphs]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "183f5c98-7586-4c78-b5cb-0add1b82b220", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nO2deVzU1f7/X7OwiAgaKIJSgAqIKRrmhlcTx6VE0wzKEk3L0ZZr1vfaqLcyc2m0foZtNqa5hKlganrJBUwJJRdAUGRRcEEEVARZZBtmzu+PQx8nBATmM/OZmc7z0R90ZuZzXjPCa87nnPciIoSAwWAwGG1FLLQABoPBMG+YjTIYDIZeMBtlMBgMvWA2ymAwGHrBbJTBYDD0gtkog8Fg6AWzUQaDwdALZqMMBoOhF8xGGQyGeVBUVHTixIm5c+cOHjx4+fLllZWVQiuqR8SymBgMhqlRV1eXm5t75cqVixcvpqen0x8KCgp0n+Pv75+SkiKUQl2YjTIYDIEpKSnJysrKyMjIysrKysrKzMzMyclRq9UNnubo6Ojt7S2VSmtqapKTkwGsX79+3rx5Qkj+G8xGGQyGUcnPz+cWmPSHK1euPPw0V1fXPn36eHl5+fn50R88PT1FIhF99KOPPlqxYoWtrW1cXNygQYOM+w4awmyUwWAYitLS0uzsbGqU1DQzMjIe3tO0sbHp0aOHrmn6+vq2b9+++Yu/88473377raura2Jiopubm8HexKNhNspgMPjh4WXm1atXH3aYTp06cQtM+oOHh4dY3OrjbrVaPXbs2OPHjw8dOvTYsWM2NjY8vY9Ww2yUwWC0HbVaPXfu3J07d9bW1mo0mgaP2tnZeXt7+/j4+Pr6+vr6+vj4eHt7P3KZ2XLu3r379NNPX716dd68eevXr+frsq2F2SiDwWg777///pdffkl/5muZ2QwpKSnvvfdeVFSUs7MzHTl37tzw4cMrKytVKpVcLudxrpbDbJTBYLSdbt265efnBwYGRkVFubq6Gnq64cOHnzx5cty4cdHR0RKJhA5GRESEhYVZWVkdPXr0X//6l6E1PAwLv2cwGG0kLy/v1q1bVlZWW7ZsMYKHAtixY0eXLl0OHz783//+lxucPn36ggUL1Gp1aGhoXl6eEWQ0gNloiyCECPLPw2CYMps2bdJoNC+++GLPnj2NM6O7u/uePXusra3XrFmzc+dObvyLL74YN25cYWFhSEhITU2NccRwMBt9NLGxsR06dHB3d7ezs1uwYMHdu3eFVsRgCI9Wq928eTOA119/3ZjzBgYGfvHFF4SQ2bNnJyUl0UGJRPLzzz97eXmdOnVKgB1SwmiW6Ohoa2tr3U/Mzs4uLCwsLi5Oq9UKrY7BEIzDhw8D8PT01Gg0dKSkpGTatGmHDx82wuxvvPEGgCeeeOL27dvcYEpKCg0D+Pbbb42ggYPZaJNotVqlUkm3sXv06JGZmbls2TKZTMblUfTq1Wvp0qXXr18XWimDIQChoaEAli9fzo188803AMaMGWOE2aurqwcPHgxg9OjRarWaG9+9e7dIJLKysjp+/LgRZFCYjTZOeXn5iy++CEAkEikUCu77lhBy48YNpVLp4eFBzVQsFstkssjIyNraWgEFMxjGpKioyMbGRiwW6y4jBgwYAGDnzp3G0ZCfn0+Tl95//33d8YULFwJwcXHJzc01jhJmo42QnZ3dt29fAB06dNi7d2+jz9FoNDExMSEhIdwtf9euXefPn3/hwgUjq2UwjA+NFX3uuee4kdTUVACPPfZYVVWV0WQkJCTQ5KUff/yRG9RoNM8++yyAAQMG3L9/3wgyeLbR4rrinOqcck05NzLq0qivbn/F7ywG5fjx4507dwbg7e2dnp7+yOcXFxerVKp+/fpxm6cBAQEqlaqiosIIahkMQfD39wfwyy+/cCNvv/02gHfffdfISmjykq2t7ZkzZ7jB4uJiGjwwffp0I2jgzUb3luztl94PSUASJMmS8ZfHZ1dnE0JczrssvrmYr1kMjUqlkkqlACZMmFBSUqL70LVr15p/bWJiolwut7e3p2bq4OAgl8vj4+MNqZfBEIBTp04BcHZ2rq6upiNVVVWdOnUCkJKSYnw99Gj+8ccfv3XrFjeYnp7u4OAAIDw83NAC+LHRbXe3iZJEskuy2LLYnOqc6HvRARkBk7InEfOx0aqqqpkzZza6GUoIUalU1tbWu3bteuR1ysrKtm7dKpPJuMWpn5+fUqm8c+eOwbQzGEZlzpw5AP7zn/9wIxEREQCefvppQfTU1taOGDECQGBgYE1NDTe+Z88ekUgklUp///13gwrgwUYrNZVOqU6DMwfXah+csZTVlVVqKomZ2OiNGzcGDhwIwN7eXvc+hRBSXV3NhcV9/PHHLb9mRkaGQqHo0qULfa2NjU1ISMj+/fvr6ur4ls9gGI+Kigq6yrt48SI3OGrUKADff/+9UKoKCwu7d+8O4N///rfu+JIlS+iObU5OjuFm58FGD5YeRBK2393e6KOmb6NxcXEuLi4AevbsmZaWpvvQzZs3hwwZQndetm7d2oaL19TU7N+/PyQkhO4VAOjevbtCoaAFxBgMs+PHH3+k6z5u5MqVKyKRqF27dg32wYzMn3/+SY+bNm7cyA1qNJoJEyYA8Pf3N9xxBQ82uvbWWiQh+X5yo49yNlqnNcVVmEqlsrKyAvDss88WFxfrPnTy5EmaJuzu7n727Fk9J8rLy1MqlV5eXg3CpHTvQRgM0ycwMLDByThd8c2cOVM4UfVs3bqVLnpOnz7NDZaWlvbu3RvA1KlTDZQyw4ONLi9YjiTcqL3R6KPURu+o73RJ7SK/Lk+6n6T/jLxQXV09a9as5jdDAYwYMUJ331pPaJhUWFhYu3btqJ926tRJLpenpqbyNQWDYTiysrJEIpG9vX1ZWRkdqaurc3d3B/DHH38Iq41CAwZcXV1v3rzJDWZmZjo6OgL4/PPPDTEpDzYafiscSTh7v/H1GrXRzUWb6SE+kjAwY+D6O+vv1d3Tf+o2k5eXR/u32NvbR0VF6T6kVqsVCgX1OLlcbqCg+pKSEpVK1b9//wZhUuXl5Y9+MYMhEDSyfc6cOdzIgQMHaHSgieRG19bWjhw5EsDQoUO5QAJCyL59+8RisVgs/u2333iflAcbPVp2FEnYUrSl0Ue5m/qLVRcVeQrnVGdqprbnbEOuhMSUxWiJsT/9+Pj4rl270hTP8+fP6z50+/btZ555hp4I6d62GI5Tp07NmTOH7tkDaN++va+vb9v2YRkMg6JWq+kfzqlTp7jByZMnA1i9erWAwhpQVFTk6ekJYN68ebrjS5cupfd/ly9f5ndGHmy0Vlvret7VP92/StNI9kKDI6ZqbXVkcaTskkyUJKJ+2iut19L8pddrjJSZzt2tP/PMM7pFDQghSUlJjz/+OIBu3brp7q0YgaqqqsjISC5MSiQSbd/e+JEdgyEUe/bsAdCnTx9upLCw0MrKSiqV5ufnCyjsYZKTk+3s7ACoVCpuUKvVTp06FUDv3r1LS0t5nI6fuNG9JXslyZLBmYP3luxNq0qLLYt9/8b7Pxb9SJo+qb9Re0NZqPS44EHNVJwkll2SRRZH6kZN8Ut1dTWtCkPv1nXLGRBCIiIi6H7l8OHDCwoKDKThkRw8eNDJyYne4wulgcFoFHrkvXbtWm5k9erVACZPniygqqb46aefAFhZWelu2paVlfXp0wfAlClTeNyF4C2L6WjZ0eFZwyXJEiTBJtlmUOagfff2EUI8Lnh8nN9kuKWGaGLKYkKuhFgnW1M/7Xq+6/wb8y9U8pyZ3kzoUoPNUMGPzj/66CMatC+sDAZDl7y8PIlEYm1trXsPR0/ADxw4IKCwZliwYAGArl273rjx4AA8KyurY8eOAFatWsXXRDzn1Ndoa+6q77Zhu7OwtvDzws99L/pSMxUliUZmjdx5ZGdlZaX+qpoJXbpz505QUBDdDNUNNxOQixcv0iBWoYUwGA9YsWIFgJCQEG7kjz/+oCbV4MbOdKirqxs3bhyAIUOG6B43HTp0SCKRiMXi//3vf7xMZHIVnhLvJ8qvy+3P2buluEmsJPpnpjcTunTu3Dla787Nze3PP//UW3vbqa2t5X4Xa2pqpFKpRCIxZqUcBqMZtFotrfRx6NAhbpAmTy9ZskRAYY/k7t27NFh7xowZuuPLly+ngTq676jNmJyNUkrrSiMSI2hZVkr//v2//vrrBhHyzdN86NLPP/9MN6GHDRsm7Ab5jBkzrKysDh48yI306tULQIOUKgZDKI4ePUoT8LhU5vLycnt7e5FIxPupN+80WhJfq9XSE11ra+u8vDw9pzBRG+VoNDM9JibmkdvDzYQu1dXVmdRm6Jtvvom/16GZOHEigN27dwuoisHgeOWVVwAsXbqUG/n+++8BjBo1SjhRraDRkviFhYU0Rfu7777T8/qmbqOU1mamNxO6VFRURL+FpFKpUqk0hvpHER4eDuDNN9/kRv7zn/8AWLFihYCqGAxKSUmJnZ2dWCzWrRX59NNPA4iIiBBQWKtYuHBhgzjxqqoqsVgMYP369Xpe3DxslINmptPYWjTRwKOZ0KXU1FT62s6dOx87dszY6pvg4MGDAIKCgriRH374AUBYWJiAqhgMytdffw1g7Nix3MiFCxcAODo6Gqe2PC/U1dU1qJyyd+9eAB07dtQ/8snMbJTSVGZ6cnJyM3frO3fupFskTz31lEn1obt69So95uJG4uPjAQwaNEhAVQwGhXZY0i22++677wJ4++23BVSlP5MmTQJPWfZmaaMcd+7cWbt2LY2n5bCxsdm0aZPu0+hmKO3o+eqrr/ISRMUjGo2GHnZxmRV37twB4ODgIKwwRmtZvHhxQkKC0Cr4JDExEYCTkxMXMFRTU+Ps7AwgObnxom5mQUFBAc2/4iXXxrxtlINr4CGVSmNiYnQfKi0tpSc2prMZ+jC0s41uMxmay2RqOXaMZqCbMxKJ5L333jOju93moeefCxYs4EZ27twJoF+/fgKq0p/PPvsMwAsvvMDL1SzERilPPfUUAN0I0NzcXBrv1qVLF2P2rW4ttOX3tm3buBFa1dHQzQ8YPFJbW6tUKmmQsqenZ2xsrNCK9KWysvLhDktjxowB8M033wgoTH98fX0BREdH83I1MSwI+tFkZmZyI66uru7u7v379z99+jQtn2WaUOVZWVnciI+PT4MRholjZWWlUCjOnj07cODAq1evjhkzZu7cuWVlZULraju7d+8uKSkZPHgwvVsCcO3ataNHj9ra2tIQKDMlLi4uMzOzW7duNMdJfyzKRh+2HqlUGhUVlZCQQLOVTJaHlTMbNVP69ev3559/0mXphg0bfH19f/31V6FFtZFNmzYB4HqRAdi8eTOtk0RXqWYKfV+zZs2SSCT8XJGXNa2JQHdtpkyZIrSQVpOUlATgySef5Ebo39748eMFVMXQh7S0NC4NLyQkxOxaw+bk5IhEovbt23MnnxqNhoZjm/Ve07179+zs7EQiUXZ2Nl/XtKjV6MM39eaCj48PzavTaDTcCMzzvTAoffr0SUhIUKlUtMPCk08+uXv3bqFFtQJaqSc0NJSrKX7kyJHc3FxPT0+aH2im/Pzzz5WVlUFBQT169ODtonz5sSlQWVkpFoutrKwM1PnDoND2sNw3pFqttra2FovFFnPm+48lJyeHVhEDEBwcrNsjyGS5du1a586dAZw4cYIbzM/PX758+YYNGwQUpj8BAQHUTHm8pkXZKCGE7oFmZWUJLaTV0BRV3aNDurhm3e4sAK1Wq1KpOnToAKBjx466JdlNiurqai7r2tXV1d7e/tKlS0KL4pPz58/TfwJ+g8ct6qYe5nwy8/COhPnuUTAaIBKJ5HJ5ZmbmpEmT7t279913F4ODceOG0LJ0SE5Onj9/vpub26RJk6KioiQSiUQiqaioeOGFFyoqKoRWxxtcmjWXAMkLlmaj5ms9D38BmO97YTSKm5vbr7/+unnzZq12dXQ0+vbFxo0gREhJJSUlGzZsCAgICAgIoIUo/fz8lErljRs3MjIy+vTpk5aWRms7CKmSJ2pqanbs2AFg9uzZPF+ax5WtKfDdd98BeP3114UW0mqOHDkCYOTIkdzI5s2bAbzyyivCiWIYhMJCMnUqAQhA/vUvYvz7ZlqVIiQkhCYL4K+qFElJSbpPu3TpEu23YRnFxn7++WcYpsuZpdno77//DiAwMFBoIa0mNzcXgIuLCzeSkJBgoH91hikQGUk6dyYAsbMjSiX5qyCyYcnMzFy6dCmNW4JOjbSmqu4ePnyY9tsw2YZLLWf06NHgo7row1iajd68eROAk5OT0EJajVartbe3B8BV+C8pKQHQvn17HlsYMkyK4mIil9cvS4cNIxkZhpro3j2iUpExY0K529DevXuvWbOmJUUbVq5cCaBDhw4XL140lD7Dc+XKFbFY3K5du1Z10GghlmajhBAa5lZUVCS0kFbzcE0AWvZft68hw/KIjibu7gQgtrZk6VLCY7SeRkPi44lcTtq3JwAZMWKng4NDWFhYS/pHcGi1WlrzwcfH5969e7yJMy4ffvghDFbD1wJtlNbl1o13MxemTZsGYMuWLdzIiBEjADSoWcWwPO7dI3I5EYkIQPz9yd+3KNvC1atk6VLi4VG/1BWLyejRZNeu0rYF+pSXl/ft2xfApEmTNBqNvuKMDpd/ZaD6RJZ2Ug9zPuBmmfX/WBwdoVLh+HH06oXUVAwejEWLUFMDADk5SEqq/5kjIwNXrjRynepqREVh4kT07Illy3DtGrp3h0KBy5cRG4vQUIe2BfrY29vv37/f2dl5//79n376aRuuICyHDh3Kzc318vKi6xLesUAbNV/refgLwHzfC6MNjBiB5GS88w60WqxejaefRmoqPvgAAwdi5cq/PTMsDEuW/G0kKQnvvovu3REaiv/9D1ZWCAnB/v24dg1KJby89NXm4eGxY8cOqVT66aefmldWK/6qRTJnzhxau513mI2aEA/n0ZvvyvphwsPDHRwcrKysRo4ceaXRpRQDsLfH118jPh6+vsjIAC2xYGODNWvQ6G9BYSHWrUP//hg4EF99hbt3ERCA8HDcvInISEycCL5qGAGQyWSrVq0ihMyaNevixYu8XdfAFBUVRUdHS6XSGTNmGGoOQ+wUCAvtt+Xt7S20kFZDawJYW1ur1Wo6kp2dDcDd3V1YYXqi0Wg+/vhj3YWAWCweM2bMzp07udYUjAZUVpJDhwgh5IUXyIgRJCCAjBpFuGOhgADy0kskPp5IpfW7n66u5IMPDHjWz0F38Hv16tWgQ5zJ8vnnnwOYNGmS4aawQButrq6WSCRSqVTwBvRt4IknngDAJTLX1dXZ2tqKRKLy8nJhhbWZsrKyKVOmAJBIJMuXL//++++nTp1KewsC6NixI+1FKLRM0+WFF8gzz5CTJ4lIRLj2CNRGa2pIt24kOJhERvJ5vt88lZWVtLrH2LFj64wT7Koffn5+AH799VfDTWGBNkoIoSWw0tPThRbSamg57v3793MjtGGfmRrNpUuX6C/xY489duTIEW783r17KpWKNkqhBAQEhIeH3717V0C1pgm1UULItGmkc2dCPyFqo4QQQVbzXP2n//73vwJM3xpOnDgBwMXFxaBV3yxwbxRse9Q0+O233wYNGpSent6vX7+zZ8/SHj4UR0dHuVx+4sSJtLQ0hULh7OyclJS0YMGCbt26hYaG0i5GAio3Tb74AjU1WLTob4M2NgIoeeKJJ/bs2WNlZbVq1arIyEgBFLQYrtC9lZWV4WaxTBs1X+tpqkCJeX0lEEJWr149ceLEe/fuhYaGJiQkeDVxVNynTx+lUpmXlxcZGSmTyWpqaqKiosaMGePr6/vJJ5/cMKkiSELj5oZly7BpExIThZYCDB8+fM2aNYSQ119/nZ5GmCAVFRU0qGDmzJmGnclwC10BUalUAF577TWhhbSao0ePAhg+fDg3sm3bNgAv0Vs4c6C8vHzq1KkARCKRQqFoVSZrbm6uUqmkG8QAJBIJzfg2xzrcfMHd1BNC1Gri709GjnxwUy8stFSSh4eHabZIoWXxRowYYeiJLNNGjx8/DmDo0KFCC2k1tCaAs7MzN3L69GkA/fv3F1BVy7l8+fKTTz4JwMHBoc2b+lz9Ie5GzNXVVaFQXL58mV+1ZoGujRJSf9ZkY2MSNlpVVUWTBmUymQkeNw0ZMgTA1q1bDT2RZdpoYWEhgI4dOwotpC30798/ODiYiwQqLS0F0K5dO9NPwjt48CBtGOnj45PBR+hNQUFBeHg4TUOkBAQEqFSqiooK/S9uLjSwUULI7NkEMAkbJYTcvHnT1dUVwAcffCCIgOrq6vPnz0dFRZWVlemOZ2RkAHB0dDRCGx7LtFFCCP17vnXrltBCeID+ml67dk1oIU2i1WqVSiVtVxscHMx7AYvExES5XE4rYOGvE6r4+Hh+ZzFNPvmE/N///W3kzh0yfjwxnRKgJ0+epHVL+W1w1CjFxcWJiYlbt25VKBQhISF+fn5ck+QGvw/vvfcegHnz5hlaErFgG6Xr+bi4OKGF8AC9TZbL5aa5A1VRQcLC1F5eE8Ri8aeffmq4sn6lpaVbt26lTasotFS7aX4s/yi++eYbes+UmJjI1zVramouXrz4yy+/rFq1aubMmYMHD6Y1pBtgZWXl7e39/PPPnz59Wve1NCTr7NmzfOlpBou1UXo2Z+5dDNVqtUKhAODk5ATA2to6ODg4MjKSS3MSnJwc0q8fAUjfvlVGq+ybnp6uUCjo3wkAGxubkJCQVhV/M0eyskinTsTXV2gdTfDGG28AeOKJJ27fvt2GlzdYZgYEBNg0FszVsWPHgICAkJCQpUuXRkZGJiYmNlqzioZh9e3bV++31SIs1kZXrVoF4P8a3A6ZFXfu3KGNeW1sbN5+++0JEyZw9y+PP/740qVLBb/NP36cdOlCANKrFzF+Sd/q6urIyMjg4GDuY3F3d1coFIJ/LAYiN5cApFs3oXU0QW1t7fDhwwEEBQU1/zWvVqtzcnJiYmLCw8PlcrlMJqPbVg/j6uoqk8nkcnl4eHhMTExOTk4LxQwbNgzAV199xcc7ezQWa6N79uwBMGHCBKGFtJHk5GQa9+Pm5nbq1Ck6mJ+fr1Qqe/bsSX/JxGJxYGCgSqUSpJe9SkWsrAhAnnuOCJtdnZeXp1QqPT09uY/lySefXLhwoSAfi+EoKSEAcXQUWkfTFBQUdOvWDcB777338KNRUVGTJ0/28fFpNBLe0dFx0KBBM2bMWLly5e7du9PS0tqczH3o0CEAIpGosLBQvzfUUizWRtPT0wH06NFDaCFtYfv27XZ2dgACAwMbbfNAj1yEykyvqiKvvUYAIhIRhYKYSAQBDZPS7Z1rLlFiLUStJgCRSIgpb10kJCTQm/FNmzY1eEipVDazzGzzhszNmzdjYmJUKtX8+fNlMhk9W6bbC/q+mRZjsTZaU1MjlUolEklVVZXQWlpBXV0d3QwFIJfLm/9CppnptE4ExQiZ6TdukKefJgCxtye7dxtunrZz48aNSZMmicViANnZ2ULL4RNbWwIQE19kb9myBYCtre2ZM2d0xzMyMiIjI1NSUtr8J1lWVpaYmLh9+/YPP/wwJCTE39/f1ta20d0AT0/PtLQ0Pt5Ni7BYGyWE9OrVC0CDTzMiIuLAgQMmGCpMCCkqKqLH0NbW1q06HEtKSnrrrbe4c8wuXVxmz1YbIhzojz+IiwsBSM+e5MIF/q/PI3R37PfffxdaCJ/QTqKmH8U3d+5culWtT8RhcXFxfHy8SqVSKBTBwcFeXl70q7EBnTp1CgwMlMvlSqVy//79WVlZxv/rtmQbnThxIoDdOksmtVrt5uYGk8yKSUlJobt7nTt3blvHGHrkIpPJRoyQ0xqU3t5k6VKSm8uPQm4zdPx4YoDuijxD8xTXr18vtBA+8fQkADH9FXZtbS1t1zFs2LCWbHHW1NTk5OTs379fqVTK5fLAwMAOHTo87JjW1tZeXl7BwcEKhUKlUsXHx5eWlhrh7TwSS7bRhQsXAlihE6ZcWVm5evVqWv6DbkKPGjUqIiKibX2+eGTHjh10M/Spp566fv26nle7fLlu8WLi5lZf0NfKikyZQg4caHsn9Orq+swZuhlqkkv5hqxevRrAggULhBbCJzS2LCVFaB0toLCwsHv37gDeeeedBg+1fJkZEBAQFhamVCojIyPT0tJM8yaSWLaN0qrXw4YNe/gh08mKoZuhtDL89OnTeTR0jYbExJCQkPolJC2QrlCQRpfg6ekkJoY0+GpPTiZJSSQvjwweXN/+lysbbPr8+uuvAMaPHy+0ED4ZNowAxFyyt5KSkuhx39y5c1esWPHqq68OHDiwqWWmn5/fCy+8sHjx4i1btpw+fdpcSutTLNlGv/vuOwA2NjZNbdAInhVz9+7dsWPHApBKpUql0kCzFBSQ8HDSt2+9mQIkIICoVEQ3MX3OHAIQufxvLxw7lowZQw4dIhIJ8fQkqakGEmgQaJlET09PoYXwybhxBCAHDwqto8V8++23YrG4QTvSBsvMxMREc+8lY8k2mpOTI5VK6c379OnTm+n2LkhWzPnz52kVTmdnZ+OchCQmErmc2NvXm6mjI5HLCU3emzOH2NoSsZicPPng+dRGCSE7d5KiIiMI5BO1Wm1tbS0Wiy0penTqVAKQyEihdbSYefPmAfDy8vrggw82bdp08uRJi2xwYMk2SghZv369g4MD9zXYp0+fL7/8sqnFpjGzYn799VcqbMCAAUbOurl3j6xfXx+0RP/bv5/MmUP69SPjx5O+fR909eFs1EyhFa9TzWsV3Sw0XPfHH4XW0TKOHTsmEolsbGyMGXskCBZuoxS62HRxceE2YprPTH84K4bH4sG0GBLdDH3llVcEXCulpxOFgvTsSe7fr7fR9HRiZUXWrKl/grnb6OTJkwHs2rVLaCG88c47BCDr1gmtowXcv3+fptutMJ1SVAbjH2GjlLq6OloMmN7pA+jWrZtCoWgqUffhrJiuXbvOnz///PnzbdZQWlr6/PPPG3oztFXQfQtqo4SQ994j7dsTuj42dxtdtGgRgGXLlgkthDcWLyYAWblSaB0tYMGCBSeh5tAAABDNSURBVAD8/f3/CZ0L/kE2ykEz02n3UG6xuXXr1qZOyYuLi1Uqlb+/P7c5QIsHt7bpcVZWVu/evQE4OTnRrm2mA2ejpaXEzY08/zwh5m+jmzdvpkt+oYXwxsqVBCCLFwut41GcOnWKNjnnsW6eKfNPtFEOGvZEAzbxV2b6uXPnmn8+F7Hh4OAQFhbWzMmVLgcOHHB0dKTfz1euXOHvTfADZ6OEkB076o+Dzd1GExIS6Hee0EJ4Y906ApCHAjFNi+rqatoV3PTbL/PFP9pGKTQz/amnntJdbDaTmV5eXr5x48ahQ4dyz+/bt294eHhxE5k9dDOUBhi//PLLpnlwrGujhBCZjPTpQ4KCzNtGS0pKALRv395i6pBu2bK9QweHmTNnCi2kOZYsWQLA19fXvMpZ6AOz0QfQnum0QDIAW1vb5sOeMjMzFQpFly5d6PMbjd4vKyubMmUKAIlEYiKboY3SwEYzM4m1NbGyMm8bJYTQf50bN24ILYQfaDXiqVOnCi2kSVJSUqysrMRi8YkTJ4TWYjyYjTaEy0ynh+kAvL29lUplU7ULa2pqdu/e/cYbbzzstpcuXfLz8wPw2GOPHTlyxPDa204DGyWELFlCALO3UZrZ3cKNF9Pn4MGDAMaNGye0kMZRq9W03tj7778vtBajwmy0SZrqmd7CBh7R0dG05FK/fv1aXrVbKNaubZjCdP8+ef55IlC3R95YsGDJgAGjf/yxLaVeTJD4+Hg0kd9sCnz66ac0c6y1p6/mDrPRR9CGnum6m6GhoaHm2A1YoyF8N/cUhv/3/8zgTKblpKSk0C9moYU0QkZGhq2trUgkspi1f8thNtpSWtgzvby8fOrUqTQDVaFQmOPhxuHDpF07MnGi0Dr44H//s4StCY7s7GyYZKEAjUYTGBgI4M033xRaiwAwG201jVaH+vPPPwkhsbGxNAHRwcFh3759QittIxcu1NcqtQCyswlA3N2F1sETt27dAtC5c2ehhTRk7dq6kSOVPXv2MpECoEZGRAgBo/WUlpbu2LHjxx9/PHv2LB2xtbWlFWr9/Pz27dtHa++bIzU1aN8eIhHu34e1tdBq9EOjgb09ampQVoa/vvjMmMrKyvbt29va2lZVVQmt5QHZ2fD3R2UlDh5Ujx/fSLs6i6eRaqmMluDo6Dhv3rwzZ87QhH2JREKLfXXt2jU2NtZ8PRSAjQ08PFBXh5wcoaXojUSCHj1ACC5fFloKH9jZ2Uml0urq6rq6OqG11EMI5s1DZSVmzsQ/00PBbFR/evfurVQqi4uLP/74423btt28ebOppttmBO0PkJUltA4+8PUFgMxMoXXwBG0HW1FRIbSQetavx9Gj6NoVa9cKLUU4mI3yg4ODw7Jly8LCwhpth2B2WJL10PdiGV8JAOimfHl5ue7g4cOHCwoKjC8mNxeLFgHAt9/isceMP7+pYAl/8wzesaTVKH0vlvGVAICWdNBdjVZXV0+ZMsXNza1Hjx5z586NiooqKyszjpi5c1FejtBQvPCCcSY0UZiNMhrBkqzHkr4S0NhqtKioaPTo0fb29leuXNmwYUNoaGiXLl1kMtnq1auTk5O1Wq2BlGzZgkOH4OSEr74y0AxmAzupZzTCrVvo2hUdO6KkRGgpelNWBkdHtGuHigpYwI5LUFDQsWPHjh49GhQUpDuu0WhSUlJiY2NjY2Pj4uLUajUdd3Z2HjVqlEwmGz9+/OOPP86XjMJC9OmD4mJERODVV/m6qrnCbJTROI89hpIS3LqFv0qvmDFubigowLVr+Cuz14yZNGnSgQMH9u3bR+t/N0pxcfHRo0djYmKOHDly/fp1btzPz2/y5NOBgfYjR6J9e71kTJ2KPXvw3HOIjtbrOpYBs1FG4wwdilOnEBeHESOElqI3o0bh+HEcPoyxY4WWoh8ajSYgICA1NfWpp56aPXv2mDFjvL29m3/JlStX6BI1JibG2vrxO3dSCYFUCn9/yGSQyfDMM/irHURL2bULL78MBwekpcHdve1vx3IQMvafYcLQ7mkqldA6+GDePLNpYdQMxcXF48aNA/CYzqF4165dQ0JCtm7dWvSo3q1qtTohIeejj8iQIUQiedDQ0NmZvPQS2biRXL/eIhlFRcTFhQDkhx94eFOWAbNRRuN89hkBiGUUPPvySwKQt94SWoceZGZm0jxjZ2fn3bt3R0ZGyuXybt26cX4qFosDAgIUCkVMTMwj276Xl5OYGKJQED+/B34KEC8vIpeTyMjmCtNMm0YAMmoUMcNyEYaC2SijcfbsIQCZMEFoHXxw8CABSFCQ0Drayv79+2kHmv79+1+9elX3oZycnPDwcJlMZmtry1mqnZ2dTCZTKpWJiYmPLI5z+TL57jsyZQpxdHzgp9bWpKSkkSdHRxOAtG9PTK8PjpCwvVFG42RkwM8PPXogO1toKXqTn49lyzBoEF5/XWgprYQQsmbNmiVLlmi12mnTpm3cuJFrHdaAysrK+Pj4I0eOxMTEXLhwgRvv3r379Onr/f2DR49G587NzaXRICUFsbGIjUVZGU6fbuQ55eVQKNC7N/79b73el4XBbJTROLW16NEjr2vXrPj4f9nammuFkooKZGXBxQXduz8YLC7G1avo29fUC6+Ul5fPmDFj3759Eolk5cqVCoWihS+8fft2XFxcbGzsb7/9lpeXN2zY1YQEDwBeXggOxsSJGD4cOovXRsjPR3o6vLzg5fVg8Pp1XL6M0aPxV18Ixl8IvBpmmDD0FPjChQtCC2k7J07UF8rTLce+fTsBiIn3Z9Jtx93mQsharTY1NXXdusqxY0m7dg/u2e3syLPPkrVrSVP/tlFRBCA9exLdrnRffEEAotG0TYslY/7hyAyD4ePjAyDL/BOACgvxySdCi2gN0dHRgwcPzsjI6Nev39mzZ2UyWduuIxKJ+vXrN39+u8OHUVaG+HgoFAgIQHU1Dh7E+++jb1+4uCA0FBs24ObNhi+/eROrVun7Xv4JMBtlNAk9Gs40/5xQuRzr1iE1VWgdLYAQsnr16kmTJt27d++ll15KSEjw9PTk5cpSKYYPh1KJxETk5yMiAjNnwtUVt28jKgpz58LdHQMG4IMPkJRU/5J338WaNZaTR2s4mI0ymsRiVqMzZ2LAAMjlMFh+OT9UVFS8+OKLixYtEolESqVyx44d7fVMNmoCFxe8+iq2bEF+PnJyoFIhJAQdOiAlBZ9/jmPH6p/25pvo1Qtz54IdoDQPs1FGk1jMalQsxtq1OHsWP/wgtJSmyc7OHjJkyJ49exwcHPbu3atQKERGOcrx8oJcjshI3L6No0ehUGDixPqHpFKsXYu4OEREGEGIGcNslNEk1EazsrKI+a9Ghg/HzJlYvBi3bwstpTEOHTo0aNCgixcv+vj4nD59eiLnZEbExgZBQVAq62tiUcaMQUgI/vMfSyhSYziYjTKaxMnJycnJqaysrLCwUGgtPPDFFxCL8eGHD0YyMhqPjjQmdDM0ODi4pKRk4sSJZ86cod9epsO6daipwaefCq3DhGE2ymgOi7mvB+DkhOXLsWnTg7OmVaswZAh698bq1bhzRwBJFRUVoaGhixYt0mq1CoVi3759Dg4OAuhoFldXfPQRvv0Wly4JLcVUYTbKaA56ymSONtroadLcuQgIwNdf1/+vhwe6dEFmJhYtgrs7XnoJR44Y7xgqJydn2LBhu3fv7tChw549e5RKpcl2oHn3Xfj6YvNmoXWYKib6z8YwEcz0sP7OHchk2LWr4bhYjPXrUVtb/7/LlyM/HzExCAmBRoPISIwbh8cfx6JFuHrVsArj4uKGDh164cIFb2/v06dPT5482bDz6YdUiu++g8l0IzU5mI0ymoM7ZRJaSCs4cwYDBuDYMSxbBjs7BARANw09IACLFiEgoD4TVCKBTIbISFy/DqUSPXrg5k2sXo2ePTFmDLZtgyEawm/YsEEmk925c2fChAlnzpyh2UomhYsLZDLY2DwYGT4cS5ZAJmOZoI0hcBYVw7ShBurh4SG0kJYSEVGf9Th8OCkoaPXLNRoSH0/kcmJnV5832bEjkcvJuXP8yKuqqpoxYwYAkUikUCg0LLPSImA2ymgOtVptbW0tFovv378vtJZHoFYThaLe++RyUlOj19Xu3iXr1hF//wd56MOGaTdt2lZWVtbma+bm5g4cOBCAvb39L7/8opc+hinBbJTxCOh9fWpqqtBCmuPOHRIURABiY8NzVfa0NKJQECcnMnDgbQC2trYhISExMTGPrOPZgLi4uC5dugDo2bNnWloanxIZQsNslPEIJkyYAOCTTz4RWkiTnDtHPDwIQNzcyJ9/GmSK+/fJrl2nRowYwWUW9e7d+4svvrh161ZLXq5SqaysrAA8++yzxcXFBpHIEA5mo4xHEBwcTI3D29s7MjKytrZWaEV/4+ef6/cxhw0j+fkGn+7SpUtLly7lOhVLJBKZTBYZGalWqxt9fnV19axZs9hmqGXDbJTxCMrKynx00gNdXFwWLlyYmZkptC5SV8fnZmgrp66LiYkJCQmha0wAbm5uCoUiOztb92l5eXmDBg2im6FRUVHG08cwLsxGGS0iOztbpVL5+/tzfhoQEKBSqcp16yEbkaIiIpMRgEilRKkURAIhhBQUFCiVyp49e9LPRCwWBwYGqlSq+/fvf/PNN87OzgB69Ohx/vx5wSQyDA+zUUbrSExMlMvlHTp0oMbh4OAQFhbW5vLsbSM1VePpSQDStSs5ccKYMzeOVqs9duxYWFhYu3bt6MfC/RAUFHT37l2hBTIMC+vFxGgL1dXVBw4c2LBhw9GjR+mvUO/evWfOnDl79uzOzTdO05tdu3Z9+OEPJSVHnnhCvHcv/tqlNAnKysp27ty5bdu2hIQEQoi/v//p06dtdKPYGZYIs1GGXmRlZW3evHnz5s23b98GYG1t/fzzz4eFhT333HMSiYTfuTQazZIlSz7//HNCyMKF25YvDzNZgzpy5Iirq2vfvn2FFsIwBsxGGTyg0WiOHTu2YcOGvXv31tXVAejWrdv06dPnzp3LVw+MsrKy6dOnHzhwQCqVrlixouVtMhkMQ8NslMEn+fn5P/300w8//JCTkwNALBYHBQWFhYWFhIRw24VtICsra/LkyZmZmc7Ozrt27QoKCuJPMoOhL8xGGfyj1WoTEhJ++umniIiIyspKAJ06dQoJCXnrrbd0z/pbyIEDB6ZPn15WVta/f/+9e/d6eHjwr5jB0ANmowwDcu/evcjIyO+///7cuXN0JCAgQC6XT5s2jTvrbwZCyJo1a5YsWaLVaqdNm7Zx40Y73WJNDIZpwGyUYQySkpK2bdsWERFRXFwMwNbWduLEiXK5fPTo0U01bisvL58xY8a+ffskEsnKlSvZZijDZGE2yjAeD4dJ+fj4zJo1a9asWbRsB8elS5cmT56ckZHh5OS0c+dOmUwmkGQG49EwG2UIwOXLl7dv37558+bc3FwAEolk1KhRcrl8ypQpUqk0Ojr61VdfLS0t9ff337t3L19n/QyGgWA2yhAMtVr922+/bdy48eDBgxqNBoCVlZWrq+uNGzcIIS+//PKmTZvYZijD9GE2yhCegoKCbdu2rVu3rqCgAIBIJPrss88++OCDprZNGQyTgtkow1TQarVfffXV+fPnR4wY8dprrwkth8FoKcxGGQwGQy9YZ1AGg8HQC2ajDAaDoRfMRhkMBkMvmI0yGAyGXjAbZTAYDL1gNspgMBh6wWyUwWAw9ILZKIPBYOjF/wc5HGaMHRX65gAAAdN6VFh0cmRraXRQS0wgcmRraXQgMjAyNC4wOS4xAAB4nHu/b+09BiAQAGImBgiQAGJpIG5gZGNIANKMzOwOGkCamZkNQrNAxJmY2BkUQHwYFyEMVY4mDtfukAGWZ0RiQGQEwQYyYirAcAEWI3AZys3AyMDIlMDEDGQzsLAysLIxsLEzsHMwcHAysHMpcHFnMHHzJPDwZjDx8jHw8ifwC2QwMQlmMAkKJQgJZzAJiySIiGYwiYoliIlnMIkzJnCyMAhwJYgLJTixAM1nZQQqFGdjZWPn4GRh4+bh5RfgYhMWERUTFxLXYgR6hgEWrFfaDti/2dpmD+KETjNwaKzLsQOx/V1MHQ6aLtoPYqc+vmLv68B7AMRe2ddu58R7FSyurypoO73Cah+IPbFs3X6hiTvAep/yKh1Q9xUBq1maIHwg8RMzWK98c/p+3jVqYPbh3uoDjelie0HsJtnWA5+5doLdUHB34wGvun9g9n636weKjSFuC2dmOnhq2WkwW+7D+QOftO6B7TUwWHxA408l2K6PnzscLk68AmZX2C52eLn+PFgNy9pLDme6TcDsW5xPHBzz5oDNUfl+1GEvp6QDiD2jfZpDbQwXmH1Iq9WhvsgZrEYMAGdzeABl6urhAAACVnpUWHRNT0wgcmRraXQgMjAyNC4wOS4xAAB4nH1VW6obMQz9zyq8gTGSLMn2501yKaXcBNq0eyj0s/unRx6S8QXTmVh47GM9j5xTiuf79dvvv+n1yPV0Son+8+u9p1+FiE4fKSbp/P7l6y1dHm/n58rl/vP2+JFEk1ScwfsZ+/a4fzxXOF0SZyNlNkzIRONQpvEcJyXdkuRqnatjWwp14wWuQJ/kVkS6po2zeBW3BVCHYffCndImWaiS0QJosAyDrtUpNLqr+spFh0asFtPiaaPMTZhWPlYAoai49F6wX5y0yQLYYBquWRHqLVRa7YVWyB4qJWOzVo0ZObv3BZIpoJQ7adEBhXVZ5pKjOFvJsGkoyhbBIZ2rJLEEVDNbbyZIrKohYytkVGizbNakMZC9NRRrhYwSbZ6r1NI8yKENaldIS5c/aWuZxdrgEXhCSx6x70q1N6QoYtJSuizDr7unoLwXGTmrDqauoFEpzVJ7rW0nCei0TFRUyjKIIabDPJmXVfgShfJsDTyhwacKsq7Cl6hTRfAARu5FhNdAGSq5ucJj0KWz8NJ2FElzJXgnALJz60uNOoDY1xrhwkV1XwDfb9dPfb/fBOf77XrcBPHK0fC4PlI5+pox9OhexrCjRxnDj05kjHr0G2O0o6sYox+twzHmBol95qkNNATLxHYNwWVitYZgndjLQ9jEUh0rPpExPsG0iXMasbaJWRqC+8QgDSEzUzSE8MQIDSEylV5DSJlqrCFEp2LquLVfC5FX5OMVggyH65QcqfuZw7+o9FzX+H7+O2B++gcuuTJBQIIwXQAAAS96VFh0U01JTEVTIHJka2l0IDIwMjQuMDkuMQAAeJwlUDmSwzAM+8qWyYzC4U1xXLrPJ9zu5AV5/IJeN7JACARwfuR6nG89z/fjsuvC5fd5XfY8T31+Lr3mU/n5PoSCXWQJcajbOpQqWvZiUuMOGWSbaq+XkGZprUMo0wSIknJxrAPs9MrhZLrXIGzhFmAxyVZhSGFsqd1Qt2TfCkgpTGVYUW08kBF+wgZjy5LeAJ0kegecukdi5ysoYusG0nsrvL+S4M960vj24WwSBQkIUvE/x3tzjribtcqtxN15L+xKNHL7gpmqCckpiUgzZTf3wWBfscBJq2v/B2eQgpAUk5HnSJg5kmIj9l1EoTg8K9jKcaWqaB8U2em6sLRFdXSL8R6AJJrGHSd6ZYKCZ67n9w9ramN6pq4WpwAAAABJRU5ErkJggg==", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAApQAAAHzCAYAAACe1o1DAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACQq0lEQVR4nOzdeVhTZ9oG8DuHsCkqQVFcUEBwXxF3RUFtxUJb7VitVq21i2XG2s7W1i5j922mrbalddRqS7VVuwoU64biLhB3qoKIghqIJoCsITn5/uAjY2SHwCHJ/buuXt/HOck5N8qYh/e87/PKjEajEUREREREjSRIHYCIiIiIrBsLSiIiIiJqEhaURERERNQkLCiJiIiIqElYUBIRERFRk7CgJCIiIqImYUFJRERERE3CgpKIiIiImoQFJRERERE1CQtKIiIiImoSFpRERERE1CQsKImIiIioSVhQEhEREVGTsKAkIiIioiZhQUlERERETcKCkoiIiIiahAUlERERETUJC0oiIiIiahIWlERERETUJCwoiYiIiKhJWFASERERUZOwoCQiIiKiJmFBSURERERNwoKSiIiIiJqEBSURERERNQkLSiIiIiJqEhaURERERNQkLCiJiIiIqElYUBIRERFRk7CgJCIiIqImYUFJRERERE3CgpKIiIiImoQFJRERERE1CQtKIiIiImoSFpRERERE1CQsKImIiIioSVhQEhEREVGTyKUOQNZJp9NBo9FAr9dDLpfDw8MDTk5OUsciIiIiCbCgpHpTq9VITk5GWloatFptlfMKhQIBAQEICgqCp6enBAmJiIhICjKj0WiUOgS1blqtFrGxscjIyIBMJkNtPzKV5/38/BAeHg6FQtGCSYmIiEgKLCipVkqlEvHx8RBFEaIo1vt9giBAEASEhYUhMDCwGRMSERGR1FhQUo0SExORkJDQ5OuEhIQgODjYAomIiIioNeIqb6qWUqm0SDEJAAkJCVAqlRa5FhEREbU+XJRDVWi1WsTHxwMAysrKsH//fqhUKqhUKhQXF2PSpEkICQkxvV4URRw9ehSXLl1Cbm4uSkpK4O7ujr59+2LChAlwdXVFfHw8fH19OaeSiIjIBnGEkqqIjY01zZcsLi5GSkoKDAYD+vXrV+3ry8vLsW/fPri7u2P69OmYP38+AgMDkZKSgq+++grl5eUQRRGxsbEt+W0QERFRC+EIJZlRq9XIyMgwfe3u7o4XX3wRMpkMRUVF1T66dnR0xHPPPYc2bdqYjvn6+qJDhw7Ytm0bUlNTMXToUGRkZECtVrOlEBERkY3hCCWZSU5OhkwmM30tk8nMvq6OIAhmxWSl7t27AwAKCgpMr0tOTrZgWiIiImoNWFCSmbS0tFr7TDbE5cuXAcA0IimKItLT0y1ybSIiImo9WFCSSVlZWbU74DRGQUEBdu/ejW7duqFPnz6m4xqNBjqdziL3ICIiotaBBSWZWKqYLC4uxqZNmwAAf/rTnyAI5j9mGo3GIvchIiKi1oEFJZno9fomX6OkpATR0dEoKCjAggUL4OHh0Sz3ISIiotaDBSWZyOVNW/RfUlKCb775BlqtFgsXLoSXl1ez3IeIiIhaFxaUZFLdaGJ93VlMLliwAF27dm2W+xAREVHrw6EiMnFycoJCoagylzItLQ06nc60mEatVuPcuXMAgICAAMhkMkRHR+PGjRuYPn06RFFEVlaW6f1t27Y1FZEeHh5wcnJqoe+IiIiIWgILSjITEBCApKQks9ZBsbGxyM/PN32dmpqK1NRUAMDy5csBANevXwcA7Nixo8o1hw4dipkzZwIA/P39my07ERERSUNmtFTTQbIJarUaUVFRzXb9//73v+jQoQP69esHR0dHODk5wdHREe3bt8fLL7/MXXSIiIisEEcoyYynpyf8/PyQmZlp2s/bEgRBgLOzM65fv47r16/jjz/+AAA4ODhAFEUYjUY8/vjjLCiJiIisEBflUBXh4eFVekc2lSAIePLJJ7Flyxaz4waDATKZDGFhYRgyZIhF70lEREQtgwUlVaFQKBAWFmbRa4aFhUGhUODhhx/GkiVLzPYHF0URZ8+excGDBy16TyIiImoZnENJNUpMTERCQgKMRqNZAdhQoaGhmDhxounroqIiDBkyBFeuXAEAdO3aFdevX4coihgyZAg2bdqEQYMGNTn/nXQ6HTQaDfR6PeRyOVebExERWRALSqqWRqPBwoULcePGDYSHh0MulzdoTqUgCBAEAWFhYQgMDKxyXqlUYvTo0TAajTh//jzat2+PRx99FLt27QIAjBs3Dps3b0avXr1M7zlz5gxOnjyJBQsW1CuDWq1GcnIy0tLSqt1WUqFQICAgAEFBQZy7SURE1AQsKMlMUVERVq1ahXfeeQdFRUUAgOTkZKSmpiIjIwOCINRaWFae9/PzQ3h4OBQKRY2v3bJlCzQaDZ555hnTsYyMDMyfPx9Hjx6FTCbDvffei+joaHh4eGDQoEH4448/sH37dkRERNR4Xa1Wi9jYWGRkZEAmk6G2H/HK8/XJS0RERNVjQUkAKh4Jr1u3Dq+99ho0Go2pCHNyckJpaSlkMplpxC89PR0ajabKNTw8PODv72+REb/KkcizZ89CEASMGjUKR48eBQC4u7vj3Llz6NatW5X3KZVKxMfHQxRFi46oEhERUc1YUBIAYOXKlXj99derHB8yZAhOnTpV5XhLzUnct28fHn/8cVy+fNl0zMHBAcHBwdi9e7fZavTKOZ9NFRISguDg4CZfh4iIyF5wlTcBAB5//HGMHDnSbPGNg4MDBg8eXO3rnZyc4OXlhR49esDLy6vZFrhMnjwZL7zwgtkxg8GAhIQEfPTRR6ZjSqXSIsUkACQkJECpVFrkWkRERPaABSUBAHr27InExERTYVg58tevXz8pY8FgMOC1116r9tw//vEPvP/++9BqtYiPj7fofePj46tdyENERERVcaccMvnTn/6EsrIyREZG4tdff8W1a9ckLygBYNq0acjKyoJOpzP9p1KpoNFo8OKLLyI/Px+urq4oKyvD/v37oVKpoFKpUFxcjEmTJiEkJMTseleuXMHJkyehUqmQm5sLg8GA5cuXmy3IEUURsbGx9V5RTkREZM9YUBIAYNOmTYiLi8PIkSPx+eef45133sG3336L8PBwSXM5ODjg22+/rfac0WjEli1bcOHCBYiiiOLiYqSkpMDLywv9+vWr8bH15cuXkZGRga5du8LZ2RmZmZlVXiOKIjIyMqBWq9lSiIiIqA5clEO4efMmunfvDgcHB+Tm5sLNzU3qSPUWHx+PpKQkGI1G08p0mUyGoqIifPjhh9WOUIqiaHqkf+jQIezatavKCCVQ8dg/KCjI4rsGERER2Rq7H6HkDipAcHAwdDod4uLirKqYBIC0tDSzQrI+6rtPuSiKSE9Pb3Q2IiIie2GXBSV3UPmfl156CX/88QfmzZuHGTNmSB2nQcrKypp94YxGo4FOp7O7XzKIiIgawq4KyvruoKLVapGUlITjx4/b9A4qSqUS77//Pry8vBAdHS11nAZrqVXYGo0GXl5eLXIvIiIia2Q3bYOUSiWioqJMCzDqmjpaeT4zMxNRUVE215dQr9dj2rRpkMlk2LdvX70fA7cmer3epu5DRERkrexihLIpO6hUbuEXExODwsJCm9lBZdasWdBoNHj33XfRt29fqeM0ilzeMj++LXUfIiIia2V9w1INxB1Uqvr+++8RExODESNG4MUXX5Q6TqN5eHjY1H2IiIislU0PvVTuoFLfhtd3MhqN2LBhA65evYqRI0fivvvuA1DRpsbX19dq51TevHkTixYtgqurK/bu3St1nCZxcnKCQqFo1rmU9rjqn4iIqKFseoQyNjbWrOG1wWCo984vx48fh0ajqXK8cgcVazVp0iTodDps3boV7du3lzpOkwUEBJi1C0pLS8O5c+dw8eJFABUr+s+dO4dz585Bp9MBAIqKikzHcnNzAQDp6ek4d+6cWZNzQRDg7+/fct8MERGRlbLZEUq1Wo2MjAwAgLu7O1588UVTw+u6HltrtVrs2bMHM2fOxJYtW8zOWfMOKi+//DJSU1PxyCOPSL4DjqUEBQXh+PHjpq9jY2ORn59v+jo1NRWpqakAgOXLl8PJyQm5ubnYtm2b2XXi4uIAAL169cLixYsBVPxdBwUFNfe3QEREZPVstqBMTk42tQaqb8PrSjExMfDz80P//v2rPS8IApKTk61qB5WTJ0/i3XffhZeXV41bGVojT09P+Pn5ITMzE6Io4vnnn6/zPb6+vli5cmWtrxEEAT4+Plb3SwMREZEUbPaR9507qDRESkoKrl27VmuTb2vbQUWv12Pq1KmQyWTYu3evVbYIqk14eLjFvydBEGxmFJeIiKi52VZl8f8au4NKQUEBdu7ciWnTptU5v7ByBxVr8Kc//Qm3bt3CW2+9VeOoqzVTKBQWHy0OCwuz2oVXRERELc0mC8rGrvqNjY2Fl5cXRowYUa/XV7dop7XZsmULfv31VwQGBuKll16SOk6zCQwMrHXVfkOEhoYiMDDQItciIiKyBzZZUDZmZ5Nz584hPT0d06ZNQ2lpKUpKSlBSUgIAMBgMKCkpgcFgaPJ9WtKtW7ewcOFCuLi4WKwXZ2sWHByMiIgIyOXyBj8CFwQBcrkcERERmDhxYjMlJCIisk02uSinMTub5ObmQhRFrFu3rso5pVIJpVKJOXPmmD0ybu07qEyePBk6nQ6//vqrTbQIqo/AwED4+vqa9mwXBAGiKNb4+srzPj4+NrtnOxERUXNr3RVRIzVmZ5Nhw4bBx8enyvGvv/4a/fr1w+jRo9G5c+cm36c5vfTSS1Cr1fjoo4/w73//G2fPnsWcOXNw//33Sx2tRSkUCixYsABqtRrJyclIT0+vMj3BaDSisLAQU6ZMQVBQEFdzExERNYFNFpTV7aCSlpYGnU5nWkhT2fAaqGiOrVAoahydateuHXx9fc2OtcYdVDZs2ICcnBzExsYiNzcXXbp0webNm6WOJRlPT0/TYh2dTgeNRgO9Xg+5XI7XX38dX375JXx9fa2q/RMREVFrZJMFJVBRJCYlJZlaB9Wn4XVDdOvWDSqVCqIomv3Xs2dPSdry3L59Gzk5OQBg+r+zZ8+GKIo21yaoMZycnODl5WX6unK6wvLlyzFq1CiMHDlSqmhERERWT2ZsTLNGK6BWqxEVFdVs1//ss89w8+bNKsdfffVVvPHGG81235qkpKRUu6vL+PHjsX//fjg4OLR4ptYsPDzctDuOQqHA8ePHuc0iERFRI9ns0FXlDirN0fA6Ly+v2mISqFgII4Xz58+bfV35fZeXl7f61ehSSEpKMv3/+fn5mDJlimlkl4iIiBrGZgtKoPl2UHnllVdwzz33VNnS0cvLq1lazuh0OqhUKmRnZ0OlUlXbUD0lJcXsa19fX/zwww84evQonJ2dLZ7JmuXk5CA3N9f0tSiKyMrKwj333IOysjIJkxEREVknm33kXUmpVCImJsZi14uIiEBgYCBu3bqFIUOGICcnx6w/pbOzM5544gl89NFHZvMyjx49iosXL2LhwoX1uk/lCuW0tLRqG7UrFAoEBASYVih7eHhAq9XC3d0d//nPf7Bw4cJW39ZIKr/99hvuu+++Ksc9PT1x5swZdOnSRYJURERE1svmC0oASExMtEhj79DQULMRyOPHj2P8+PHQ6/UYPnw4nnnmGbz88stQq9VwdHTEokWL8Omnn0IulyMgIACZmZn45Zdf8MADD9R4D61Wa+qhKJPJat2PvPJ8+/bt8cYbb2DQoEHYtWsXXFxcmvy92rLVq1dj+fLlcHBwgCAIcHBwQGJiIgIDAznXlIiIqBHsoqAEKkYq4+PjTaux60sQBAiCgLCwsGq34/v000/x7LPPIjY21jTq9c033+CFF16ASqWCXC7HmDFjcPDgQQCAm5sbTp06BT8/P4tlBCoe295///313jbSnmm1WiQnJ2P06NF4/PHH8eOPPyInJ6dKn1EiIiKqH7spKAHz0b/67qDi5+dX6w4qRqMRWVlZ6NmzZ5VzW7duxfPPP4/r16+bjjk4OGDgwIE4duyY2UiipUZRQ0JCEBwc3OTr2IsdO3YgLCwM//rXv7By5Uqp4xAREVkluyooK9W2gwpQ0bTc39/fIjuobN26FXPmzKlyfNGiRdi4cSOA5pvnSXUTRRFOTk4YPHgwTpw4IXUcIiIiq2SXBeWd7t5BxdI74AwaNMi0I8/dpk+fjnXr1mHjxo0oKirC/v37oVKpoFKpUFxcjEmTJiEkJMTsPUePHsWZM2eg0Wig0+nQtm1beHt7Y9KkSaZHtnK5HJGRkdyXup769++PjIwMrvAmIiJqJJtuG1QflTuo9OjRA15eXhbfTnHkyJEYO3YsJkyYgODgYEyePBmBgYFo27Ytfv/9d7z77rsQRRHFxcVISUmBwWBAv379arxeSUkJAgICcP/992PBggUICQmBSqXC2rVrTb0xRVFEbGysRb8PWzZjxgzodDocOXJE6ihERERWye5HKKV06tQp/PLLLwBgWs0tk8lQVFSEDz/8sNoRyuqo1Wp8/vnnCA4ORmhoqOl4ZGRkkx/Z24OMjAz07t0bCxcuxNdffy11HCIiIqtj9yOUUrp+/bqpObpMJqvSKL2+2rRpAwBmTdwFQUBycnLTQ9oBPz8/uLm5Ye/evVJHISIiskosKCWUlpZWa5/J2oiiCL1eD7Vaje3bt6Nt27YYPny42fn09HRLRbV5w4cPx7Vr16rdhYiIiIhqx61UJFJWVlbtDjj19fbbb5t26OnYsSMee+wxdOjQwew1lQt3LD0v1BY98sgjOHDgADZt2oTFixdLHYeIiMiqcIRSIk0pJgFgyZIlWLJkCWbNmgUnJyds3LjRbH/qStW1RaKqFi1aBADYtGmTxEmIiIisDwtKiej1+ia9v1u3bvD29saQIUPw2GOPAQD27Nlj8fvYizZt2qBr165ISkqSOgoREZHVYUEpEbnccrMNnJ2d0alTJ9y6datZ72PrJk2ahIKCAmRnZ0sdhYiIyKqwoJSIh4eHxa5VVFSEnJycaq9pyfvYuqeffhoAEBUVJXESIiIi68LhK4k4OTlBoVCYzaVMS0uDTqczrTRWq9WmXXYCAgIgiiK++eYbDB48GB07doRcLsetW7dw7NgxGAwGTJo0yewelt71x9ZNnjwZcrkcMTExeOedd6SOQ0REZDVYUEooICAASUlJptZBsbGxyM/PN51PTU1FamoqAGD58uVo164dvLy8kJKSgoKCAuj1eri5ucHHxwcPP/ywaetFoKIPpb+/f8t+Qzagb9++uHDhgtQxiIiIrAp3ypGQWq1u1ser3Cmn4V566SW89957SEhIwOTJk6WOQ0REZBU4h1JCnp6e8PPzM9vhxhIEQYCfnx+LyUaIjIwEAKxdu1biJERERNaDBaXEwsPDm6WgDA8Pt+g17YW3tzfat2+PhIQEqaMQERFZDRaUElMoFAgLC7PoNcPCwqBQKCx6TXsSFBSEGzduoLS0VOooREREVoEFZSsQGBiIkJAQi1wrNDQUgYGBFrmWvZo3bx4AYOPGjdIGISIishJclNOKKJVKxMfHQxRFiKJY7/cJggBBEBAWFsZi0gJ0Oh1cXFwwceJE7N+/X+o4RERErR4LylZGq9UiNjYWGRkZEASh1sKy8ryfnx/Cw8P5mNuCvL29kZeXh9u3b0sdxa7odDpoNBro9XrI5XL2UiUishIsKFsptVqN5ORkpKenQ6PRmJ0zGo3Iy8vDPffcg5EjR3I1dzNYtGgRvvnmG2RkZMDX11fqODat8mc9LS3NrNF/JYVCgYCAAAQFBfFnnYiolWJBaQXuHrVZtGgRdu7ciYULF+Lrr7+WOp5NOnz4MMaPH4+///3v+PDDD6WOY5PuHI2XyWSo7Z+iyvMcjSciap1YUFqhadOmYffu3QAqFo4sWrRI4kS2ydnZGX5+fvjjjz+kjmJzOF+YiMi2sKC0MkajEZ6enrh16xYAQC6XY/fu3VX28aamGzZsGM6ePQudTmfxXqH2LDEx0SJ9PkNCQhAcHGyBRERE1FT8lLQyly9fNhWTACCKIu6//35cvHhRwlS26cEHH4TBYMCuXbukjmIzlEqlxZrGJyQkQKlUWuRaRETUNCworczhw4fNvhZFEQUFBbj//vslSmS7li5dCgBYt26dxElsg1arRXx8vEWvGR8fX+1CHiIiallyqQNQw1QWlHK5HHq9HgDg7++P2bNnw2g0QiaTSRnPpnh5ecHd3R0HDhyQOopNiI2NrXO+5JUrV3DgwAFkZ2dDr9ejffv2GDp0aI1TOkRRRGxsLBYsWNAckYmIqJ5YUFoZT09P9O/fH1OmTMGXX36JXr16IS0tTepYNmv06NH4/fffUVhYCDc3N6njWC21Wo2MjIxaX3P69Gn8/PPPGDhwIGbOnAknJydoNJpae4GKooiMjAyo1Wq2FCIikhAX5VixwYMH48KFC9DpdFJHsVmbNm3Co48+ilWrVuHZZ5+VOo7Vio+PR1JSUo2tgQoKCvDpp59i6NChCA8Pb9C1BUFAUFAQwsLCLBGViIgagXMordjUqVNRXl6OkydPSh3FZs2ZMwcymQxbtmyROopVS0tLq7XPpFKpRHl5OSZMmNDga4uiiPT09KbEIyKiJmJBacUWL14MAPjqq68kTmK75HI5evbsyaK9CcrKyupcOHPlyhW4urri5s2b+OKLL/D666/jgw8+QExMDEpLS+u8h0aj4Ug9EZGEWFBasSFDhsDJyQl79uyROopNmzp1KoqLi3HhwgWpo1il+qzCLigoQHl5ObZu3YpBgwZh4cKFGD9+PE6dOoVNmzbVOrpZ6e4tSomIqOWwoLRyvXv35uO+ZvbMM88AAKKioiROYp0quxHUxmg0Qq/XY+LEiZg4cSJ8fX0xfvx4TJ06FVlZWXUu6KnvfYiIqHmwoLRyU6ZMgU6nw9mzZ6WOYrNGjBgBZ2dni/dQtBdyed3NJNq0aQOgogXWnSq/vnHjhkXuQ0REzYMFpZXjPMqWMWjQIFy6dKlB+05TBQ8Pjzpf06VLl1rP16e/an3uQ0REzYMFpZULDAyEo6Mjdu/eLXUUmzZz5kxTE21qGCcnJygUilpf079/fwCo0lO18usePXrU+n4PDw84OTk1ISURETUFC0ob4Ovry728m1nlNowbNmyQOIl1CggIqHWU0d/fH3369MH+/fuxf/9+XLp0CQcOHMCePXvQp08f9OrVq8b3ymSyKo/KiYioZbGxuQ1YunQp1qxZg/Pnz6Nv375Sx7FZHTt2hIODA3Jzc6WOYnXUanWdi5rKy8uxb98+nDlzBoWFhWjXrh0GDx6MyZMn1zk/ctu2bfDz84OjoyNKS0tRWlqKkpISDB06FF988YUlvxUiIqoGC0obcOTIEYwbNw7//Oc/8f7770sdx2aFh4cjLi4OSqUSR44cgV6v5+45DRAdHY3MzEyLzkOVyWS4ceMGvvzyy2rPjxkzBkeOHLHY/YiIqHosKG2Eo6MjBg0ahBMnTkgdxeYUFRVh586d+OKLL7Br1y7T8Q4dOiAvL0+6YFZGq9UiKirKou195HI5li5dipkzZ+LAgQNVzv/666+4//77LXY/Imuj0+mg0Wig1+shl8s535iaDfts2AgfHx+cP39e6hg2afny5Vi/fn2Vx67Dhg2TJpCVUigUCAsLQ0xMjMWuGRYWho4dOyImJgbDhw/HlStXzEZAly5dips3b+Lxxx+32D3vxA9rao3UajWSk5ORlpZW7cYCCoUCAQEBCAoKgqenpwQJyRZxhNJGPPHEE1i/fj0yMjLg6+srdRybcuLECUyYMAElJSWmHVscHR3x3HPP4YMPPpA4nXUpKirCE088gX79+jX5WqGhoZg4caLp63PnziEoKMi0VeO4ceOQlJSE8vJyuLu74+9//zteeuklCML/1iLqdDr8/PPPmDlzZr0LQX5YU2ul1WoRGxuLjIwMyGSyWneYqjzv5+eH8PDwOjsxENWFq7xtxMKFCwEA69atkziJ7Rk+fDh+/fVXODg4mI6Vl5djxIgREqayLjqdDl9//TU8PDywdetWDBs2DHK53Ky4qw9BECCXyxEREWFWTALAwIEDER0dDQDw8vJCQkICCgsL8fe//x06nQ6vvPIK3Nzc8Oyzz5qKzrVr12Lu3Ln1mgur1WoRHR2NqKgoJCUl1bilpFarRVJSEqKiohAdHV2vrSeJmkqpVCIqKgqZmZkAUOd2pZXnMzMzERUVBaVS2dwRycZxhNJGiKIIJycnDB06FCkpKVLHsUmbN2/G/PnzTV+npaWxXU0dcnJysGbNGnz66ae4efMmAGD06NE4evSo2WiKIAi1LtapPF+f0ZQNGzbA29sbU6dONR0TRREfffQR3nvvPdy6dQtyuRyzZs3CwYMHcf36dQDA+vXra3w0rlQqER8fD1EUG7SoSBAECIKAsLAwBAYG1vt9RA2RmJiIhISEJl8nJCQEwcHBFkhE9ogFpQ3x8/NDTk4OioqKpI5isz766CP87W9/g0wmg8FgqNcOLvaooKAAf/nLX/Ddd99VKcJ++uknzJw50/R15SPk9PR0aDSaKtfy8PCAv7+/xR4hb9q0CS+99BKysrLMjsvlchw+fBgjR440O84Pa2rNlEqlReclR0RE8JcfahQWlDZk0aJF+Oabb3DlyhX07NlT6jg2q0+fPrh8+TJ0Oh3Ky8u5KKMaSqUSI0eOrHY0Lzc3t8bCsKUWuRiNRvj6+uLKlStmxzt27IjU1FR07tzZ9H3ww5paq7o6J5SVlWHv3r04d+4cSkpK0KlTJ0yYMAGDBw+u8ZpyuRyRkZGcU0kNxoLShuzevRvTpk3Dv/71L6xcuVLqODYrLi4On3/+OaZOnYrbt29XOc9FGRV+//13PPjgg6b5igDQu3dvpKenS5iqQnx8PGbMmFHtubZt2yIlJQWdO3eu8cP68uXL+Prrr6t9/5IlS+Dt7V3tOX5YkyXV1dv1m2++wfXr1zF16lR07NgRZ86cgVKpxKxZszBkyJBq3yMIAnx8fLBgwYLmjE42iG2DbEhoaCgEQcBvv/3GgrIZ3Dnnb8yYMdUWk5WvS0pKwvHjx+16BeWAAQNgMBgAwLSgKTQ0VMpIJhqNBu3atYNcLoeTkxOcnJzg4OCAmzdvori4GP369cMnn3xS53zJKVOmwMfHx+xY5ehmdSr3g+eHNTWVWq1GRkZGjecvXryIjIwMPPTQQ6YRSV9fX+Tl5WHXrl0YNGhQtYviRFFERkYG1Gq1Xf9CTA3HVd42RBAEeHt7IzU1VeooNocrKBumtLQUgYGBKC8vx+bNmzF69GgYDIZWM4dw/vz5KCgogEajgUqlwtWrV3H58mXcvn0bRUVF+O6775CXl1dnQenh4QFvb2+z/5ydnWt8/Z0f1kRNkZycXOsc7vPnz8PJyQkDBgwwOz58+HDcvn0b2dnZNb5XEAQkJydbLCvZBxaUNmb8+PEoKioyrVylpktMTERMTAz0en2Dtw0URRF6vR4xMTFITExspoStz9ixY3Hz5k289957eOSRR7B3715s2rQJDz/8sNTR6uTi4oIOHTo024IrfliTJaSlpdX6i21ubi46depk1u4MALp06WI6XxNRFFvF1BSyLiwobcyjjz4KoKJ1CjWdUqm0yApfAEhISLCLkcp58+bh5MmTmDNnDl544QUAgLOzM+bNm2c1C5bq+rCu9Ntvv+H111/HO++8g+jo6CqLfKrDD2tqqrKysjr7mxYXF8PV1bXK8cpjJSUltb5fo9FAp9M1PiTZHRaUNubee++FIAiIjY2VOorV02q1iI+Pt+g14+PjbbrR9QcffIDvvvsOQ4YMwffffy91nEapz4e1i4sLRo8ejfDwcDz22GMICwtDfn4+Nm7cWK9ikR/W1BT1/TekqaPs1bXxIqoJF+XYGEEQ0L17d5w9e1bqKFYvNjYWoiiirKwM+/fvh0qlgkqlQnFxMSZNmoSQkJAq77l+/Tp27dqF7OxsCIIAX19f3HPPPfDw8ABg24sy4uPj8eKLL6Jjx444duyY1HEarT4f1l27dkXXrl1NX/fq1Qv9+vXDF198gV27dtWr4b1Go4GXl1eTspJ9qqlN0J3atGmD4uLiKscrRyarG71szH2IKnGE0gaNGzcOhYWFtc6RodpVrqAURRHFxcVISUmBwWCodQ9qtVqNjRs3wmAwYPbs2XjggQdw69YtbNiwwdRs3lYXZaSlpeGBBx6Ao6MjlEolXFxcpI7UaI39EHV1dUWfPn2Qk5OD8vLyZrsPkVxe91hQ586dcfPmTVOnhUo5OTmm85a4D1ElFpQ26JFHHgHAeZRNcecKSnd3d7z44otYvHgxpkyZUuN7EhISIJfLMW/ePPTp0wcDBgzA/PnzUVRUhMOHD5teZ2uLMgoLCzFq1Cjo9Xrs2LHD6pvqN+VDtCFtfflhTY1V+cSjNv3794dOp8Mff/xhdvzUqVNo164devToYZH7EFViQWmDIiIiIJPJLLrDh725c1GGTCarcy6SwWDAxYsX0b9/f7PROXd3d/j6+pr9o25LizJEUURQUBDy8vKwevXqaqcBWJvGfoiWlJTg4sWL8PLygqOjY7Pdh8jJyanO3rYBAQHw8/NDbGwsUlJScPnyZWzfvh3p6emYNm1atT0o78Rdv6ih+CuyDRIEAV27dsWZM2ekjmKV6rMo425arRZ6vd7UkuNOXbp0waVLl1BeXm4qNCoXZVj7P9gzZ87EhQsXsGTJEvzlL3+ROo5FVH5Y1/Yz8MMPP6BDhw7o1q0b2rRpA41Gg8OHD6OoqAgPPvhgnffghzU1VUBAAJKSkmodFZ8zZw727t2LhIQE09aLdzY6r4kgCPWaB0x0JxaUNmrs2LH48ccfodFoOBLSQI1ZhV05+b22Nh2lpaVmI1fWvijjtddew/bt2zF69GisW7dO6jgWVdeHdZcuXXDu3DkkJydDp9PB1dUVPXv2xKxZs9C9e/dar80Pa7KEoKAgHD9+vNbXODs7IywsDGFhYQ26duWTB6KGYEFpo+bOnYsff/wRGzduxF//+lep41iVpiyWaEibDmtelPHDDz/gzTffRNeuXXHw4EGp41hcXR/WEydOxMSJExt1bX5YkyV4enrCz8+v1r28G6NyL29uu0gNxTmUNurBBx+ETCbD9u3bpY5idRqzWKJNmzYAUGubjrtXPlvrooyzZ89i7ty5cHV1hVKptNrvozaVH9Z1zTNrKEEQ4Ofnxw9rsojw8PBm+RkNDw+36DXJPrCgtFFyuRxdunTByZMnpY5idRozRUChUEAul1fbqiknJwceHh5VFmpY41SEvLw8jB07FkajEfv377fqR/Z14Yc1tXYKhaLBj7PrEhYWVueCH6LqsKC0YaNHj0Z+fj7y8vKkjmJV6rOC8m4ODg7o27cv/vjjD5SVlZmO5+XlITMzE/379zd7vTUuyhBFEcOGDUNhYSG++uorjBw5UupIzYof1mQNAgMDLdZdITQ0FIGBgRa5FtkfFpQ2bM6cOQCA6OhoiZNYn4CAALP5kGlpaTh37hwuXrwIoKKJ+blz53Du3DnTFnqTJ09GeXk5Nm/ejLS0NPzxxx/YvHkz2rRpg3HjxpmuZS2LMnJycsx2XJo2bRquXLmC5557DosWLZIwWcvhhzVZg+DgYEREREAulzd4VF0QBMjlckRERDR6XjARAMiMDenES1ZFp9PB2dkZoaGh2LNnj9RxrIparUZUVJTp648//hj5+fnVvnb58uWmUae6tl6slJycjKFDhwKo6GFZ+d/kyZMRERHRTN9Vw8ybNw9btmzBp59+irS0NHzyyScICQnB3r17pY7W4pRKJeLj4yGKYoMWQAiCAEEQEBYWxmKSmp1Wq0VsbCwyMjIgk8lqbSkkCAJEUYSfnx/Cw8M5ck5NxoLSxnXp0gXl5eXQaDRSR7E60dHRFl9BCQCXL1/G119/DeB/BYdMJkN5eTkeeugh/PDDDxa9X2MYjUZ06dLFbIvInj174vLlyxafV2gt7vywrvwwrgk/rElKarUaP/30E86dO4eOHTtWOe/h4QF/f38EBQVxgRhZDAtKGxceHo64uDjcvn0bbm5uUsexKlqtFlFRURZt7yOXyzF79mwEBgaa9ve+08GDBzF+/HiL3a+xMjIy0Lt3b7NjkyZNwq+//ooOHTpIlKp1UKvVSE5ORnp6erW/qN26dQsDBgzAQw89xA9rkkRZWRm6d++OW7du4ccff8S4ceOg1+shl8utcv42WQfb6/dBZmbPno24uDh8++23WLp0qdRxrErlogxLbmEZFhaGPn364Pfff0dwcLDZKJdMJkN0dDRGjBhRpcVQSztw4ECVY/v370dwcDBOnjzZoH6btsbT09O0WEen00Gj0Zg+rL/66it8+umnaNOmDe677z6Jk5I9MhqNWLp0KW7dugUA+OOPPzBr1iyJU5E9sM9nV3akcmHOTz/9JHES6xQYGGix9j53LsoYP348PvzwQ7PzHTp0wJo1a9CuXTssWrQIhYWFFrnvnXQ6HVQqFbKzs6FSqUwLiu62b98+s68FQYCLiwvuvfdei2eyZk5OTvDy8kKPHj3g5eVl2mWpuLgYISEhZlMGiFrC6tWrsXHjRtPXnD9PLYWPvO1A586dIYoi9u7di8TERAQGBpqtOqbqlZWVYdmyZVi7di3++te/wsPDw6KLMoxGIx566CH8/PPPGDlyJI4dO4bvv/8e//jHP3Dt2jU4ODhg1qxZ+O9//wt3d3fT+95880107twZTz/9dL0yVD6iTUtLq3ZbSYVCgYCAALP5VC4uLqb2R926dcNzzz2HJ554gnMB6zB16lTTB7ggCBg8eDASExPRvn17iZORPdi9ezfuvfdes3+jXFxckJ+fz8fc1OxYUNqwixcv4rfffsM777xjNlIyf/58fPvttxIma92Kioqwdu1avPXWW6bHRiUlJSgpKbH4ooyCggI89thj+Mc//oGxY8eajv/6669Yvnw5rly5YmqGvXbtWmg0GgwYMAAA8Pvvv2PatGk1ZmjIis/K835+figtLcVTTz0FDw8PrF27Fvfff79N7obTHDp16mT6mQEq+pOOHTsWu3btknwaA9m227dvo0ePHigoKKhy7tChQxxEoGbHgtJGGY1GeHh4IC8vz6yYkMlk+M9//oPnn39e4oStj1arxeeff45///vfKCgoMP2ZDR8+HEql0vS6uhZlWHIF5a5du/DnP/8ZaWlpkMlkppXXRqMR7du3x5kzZ9CjR48q72tKmxudTof8/HysWrXKrudKNlROTk6VnYMqf7E4cOAAJkyYIFEysgclJSV4/vnn8fvvvyMzMxNAxSJAvV6Pt99+GytWrJA2INk8DjvYKJlMhjfffBPLli0zG5kyGo38TbUGISEhOHXqlNkxQRAwdepUs2O1Lcqw9ArKadOm4eLFizh48CAef/xxpKWlmc7dvn0bDz30EA4ePGi2rWNiYiISEhIadT9RFCGXy9GxY0ccOHAAwcHBTf4e7MWZM2fMvpbJZFi5ciXCw8MxfPhwiVKRvXB1dcWXX34JAKYnIs888wyOHj2KgIAAKaORneCiHBv2l7/8BcuWLTMbZXJ0dOSHWw1WrFgBZ2dnsz8vURQRFBRU43vuXpTRXPOUJkyYgODgYDg4OJiOGQwGHD9+HI8//rjpmFKpbHQxebeEhASzkVmq3YABAxAZGYmvv/4aS5cuhdFoRGhoKP/3Ri1Kp9MhLy8Po0aNwjvvvIO9e/di9uzZUsciO8BH3jbOYDAgIiIC8fHxAICRI0fi+PHjEqdqvdauXYunnnrK7Fh6enqVnowtTaPRoEuXLjAYDJDL5TAajTAYDKbR53vuuQeffvoptm3bVm3fzJ9//rnK6OudlixZAm9v7yrH5XI5IiMjuRinga5evYpevXphzpw5+P7776WOQ3bkhx9+wOzZs/HJJ59g+fLlUschO8KC0g7cvn0bffr0gUqlwowZMxAXFyd1pFbp5s2b8Pb2hl6vR3BwMPbu3Yt27dohPz9f8rmEpaWleOONN5Cfnw8HBwfTf7dv30ZCQgJu3LiBpUuXon379tXOmdRoNNU2Uv/uu+/g4OCA559/vtodcARBgI+PDxYsWNAs35ctUygUcHR0RG5urtRRyI7Mnz8fmzdvRk5ODjp37ix1HLIjnENpB9q1a4e9e/diwIABcHd3b/Z5f9ZIFEWMHj0apaWl+OGHHzBz5kx88sknMBgMkheTQEXrj3feeafG81euXMHGjRtrXIDj4eFRpZ9mZmYmiouLERwcXON2iqIoIiMjA2q1mru+NNCECRMQGxuLW7duVbv9HVFzOHr0KFxdXVlMUotjQWknOnXqhKeeegqdOnXCu+++W+V8db0I7cn8+fORkZGBZ555Bg899BAA4K9//avEqeovNTW1ztZAd6ucH1nXHD9BEJCcnGxaiET1s3TpUsTGxuLTTz/FypUrpY5DdiIrKwv9+vWTOgbZIRaUNu7OXoTdu3evseDQarVISkrC8ePH6+ydaGvWrVuH77//HkOGDEFUVJTUcRolLS2tQcVkaWkpUlNT4efnV+ffsyiKSE9Pb2pEuxMWFga5XI4ffviBBSW1iAsXLqC8vBwTJ06UOgrZIa7ytmFKpRJRUVGmnmR1FRyV5zMzMxEVFWUXK3zPnTuHpUuXol27djh06JDUcRqlrKys2h1wanPmzBno9fp6r0DWaDQ1btNI1RMEAf3798f58+cb1AuUqLEqN6x45JFHJE5C9ogFpY1KTExETEwM9Hp9gz/MRFGEXq9HTEwMEhMTmymh9EpLSzFhwgTTtpRubm5SR2qUhhaTAHDixAm4urqif//+9X5PdU3cqXYPP/wwDAYDYmJipI5CdmDPnj0QBIG9hkkSLChtEHsR1s/kyZORl5eHjz76qNZek61ddW2CaqNSqXD9+nUMGTKkQVsqNvQ+BPz5z38GAKxZs0biJGQPUlNT4eXlVeMiO6LmxJ86G6PVak09Jy0lPj6+UaNgrdkLL7yAY8eOITw8HM8995zUcZqkoftsnzhxAgAQGBjYrPehisVuXbp0weHDh6WOQjaupKQE+fn5bKRPkuEnhI2JjY2FKIooKyvD/v37oVKpoFKpUFxcjEmTJiEkJMTs9TU1vO7YsSOWLVsGoOIReGxsrM30ItyxYwc++OADdO/eHb/++qvUcZrs7nZAtdHr9Th9+jS6d++OLl26NNt96H+mTJmCzZs3IzMzEz4+PlLHIRv1008/AQDCw8MlTkL2igWlDVGr1cjIyAAAFBcXIyUlBV5eXujXr1+tj63lcjkWLVpkduzOvaFtqRehSqXCgw8+CCcnJxw/ftwmHg05OTlBoVDUaxT5/PnzKCkpafDoJHuVNt5f/vIXbN68GZ988gk++eQTqeOQjfrll18AAHPnzpU2CNkt6/80JZPk5GRTE253d3e8+OKLWLx4MaZMmVLr+2QyGby9vc3+8/LyMntNZS9CayaKIkaNGoWysjL8+OOP6Natm9SRLCYgIKBeDdiVSiUcHR0xaNCgel9bEAT4+/s3JZ5dGzt2LFxcXLgwh5pVUlIS2rZtC3d3d6mjkJ3iCKUNubMXoaV3d7GFXoR/+tOfkJWVheeff97mHgsFBQXVa4/2hQsXNvjaoiha9aKl1mDYsGE4duyYaXcqIku7du0ahgwZInUMsmMcobQRjelFWEmv1+PDDz/E66+/jv/85z+Ii4tDcXFxlddZcy/Czz//HD///DNGjBiBjz76SOo4Fufp6Qk/Pz+LP8IXBAF+fn5WP9VBao8++iiMRiO+/vprqaOQDTp58iT0ej0mTZokdRSyYywobURji0kvLy/cc889mDVrFh599FEMGzYMJ0+exFdffYWysrIqr7fGXoQnT57Es88+iw4dOuDAgQNSx2k24eHhzVJQ2tporhSWLFkCmUzGgpKaxebNmwGwoTlJi89ebERjewSOHTvW7OvevXuja9eu2Lp1K5RKZZXz1taLsHJ1O1DR7N3V1VXiRM1HoVAgLCzMonP1wsLC7GYLzubk4uICb29vpKSkSB2FbFBCQgIcHBwwcuRIqaOQHeMIpY2w5Lysfv36wdHREdnZ2c16n5YwceJEFBQU4LPPPrOL+UWBgYFVWkM1VmhoaINXg1PNZsyYgeLiYpw8eVLqKGRjLly4YFOLDMk6saC0Ec3RI7C6hT2tuRehwWDAlStXTF8vX74cSqUSDz30EJ555hkJk7Ws4OBgREREQC6XN/gRuCAIkMvliIiIwMSJE5spoX2qbKC/atUqaYOQTSksLMTt27e5cI4kx4LSRlT2IrSE1NRUlJeXo0ePHmbHW3svwrVr18LX1xdvvvkmfvnlF6xevRq9evXC1q1bpY7W4gIDAxEZGWlqpF1XYVn5y4OPjw8iIyM5MtkM+vbtCzc3N+zcuVPqKGRDtmzZAgCIiIiQOAnZO+t6fkm1CggIQFJSkql1UFpaGnQ6nWlltlqtxrlz50yvLS4uxo8//ohBgwaZRh6vXLmCo0ePwtPT06yokMlkrb4X4aFDhwAAr732GmQyGZydnW2meXljKBQKLFiwAGq1GsnJyUhPT692UdWtW7eQn5+Pf//731zN3cxGjx6NPXv2oLCwEG5ublLHIRtQOWd69uzZEiche8eC0obc3YswNjYW+fn5pq9TU1ORmpoKoOJxsIuLC9q2bYsjR46gsLAQRqMRHTp0wOjRozFx4kSz0Uij0Yinn34abdq0gZubG4qLi1FcXIySkhK0bdsWSqVS8oa6R44cMRXTRqMR7du3x/Xr19G5c2dJc0nN09MTYWFhAACdTgeNRmPqh+js7Gz6ZWLYsGF4/vnnpYxq85544gns2bMHX375Jf7+979LHYdsQEpKCtq3b89fUEhyMmPlJzDZhOjoaGRmZkIURYtdUyaTobS0FO+++2615z09PZGVlQVnZ2eL3bOhbt++jQ4dOuDOH2eZTAZHR0ecOHECAwYMkCxba5aUlIRRo0aZvl6/fj0ef/xxCRPZNlEU4ejoiGHDhnHFN1mEXC7HiBEjcOzYMamjkJ2zz2eBNqw5ehE6ODjgH//4Bz7//PNqzy9durRZi0mdTgeVSoXs7GyoVKpqm6ufOHHCrJh0cHCA0WjE5MmTq2wjSf9z9uxZs6+feOIJbNu2TaI0tk8QBAQEBODMmTNSRyEbcOzYMRgMBoSGhkodhYiPvG1Nc/YijIyMxJkzZ7BmzRqz4u3NN9/EwYMH8cUXX6Bv374WuWflvL+0tLRqm7YrFAoEBAQgKCgInp6e+Pnnn83Oz5w5Ey+//DKGDRtmkTy26syZM3B0dER5eTmAiqkC8+bNQ4cOHXDPPfdInM42zZw5E++99x727NmDKVOmSB2HrNh3330HAJg/f77ESYg4QmmTmrMX4apVqzBu3Dg4ODjAwcEBM2fOxKBBg5CQkIB+/fph0KBB2L17t9k1zp07h8mTJ+PSpUt13k+r1SI6OhpRUVFISkqqcQcgrVaLpKQkREVF4euvvzaNqs2cORMXLlzAtm3bWEzWw8mTJ03FJFAxsqvX6xEbGythKtu2fPlyAEBUVJTEScja7d+/H3K5HIMGDZI6ChHnUNoypVKJ+Ph4iKLYoDmVgiBAEASEhYVV2z4mNzcXw4YNQ05ODi5dugQfHx9cuHABkZGRSEhIgNFoRNeuXfGvf/0LTz/9NObOnYstW7agb9++SEpKQrt27SyaFwDKy8vh6+uLJUuWNOh99s7Lyws5OTmmr2fMmIG33noLw4YNq7YPKVlGx44dIZPJcPPmTamjkBVzc3ODp6cnLl++LHUUIhaUtk6r1SI2NhYZGRkQBKHWQq3yvJ+fH8LDw2vta5mWlobU1FQ88MADVe63fPlybNmyBTqdDm3btkVRURGAitGv8PBw/PTTT1XmeSYmJiIhIaEJ32mFkJAQBAcHN/k69mL9+vUAgHvuuQc+Pj4ICgri5P4WMHPmTPzyyy+4ceMG5/hSo2g0GnTs2BFz5szB999/L3UcIhaU9qKuXoQeHh7w9/c3zUlsKr1ej9dffx0ffPBBlUU0//rXv7By5UrT10ql0qJzPiMiItiYuxH69OmDK1euoKysTOooNm/37t2YNm0aVqxYgbffflvqOGSFoqKi8Oc//xmbN2/GI488InUcIhaU9ujuXoTNtQOOVqtFt27dUFpaWuXcqlWr8Oyzz0Kr1SIqKgp6vd5i95XL5YiMjLTYzkH24oUXXsAHH3yAPXv2cNVoC3ByckLv3r3xxx9/SB2FrNB9992H3377DSUlJXBxcZE6DhELSmo+n332GZYtWwYHBwfIZDIYDAaz1eFPPPEExo0bh+zsbJSUlGD//v1QqVRQqVQoLi7GpEmTql1cZDAYcOzYMZw8eRIajQYODg7w9PTEPffcg549e0IQBPj4+GDBggUt+e1aPZVKha5du2LmzJn46aefpI5j8wIDA3H69GnodDq73c2JGq9bt24oKSmpceEiUUtj2yBqNpMnT8bf/vY3uLq6wtXVFW3atEGbNm1w+/ZtHD9+HCdPnjTtF15cXIyUlBR4eXmhX79+UCqV1V5TFEV8//33uHr1KsaPHw9vb2+Ul5fj+vXrptXKoigiIyMDarWaWwk2gJeXFzp27Ij9+/dLHcUuzJ07FydOnMAPP/yAhx9+WOo4ZEVEUUROTg7GjBkjdRQiE45QkmTi4+NNe49X/hjKZDIUFRXhww8/rHaE8siRI9i5cycef/xxeHt713htQRAQFBRk2nKQ6mf27Nn44YcfkJWVZSr2qXkUFhaiXbt2mDZtGnbu3Cl1HLIi+/btQ0hISJX56ERS4nMWkkxaWppZIVmfNjXHjh1Dr169ai0mgYrf4NPT0y2S055U9kj8+OOPJU5i+9zc3NC1a1ccPXpU6ihkZbZu3QqADc2pdWFBSZIoKytr8Nyf/Px85OXloXPnzti9ezc+/PBDvP766/j8889x8uTJKq/XaDTVbtNINZswYQKcnZ3x66+/Sh3FLkybNg23b99GWlqa1FHIihw4cABOTk4ICAiQOgqRCQtKkkRjJpIXFBQAAE6dOoULFy5gxowZmD9/Pjw9PfHLL78gJSWlynuqa5FEtQsMDERGRgaL8Rbw3HPPAQA++eQTSXOQdbl06RJ69uwpdQwiMywoSRKNaRNU+Xhcr9dj/vz5GDhwIPz9/TF79mx07dq12sUklmxHZC8WLVoEo9GIjRs3Sh3F5g0fPhyurq6Ii4uTOgpZCZVKhZKSEowePVrqKERmWFCSJOTyhjcYaNOmDQCgU6dOcHd3Nx2XyWTo3bs3CgoKUFhY2OT72LvFixdDJpOxoGwhI0aMwNWrVzkiTPWyefNmAMBDDz0kcRIicywoSRIeHh4Nfo9CoYCjo2Otr7l7YU9j7mPvnJyc4OvrixMnTkgdxS5UjghXboNJVJsdO3YAqNgRjKg1YUFJknBycmrwTjYODg7o27cv1Gq12RxMo9GI9PR0KBQKtG3b1nS8uXYAsgcPPPAASktLcejQIamj2LyFCxdCJpPhm2++wYEDB/Cvf/2r2kVmREDFHPKOHTvy6Qu1OvyJJMkEBASY+lACFW2EdDqd6dGfWq3GuXPnTK91cnJCaGgo0tPT8e2332Ly5MlwdnaGUqmESqXC7NmzTdcWBAH+/v4t/03ZiOeffx4ff/wxVq1ahfHjx0sdx2bdvHkT8fHxcHV1xdGjRxEcHAygYqrGsGHDpA1HrcahQ4dQWlqKoKAgqNVq088JUWvCxuYkGbVajaioKNPXH3/8MfLz86t97fLly00jmjk5Odi9ezeuXLkCURTh5eWFiRMnom/fvmbviYyM5E45TeDh4QEHBweo1Wqpo9ik27dvo2vXrigqKoJMJjPblvSnn37CzJkzJUxHrUnv3r2RkZFh+nr06NH485//jHvuuQddunSRMBnR/7CgJElFR0cjMzMToiha7Jrcy9syZs6ciV9++QU3btyAl5eX1HFsjtFoxGOPPYbo6Gjc/c/wpUuX4OfnJ1Eyam3mzp2Lbdu2mf6ddHBwgMFgwMSJE5GYmChxOqIKnENJkgoPD4cgWPbHUBAEhIeHW/Sa9mjZsmUAgFWrVkmcxDbJZDKsX7++yv8GXF1d4ePjI10wanUmT55s9kuHwWCATCbDyy+/LGEqInMsKElSCoXC4vtth4WFNXjBD1UVGhoKJycn/PTTT1JHsVlyuRxbt27FhAkTTMcGDBhg8V+yyLrdXVACwIcffoh7771XokREVfFfLZJcYGAgQkJCLHKt0NBQBAYGWuRaBAwdOhTp6elsEN+MXFxcEBsbC19fXwAVjzOJ7tS3b1+z3rvz5s3DX//6V+kCEVWDBSW1CsHBwYiIiIBcLm/w6IwgCJDL5YiIiMDEiRObKaF9WrBgAURRxLfffit1FJvWrl07HD16FIIgmFpd6XQ6qFQqZGdnQ6VSsfG5HZPJZOjRoweAigU669atq9Jzl0hqXJRDrYpWq0VsbCwyMjIgCEKti3Uqz/v5+SE8PJyPuZtBaWkp2rRpgwkTJnDyfwt49tlnUVBQgMDAwGr3u1coFAgICEBQUBA7GNiZSZMmITExEVevXoW3t7fUcYiqYEFJrZJarUZycjLS09Oh0WiqnPfw8IC/vz8/WFuAj48PcnNzUVxcLHUUm3XnL1J3txC6W+V5/iJl23Q6HTQaDfR6PeRyOZYuXYry8nLu+06tFgtKavXu/oeVO+C0rGXLluGzzz7D8ePHMXLkSKnj2BylUon4+HiIotig9lmCIEAQBISFhXHesI2o/EU6LS2t2hHqtm3bYuDAgfxFmlolFpREVKvLly/Dz88P8+bNw6ZNm6SOY1MSExORkJDQ5OuEhIRw9xQrxhFqsgUsKImoTu7u7nB2dkZOTo7UUWyGUqlETEyMxa4XERHBkUorxBFqshVc5U1EdZo4cSJyc3Nx8+ZNqaPYBK1Wi/j4eIteMz4+vtrHpNR6JSYmIiYmBnq9vsG7hYmiCL1ej5iYGC6Yo1aBBSUR1SkyMhIAsHr1aomT2IbY2Nh6FxApKSlYuXIl3n777VpfJ4oiYmNjLRGPWoBSqbTIdAcASEhIgFKptMi1iBpLLnUAImr9pk+fDkdHR2zbtg1vvPGG1HGsmlqtRkZGRr1eW1BQgJ07d6Jdu3YoLS2t9bWiKCIjIwNqtZoLNlq5ukaoy8rKsH//fqhUKqhUKhQXF2PSpEm1bgARHx8PX19fzqkkyXCEkojqJJPJMHjwYFy8eLHBj+bIXHJycr2bUsfGxqJXr17w8/Or1+sFQUBycnJT4lELqGuEuri4GCkpKTAYDOjXr1+9rskRapIaC0oiqpd58+ZBFEV89913UkexamlpabWu4q106tQpZGZm4r777qv3tUVRRHp6elPiUTOrHKGuraB0d3fHiy++iMWLF2PKlCn1uu6dI9REUmBBSUT1snTpUgDA+vXrJU5ivcrKyuq1cKawsBA7duzA1KlT0aFDhwbdQ6PRcJvGVqw+I9QymaxRWytyhJqkxIKSiOqlbdu28Pb2xrFjx6SOYrXquwo7Li4OnTp1anQj+ep2l6LWob4j1I3BEWqSEgtKIqq3GTNmoLi4GCdPnpQ6ilXS6/V1viY1NRUXL15EREREo0ap6nsfann1HaFuCo5Qk1RYUBJRvT3//PMAgI8//ljiJNZJLq+9sUZZWRni4uIwatQotGvXDiUlJSgpKYHBYAAAlJSU1KtYqOs+JI2W6hPKEWqSAv/VIaJ669u3L9q1a4edO3dKHcUqeXh41Hq+uLgYRUVFOHLkCI4cOVLl/Pvvv4++ffvikUceadJ9SBotNXLMEWqSAgtKImqQ8ePHY8eOHcjLy4O7u7vUcayKk5MTFApFjSNVbm5uWLRoUZXjBw8exJUrVzB//ny0adOm1nt4eHjAycnJInnJslpq5Jgj1CQF/tQRUYMsXboUO3bswOrVq/Haa69JHcfqBAQEICkpqdqFGY6OjvD19a1y/OTJk5DJZNWeu5u3tzeuX7+O27dv4/bt2ygoKMDt27fh6+uLIUOGWOR7oMZpqZFjjlCTFFhQElGDREREQC6XY9u2bSwoGyEoKAjHjx9vtus/+eST1e65PnjwYJw+fbrZ7kt1q2uE+k5paWnQ6XSmObNqtRrnzp0DUPFLSU2j0ByhJqmwoCSiBhEEAQMGDMC5c+cgiiIEgWv7GsLT0xN+fn7IzMys965DM2fOxMyZM2t9jSAIMBqN1RaTMpkMc+fObVTe+tDpdNBoNNDr9ZDL5SxqalHbCPWdYmNjkZ+fb/o6NTUVqampAIDly5dX++crCAL8/f0tG5ionlhQElGDzZ07FytWrMArr7wCjUaDixcvIi4uDq6urlJHswrh4eGIioqy6DaWgiAgMjISPj4+VeZhGo1GGAwGi/4CoFarkZycjLS0tGpH3BQKBQICAhAUFMS9xe9Q3xHqyo4KDSGKIoKCghoTi6jJZMbm6rBKRDanuLgYmzZtwo8//ojff/8dQMXol9FoRH5+Ptq3by9xQuuhVCoRExNjsetFREQgMDAQAPDCCy/gww8/hNFoNO26IooiXFxc8Mgjj+Cjjz4yW1CVkpKCDz74AGvWrKlzoZVWq0VsbCwyMjJMf/c1qTzv5+eH8PBwKBQKS3yrVi86OrpBI9T1IQgCfHx8sGDBAotdk6gh+KyKiOpt3bp1eOqpp8zaBhmNRnTt2pXFZAP5+PhYrEF8aGioqZgEgHfffde0B7jRaMTJkyfx/vvvo3379tiwYQM8PDwwadIk05zKFStWYOvWrZg9e3atLWeUSiWioqKQmZlpunZtKs9nZmYiKioKSqWyKd+mzQgPD4coihbdMUcQBISHh1vsekQNxRFKIqq3wsJChISE4MSJE6Zm2wAQFhaG3377TcJk1qOsrAzR0dF45plnIJPJEBsbi6SkJIii2KARK0EQIAgCwsLCzIrJSrdv38b48ePh7e2NuLg40/Hff/8d//jHP3DmzBkAQI8ePZCdnQ2gYkRx2bJlWLVqVZXrJSYmIiEhoaHfbhUhISEIDg5u8nWsjSiKSE5ORnx8PP7973+jT58+uP/++y12/TtHqImkwIKSiBpEo9FgwoQJuHDhAkRRhEwmwwsvvIB3331X6mit2q1bt7BmzRp88sknUKvVAIDp06cjPj7e7DGyIAi1FpaV5+vzGNlgMMBgMFS7gOPKlStYtmxZtY/d16xZg6eeesr0dXM+nrdlBoMBW7ZsQVxcHH777Tfk5eWZznl6euKHH36wSJEeGhqKiRMnNvk6RE3BgpKIGkylUmHs2LGmR5+bNm3CvHnzpA3VShUWFuKf//wnvvrqK5SXl5sVizt37sS0adNMX1cudElPT692+zwPDw/4+/tbbKGLVqtF165dUVZWZnZcJpNh8+bNmDt3LrRaLaKioiy6+4pcLkdkZKTNz6ncsWMHwsLCqv0lYdu2bfjTn/4EpVKJ+Pj4Bo9Qi6IIJyenGkeoiVoaC0oiapQrV66gT58+0Ol0OHbsGEaNGiV1pFYpMTERkyZNqnJcJpMhLy+vxrmnLdGKZ9WqVXjuuefg4OBgKnrunMqwbNkyBAYGIisrq9pi58aNG9i7dy9ycnJQXFwMuVyOTp06YeTIkRg6dGiN97WXBSQGgwEPP/wwfv75Z7P5kh06dEBOTg6cnZ0BoFEj1JcuXUJycjIOHDiAjh07Nvv3QlQXtg0iokbp1asX1qxZg8WLF0Oj0bAXYQ2Cg4OxefNmLFy40GyUr1+/frUuZHJycoKXl1ezZhs2bBjmzp0LNzc3tGvXDu3bt0e7du2Ql5eH06dP4+DBg7UWK6WlpWjfvj0GDRqE9u3bQ6fT4cyZM/j555+Rl5dXbSENVIyuZWRkQK1W23RLIQcHB7z88sv45ZdfTAWlXC7HY489ZiomgYoWSwsWLKj3CPWQIUPQo0cPABU/R9999x2mTp3aMt8UUQ04QklEjZabm4vnn38e/fr1q3ZUhb0IK9y8eRM9evRAWVkZHBwcAABPPPEEvvzyS4mT1S4+Pr5eTbjvtnbtWty+fRt//etfa3yNIAgICgpCWFhYU2O2WomJiZg6dSr0ej169uyJq1evwmg04vTp0xg8eHCt763tFzRRFE0/R5X+9re/4e233zYrVIlaEtsGEVGDabVaREdH44svvkDfvn1rfESn1WqRlJSEqKgoREdH12vLOVsjiiICAwNRVlaGNWvWIDAwEAaDAePGjZM6Wp3S0tIa1dqmTZs2dTZQF0UR6enpjY3W6v3www8ICQkBAOzbtw9JSUnw9fXFyJEj6ywmgf+NUPfo0QNeXl5mo/2CIFQpKD/66COMHDkSubm5lv1GiOqJj7yJqEHuXEQANLwXob0tIrj33nuRlZWFv//973jqqaewcOFCbNmyBXPmzJE6Wq3Kysrq/QtAZU/F0tJSnDt3DpcuXcKMGTPqfF/lVAlbmxrx2WefYdmyZXB1dcXx48cxaNAgAMCZM2dQXl5ukXs4OjqazXcVBAEXLlxAVlYWOnfubJF7EDUEC0oiqrem9CKsXMUaExODwsJCu+hFuGLFCuzevRuTJ0/Ghx9+CABwcXGpsjVia9SQ0eS4uDikpKQAqJg3GBYWVu8tADUaTbPPFW1Jr776Kt566y24u7vjzJkzprmOQMXIraU4OjqitLTU9PWDDz6IVatWoXv37ha7B1FDsKAkonpRKpUW6ZkHAAkJCXBzc7Ppkcqff/4Z7777Lrp3745du3ZJHafBGtImaOLEiQgMDERRUREuXryI3377DTqdDuPHj7fofVq7J554AuvXr0e3bt1w7ty5OrexbApXV1fcvn0b9913H+Li4nDx4kUWkyQpFpREVCetVov4+Pgaz9+4cQP79u3DtWvXUFpaig4dOmDw4MEYN25cjY8z4+Pj4evra5O9CNPS0vDwww/DxcUFKSkpkMut75/ahmR2d3c3FU99+vQBAOzZswfDhg1D27ZtLXaf1iw8PBxxcXHo378/Tp482eyP8b///nsoFAoMGzYMM2bMQHx8PPbt24fJkyc3632JasJFOURUp9jY2BoX3uTm5mL9+vXIy8vD9OnTMW/ePAwaNAj79+/Hjz/+WOM1RVFEbGxsc0WWTHFxMUaNGgWDwYBdu3ahS5cuUkdqFA8Pj0a/t3v37hBFsV6PzZtyn9ZAFEWMHDkScXFxGD9+PM6ePdsic0JDQkIwbNgwAMA333wDQRCwePHiZr8vUU1YUBJRrdRqNTIyMmosKM+cOQO9Xo85c+Zg0KBB8PPzQ0hICAIDA3HhwgWUlJRU+747exHakjFjxiAvLw+rVq3ChAkTpI7TaE5OTo0ePb58+TJkMlmd77f2XqWlpaXo06cPkpOTMWvWLBw8eLDO1e3NoVOnTpg7dy4yMzPxww8/tPj9iQAWlERUh+TkZMhkshrPV7Yvubv/nYuLC2QyWZX2JncSBAHJycmWCdoKPProozhz5gzmz5+PZcuWSR2nyQICAmr9u9++fTt+//13nD17FpmZmUhNTcW2bdtw+vRpjB07ttbH3TKZDP7+/s0Ru0XcvHkTvXr1wqVLlxAZGVnraHxLWLt2LRwdHfHnP/9Z0hxkv2xj8goRNZu6ehEOHToUR48eRVxcHKZOnYq2bdsiMzMTycnJGDlyZK0jULbUi/DTTz/Fpk2bMGjQIHz77bdSx7GIoKAgHD9+vMbz3t7eOHHiBE6dOoXS0lI4OTmhS5cumDlzZq1bLwIV7aRWrlyJH374AQ4ODjAajab2Q1OnTm1Ve8MXFBSgsLAQ3bp1A1AxAjts2DAUFBTgzTffxCuvvCJxwooV5M888wxWr16NqKgoREZGSh2J7Ax3yiGiGpWVleG9996r83VqtRpbtmzBzZs3TcdGjx6N6dOn1zrCVemll16y6kefhw4dwsSJE9GhQwdcu3bNou1hpBYdHY3MzMxa95duKJlMhszMTGzYsMH0tSAIkMlk0Ov1mD17NrZu3Wqx+zXVrFmzsHPnThw6dAgGgwHjx49HWVkZ1q5diyVLlkgdz0Sv16N9+/ZwdHSEVquV5PE72S/+tBFRjeqzqEKr1eK7776Dq6srHn74YTz22GOYNm0aTp48ie3bt9frPtXtW2wtVCoVpk6dCgcHBxw7dsymikmgYvWypQsTBwcHvPHGG6YG3EajEQaDwdRC6J///KdF79cUOTk52L59O4qKijBx4kSMGjUK5eXl2L59e6sqJoGKFfMrVqxAQUEB3njjDanjkJ1hQUlENapPj8Ddu3ejrKwMCxYswIABA+Dj44Px48dj+vTpOHHiBDIzMy1yn9ZIr9cjKCgIpaWl2Lp1q6llji1RKBQW3287LCwMPXr0wK5du6odmf7ggw+Ql5dn0XtW0ul0UKlUyM7Ohkqlgk6nq/X10dHRpikft2/fhsFgwC+//ILw8PBmyddUK1asQIcOHfD+++9b7f+uyDpxDiUR1ag+PQJVKhU8PT2rFAaV881yc3Ph4+PT5Pu0RtOmTcO1a9fw0ksvYebMmVLHaTaBgYEoLCy0SGP70NBQU0P7IUOG4IsvvjAb6evWrRu2bduGH3/8EQ888ADWrVtXpbWQSqVCmzZt0L59+3rdU61WIzk5GWlpadWOuisUCgQEBCAoKAienp6m40ajEf/973/NHvcLgoCXXnrJNMWhtREEAR988AGefvppLF++HJ9//rnUkchOcISSiGpUnx6B7dq1Q25uLsrKysyOZ2dnA0C9PvStsRfhP//5T+zbtw9Tp07FO++8I3WcZhccHIyIiAjI5fIGPwIXBAFyuRwRERGYOHGi2bnFixdjwYIFAICpU6fi2rVr2LFjB3x9ffHzzz/D09MT999/P3JzcwFUjDCOHDkSY8aMQXFxca331Wq1iI6ORlRUFJKSkmqcwqHVapGUlISoqChER0ebXnfs2DGkpaWZvVYmk+Hs2bPYvHlzg/4MWtJTTz0FLy8v/Pe//0VhYaHUcchOcFEOEdVq9erVtc6lPH/+PL7//nv06NEDY8aMQZs2bZCdnY2DBw+iQ4cOePrpp2sdgfTw8LC6Fjtbt27FnDlz4O3tjczMTLta/KDVahEbG4uMjAwIglDrYp3K835+fggPD6+xL2VRURGee+45LF++HIMGDTId37t3L5555hlcvHgRMpkM06dPx5QpU/D3v/8dgiBg0aJF+Oqrr6q9plKpRHx8vGkP+foSBAGCICAsLAxz5841Kyg7d+6M++67DzNmzMD999/fqheS/fzzz5g1axbmzJmD77//Xuo4ZAdYUBJRreLj45GUlFRr66DLly/j4MGDyMnJMW292KdPH0ycOLHORSqjRo2y+Bw9S1u/fj3OnDmD9957D5cvX8bgwYPh5OSEK1eumD0itSeVj5HT09OrXVTl4eEBf3//Ko+RGyMxMRFPP/00zp8/X+Xchg0b8Nhjj1V5vSUez+/ZswdKpRIrVqxAeHg4Bg8eXK+uBa1F7969kZmZiRs3bpgWQBE1FxaURFQrtVqNqKioZrv+l19+CScnJ/Ts2dM0miSKIpycnLB+/fpWsdClf//+OH/+PIYPH4709HQUFhbi8OHDGDNmjNTRWgWdTgeNRgO9Xg+5XN5sO+C8/PLLVaYXODk5ISUlxTSyqVQqERMTY7F7RkREmOZ8WpsDBw4gODgY99xzD37//Xep45CNs86Z8ETUYjw9PeHn52fxXoSCIMDV1RU5OTkwGo24evVqlde0hlWq+fn5uHDhAgDgxIkTAIAXXniBxeQdnJyc4OXl1az30Ov12LRpU5XjlXMqU1NT4e7ujvj4eIveNz4+Hr6+vo3ehlJKEydOxJAhQ7Bz505cunQJvXv3ljoS2TCOUBJRnbRaLaKioixa4MnlckRGRuLUqVMIDQ01e6Quk8kQERGBX3/91WL3a6ydO3fi3nvvNTvm5OSEr7/+GnPnzpUolf3JysqCn59fjT+Dzs7OeOONN1BWVlbtLz4ZGRk4ffo0srKyUFBQABcXF3Tr1g2TJk0ydSSojiAI8PHxMS0csjZnz57F4MGDMXr0aBw9elTqOGTD7GcmORE1WnP1IlQoFJg8eTLeeusts3NGoxG//fYbnnnmGZSWllr0vpXq24/w8OHDVebN6XQ6/POf/6x1XilZlre3N27duoWcnByo1WrcvHkTt27dglarxaVLl/Dcc8+hpKSkxlH05ORk5OXlYcyYMZg/fz6mT5+OoqIirFu3DhkZGTXeVxRFZGRkQK1WN9e31qwGDRqECRMm4NixY0hJSZE6DtkwjlASUb1ZarFDaGioWfsYURRx3333YdeuXQCAgQMHQqVSITc3F46Ojnj00Ufx2WefVVngo9frodfr4eLiUq/7NqYfYWBgoOlRNwC0bdsWS5cuxbPPPouePXs2+Hun5lHX4rHCwkK4ubmZHSsrK8Pq1avRuXNnLFq0qMZrC4KAoKCgVr94rCZXr16Fj48P+vfvj3Pnzkkdh2wURyiJqN6aqxehIAj49ttv0aVLFxgMBqxbtw45OTn49ttv0alTJ2zYsAEdOnTAo48+atZX79FHH0Xfvn3r3CKysf0Is7KyTMVkly5d8PHHH+PGjRv497//zWKylUlLS6t1xPjuYhKoeEzu6emJgoKCWq8tiiLS09ObnFEqPXv2xH333YfU1FTTL21ElsYRSiJqsOboRQhUzPc6fPgwnnrqKbPjP/74I55//nlkZWXBwcEBs2bNwnPPPYfx48cDAO677z5s37692iK3Kf0I9Xo94uLiMG3aNLz55ptWu6OPrSsrK8N7773X4PeVlpbi448/hq+vb73mw7700kutuvdkbTQaDTp37ozu3bvjypUrUschG8SCkogarSV7EQJAbGwsli1bZtofXCaTmUal3n//ffzzn/80e31TH9EbjUbIZDKEhIQgODi40deh5qVSqbBmzZoGv+/HH3/EuXPn8MQTT9S6MKfS008/3eyr2ZvTokWL8M0332Dz5s145JFHpI5DNoYFJRFZREv1IgQqGo0/8cQTZsdkMhn27dtnKvzYj9B+ZGdnY/369Q16z969e5GYmIiwsDCMHj26Xu9ZsmQJevTo0ZiIrUJpaSnat2+PDh06WO0iI2q9OIeSiCyishdhjx494OXl1ayPBvft21fl8bbRaMTUqVNx+vRpaLXaZulHWNdcTZJGQ6ci7Nu3D4mJiQgNDa13MdmY+7Q2Li4uWLZsGW7evIlVq1ZJHYdsDEcoicjqtGvXzmxxzp0cHR2xcuVKGAyGGudMZmdnIyEhAVlZWTAajejevTtCQ0NrXWhj7f0IbZlOp8O7775br9fu27cP+/btw+TJkzF58uQG3cea51BWEkUR7dq1gyAIyM/Pt6t96Kl58SeJiKzO2bNncfz4cSQnJ0OpVOLkyZM4ffo0YmNjERkZifLy8hqLyWvXrmHDhg0oLy/HzJkzMWvWLOj1enz99dfIysqq8Z7W3o/Qljk5OdVrJ5v9+/ebpkU0tJhszikcLUkQBLz66qsoLCzEq6++KnUcsiEcoSQim1JXP8Lo6GioVCosX77cVCCUlZVh1apV6NixI5YsWVLjta29H6Etq+vv/fDhw9i5cyf8/f0xadKkKue9vb1rvLat/b2LoohOnTqhuLgYBQUFNlEok/Sse0IIEdFd6upHmJWVhYCAALMPUWdnZ/Tq1Qt//PEHbt++jXbt2lX7XmvvR2jLgoKCcPz48RrPV+7Hnp6eXu3f4cqVK2t8ryiKCAoKanLG1kIQBPznP//B448/jmeffRZffvklTpw4AYPBYFPfJ7UsFpREZDPKysrqXDhjMBiqXVzh4OAAAMjJyamxoAQq+vnpdDqO6rQynp6e8PPzQ2ZmZrXTHRYvXtyo61bOnbVE26vWZPHixXj55Zexbt06XL9+HTExMRgwYAB30qFG4xxKIrIZ9VmF7enpiezsbLOiw2Aw4Nq1awCAkpKSOq9RXc9Nkl54eLjFF5kIgoDw8HCLXrM1yMrKwsCBA2EwGBAbGwsAKC4uljgVWTMWlERkM/R6fZ2vGTVqFG7duoXffvsNBQUFyM/PR2xsLPLy8gBU9LO0xH2o5SkUCovPcwwLC6vXgh9rkpCQgN69e5ua/ldOEdHpdFLGIivHR95EZDPq0ycwMDAQxcXFSExMRHJyMgCgR48eGDduHA4dOlTr4+6G3IekERgYiMLCwibtkFQpNDTUJpvZ9+zZE15eXqZR+UosKKkp+K8iEdkMDw+Per1uwoQJGDNmDG7dugVnZ2e4u7sjJiYGjo6O9dqCr773IWkEBwfDzc2t0Xu4C4KAsLAwmywmAaB37944e/Ys/vznP+Pbb781HS8tLZUwFVk7PvImIptR336EQMUoY5cuXeDu7o68vDycPXsWI0aMgKOjY63vs5V+hLYuMDAQkZGR8PHxAYA651ZWnvfx8UFkZKTNFpOV2rdvj+joaHz33Xdo06YNAKCoqKjK63Q6HVQqFbKzs6FSqTiKSTViH0oisil19SPMycnBH3/8gW7dusHBwQE5OTk4ePAg3N3dsWjRIjg7O9d4bVvrR2gv1Go1kpOTkZ6eXu2CKg8PD/j7+yMoKMjmVnPXx5UrVzB06FDk5+cjKysLzs7OSE5ORlpaWrUL3RQKBQICAuz2z4uqx4KSiGyKWq1GVFRUjedv3ryJmJgY5ObmQqfToUOHDhg0aBAmTJhQr5HHyMhIfohaMZ1OB41GA71ej+DgYBgMBly5ckXqWJLLy8tDYGAg5s+fD7lcDplMVms/18rzfn5+CA8Pt7mFS9RwLCiJyOZER0fX2I+wsbiXt20pKiqCm5sbAODnn3/Ggw8+KG0giSmVSsTFxcFoNNZaSN7NHuacUv1wDiUR2Rz2I6S6/P7776b//7HHHsP169clTCOtxMRExMTEQBTFBhWTQMUuQnq9HjExMUhMTGymhGQNWFASkc1hP0Kqy08//WTqOVpYWIj58+dbdETbWiiVSou0WAIq+lsqlUqLXIusDwtKIrJJgYGBCAkJsci1bLUfob3S6XT49ddfTaNxBoMB+/btw8cffyxxspal1WoRHx9v0WvGx8fXa8cqsj2cQ0lENk2pVLIfIZnZuXMn7r333irH5XI5rl69iq5du0qQquVFR0dj7969OHnyJLKyslBQUAAXFxd069YNkyZNMuvJevToUZw5c8a0l33btm3h7e2NSZMmoXPnzqbXca6x/WJjcyKyaYGBgfD19UVsbCwyMjIgCEKthWXleR8fH65etVF3P+J1dHTE5MmTMWLECHTo0EGiVC1LrVYjIyMDx48fR3FxMcaMGQNPT08UFRXhyJEjWLduHR599FH4+fkBqNjjPiAgAF26dIGrqyu0Wi0OHjyItWvX4umnn0anTp0AVMypzMjIgFqtlqwbwp0r+eVyOXvHthCOUBKR3WA/QgKAq1ev4siRI+jbty/uueceAEBubq7EqVpWZb/W27dvm1a7VyorK8Pq1avRuXNnLFq0qMZrqNVqfP755wgODkZoaKjpuBT9Wiv/t83emdLhCCUR2Q1PT0/ThxxHMexXz5490bNnTwAVO+OcOHFC4kQtLy0tDUajsUoxCQDOzs7w9PREQUFBrdeo3GHn7o4KoigiPT3dcmFrodVqTU8fauudqdVqkZSUhOPHj7N3ZjPhohwisktOTk7w8vJCjx494OXlxWLSTg0cOBB6vd6uFpKUlZXV+v2Wlpbixo0b1Y7kVbYJUqvV2L59O9q2bYvhw4dXeV3lXMvmpFQqERUVhczMTACos+VR5fnMzExERUVxRbqFcYSSiIjs1pgxY7Bx40bs3bsXDz30kNRxWkRdxXNcXBzKy8sRHBxc5dzbb78Ng8EAAOjYsSMee+yxGuedajQaeHl5NT1wNRITExvd7qhygV5MTAwKCwur/T6p4ThCSUREdqty7t/BgwclTtJy9Hp9jef27t2LM2fO4N577zVb5V1pyZIlWLJkCWbNmgUnJyds3LixxvmnlfcxGAw4ePAg/va3v8HPzw/vvvtuk/Kzd2brxBFKIiKyWwEBAZDJZDh16pTUUVqMXF79R/++ffuQmJiI0NBQjB49utrXVBaZ3t7e6Nu3L1avXo09e/bgkUceqfLaXbt24fDhw/jpp5+g0Wggl8uh1+tx69ataq99/fp1/OUvf8GLL76IUaNGVfuayt6ZGRkZOH36dK3tjkRRxNGjR3Hp0iXk5uaipKQE7u7u6Nu3LyZMmABXV1cAFQuUfH19OaeyiThCSUREdq1du3bIyMiQOkaL8fDwqHJs37592LdvHyZPnlzvR8DOzs7o1KlTtQWi0WjE0qVLsW7dOlNHhcoRSzc3t2pbd+3cuRM///wzxo0bh3fffdf0aP1OsbGxEEURycnJyMvLw5gxYzB//nxMnz4dRUVFWLdunenvsry8HPv27YO7uzumT5+O+fPnIzAwECkpKfjqq69QXl4OoKLwjI2Nrdf3TDXjCCUREdm1bt264cqVK1LHaDFOTk5QKBSmuZT79+/Hvn37EBwcjMmTJ9f7OkVFRcjJyTGtmL/7Hu7u7tU+Dn/99dfxxhtvoG3btujatSsCAgIQGBiItLQ00yjmihUr8Ntvv2Hz5s3w9vYG8L/emQAwY8aMKivU/f39sXr1ahw4cAB+fn5wdHTEc889Z1qNDgC+vr7o0KEDtm3bhtTUVAwdOrRV9M60BRyhJCIiu9anTx+UlJTUOrfQ1lQ+6j98+DASEhLg7++PgIAAZGVlmf0HVKz6/u9//4sjR47g4sWLyMjIQFJSEjZs2ACDwYBJkyaZXVsQBAwfPhzXrl3DG2+8AUEQ4ODgYDq/bNkyTJ06FZ6enrh27Rp+++03vPXWW9iyZYvZ38HBgwfRu3dvvPDCCyguLkZycrJp//X6tDsSBMGsmKzUvXt3ADBriyQIApKTkxv7x0ngCCUREdm5ESNGYPv27Th+/DjGjRsndZwWERQUhOPHj+PChQsAgPT09Gp7R65cuRJyuRxeXl5ISUlBQUEB9Ho93Nzc4OPjg4cffths60Wg4hFyUFAQ5HI5Xn31VUydOhVz5sxBVlYWPDw8sHr16iqvT0lJwb333ltlBXp5eTk++OADfPDBB1i5cmWt31NluyNfX99aX3f58mUAMBuNbMnembaKBSUREdm1yse8CQkJdlNQenp6ws/PD0uWLKlzj3u5XI7777+/Xtet3Mv7zmJt7NixOHv2LJ5//nm0a9eu2vcMGTIE+fn5AAAHBwcYDAb06NED9957L/r06YNOnTqZRkxrUlu7o0oFBQXYvXs3unXrhj59+pidq+ydyZ60jcOCkoiI7NqYMWMAACkpKRInaVnh4eGIioqqs6BsCEEQEB4eXuV4+/btsX79+hrfV1ZWBmdnZ/j6+mL27NmYOXMmhgwZYnrErVKpsGbNmhrfX9nuKCwsrNp2RwBQXFyMTZs2AQD+9Kc/VdnhB2je3pm2jgUlERHZNScnJ7i4uJge/9oLhUKBsLAwxMTEWOyaYWFhjWq/0759e9y+fdtsruWdapvfWp92RyUlJYiOjkZBQQEWLVpU7Ur3uu5DteOiHCIisnuenp64fv261DFaXGBgIEJCQixyrdDQUAQGBjb6/TUVk0DtvTPrandUUlKCb775BlqtFgsXLqx1BLKm+1DdWFASEZHd8/X1NVv1a0+Cg4MREREBuVxe7WPg2giCALlcjoiICEycOLGZElbfO7M+7Y7uLCYXLFiArl27Nvg+VD8sxYmIyO4NHToUiYmJuHLlCnr16iV1nBYXGBgIX19fxMbG1qvJuyAIEEURPj4+CA8Pb/ZdZu7unVldu6M7eXt7o7y8HNHR0bhx4wamT58OURTNXte2bVuzAtLDw4MLcpqABSUREdm98ePH49NPP8WuXbvwxBNPSB1HEgqFAgsWLEBOTg6efvppBAYGwmg0Vnmdh4cH/P39ERQU1KKNwAMCApCUlASj0VivdkeFhYWmaQw7duyo8pqhQ4di5syZACoKZH9//2ZMb/tkxup+WoiIiOzIzZs34enpiSVLlmDdunVSx5HUm2++iddeew1ffvklFi9eDI1GA71eD7lcLukonlqtRlRUVLNdPzIykjvlNAFHKImIyO516tQJDg4OOHv2rNRRJJWamorXX38dQMXWik5OTq2mjU5l78zMzEyLtzq6u3cmNRwX5RAREQFwd3e3qz2973bz5k2EhYXBYDAAAE6ePCltoGqEh4c3eOFQXWrqnUkNw4KSiIgIFQs5NBqN1DEkodPp8OCDDyI7O9t0LCEhQcJE1avsnWlJje2dSeZYUBIREQEYMGAAdDodiouLpY7SooxGI55++mkcOXLE7FFydnY2rl27JmGy6rWm3pn0PywoiYiIAIwaNQpARbNse6LVavHtt99CFEXTVoeVDh06JFGq2llD70x7w4KSiIgIwJQpUwBUNMy2Jx4eHjh//jzWrl2Ldu3amZ07ceKERKnqFhgYiMjISPj4+ABAnYVl5dxQHx8fREZGcmTSwtg2iIiICIAoipDL5Zg2bRp+//13qeNIwtnZGf7+/ti5cyeOHj2KkSNHomfPnlLHqpNarUZycjLS09OrnQfr5uaGPXv2IC0tDSkpKWjfvr0EKW0bC0oiIqL/165dO3Tu3BmXLl2SOkqLS01NxcCBAxEZGYnPP/9c6jiNptPpqvTO/OOPPzBs2DAAwJAhQ7B79262CbIwPvImIiL6f15eXsjJyZE6hiS+/vprAMDChQslTtI0lb0ze/ToAS8vLzg5OZktLjp79izGjh1bZbtGahoWlERERP/P398fRUVFFm2cbS12794NBwcHjB49WuooFndnQSmKIjIzMzF69GhcvHhRwlS2hQUlERHR/6tcqHH69GmJk7S88+fPW8V8yca4du0a5PL/bQ5oMBhw48YNPPnkkxKmsi0sKImIiP5fcHAwAGDPnj0SJ2lZKpUKxcXFGDdunNRRmkV2djYMBoOpLZKjoyOeeuqpZt0b3N6woCQiIvp/lX0Jk5KSJE7SsirnTz788MMSJ2keZWVlAICpU6fC0dERnp6eWLNmDQYOHChxMtvBVd5ERER3cHZ2Rr9+/XDq1Cmpo7SY4OBgHDx4EHq93uJ7ZbcG+fn5KC0tRZcuXTBz5kz88ssvuHjxIgICAqSOZjNYUBIREd2hW7duKCsrw61bt6SO0mIUCgVcXFxw48YNqaM0u3PnzmHQoEGYPXs2tm7dKnUcm2F7v4YQERE1gY+PD/Lz86WO0WIKCwuRl5eHoKAgqaO0iIEDB8LLywtxcXFSR7EpLCiJiIjuMGjQIBgMBqhUKqmjtIgtW7YAAGbNmiVxkpazcOFCFBcX49dff5U6is1gQUlERHSHypXO9rLS++effwYAzJkzR+IkLefVV1+FTCbD22+/LXUUm8GCkoiI6A5TpkwBABw+fFjiJC0jOTkZCoUCbdq0kTpKi3Fzc8PgwYORkpKC0tJSqePYBBaUREREd/D29oYgCHbR3Fyv1yM3NxdDhgyROkqL++c//wlRFPHBBx9IHcUmsKAkIiK6S4cOHXD58mWpYzS7uLg4GI1G3HfffVJHaXGPPPIInJ2dsW7dOqmj2AQWlERERHfp3r07bt68KXWMZlfZNmfRokUSJ2l5giBg2rRpyMrKsotfHpobC0oiIqK79O3bF2VlZdDpdFJHaVaHDx9G27Zt0blzZ6mjSKJyUc7LL78scRLrx4KSiIjoLpU9GQ8ePChxkuaVlZWFfv36SR1DMkOGDEHnzp2xfft2qaNYPRaUREREdwkNDQUA7N+/X+Ikzefw4cMwGAyYNm2a1FEk9eijj6KoqAi//fab1FGsGgtKIiKiu4wYMQIAkJKSInGS5vPtt98CAB577DFpg0jstddeg0wmw5tvvil1FKvGvbyJiIiq0bZtW/To0QMXLlyQOkqzGDhwINLT01FWViZ1FMkNHjwYqampKCkpgZOTk9RxrBJHKImIiKrRuXNn3LhxQ+oYzebSpUvw8/OTOkar8Le//Q2iKOI///mP1FGsFgtKIiKiavTu3RuFhYXQarU4cuQI0tPTpY5kMWlpaSgrK8PkyZOljtIqLFy4EE5OTlizZo3UUayWXOoARERErcm2bduwZ88e/PHHHzAajfDw8AAAjBkzBkeOHJE4nWVs3LgRADB//nxpg7QSgiBgypQpiI+Px9WrV9GzZ0+pI1kdjlASERHd4R//+AfWrFmD69evm47JZDJMmDBBwlSWtXPnTjg4OGDcuHFSR2k1KhflvPLKKxInsU5clENERHSHHTt2ICwsrMrxw4cPY+zYsRIksjw3Nzd06tQJmZmZUkdpVTw9PVFWVoaCggKpo1gdjlASERHdYfr06Xj22WchCP/7iPT09MTo0aMlTGU5arUaRUVFNlMcW9K8efNw+/Zt7Nq1S+ooVocFJRER0V3ef/999O3b1/T1Qw89ZFZgWrNvvvkGAPDwww9LnKT1WblyJQDg9ddflzaIFbKN/3UQERFZkIuLC7Zu3Wr6etasWRKmsazY2FjIZDJERERIHaXVUSgU6N+/P44ePQq9Xi91HKvCgpKIiKgagwYNMm1LOGbMGInTWM7p06fh6ekJuZyNXqrz3HPPwWAw4OOPP5Y6ilXhohwiIqIaHDp0CKGhoUhISEDPnj0hl8vh4eFhtbuplJaWwtXVFWFhYdy7ugaiKMLFxQU9evRARkaG1HGsBn89ISIiuotarUZycjLS0tKwYsWKKos0FAoFAgICEBQUBE9PT4lSNtyWLVsAAA8++KC0QVoxQRAQEhKCnTt3Ijs7Gz169JA6klXgCCUREdH/02q1iI2NRUZGBmQyGWr7iKw87+fnh/DwcCgUihZM2jgPPPAAtm/fjtu3b8PNzU3qOK3W8ePHMXr0aDz22GPYsGGD1HGsAgtKIiIiAEqlEvHx8RBFEaIo1vt9giBAEASEhYUhMDCwGRM2Xbdu3VBcXIy8vDypo7R6HTt2hF6vR35+vtRRrAIX5RARkd1LTExETEwM9Hp9g4pJoGLOnV6vR0xMDBITE5spYdOJogiVSoXBgwdLHcUqzJ07FwUFBdi7d6/UUawCC0oiIrJrSqUSCQkJFrlWQkIClEqlRa5laTt27IDRaMSMGTOkjmIVKntRsidl/XBRDhER2S2tVov4+HgAQFlZGfbv3w+VSgWVSoXi4mJMmjQJISEhZu+pbH5dnY4dO0Iul8PX17fVzan8/vvvAQCLFi2SOIl16NSpE/r06YPDhw9Dr9ezzVIdOEJJRER2KzY21vSIu7i4GCkpKTAYDOjXr1+N71myZEmV/6ZPnw4A6N+/P0RRRGxsbIvkb4hDhw6hTZs26Natm9RRrMby5cuh1+vx2WefSR2l1WNBSUREdkmtViMjI8NUULq7u+PFF1/E4sWLMWXKlBrf5+3tXeU/lUoFABg+fDhEUURGRgbUanWLfB/1dfXqVfTp00fqGFZl6dKlkMvlLCjrgQUlERHZpeTkZMhkMtPXMpnM7Ov6Kisrw7lz5+Dj44OOHTsCqFj5nZycbLGsTZWUlAS9Xo+pU6dKHcWqCIKA4OBgXLp0CQcPHsSKFSvwyCOPNHjhlj3ghAAiIrJLaWlptfaZrK+zZ8+ivLzcrGWQKIpIT09v8rUtJTo6GgDnTzZUXl4eAgMDsXfvXkycOBFARZEZHR0NQeCY3J1YUBIRkd0pKyuDVqu1yLWUSiVcXFzQv39/s+MajQY6na5VbNOYkJAAR0dHDBo0SOooViMuLg4zZ86EXq83O1658IrMsbwmIiK7Y6liMjc3F9euXcPgwYPh6OhY5bxGo7HIfRrjwoULSElJgV6vR3p6Onx9fSXLYo169uwJFxeXKiOR3bt3lyhR68aCkoiI7M7do06NVdlzsqYdcix1n8ZYuHAhgoKC0K5dO5SWlsLNzQ0JCQkoKSmRLJM1GTx4MA4dOgSFQgEHBwfT8V69ekmYqvViQUlERHbHEo8s9Xo9Tp8+ja5du6Jr167Ndp/GGjx4MARBQGlpKQDg5MmTCA0NrdJXk2o2ePBgHD161KzVEkcoq8eCkoiI7I6Hh0eTr3HhwgUUFxfXun+3Je7TWBMnTjRbjVz5/8+bN0+qSFapd+/eOHr0KDp37gwAyMrKkjhR68RZpUREZHecnJygUCiqzKVMS0uDTqeDTqcDUNGr8ty5cwCAgIAAswU2J06cgFwur3FvbA8PD0kX5EyYMMHsa5lMhieffBLLli2TKJH16tatG86cOYPu3bubpgzodDpoNBrTLjpS/31LTWa0RM8EIiIiKxMfH4+kpCSz1kEff/wx8vPzq3398uXLTdsp5ufn45NPPsGQIUMwc+bMKq8VRRF6vR6BgYFwdnZGWVmZ6T+dToewsDD07Nmzeb6x/2c0GtGpUydoNBrIZDIEBwdj165d1S4eovp56623kJqaijFjxlS7sEuhUCAgIABBQUHw9PSUIKF0WFASEZFdUqvViIqKarbrf/bZZ7h582a159544w28+uqrzXbvSkOHDsXp06fh6emJ8+fPS/oI3ppptVrExsYiIyMDMpms1v6llef9/PwQHh7e6vZ0by6cQ0lERHbJ09MTfn5+Fm9QLQgC/Pz88NRTT9V4fsGCBRa9Z03atGkDANizZw+LyUZSKpWIiopCZmYmANTZDL/yfGZmJqKiokydAGwdC0oiIrJb4eHhzVJQhoeH4+2338by5curbOfYpk0bnD171qL3rKTT6aBSqZCdnQ2VSgVPT09MmzatxnmeVLvExETExMRAr9c3eLvFymkPMTExSExMbKaErQcfeRMRkV1TKpWIiYmx2PUiIiJMK7/Ly8sRGhqKI0eOwGAwAPjfI1EvLy+88cYbePLJJ83e/80338DNzQ2zZs2q1/3UajWSk5ORlpZW7bw+V1dXDB482C7n9TVFc/5c2CIWlEREZPcSExORkJDQ5OuEhoaa9nyulJubi6FDh0KlUuGee+7Btm3bsHz5cmzevBk6nQ4dOnTA888/j1dffRU3b96Et7c3DAYDEhISqlzrTpzX13y0Wi2ioqJw8eJFnD59GllZWSgoKICLiwu6deuGSZMmmfWmvHLlCk6ePAmVSoXc3FwYDAazRVxARU/SyMhIm/2zZ0FJRESEihGp+Ph4iKLYoMebgiBAEASEhYXVOAKVlJSEhx56CFu3bsWYMWMAVDRGf+WVV/DZZ5+hqKgILi4uGDBgAE6cOAGZTAaFQoFTp05V20i7ObMSEB0djczMTHz//fcoLi7GwIED4enpiaKiIhw5cgTXr1/Ho48+Cj8/PwDAvn37cOLECXTt2hWlpaXIzMysUlAKggAfH58Wmz/b0lhQEhER/b87R/0EQai1WKs8X99RP6PRWGU+JVAx1+7TTz/Fm2++iVu3bpldf8SIEThw4ACcnZ1Nxy01mhoSEoLg4OAmX8fW3Ln6v7CwEG5ubmbny8rKsHr1anTu3BmLFi0CUPF3WDkX99ChQ9i1a1eVgrJSZGSkTU49YGNzIiKi/6dQKLBgwQLTvMT09HRoNJoqr/Pw8IC/v3+D5iVWV0wCFYXj8uXLodfr8fe//910XBRFJCUlYeHChdiyZQuAipFJSxSTAJCQkAA3NzeOVN4lOTnZNEXg7mISAJydneHp6YmCggLTsfou7BIEAcnJyQgLC7NY3taCBSUREdFdPD09TR/6LbEjiiiK+PDDD6s9t3XrVmRnZ2PdunWIj49HWVkZ9u/fD5VKBZVKheLiYkyaNKnKHt1GoxFKpRLJycm4desWHBwc0LlzZ4wfPx59+vQBUNHc3dfX12bn9TVGWlparfNRS0tLcePGDfj6+jb42qIoIj09vSnxWi0WlERERLVwcnKCl5dXs95DJpNh8uTJuHXrFlxcXODi4gJnZ2eUlpbi+PHjOHPmDKKiotC5c2cUFxcjJSUFXl5e6NevX419DhMSEpCYmIigoCBMnToVer0ex44dw+bNm/Hwww9jwIABEEURsbGxNjuvr6HKysqqXSl/p7i4OJSXlzd6uoBGo4FOp7O5bRpZUBIREUlMJpPh+++/r/H8lStXsHHjRoiiCHd3d7z44ouQyWQoKiqqsaA8ceIEevbsifDwcNMxPz8//Pvf/8apU6dMBWVGRgbUarVNzutrqLqKyb179+LMmTMICwszW+XdUBqNptl/SWlpbGxORETUyqWmpprmYMpkshrnY97JwcHBbDEPADg6OkIul0Mu/994UuW8PqpYeV+Tffv2ITExEaGhoRg9enSz3cdasaAkIiJq5eqa11ed0aNHIz09HUqlEiUlJbh9+zZ27NiBsrIys4LIluf1NdSdhfad9u3bh3379mHy5MkWWRlf032sme19R0RERDakPvP6qjN27Fg4OjoiLi4O27dvB1Cxa84jjzyCnj17mr3WVuf1NVR1+53v378f+/btQ3BwMCZPntxs97F2LCiJiIhascYUk0DFHMr4+HiMGjUKAQEBMBgMOHXqFL7//nvMmTMH/v7+Zq+3xXl9DeXk5ASFQmH6Mz98+DASEhLg7++PgIAAZGVlmb3e29sbAFBUVITMzEwAFTsjAUB6ejratGmDtm3bwsfHx/Se5ugS0BqwoCQiImrFGjPfrqSkBHFxcQgMDMS9995rOh4QEIANGzYgNjYWzz33XJPvY4sCAgKQlJQEo9GICxcuAKgoDqubFrBy5UoAFUXktm3bzM7FxcUBAHr16oXFixcDqJivenchbytYUBIREbVijZlvd/PmTej1+mq3bezWrRuuXLmCsrIys0U7tjivrzGCgoJw/PhxADAVgnXx9fU1FZe1EUURQUFBTYnXanFRDhERUSvWmPl27dq1AwBkZ2ebHTcajcjOzoaLi0uVx662OK+vMTw9PeHn51fv3W/qSxAE+Pn52Wx7Jv46QkRE1IrdPa8PqFj1rdPpoNPpAFTsP33u3DkAFY9s3d3d0b9/f6SkpMDBwcE0h/LkyZPIyspCSEiIWeshW53X11jh4eGIioqqdS/3hhIEwawnqK2RGRvah4CIiIhaVHx8vGleHwB8/PHHyM/Pr/a1y5cvh0KhQHl5OY4fP47Tp09Dq9XCwcEBHTt2xKhRozB48GBTQSkIAoKCgmxyf+mmUCqViImJsdj1IiIibHrfdBaURERErZxarUZUVFSzXT8yMtJmH8U2RWJiIhISEpp8ndDQUEycONECiVovzqEkIiJq5TivTxrBwcGIiIiAXC5v8J+9IAiQy+WIiIiw+WIS4AglERGRVdBqtYiKirJoex+5XI7IyEgoFAqLXdMWabVaxMbGIiMjAzKZrNZdiwRBgCiK8PPzQ3h4uN382bKgJCIishKc1ycttVqNl156CW3btq12VbyHhwf8/f0RFBRkd6O+XOVNRERkJQIDA1FYWGixeX0sJhsmKSkJ69evB1DRPD4vLw96vR5yudzuV8pzhJKIiMjKKJVKxMfHQxTFBrW2EQQBgiAgLCyMxWQDnThxAmPHjkVZWRkAIDU1Ff3795c4VevBEUoiIiIrExgYCF9fX9O8vsp5ezUxGAxwcHCAj4+PXc3rs5Ts7GxMnz7d1PcTAI4dO8aC8g4coSQiIrJiarUaycnJSE9Ph0ajqXJeoVBgx44dOHPmDJRKpd3N7WuqgoICjB07FhcvXjQtiHJwcMATTzyBL7/8UuJ0rQcLSiIiIhuh0+mg0WjM5vUVFBSYisjevXtj//791e7xTdV79dVX8dZbb1U5PmDAANPuRMSCkoiIyKYplUqMGDECQMUcyu7du2Pfvn3w8/OTOJl1uH79OtavX4+4uDgcO3bMdFwQBBQUFKBt27YSpms9OIeSiIjIhl29etX0/4uiiOvXr2Ps2LHYt28f5wDWQ7du3fDqq6/C0dERx44dw5tvvgmdTofs7Gy4uLhIHa/VYEFJRERkw7KysswW7RgMBuTm5uLhhx/GmTNnJE5nPbZv3w5BELBixQqL71hkC/gnQkREZMMqRyhlMhmAike1s2bNata9wW3RmTNn0L17dxaTNeAIJRERkQ0rKiqCKIoYN24cUlJS0LZtW/z4449Sx7Iq169fR2FhIe6//36po7RaLLOJiIhs2HvvvYesrCwcOnQIDz30EDQaDU6fPi11LKuydu1aAMCiRYskTtJ6cZU3ERGRnbh8+TL8/PzwwAMP4JdffpE6jtUYOXIklEolysvL+ci7BiwoiYiI7Ej37t2h1WpRXFwsdRSr0bZtW3Tu3BmXL1+WOkqrxTKbiIjIjixYsAAlJSX49ddfpY5iFa5cuYLi4mJMnjxZ6iitGgtKIiIiO7JixQrIZDK8++67UkexCv/9738BAI899pi0QVo5PvImIiKyMwMHDsSFCxdQWloKuZwNX2ozfPhwnDlzBuXl5abWS1QVRyiJiIjszLPPPguDwYBVq1ZJHaXVO3/+PHx9fVlM1oEFJRERkZ158sknIZfL8eWXX0odpVWrHMUNDQ2VOkqrx4KSiIjIzgiCgPHjxyM9PR03b96UOk6rtW7dOgDAkiVLJE7S+rGgJCIiskOvvvoqAOD111+XOEnr9fvvv8PR0RGjRo2SOkqrx0U5REREdqpdu3ZwdXVFbm6u1FFaJRcXF/j4+OD8+fNSR2n1OEJJRERkp8LDw6FWq3H27Fmpo7Q6Z8+eRVlZGaZOnSp1FKvAgpKIiMhOvfHGGwD+9/ib/qdy/+4nnnhC4iTWgY+8iYiI7Fi3bt2Qn5+PoqIiqaO0Kv3790dGRgbKysqkjmIVOEJJRERkxx599FEUFxcjJiZG6iityqVLlxAQECB1DKvBgpKIiMiOvfLKK9yK8S7JyckoLy/HvffeK3UUq8GCkoiIyI61b98e/fr1w/Hjx6HX66WO0yqsX78eQEUDeKofFpRERER2btmyZTAYDPjss8+kjtIq7NmzB87OzujXr5/UUawGF+UQERHZOVEU4ezsDF9fX1y8eFHqOJJzdHTEwIEDcfLkSamjWA2OUBIREdk5QRAwduxYpKWlQaPRSB1HUgcPHoRer8f06dOljmJVWFASERGRqRdlZW9Ke/XVV18BAJ566imJk1gXPvImIiIiABVbMbZp0wY5OTlSR5FM7969cePGDRQXF0sdxapwhJKIiIgAAGFhYcjNzUVqaqrUUSQhiiKuXLmCAQMGSB3F6rCgJCIiIgDAm2++CQB47bXXJE4ijYSEBBgMBtx3331SR7E6fORNREREJl27dkVBQYFdbsW4cOFCREdH4+rVq/D29pY6jlXhCCURERGZzJs3D8XFxYiPj5c6SotLTExE27ZtWUw2AkcoiYiIyCQvLw8KhQLjxo3DoUOHpI7TYkRRhKOjI0aOHImjR49KHcfqcISSiIiITNzd3dGvXz8cO3bMrrZijI+PhyiKiIiIkDqKVWJBSURERGb+/Oc/w2Aw4IsvvpA6SouJjo4GACxZskTiJNaJj7yJiIjIjF6vh6urK3r37o3z589LHadF9OjRAwUFBSgoKJA6ilXiCCURERGZkcvlGDNmDC5cuIC8vDyp4zQ7vV6P69evY8iQIVJHsVosKImIiKiKV155BYB9bMX466+/wmg0YubMmVJHsVp85E1ERETVcnNzg5ubG27cuIFDhw5BEASMGzdO6lgW99BDD+Gnn37CrVu34OHhIXUcq8SCkoiIiKoVFhaGHTt2oHPnzsjNzUWfPn1w4cIFqWNZXLdu3VBcXGwXj/ebCx95ExERkZk9e/ZgxIgR2LFjBwAgNzcXAODl5SVlrGah0+mgUqkwfPhwqaNYNbnUAYiIiKh12bNnD5RKpdkxuVwOHx8faQJZWHl5Od5//3307t0bt27dgtFoxKxZs6SOZdVYUBIREZGZN998EyqVChs2bDAdMxqN6Nmzp4SpLCcnJwevvvqq2bFTp05h27ZtiIiIgIuLi0TJrBcfeRMREZEZBwcHrFu3DsuWLTMdMxgMNlNQdu/evcrim40bN+Lhhx/Gp59+KlEq68aCkoiIiKoQBAGrVq3CihUrTMe6du0qYSLLkclkGDduHATBvAzy8fHB4sWLJUpl3VhQEhERUbVkMhnefvttPPjggwCAmzdvAvjfQpbs7GyoVCrodDoJUzbOne2PZDIZ3NzcsHPnTnTq1EnCVNaLbYOIiIioVqIoYvjw4ZgxYwa6du0KrVZb5TUKhQIBAQEICgqCp6enBCkbZu/evZgyZQqAigVHCQkJmDBhgsSprBcLSiIiIqqRVqtFbGwsMjIyIJPJUFvZUHnez88P4eHhUCgULZi0YW7fvo327dsDAL777jvMnTtX4kTWjQUlERERVUupVCI+Ph6iKEIUxXq/TxAECIKAsLAwBAYGNmPC+tPpdNBoNNDr9ZDL5XBzc0P79u0RFBSE48ePSx3P6rGgJCIioioSExORkJDQ5OuEhIQgODjYAokaTq1WIzk5GWlpadU+ptdoNAgMDMSMGTOs4jF9a8aCkoiIiMwolUrExMRY7HoREREtOlJpq4/pWzMWlERERGSi1WoRFRUFvV5vsWvK5XJERka2SLFmS4/prQkLSiIiIjKJjo5GZmYmSkpKsH//fqhUKqhUKhQXF2PSpEkICQkxe73RaMSxY8eQlJSEvLw8uLq6ol+/fpgyZQpcXV0BVBRrPj4+WLBgQbNmt4XH9NaKfSiJiIgIQMWcw4yMDIiiiOLiYqSkpMBgMKBfv341vmfnzp34/fff0a9fP8ybNw8TJkzAmTNn8M0338BgMACoaDuUkZEBtVrdbNmVSqVFikkASEhIqLKXOdWOe3kTERERACA5Odk0p9Dd3R0vvvgiZDIZioqKqi2wCgoKcPToUYwcORLTpk0DAPTu3Rtt27bFjz/+iJMnT2LEiBEAKkYpk5OTERYWZvHcWq0W8fHxFr1mfHw8fH19OaeynlhQEhEREQAgLS3NtIBFJpPV+frs7GwYjUYEBASYHe/Tpw8AIDU11VRQiqKI9PR0CyeuEBsbaxoFPX36NLKyslBQUAAXFxd069YNkyZNQrdu3czec/36dezatQvZ2dkQBAG+vr645557THt8i6KI2NjYZn9Mbyv4yJuIiIhQVlZWbWud2lQ+0pbLzcenKvfIzsnJMTuu0WjMtmk0Go04cuQI5s+fj6FDh9a4heMvv/yC3Nzcas/d+Zg+OTkZeXl5GDNmDObPn4/p06ejqKgI69atQ0ZGhtl7Nm7cCIPBgNmzZ+OBBx7ArVu3sGHDBhQVFQFomcf0toQFJRERETW4mARg6t149epVs+NZWVkAgJKSkirv0Wg0piJvyJAhGDduHL777jucPn3aVKDeKTMzEzNnzkTv3r2xevXqKqvPKx/TA8CMGTPw2GOPYeTIkfDx8cHAgQOxYMECuLq64sCBA6b3JCQkQC6XY968eejTpw8GDBiA+fPno6ioCIcPHza9rvIxPdWNBSURERE1qk2Ql5cXevXqhcOHD+PcuXMoKSnB1atXERsbC5lMVu1j8/DwcLRv3x5PPvkkzp49C6BipNLR0RGXLl1CYWGh2esrRwgLCwuxfPlyDB48GPv27TOdv/MxvZubW5X7OTs7w9PTEwUFBQAqRlUvXryI/v37w8XFxfQ6d3d3+Pr64o8//jAda87H9LaGcyiJiIioymPr+po9ezZ++eUXbNu2DQDg4OCAMWPGICMjA6WlpVVef/369Wr7Q5aXl2Pw4MGmrx0cHODo6Fgl1/nz5xESEoKAgAC8/PLLdY6slpaW4saNG/D19QVQMRKr1+vRpUuXKq/t0qULLl26hPLycjg6OgL432N6JyenOv4k7BsLSiIiIjItRmkoNzc3PProoygsLERhYSHc3d0h/7/27t+lrS6O4/jH20sES4dkUdBJEgp1KRKsCEqIOAgJXVt1kS6igzrpUujSf8HBRUXwHzClkyZ1KKG2WUQdIqI4qKT1B4IEI/EZ5N4aYvx18jxPTd8vCOTm3pwcMn043/PDtrW8vKwXL14UPZ9OpzU5Oan3798rm826Ze7a2lq9e/dO+/v7+vnzp379+qWjoyPt7u4WjVo67YyPj2tgYODG/n369Em5XM7dV/L09FSS3D0yr3I+y2azbqCULkNlXV3dHf+RvxOBEgAAyOPxyOv1PmgupXQZLJ2SczKZVC6XU0tLS8EzPp9PT58+1ejoqHp6ejQ2NqaZmRlJl9sNffz4sajdiYkJDQ0NSbocRT0/P1dbW5vevn2rly9famFhoWSfFhcXtbKyou7u7qJV3ndZxe4o56lBlYpACQAAJEmBQEDLy8vunMR0Oq2zszN39XUmk9Hq6qr7rMfj0Y8fPyRJXq9X2WxWGxsbSqVS6uzsLAhxlmXJ7/e717W1tZqentbAwIBGRkbU3t5+bZ9OTk4kXY4e9vf3a3BwUE1NTZKkvb29koEykUhoaWlJ4XBYr169cj+vqamR9Huk8ipnEdHVuZXSw6cD/E34hwAAgCQpGAzq27dv7nUsFtPx8bF7vba2prW1NUnS8PCwPB6PLi4ulEwmdXx8rKqqKtXV1enNmzdFp+vk83kFg8Gi32xtbVUymSzZp97eXtXX1+v169d69uxZwb1SZfpEIqFEIqFQKFR0hKLX65Vt29duQ7S/vy+fz1dQ7r7pd/AbgRIAAEi63AaosbFRW1tbyufzGh0dvfU7wWDw2qB4lXOWt7PN0H00NDSor6/v2nvXlem/fPmiRCKhjo4OhUKhou88efJEz58/1/r6urq6ulRdXS1JOjo60tbWllpbWwue9/l8LMi5A7YNAgAArkgk4m5MXi6WZSkSiZS1TUcgEHDnQ379+lXxeFx+v1+BQEA7OzsFL0coFFIul9Pc3JzS6bTW19c1NzenmpoatbW1FfT7apkepVVdOBMlAAAAJKVSKc3Pz5etvWg0qubm5rK1d1Umk9HExIQkaWpqStvb2yWf/fDhg/v+tqMXHYODgw8aWf3bECgBAECRpaUlxeNx43bC4XDJBTflMjs765bpy8Up03OW991Q8gYAAEU6OjoUjUZl2/a9S+CWZcm2bUWj0X89TEqPr0xfiRihBAAAJR0eHioWi2lzc1OWZd04Cujcb2xsVCQSkdfr/c/6+ZjK9JWIQAkAAG6VyWT0/ft3bWxs6ODgoOi+z+eT3+9XMBj83+YcPqYyfaUhUAIAgHs5OzvTwcGBzs/PZdv2H7W1TiqV0ufPn5XP5+81p9KyLFmWpe7ubkYmH4BACQAAKspjKdNXEgIlAACoSI+hTF8pCJQAAKDi/cll+kpAoAQAAIAR9qEEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABghUAIAAMAIgRIAAABGCJQAAAAwQqAEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABghUAIAAMAIgRIAAABGCJQAAAAwQqAEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABghUAIAAMAIgRIAAABGCJQAAAAwQqAEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABghUAIAAMAIgRIAAABGCJQAAAAwQqAEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABj5B9C6Xh9TlurRAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "idx = 0 # Feel free to change this to visualize different graphs.\n", + "\n", + "# Visualize the graphs\n", + "display(test_data[idx].mol)\n", + "visualize_dgl_graph(gs[idx])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "db4b56f6-2aa4-4dcf-bf14-c7a64d88592b", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'n': tensor([[0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.1201],\n", + " [0.0000, 0.0000, 0.0000, ..., 0.0000, 1.0000, 0.1401],\n", + " [0.0000, 0.0000, 0.0000, ..., 0.0000, 1.0000, 0.1201],\n", + " ...,\n", + " [0.0000, 0.0000, 0.0000, ..., 0.0000, 1.0000, 0.1201],\n", + " [0.0000, 0.0000, 0.0000, ..., 0.0000, 1.0000, 0.1201],\n", + " [0.0000, 0.0000, 0.0000, ..., 0.0000, 1.0000, 0.1201]])}\n", + "{'e': tensor([[0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.]],\n", + " dtype=torch.float64)}\n" + ] + } + ], + "source": [ + "# Examine the features\n", + "print(gs[idx].ndata)\n", + "print(gs[idx].edata)" + ] + }, + { + "cell_type": "markdown", + "id": "9958ec7f-fb10-4f42-a6f4-9b3806fcadcc", + "metadata": {}, + "source": [ + "# Use Chemprop featurizer with PyTorch Geometric" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "17f56f95-3df2-466e-b8c1-f9cea1eeb927", + "metadata": {}, + "outputs": [], + "source": [ + "# Install with https://pytorch-geometric.readthedocs.io/en/latest/install/installation.html\n", + "import torch_geometric\n", + "from torch_geometric.data import Data\n", + "import networkx as nx" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "2cb5d0c8-88c2-43dd-8f49-84df9b80624d", + "metadata": {}, + "outputs": [], + "source": [ + "def convert_molgraph_to_pyg_graph(mg):\n", + " \"\"\"\n", + " Takes a Chemprop molgraph from featurizer and converts it to a PyTorch Geometric graph.\n", + " \"\"\"\n", + " # Instantiate a graph from the edges\n", + " data = Data(edge_index=torch.from_numpy(mg.edge_index), x=mg.V, edge_attr=mg.E)\n", + " return data\n", + "\n", + "\n", + "def visualize_pyg_graph(g):\n", + " \"\"\"\n", + " Visualize a PyTorch Geometric graph object.\n", + " \"\"\"\n", + " nx_G = torch_geometric.utils.to_networkx(g, to_undirected=False)\n", + " pos = nx.kamada_kawai_layout(nx_G)\n", + " nx.draw(nx_G, pos, with_labels=True, node_color=[[0.5, 0.5, 0.5]])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "83c1c816-7a5a-4ca0-9d06-59b796d5cee1", + "metadata": {}, + "outputs": [], + "source": [ + "# Convert the molgraphs to PyG graphs\n", + "pygs = [convert_molgraph_to_pyg_graph(x) for x in molgraphs]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "419558f2-434b-43c1-ad12-0fcda9e02bf0", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nO2deVzU1f7/X7OwiAgaKIJSgAqIKRrmhlcTx6VE0wzKEk3L0ZZr1vfaqLcyc2m0foZtNqa5hKlganrJBUwJJRdAUGRRcEEEVARZZBtmzu+PQx8nBATmM/OZmc7z0R90ZuZzXjPCa87nnPciIoSAwWAwGG1FLLQABoPBMG+YjTIYDIZeMBtlMBgMvWA2ymAwGHrBbJTBYDD0gtkog8Fg6AWzUQaDwdALZqMMBoOhF8xGGQyGeVBUVHTixIm5c+cOHjx4+fLllZWVQiuqR8SymBgMhqlRV1eXm5t75cqVixcvpqen0x8KCgp0n+Pv75+SkiKUQl2YjTIYDIEpKSnJysrKyMjIysrKysrKzMzMyclRq9UNnubo6Ojt7S2VSmtqapKTkwGsX79+3rx5Qkj+G8xGGQyGUcnPz+cWmPSHK1euPPw0V1fXPn36eHl5+fn50R88PT1FIhF99KOPPlqxYoWtrW1cXNygQYOM+w4awmyUwWAYitLS0uzsbGqU1DQzMjIe3tO0sbHp0aOHrmn6+vq2b9+++Yu/88473377raura2Jiopubm8HexKNhNspgMPjh4WXm1atXH3aYTp06cQtM+oOHh4dY3OrjbrVaPXbs2OPHjw8dOvTYsWM2NjY8vY9Ww2yUwWC0HbVaPXfu3J07d9bW1mo0mgaP2tnZeXt7+/j4+Pr6+vr6+vj4eHt7P3KZ2XLu3r379NNPX716dd68eevXr+frsq2F2SiDwWg777///pdffkl/5muZ2QwpKSnvvfdeVFSUs7MzHTl37tzw4cMrKytVKpVcLudxrpbDbJTBYLSdbt265efnBwYGRkVFubq6Gnq64cOHnzx5cty4cdHR0RKJhA5GRESEhYVZWVkdPXr0X//6l6E1PAwLv2cwGG0kLy/v1q1bVlZWW7ZsMYKHAtixY0eXLl0OHz783//+lxucPn36ggUL1Gp1aGhoXl6eEWQ0gNloiyCECPLPw2CYMps2bdJoNC+++GLPnj2NM6O7u/uePXusra3XrFmzc+dObvyLL74YN25cYWFhSEhITU2NccRwMBt9NLGxsR06dHB3d7ezs1uwYMHdu3eFVsRgCI9Wq928eTOA119/3ZjzBgYGfvHFF4SQ2bNnJyUl0UGJRPLzzz97eXmdOnVKgB1SwmiW6Ohoa2tr3U/Mzs4uLCwsLi5Oq9UKrY7BEIzDhw8D8PT01Gg0dKSkpGTatGmHDx82wuxvvPEGgCeeeOL27dvcYEpKCg0D+Pbbb42ggYPZaJNotVqlUkm3sXv06JGZmbls2TKZTMblUfTq1Wvp0qXXr18XWimDIQChoaEAli9fzo188803AMaMGWOE2aurqwcPHgxg9OjRarWaG9+9e7dIJLKysjp+/LgRZFCYjTZOeXn5iy++CEAkEikUCu77lhBy48YNpVLp4eFBzVQsFstkssjIyNraWgEFMxjGpKioyMbGRiwW6y4jBgwYAGDnzp3G0ZCfn0+Tl95//33d8YULFwJwcXHJzc01jhJmo42QnZ3dt29fAB06dNi7d2+jz9FoNDExMSEhIdwtf9euXefPn3/hwgUjq2UwjA+NFX3uuee4kdTUVACPPfZYVVWV0WQkJCTQ5KUff/yRG9RoNM8++yyAAQMG3L9/3wgyeLbR4rrinOqcck05NzLq0qivbn/F7ywG5fjx4507dwbg7e2dnp7+yOcXFxerVKp+/fpxm6cBAQEqlaqiosIIahkMQfD39wfwyy+/cCNvv/02gHfffdfISmjykq2t7ZkzZ7jB4uJiGjwwffp0I2jgzUb3luztl94PSUASJMmS8ZfHZ1dnE0JczrssvrmYr1kMjUqlkkqlACZMmFBSUqL70LVr15p/bWJiolwut7e3p2bq4OAgl8vj4+MNqZfBEIBTp04BcHZ2rq6upiNVVVWdOnUCkJKSYnw99Gj+8ccfv3XrFjeYnp7u4OAAIDw83NAC+LHRbXe3iZJEskuy2LLYnOqc6HvRARkBk7InEfOx0aqqqpkzZza6GUoIUalU1tbWu3bteuR1ysrKtm7dKpPJuMWpn5+fUqm8c+eOwbQzGEZlzpw5AP7zn/9wIxEREQCefvppQfTU1taOGDECQGBgYE1NDTe+Z88ekUgklUp///13gwrgwUYrNZVOqU6DMwfXah+csZTVlVVqKomZ2OiNGzcGDhwIwN7eXvc+hRBSXV3NhcV9/PHHLb9mRkaGQqHo0qULfa2NjU1ISMj+/fvr6ur4ls9gGI+Kigq6yrt48SI3OGrUKADff/+9UKoKCwu7d+8O4N///rfu+JIlS+iObU5OjuFm58FGD5YeRBK2393e6KOmb6NxcXEuLi4AevbsmZaWpvvQzZs3hwwZQndetm7d2oaL19TU7N+/PyQkhO4VAOjevbtCoaAFxBgMs+PHH3+k6z5u5MqVKyKRqF27dg32wYzMn3/+SY+bNm7cyA1qNJoJEyYA8Pf3N9xxBQ82uvbWWiQh+X5yo49yNlqnNcVVmEqlsrKyAvDss88WFxfrPnTy5EmaJuzu7n727Fk9J8rLy1MqlV5eXg3CpHTvQRgM0ycwMLDByThd8c2cOVM4UfVs3bqVLnpOnz7NDZaWlvbu3RvA1KlTDZQyw4ONLi9YjiTcqL3R6KPURu+o73RJ7SK/Lk+6n6T/jLxQXV09a9as5jdDAYwYMUJ331pPaJhUWFhYu3btqJ926tRJLpenpqbyNQWDYTiysrJEIpG9vX1ZWRkdqaurc3d3B/DHH38Iq41CAwZcXV1v3rzJDWZmZjo6OgL4/PPPDTEpDzYafiscSTh7v/H1GrXRzUWb6SE+kjAwY+D6O+vv1d3Tf+o2k5eXR/u32NvbR0VF6T6kVqsVCgX1OLlcbqCg+pKSEpVK1b9//wZhUuXl5Y9+MYMhEDSyfc6cOdzIgQMHaHSgieRG19bWjhw5EsDQoUO5QAJCyL59+8RisVgs/u2333iflAcbPVp2FEnYUrSl0Ue5m/qLVRcVeQrnVGdqprbnbEOuhMSUxWiJsT/9+Pj4rl270hTP8+fP6z50+/btZ555hp4I6d62GI5Tp07NmTOH7tkDaN++va+vb9v2YRkMg6JWq+kfzqlTp7jByZMnA1i9erWAwhpQVFTk6ekJYN68ebrjS5cupfd/ly9f5ndGHmy0Vlvret7VP92/StNI9kKDI6ZqbXVkcaTskkyUJKJ+2iut19L8pddrjJSZzt2tP/PMM7pFDQghSUlJjz/+OIBu3brp7q0YgaqqqsjISC5MSiQSbd/e+JEdgyEUe/bsAdCnTx9upLCw0MrKSiqV5ufnCyjsYZKTk+3s7ACoVCpuUKvVTp06FUDv3r1LS0t5nI6fuNG9JXslyZLBmYP3luxNq0qLLYt9/8b7Pxb9SJo+qb9Re0NZqPS44EHNVJwkll2SRRZH6kZN8Ut1dTWtCkPv1nXLGRBCIiIi6H7l8OHDCwoKDKThkRw8eNDJyYne4wulgcFoFHrkvXbtWm5k9erVACZPniygqqb46aefAFhZWelu2paVlfXp0wfAlClTeNyF4C2L6WjZ0eFZwyXJEiTBJtlmUOagfff2EUI8Lnh8nN9kuKWGaGLKYkKuhFgnW1M/7Xq+6/wb8y9U8pyZ3kzoUoPNUMGPzj/66CMatC+sDAZDl7y8PIlEYm1trXsPR0/ADxw4IKCwZliwYAGArl273rjx4AA8KyurY8eOAFatWsXXRDzn1Ndoa+6q77Zhu7OwtvDzws99L/pSMxUliUZmjdx5ZGdlZaX+qpoJXbpz505QUBDdDNUNNxOQixcv0iBWoYUwGA9YsWIFgJCQEG7kjz/+oCbV4MbOdKirqxs3bhyAIUOG6B43HTp0SCKRiMXi//3vf7xMZHIVnhLvJ8qvy+3P2buluEmsJPpnpjcTunTu3Dla787Nze3PP//UW3vbqa2t5X4Xa2pqpFKpRCIxZqUcBqMZtFotrfRx6NAhbpAmTy9ZskRAYY/k7t27NFh7xowZuuPLly+ngTq676jNmJyNUkrrSiMSI2hZVkr//v2//vrrBhHyzdN86NLPP/9MN6GHDRsm7Ab5jBkzrKysDh48yI306tULQIOUKgZDKI4ePUoT8LhU5vLycnt7e5FIxPupN+80WhJfq9XSE11ra+u8vDw9pzBRG+VoNDM9JibmkdvDzYQu1dXVmdRm6Jtvvom/16GZOHEigN27dwuoisHgeOWVVwAsXbqUG/n+++8BjBo1SjhRraDRkviFhYU0Rfu7777T8/qmbqOU1mamNxO6VFRURL+FpFKpUqk0hvpHER4eDuDNN9/kRv7zn/8AWLFihYCqGAxKSUmJnZ2dWCzWrRX59NNPA4iIiBBQWKtYuHBhgzjxqqoqsVgMYP369Xpe3DxslINmptPYWjTRwKOZ0KXU1FT62s6dOx87dszY6pvg4MGDAIKCgriRH374AUBYWJiAqhgMytdffw1g7Nix3MiFCxcAODo6Gqe2PC/U1dU1qJyyd+9eAB07dtQ/8snMbJTSVGZ6cnJyM3frO3fupFskTz31lEn1obt69So95uJG4uPjAQwaNEhAVQwGhXZY0i22++677wJ4++23BVSlP5MmTQJPWfZmaaMcd+7cWbt2LY2n5bCxsdm0aZPu0+hmKO3o+eqrr/ISRMUjGo2GHnZxmRV37twB4ODgIKwwRmtZvHhxQkKC0Cr4JDExEYCTkxMXMFRTU+Ps7AwgObnxom5mQUFBAc2/4iXXxrxtlINr4CGVSmNiYnQfKi0tpSc2prMZ+jC0s41uMxmay2RqOXaMZqCbMxKJ5L333jOju93moeefCxYs4EZ27twJoF+/fgKq0p/PPvsMwAsvvMDL1SzERilPPfUUAN0I0NzcXBrv1qVLF2P2rW4ttOX3tm3buBFa1dHQzQ8YPFJbW6tUKmmQsqenZ2xsrNCK9KWysvLhDktjxowB8M033wgoTH98fX0BREdH83I1MSwI+tFkZmZyI66uru7u7v379z99+jQtn2WaUOVZWVnciI+PT4MRholjZWWlUCjOnj07cODAq1evjhkzZu7cuWVlZULraju7d+8uKSkZPHgwvVsCcO3ataNHj9ra2tIQKDMlLi4uMzOzW7duNMdJfyzKRh+2HqlUGhUVlZCQQLOVTJaHlTMbNVP69ev3559/0mXphg0bfH19f/31V6FFtZFNmzYB4HqRAdi8eTOtk0RXqWYKfV+zZs2SSCT8XJGXNa2JQHdtpkyZIrSQVpOUlATgySef5Ebo39748eMFVMXQh7S0NC4NLyQkxOxaw+bk5IhEovbt23MnnxqNhoZjm/Ve07179+zs7EQiUXZ2Nl/XtKjV6MM39eaCj48PzavTaDTcCMzzvTAoffr0SUhIUKlUtMPCk08+uXv3bqFFtQJaqSc0NJSrKX7kyJHc3FxPT0+aH2im/Pzzz5WVlUFBQT169ODtonz5sSlQWVkpFoutrKwM1PnDoND2sNw3pFqttra2FovFFnPm+48lJyeHVhEDEBwcrNsjyGS5du1a586dAZw4cYIbzM/PX758+YYNGwQUpj8BAQHUTHm8pkXZKCGE7oFmZWUJLaTV0BRV3aNDurhm3e4sAK1Wq1KpOnToAKBjx466JdlNiurqai7r2tXV1d7e/tKlS0KL4pPz58/TfwJ+g8ct6qYe5nwy8/COhPnuUTAaIBKJ5HJ5ZmbmpEmT7t279913F4ODceOG0LJ0SE5Onj9/vpub26RJk6KioiQSiUQiqaioeOGFFyoqKoRWxxtcmjWXAMkLlmaj5ms9D38BmO97YTSKm5vbr7/+unnzZq12dXQ0+vbFxo0gREhJJSUlGzZsCAgICAgIoIUo/fz8lErljRs3MjIy+vTpk5aWRms7CKmSJ2pqanbs2AFg9uzZPF+ax5WtKfDdd98BeP3114UW0mqOHDkCYOTIkdzI5s2bAbzyyivCiWIYhMJCMnUqAQhA/vUvYvz7ZlqVIiQkhCYL4K+qFElJSbpPu3TpEu23YRnFxn7++WcYpsuZpdno77//DiAwMFBoIa0mNzcXgIuLCzeSkJBgoH91hikQGUk6dyYAsbMjSiX5qyCyYcnMzFy6dCmNW4JOjbSmqu4ePnyY9tsw2YZLLWf06NHgo7row1iajd68eROAk5OT0EJajVartbe3B8BV+C8pKQHQvn17HlsYMkyK4mIil9cvS4cNIxkZhpro3j2iUpExY0K529DevXuvWbOmJUUbVq5cCaBDhw4XL140lD7Dc+XKFbFY3K5du1Z10GghlmajhBAa5lZUVCS0kFbzcE0AWvZft68hw/KIjibu7gQgtrZk6VLCY7SeRkPi44lcTtq3JwAZMWKng4NDWFhYS/pHcGi1WlrzwcfH5969e7yJMy4ffvghDFbD1wJtlNbl1o13MxemTZsGYMuWLdzIiBEjADSoWcWwPO7dI3I5EYkIQPz9yd+3KNvC1atk6VLi4VG/1BWLyejRZNeu0rYF+pSXl/ft2xfApEmTNBqNvuKMDpd/ZaD6RJZ2Ug9zPuBmmfX/WBwdoVLh+HH06oXUVAwejEWLUFMDADk5SEqq/5kjIwNXrjRynepqREVh4kT07Illy3DtGrp3h0KBy5cRG4vQUIe2BfrY29vv37/f2dl5//79n376aRuuICyHDh3Kzc318vKi6xLesUAbNV/refgLwHzfC6MNjBiB5GS88w60WqxejaefRmoqPvgAAwdi5cq/PTMsDEuW/G0kKQnvvovu3REaiv/9D1ZWCAnB/v24dg1KJby89NXm4eGxY8cOqVT66aefmldWK/6qRTJnzhxau513mI2aEA/n0ZvvyvphwsPDHRwcrKysRo4ceaXRpRQDsLfH118jPh6+vsjIAC2xYGODNWvQ6G9BYSHWrUP//hg4EF99hbt3ERCA8HDcvInISEycCL5qGAGQyWSrVq0ihMyaNevixYu8XdfAFBUVRUdHS6XSGTNmGGoOQ+wUCAvtt+Xt7S20kFZDawJYW1ur1Wo6kp2dDcDd3V1YYXqi0Wg+/vhj3YWAWCweM2bMzp07udYUjAZUVpJDhwgh5IUXyIgRJCCAjBpFuGOhgADy0kskPp5IpfW7n66u5IMPDHjWz0F38Hv16tWgQ5zJ8vnnnwOYNGmS4aawQButrq6WSCRSqVTwBvRt4IknngDAJTLX1dXZ2tqKRKLy8nJhhbWZsrKyKVOmAJBIJMuXL//++++nTp1KewsC6NixI+1FKLRM0+WFF8gzz5CTJ4lIRLj2CNRGa2pIt24kOJhERvJ5vt88lZWVtLrH2LFj64wT7Koffn5+AH799VfDTWGBNkoIoSWw0tPThRbSamg57v3793MjtGGfmRrNpUuX6C/xY489duTIEW783r17KpWKNkqhBAQEhIeH3717V0C1pgm1UULItGmkc2dCPyFqo4QQQVbzXP2n//73vwJM3xpOnDgBwMXFxaBV3yxwbxRse9Q0+O233wYNGpSent6vX7+zZ8/SHj4UR0dHuVx+4sSJtLQ0hULh7OyclJS0YMGCbt26hYaG0i5GAio3Tb74AjU1WLTob4M2NgIoeeKJJ/bs2WNlZbVq1arIyEgBFLQYrtC9lZWV4WaxTBs1X+tpqkCJeX0lEEJWr149ceLEe/fuhYaGJiQkeDVxVNynTx+lUpmXlxcZGSmTyWpqaqKiosaMGePr6/vJJ5/cMKkiSELj5oZly7BpExIThZYCDB8+fM2aNYSQ119/nZ5GmCAVFRU0qGDmzJmGnclwC10BUalUAF577TWhhbSao0ePAhg+fDg3sm3bNgAv0Vs4c6C8vHzq1KkARCKRQqFoVSZrbm6uUqmkG8QAJBIJzfg2xzrcfMHd1BNC1Gri709GjnxwUy8stFSSh4eHabZIoWXxRowYYeiJLNNGjx8/DmDo0KFCC2k1tCaAs7MzN3L69GkA/fv3F1BVy7l8+fKTTz4JwMHBoc2b+lz9Ie5GzNXVVaFQXL58mV+1ZoGujRJSf9ZkY2MSNlpVVUWTBmUymQkeNw0ZMgTA1q1bDT2RZdpoYWEhgI4dOwotpC30798/ODiYiwQqLS0F0K5dO9NPwjt48CBtGOnj45PBR+hNQUFBeHg4TUOkBAQEqFSqiooK/S9uLjSwUULI7NkEMAkbJYTcvHnT1dUVwAcffCCIgOrq6vPnz0dFRZWVlemOZ2RkAHB0dDRCGx7LtFFCCP17vnXrltBCeID+ml67dk1oIU2i1WqVSiVtVxscHMx7AYvExES5XE4rYOGvE6r4+Hh+ZzFNPvmE/N///W3kzh0yfjwxnRKgJ0+epHVL+W1w1CjFxcWJiYlbt25VKBQhISF+fn5ck+QGvw/vvfcegHnz5hlaErFgG6Xr+bi4OKGF8AC9TZbL5aa5A1VRQcLC1F5eE8Ri8aeffmq4sn6lpaVbt26lTasotFS7aX4s/yi++eYbes+UmJjI1zVramouXrz4yy+/rFq1aubMmYMHD6Y1pBtgZWXl7e39/PPPnz59Wve1NCTr7NmzfOlpBou1UXo2Z+5dDNVqtUKhAODk5ATA2to6ODg4MjKSS3MSnJwc0q8fAUjfvlVGq+ybnp6uUCjo3wkAGxubkJCQVhV/M0eyskinTsTXV2gdTfDGG28AeOKJJ27fvt2GlzdYZgYEBNg0FszVsWPHgICAkJCQpUuXRkZGJiYmNlqzioZh9e3bV++31SIs1kZXrVoF4P8a3A6ZFXfu3KGNeW1sbN5+++0JEyZw9y+PP/740qVLBb/NP36cdOlCANKrFzF+Sd/q6urIyMjg4GDuY3F3d1coFIJ/LAYiN5cApFs3oXU0QW1t7fDhwwEEBQU1/zWvVqtzcnJiYmLCw8PlcrlMJqPbVg/j6uoqk8nkcnl4eHhMTExOTk4LxQwbNgzAV199xcc7ezQWa6N79uwBMGHCBKGFtJHk5GQa9+Pm5nbq1Ck6mJ+fr1Qqe/bsSX/JxGJxYGCgSqUSpJe9SkWsrAhAnnuOCJtdnZeXp1QqPT09uY/lySefXLhwoSAfi+EoKSEAcXQUWkfTFBQUdOvWDcB777338KNRUVGTJ0/28fFpNBLe0dFx0KBBM2bMWLly5e7du9PS0tqczH3o0CEAIpGosLBQvzfUUizWRtPT0wH06NFDaCFtYfv27XZ2dgACAwMbbfNAj1yEykyvqiKvvUYAIhIRhYKYSAQBDZPS7Z1rLlFiLUStJgCRSIgpb10kJCTQm/FNmzY1eEipVDazzGzzhszNmzdjYmJUKtX8+fNlMhk9W6bbC/q+mRZjsTZaU1MjlUolEklVVZXQWlpBXV0d3QwFIJfLm/9CppnptE4ExQiZ6TdukKefJgCxtye7dxtunrZz48aNSZMmicViANnZ2ULL4RNbWwIQE19kb9myBYCtre2ZM2d0xzMyMiIjI1NSUtr8J1lWVpaYmLh9+/YPP/wwJCTE39/f1ta20d0AT0/PtLQ0Pt5Ni7BYGyWE9OrVC0CDTzMiIuLAgQMmGCpMCCkqKqLH0NbW1q06HEtKSnrrrbe4c8wuXVxmz1YbIhzojz+IiwsBSM+e5MIF/q/PI3R37PfffxdaCJ/QTqKmH8U3d+5culWtT8RhcXFxfHy8SqVSKBTBwcFeXl70q7EBnTp1CgwMlMvlSqVy//79WVlZxv/rtmQbnThxIoDdOksmtVrt5uYGk8yKSUlJobt7nTt3blvHGHrkIpPJRoyQ0xqU3t5k6VKSm8uPQm4zdPx4YoDuijxD8xTXr18vtBA+8fQkADH9FXZtbS1t1zFs2LCWbHHW1NTk5OTs379fqVTK5fLAwMAOHTo87JjW1tZeXl7BwcEKhUKlUsXHx5eWlhrh7TwSS7bRhQsXAlihE6ZcWVm5evVqWv6DbkKPGjUqIiKibX2+eGTHjh10M/Spp566fv26nle7fLlu8WLi5lZf0NfKikyZQg4caHsn9Orq+swZuhlqkkv5hqxevRrAggULhBbCJzS2LCVFaB0toLCwsHv37gDeeeedBg+1fJkZEBAQFhamVCojIyPT0tJM8yaSWLaN0qrXw4YNe/gh08mKoZuhtDL89OnTeTR0jYbExJCQkPolJC2QrlCQRpfg6ekkJoY0+GpPTiZJSSQvjwweXN/+lysbbPr8+uuvAMaPHy+0ED4ZNowAxFyyt5KSkuhx39y5c1esWPHqq68OHDiwqWWmn5/fCy+8sHjx4i1btpw+fdpcSutTLNlGv/vuOwA2NjZNbdAInhVz9+7dsWPHApBKpUql0kCzFBSQ8HDSt2+9mQIkIICoVEQ3MX3OHAIQufxvLxw7lowZQw4dIhIJ8fQkqakGEmgQaJlET09PoYXwybhxBCAHDwqto8V8++23YrG4QTvSBsvMxMREc+8lY8k2mpOTI5VK6c379OnTm+n2LkhWzPnz52kVTmdnZ+OchCQmErmc2NvXm6mjI5HLCU3emzOH2NoSsZicPPng+dRGCSE7d5KiIiMI5BO1Wm1tbS0Wiy0penTqVAKQyEihdbSYefPmAfDy8vrggw82bdp08uRJi2xwYMk2SghZv369g4MD9zXYp0+fL7/8sqnFpjGzYn799VcqbMCAAUbOurl3j6xfXx+0RP/bv5/MmUP69SPjx5O+fR909eFs1EyhFa9TzWsV3Sw0XPfHH4XW0TKOHTsmEolsbGyMGXskCBZuoxS62HRxceE2YprPTH84K4bH4sG0GBLdDH3llVcEXCulpxOFgvTsSe7fr7fR9HRiZUXWrKl/grnb6OTJkwHs2rVLaCG88c47BCDr1gmtowXcv3+fptutMJ1SVAbjH2GjlLq6OloMmN7pA+jWrZtCoWgqUffhrJiuXbvOnz///PnzbdZQWlr6/PPPG3oztFXQfQtqo4SQ994j7dsTuj42dxtdtGgRgGXLlgkthDcWLyYAWblSaB0tYMGCBSeh5tAAABDNSURBVAD8/f3/CZ0L/kE2ykEz02n3UG6xuXXr1qZOyYuLi1Uqlb+/P7c5QIsHt7bpcVZWVu/evQE4OTnRrm2mA2ejpaXEzY08/zwh5m+jmzdvpkt+oYXwxsqVBCCLFwut41GcOnWKNjnnsW6eKfNPtFEOGvZEAzbxV2b6uXPnmn8+F7Hh4OAQFhbWzMmVLgcOHHB0dKTfz1euXOHvTfADZ6OEkB076o+Dzd1GExIS6Hee0EJ4Y906ApCHAjFNi+rqatoV3PTbL/PFP9pGKTQz/amnntJdbDaTmV5eXr5x48ahQ4dyz+/bt294eHhxE5k9dDOUBhi//PLLpnlwrGujhBCZjPTpQ4KCzNtGS0pKALRv395i6pBu2bK9QweHmTNnCi2kOZYsWQLA19fXvMpZ6AOz0QfQnum0QDIAW1vb5sOeMjMzFQpFly5d6PMbjd4vKyubMmUKAIlEYiKboY3SwEYzM4m1NbGyMm8bJYTQf50bN24ILYQfaDXiqVOnCi2kSVJSUqysrMRi8YkTJ4TWYjyYjTaEy0ynh+kAvL29lUplU7ULa2pqdu/e/cYbbzzstpcuXfLz8wPw2GOPHTlyxPDa204DGyWELFlCALO3UZrZ3cKNF9Pn4MGDAMaNGye0kMZRq9W03tj7778vtBajwmy0SZrqmd7CBh7R0dG05FK/fv1aXrVbKNaubZjCdP8+ef55IlC3R95YsGDJgAGjf/yxLaVeTJD4+Hg0kd9sCnz66ac0c6y1p6/mDrPRR9CGnum6m6GhoaHm2A1YoyF8N/cUhv/3/8zgTKblpKSk0C9moYU0QkZGhq2trUgkspi1f8thNtpSWtgzvby8fOrUqTQDVaFQmOPhxuHDpF07MnGi0Dr44H//s4StCY7s7GyYZKEAjUYTGBgI4M033xRaiwAwG201jVaH+vPPPwkhsbGxNAHRwcFh3759QittIxcu1NcqtQCyswlA3N2F1sETt27dAtC5c2ehhTRk7dq6kSOVPXv2MpECoEZGRAgBo/WUlpbu2LHjxx9/PHv2LB2xtbWlFWr9/Pz27dtHa++bIzU1aN8eIhHu34e1tdBq9EOjgb09ampQVoa/vvjMmMrKyvbt29va2lZVVQmt5QHZ2fD3R2UlDh5Ujx/fSLs6i6eRaqmMluDo6Dhv3rwzZ87QhH2JREKLfXXt2jU2NtZ8PRSAjQ08PFBXh5wcoaXojUSCHj1ACC5fFloKH9jZ2Uml0urq6rq6OqG11EMI5s1DZSVmzsQ/00PBbFR/evfurVQqi4uLP/74423btt28ebOppttmBO0PkJUltA4+8PUFgMxMoXXwBG0HW1FRIbSQetavx9Gj6NoVa9cKLUU4mI3yg4ODw7Jly8LCwhpth2B2WJL10PdiGV8JAOimfHl5ue7g4cOHCwoKjC8mNxeLFgHAt9/isceMP7+pYAl/8wzesaTVKH0vlvGVAICWdNBdjVZXV0+ZMsXNza1Hjx5z586NiooqKyszjpi5c1FejtBQvPCCcSY0UZiNMhrBkqzHkr4S0NhqtKioaPTo0fb29leuXNmwYUNoaGiXLl1kMtnq1auTk5O1Wq2BlGzZgkOH4OSEr74y0AxmAzupZzTCrVvo2hUdO6KkRGgpelNWBkdHtGuHigpYwI5LUFDQsWPHjh49GhQUpDuu0WhSUlJiY2NjY2Pj4uLUajUdd3Z2HjVqlEwmGz9+/OOPP86XjMJC9OmD4mJERODVV/m6qrnCbJTROI89hpIS3LqFv0qvmDFubigowLVr+Cuz14yZNGnSgQMH9u3bR+t/N0pxcfHRo0djYmKOHDly/fp1btzPz2/y5NOBgfYjR6J9e71kTJ2KPXvw3HOIjtbrOpYBs1FG4wwdilOnEBeHESOElqI3o0bh+HEcPoyxY4WWoh8ajSYgICA1NfWpp56aPXv2mDFjvL29m3/JlStX6BI1JibG2vrxO3dSCYFUCn9/yGSQyfDMM/irHURL2bULL78MBwekpcHdve1vx3IQMvafYcLQ7mkqldA6+GDePLNpYdQMxcXF48aNA/CYzqF4165dQ0JCtm7dWvSo3q1qtTohIeejj8iQIUQiedDQ0NmZvPQS2biRXL/eIhlFRcTFhQDkhx94eFOWAbNRRuN89hkBiGUUPPvySwKQt94SWoceZGZm0jxjZ2fn3bt3R0ZGyuXybt26cX4qFosDAgIUCkVMTMwj276Xl5OYGKJQED+/B34KEC8vIpeTyMjmCtNMm0YAMmoUMcNyEYaC2SijcfbsIQCZMEFoHXxw8CABSFCQ0Drayv79+2kHmv79+1+9elX3oZycnPDwcJlMZmtry1mqnZ2dTCZTKpWJiYmPLI5z+TL57jsyZQpxdHzgp9bWpKSkkSdHRxOAtG9PTK8PjpCwvVFG42RkwM8PPXogO1toKXqTn49lyzBoEF5/XWgprYQQsmbNmiVLlmi12mnTpm3cuJFrHdaAysrK+Pj4I0eOxMTEXLhwgRvv3r379Onr/f2DR49G587NzaXRICUFsbGIjUVZGU6fbuQ55eVQKNC7N/79b73el4XBbJTROLW16NEjr2vXrPj4f9nammuFkooKZGXBxQXduz8YLC7G1avo29fUC6+Ul5fPmDFj3759Eolk5cqVCoWihS+8fft2XFxcbGzsb7/9lpeXN2zY1YQEDwBeXggOxsSJGD4cOovXRsjPR3o6vLzg5fVg8Pp1XL6M0aPxV18Ixl8IvBpmmDD0FPjChQtCC2k7J07UF8rTLce+fTsBiIn3Z9Jtx93mQsharTY1NXXdusqxY0m7dg/u2e3syLPPkrVrSVP/tlFRBCA9exLdrnRffEEAotG0TYslY/7hyAyD4ePjAyDL/BOACgvxySdCi2gN0dHRgwcPzsjI6Nev39mzZ2UyWduuIxKJ+vXrN39+u8OHUVaG+HgoFAgIQHU1Dh7E+++jb1+4uCA0FBs24ObNhi+/eROrVun7Xv4JMBtlNAk9Gs40/5xQuRzr1iE1VWgdLYAQsnr16kmTJt27d++ll15KSEjw9PTk5cpSKYYPh1KJxETk5yMiAjNnwtUVt28jKgpz58LdHQMG4IMPkJRU/5J338WaNZaTR2s4mI0ymsRiVqMzZ2LAAMjlMFh+OT9UVFS8+OKLixYtEolESqVyx44d7fVMNmoCFxe8+iq2bEF+PnJyoFIhJAQdOiAlBZ9/jmPH6p/25pvo1Qtz54IdoDQPs1FGk1jMalQsxtq1OHsWP/wgtJSmyc7OHjJkyJ49exwcHPbu3atQKERGOcrx8oJcjshI3L6No0ehUGDixPqHpFKsXYu4OEREGEGIGcNslNEk1EazsrKI+a9Ghg/HzJlYvBi3bwstpTEOHTo0aNCgixcv+vj4nD59eiLnZEbExgZBQVAq62tiUcaMQUgI/vMfSyhSYziYjTKaxMnJycnJqaysrLCwUGgtPPDFFxCL8eGHD0YyMhqPjjQmdDM0ODi4pKRk4sSJZ86cod9epsO6daipwaefCq3DhGE2ymgOi7mvB+DkhOXLsWnTg7OmVaswZAh698bq1bhzRwBJFRUVoaGhixYt0mq1CoVi3759Dg4OAuhoFldXfPQRvv0Wly4JLcVUYTbKaA56ymSONtroadLcuQgIwNdf1/+vhwe6dEFmJhYtgrs7XnoJR44Y7xgqJydn2LBhu3fv7tChw549e5RKpcl2oHn3Xfj6YvNmoXWYKib6z8YwEcz0sP7OHchk2LWr4bhYjPXrUVtb/7/LlyM/HzExCAmBRoPISIwbh8cfx6JFuHrVsArj4uKGDh164cIFb2/v06dPT5482bDz6YdUiu++g8l0IzU5mI0ymoM7ZRJaSCs4cwYDBuDYMSxbBjs7BARANw09IACLFiEgoD4TVCKBTIbISFy/DqUSPXrg5k2sXo2ePTFmDLZtgyEawm/YsEEmk925c2fChAlnzpyh2UomhYsLZDLY2DwYGT4cS5ZAJmOZoI0hcBYVw7ShBurh4SG0kJYSEVGf9Th8OCkoaPXLNRoSH0/kcmJnV5832bEjkcvJuXP8yKuqqpoxYwYAkUikUCg0LLPSImA2ymgOtVptbW0tFovv378vtJZHoFYThaLe++RyUlOj19Xu3iXr1hF//wd56MOGaTdt2lZWVtbma+bm5g4cOBCAvb39L7/8opc+hinBbJTxCOh9fWpqqtBCmuPOHRIURABiY8NzVfa0NKJQECcnMnDgbQC2trYhISExMTGPrOPZgLi4uC5dugDo2bNnWloanxIZQsNslPEIJkyYAOCTTz4RWkiTnDtHPDwIQNzcyJ9/GmSK+/fJrl2nRowYwWUW9e7d+4svvrh161ZLXq5SqaysrAA8++yzxcXFBpHIEA5mo4xHEBwcTI3D29s7MjKytrZWaEV/4+ef6/cxhw0j+fkGn+7SpUtLly7lOhVLJBKZTBYZGalWqxt9fnV19axZs9hmqGXDbJTxCMrKynx00gNdXFwWLlyYmZkptC5SV8fnZmgrp66LiYkJCQmha0wAbm5uCoUiOztb92l5eXmDBg2im6FRUVHG08cwLsxGGS0iOztbpVL5+/tzfhoQEKBSqcp16yEbkaIiIpMRgEilRKkURAIhhBQUFCiVyp49e9LPRCwWBwYGqlSq+/fvf/PNN87OzgB69Ohx/vx5wSQyDA+zUUbrSExMlMvlHTp0oMbh4OAQFhbW5vLsbSM1VePpSQDStSs5ccKYMzeOVqs9duxYWFhYu3bt6MfC/RAUFHT37l2hBTIMC+vFxGgL1dXVBw4c2LBhw9GjR+mvUO/evWfOnDl79uzOzTdO05tdu3Z9+OEPJSVHnnhCvHcv/tqlNAnKysp27ty5bdu2hIQEQoi/v//p06dtdKPYGZYIs1GGXmRlZW3evHnz5s23b98GYG1t/fzzz4eFhT333HMSiYTfuTQazZIlSz7//HNCyMKF25YvDzNZgzpy5Iirq2vfvn2FFsIwBsxGGTyg0WiOHTu2YcOGvXv31tXVAejWrdv06dPnzp3LVw+MsrKy6dOnHzhwQCqVrlixouVtMhkMQ8NslMEn+fn5P/300w8//JCTkwNALBYHBQWFhYWFhIRw24VtICsra/LkyZmZmc7Ozrt27QoKCuJPMoOhL8xGGfyj1WoTEhJ++umniIiIyspKAJ06dQoJCXnrrbd0z/pbyIEDB6ZPn15WVta/f/+9e/d6eHjwr5jB0ANmowwDcu/evcjIyO+///7cuXN0JCAgQC6XT5s2jTvrbwZCyJo1a5YsWaLVaqdNm7Zx40Y73WJNDIZpwGyUYQySkpK2bdsWERFRXFwMwNbWduLEiXK5fPTo0U01bisvL58xY8a+ffskEsnKlSvZZijDZGE2yjAeD4dJ+fj4zJo1a9asWbRsB8elS5cmT56ckZHh5OS0c+dOmUwmkGQG49EwG2UIwOXLl7dv37558+bc3FwAEolk1KhRcrl8ypQpUqk0Ojr61VdfLS0t9ff337t3L19n/QyGgWA2yhAMtVr922+/bdy48eDBgxqNBoCVlZWrq+uNGzcIIS+//PKmTZvYZijD9GE2yhCegoKCbdu2rVu3rqCgAIBIJPrss88++OCDprZNGQyTgtkow1TQarVfffXV+fPnR4wY8dprrwkth8FoKcxGGQwGQy9YZ1AGg8HQC2ajDAaDoRfMRhkMBkMvmI0yGAyGXjAbZTAYDL1gNspgMBh6wWyUwWAw9ILZKIPBYOjF/wc5HGaMHRX65gAAAdN6VFh0cmRraXRQS0wgcmRraXQgMjAyNC4wOS4xAAB4nHu/b+09BiAQAGImBgiQAGJpIG5gZGNIANKMzOwOGkCamZkNQrNAxJmY2BkUQHwYFyEMVY4mDtfukAGWZ0RiQGQEwQYyYirAcAEWI3AZys3AyMDIlMDEDGQzsLAysLIxsLEzsHMwcHAysHMpcHFnMHHzJPDwZjDx8jHw8ifwC2QwMQlmMAkKJQgJZzAJiySIiGYwiYoliIlnMIkzJnCyMAhwJYgLJTixAM1nZQQqFGdjZWPn4GRh4+bh5RfgYhMWERUTFxLXYgR6hgEWrFfaDti/2dpmD+KETjNwaKzLsQOx/V1MHQ6aLtoPYqc+vmLv68B7AMRe2ddu58R7FSyurypoO73Cah+IPbFs3X6hiTvAep/yKh1Q9xUBq1maIHwg8RMzWK98c/p+3jVqYPbh3uoDjelie0HsJtnWA5+5doLdUHB34wGvun9g9n636weKjSFuC2dmOnhq2WkwW+7D+QOftO6B7TUwWHxA408l2K6PnzscLk68AmZX2C52eLn+PFgNy9pLDme6TcDsW5xPHBzz5oDNUfl+1GEvp6QDiD2jfZpDbQwXmH1Iq9WhvsgZrEYMAGdzeABl6urhAAACVnpUWHRNT0wgcmRraXQgMjAyNC4wOS4xAAB4nH1VW6obMQz9zyq8gTGSLMn2501yKaXcBNq0eyj0s/unRx6S8QXTmVh47GM9j5xTiuf79dvvv+n1yPV0Son+8+u9p1+FiE4fKSbp/P7l6y1dHm/n58rl/vP2+JFEk1ScwfsZ+/a4fzxXOF0SZyNlNkzIRONQpvEcJyXdkuRqnatjWwp14wWuQJ/kVkS6po2zeBW3BVCHYffCndImWaiS0QJosAyDrtUpNLqr+spFh0asFtPiaaPMTZhWPlYAoai49F6wX5y0yQLYYBquWRHqLVRa7YVWyB4qJWOzVo0ZObv3BZIpoJQ7adEBhXVZ5pKjOFvJsGkoyhbBIZ2rJLEEVDNbbyZIrKohYytkVGizbNakMZC9NRRrhYwSbZ6r1NI8yKENaldIS5c/aWuZxdrgEXhCSx6x70q1N6QoYtJSuizDr7unoLwXGTmrDqauoFEpzVJ7rW0nCei0TFRUyjKIIabDPJmXVfgShfJsDTyhwacKsq7Cl6hTRfAARu5FhNdAGSq5ucJj0KWz8NJ2FElzJXgnALJz60uNOoDY1xrhwkV1XwDfb9dPfb/fBOf77XrcBPHK0fC4PlI5+pox9OhexrCjRxnDj05kjHr0G2O0o6sYox+twzHmBol95qkNNATLxHYNwWVitYZgndjLQ9jEUh0rPpExPsG0iXMasbaJWRqC+8QgDSEzUzSE8MQIDSEylV5DSJlqrCFEp2LquLVfC5FX5OMVggyH65QcqfuZw7+o9FzX+H7+O2B++gcuuTJBQIIwXQAAAS96VFh0U01JTEVTIHJka2l0IDIwMjQuMDkuMQAAeJwlUDmSwzAM+8qWyYzC4U1xXLrPJ9zu5AV5/IJeN7JACARwfuR6nG89z/fjsuvC5fd5XfY8T31+Lr3mU/n5PoSCXWQJcajbOpQqWvZiUuMOGWSbaq+XkGZprUMo0wSIknJxrAPs9MrhZLrXIGzhFmAxyVZhSGFsqd1Qt2TfCkgpTGVYUW08kBF+wgZjy5LeAJ0kegecukdi5ysoYusG0nsrvL+S4M960vj24WwSBQkIUvE/x3tzjribtcqtxN15L+xKNHL7gpmqCckpiUgzZTf3wWBfscBJq2v/B2eQgpAUk5HnSJg5kmIj9l1EoTg8K9jKcaWqaB8U2em6sLRFdXSL8R6AJJrGHSd6ZYKCZ67n9w9ramN6pq4WpwAAAABJRU5ErkJggg==", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAApQAAAHzCAYAAACe1o1DAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACQq0lEQVR4nOzdeVhTZ9oG8DuHsCkqQVFcUEBwXxF3RUFtxUJb7VitVq21i2XG2s7W1i5j922mrbalddRqS7VVuwoU64biLhB3qoKIghqIJoCsITn5/uAjY2SHwCHJ/buuXt/HOck5N8qYh/e87/PKjEajEUREREREjSRIHYCIiIiIrBsLSiIiIiJqEhaURERERNQkLCiJiIiIqElYUBIRERFRk7CgJCIiIqImYUFJRERERE3CgpKIiIiImoQFJRERERE1CQtKIiIiImoSFpRERERE1CQsKImIiIioSVhQEhEREVGTsKAkIiIioiZhQUlERERETcKCkoiIiIiahAUlERERETUJC0oiIiIiahIWlERERETUJCwoiYiIiKhJWFASERERUZOwoCQiIiKiJmFBSURERERNwoKSiIiIiJqEBSURERERNQkLSiIiIiJqEhaURERERNQkLCiJiIiIqElYUBIRERFRk7CgJCIiIqImYUFJRERERE3CgpKIiIiImoQFJRERERE1CQtKIiIiImoSFpRERERE1CQsKImIiIioSVhQEhEREVGTyKUOQNZJp9NBo9FAr9dDLpfDw8MDTk5OUsciIiIiCbCgpHpTq9VITk5GWloatFptlfMKhQIBAQEICgqCp6enBAmJiIhICjKj0WiUOgS1blqtFrGxscjIyIBMJkNtPzKV5/38/BAeHg6FQtGCSYmIiEgKLCipVkqlEvHx8RBFEaIo1vt9giBAEASEhYUhMDCwGRMSERGR1FhQUo0SExORkJDQ5OuEhIQgODjYAomIiIioNeIqb6qWUqm0SDEJAAkJCVAqlRa5FhEREbU+XJRDVWi1WsTHxwMAysrKsH//fqhUKqhUKhQXF2PSpEkICQkxvV4URRw9ehSXLl1Cbm4uSkpK4O7ujr59+2LChAlwdXVFfHw8fH19OaeSiIjIBnGEkqqIjY01zZcsLi5GSkoKDAYD+vXrV+3ry8vLsW/fPri7u2P69OmYP38+AgMDkZKSgq+++grl5eUQRRGxsbEt+W0QERFRC+EIJZlRq9XIyMgwfe3u7o4XX3wRMpkMRUVF1T66dnR0xHPPPYc2bdqYjvn6+qJDhw7Ytm0bUlNTMXToUGRkZECtVrOlEBERkY3hCCWZSU5OhkwmM30tk8nMvq6OIAhmxWSl7t27AwAKCgpMr0tOTrZgWiIiImoNWFCSmbS0tFr7TDbE5cuXAcA0IimKItLT0y1ybSIiImo9WFCSSVlZWbU74DRGQUEBdu/ejW7duqFPnz6m4xqNBjqdziL3ICIiotaBBSWZWKqYLC4uxqZNmwAAf/rTnyAI5j9mGo3GIvchIiKi1oEFJZno9fomX6OkpATR0dEoKCjAggUL4OHh0Sz3ISIiotaDBSWZyOVNW/RfUlKCb775BlqtFgsXLoSXl1ez3IeIiIhaFxaUZFLdaGJ93VlMLliwAF27dm2W+xAREVHrw6EiMnFycoJCoagylzItLQ06nc60mEatVuPcuXMAgICAAMhkMkRHR+PGjRuYPn06RFFEVlaW6f1t27Y1FZEeHh5wcnJqoe+IiIiIWgILSjITEBCApKQks9ZBsbGxyM/PN32dmpqK1NRUAMDy5csBANevXwcA7Nixo8o1hw4dipkzZwIA/P39my07ERERSUNmtFTTQbIJarUaUVFRzXb9//73v+jQoQP69esHR0dHODk5wdHREe3bt8fLL7/MXXSIiIisEEcoyYynpyf8/PyQmZlp2s/bEgRBgLOzM65fv47r16/jjz/+AAA4ODhAFEUYjUY8/vjjLCiJiIisEBflUBXh4eFVekc2lSAIePLJJ7Flyxaz4waDATKZDGFhYRgyZIhF70lEREQtgwUlVaFQKBAWFmbRa4aFhUGhUODhhx/GkiVLzPYHF0URZ8+excGDBy16TyIiImoZnENJNUpMTERCQgKMRqNZAdhQoaGhmDhxounroqIiDBkyBFeuXAEAdO3aFdevX4coihgyZAg2bdqEQYMGNTn/nXQ6HTQaDfR6PeRyOVebExERWRALSqqWRqPBwoULcePGDYSHh0MulzdoTqUgCBAEAWFhYQgMDKxyXqlUYvTo0TAajTh//jzat2+PRx99FLt27QIAjBs3Dps3b0avXr1M7zlz5gxOnjyJBQsW1CuDWq1GcnIy0tLSqt1WUqFQICAgAEFBQZy7SURE1AQsKMlMUVERVq1ahXfeeQdFRUUAgOTkZKSmpiIjIwOCINRaWFae9/PzQ3h4OBQKRY2v3bJlCzQaDZ555hnTsYyMDMyfPx9Hjx6FTCbDvffei+joaHh4eGDQoEH4448/sH37dkRERNR4Xa1Wi9jYWGRkZEAmk6G2H/HK8/XJS0RERNVjQUkAKh4Jr1u3Dq+99ho0Go2pCHNyckJpaSlkMplpxC89PR0ajabKNTw8PODv72+REb/KkcizZ89CEASMGjUKR48eBQC4u7vj3Llz6NatW5X3KZVKxMfHQxRFi46oEhERUc1YUBIAYOXKlXj99derHB8yZAhOnTpV5XhLzUnct28fHn/8cVy+fNl0zMHBAcHBwdi9e7fZavTKOZ9NFRISguDg4CZfh4iIyF5wlTcBAB5//HGMHDnSbPGNg4MDBg8eXO3rnZyc4OXlhR49esDLy6vZFrhMnjwZL7zwgtkxg8GAhIQEfPTRR6ZjSqXSIsUkACQkJECpVFrkWkRERPaABSUBAHr27InExERTYVg58tevXz8pY8FgMOC1116r9tw//vEPvP/++9BqtYiPj7fofePj46tdyENERERVcaccMvnTn/6EsrIyREZG4tdff8W1a9ckLygBYNq0acjKyoJOpzP9p1KpoNFo8OKLLyI/Px+urq4oKyvD/v37oVKpoFKpUFxcjEmTJiEkJMTseleuXMHJkyehUqmQm5sLg8GA5cuXmy3IEUURsbGx9V5RTkREZM9YUBIAYNOmTYiLi8PIkSPx+eef45133sG3336L8PBwSXM5ODjg22+/rfac0WjEli1bcOHCBYiiiOLiYqSkpMDLywv9+vWr8bH15cuXkZGRga5du8LZ2RmZmZlVXiOKIjIyMqBWq9lSiIiIqA5clEO4efMmunfvDgcHB+Tm5sLNzU3qSPUWHx+PpKQkGI1G08p0mUyGoqIifPjhh9WOUIqiaHqkf+jQIezatavKCCVQ8dg/KCjI4rsGERER2Rq7H6HkDipAcHAwdDod4uLirKqYBIC0tDSzQrI+6rtPuSiKSE9Pb3Q2IiIie2GXBSV3UPmfl156CX/88QfmzZuHGTNmSB2nQcrKypp94YxGo4FOp7O7XzKIiIgawq4KyvruoKLVapGUlITjx4/b9A4qSqUS77//Pry8vBAdHS11nAZrqVXYGo0GXl5eLXIvIiIia2Q3bYOUSiWioqJMCzDqmjpaeT4zMxNRUVE215dQr9dj2rRpkMlk2LdvX70fA7cmer3epu5DRERkrexihLIpO6hUbuEXExODwsJCm9lBZdasWdBoNHj33XfRt29fqeM0ilzeMj++LXUfIiIia2V9w1INxB1Uqvr+++8RExODESNG4MUXX5Q6TqN5eHjY1H2IiIislU0PvVTuoFLfhtd3MhqN2LBhA65evYqRI0fivvvuA1DRpsbX19dq51TevHkTixYtgqurK/bu3St1nCZxcnKCQqFo1rmU9rjqn4iIqKFseoQyNjbWrOG1wWCo984vx48fh0ajqXK8cgcVazVp0iTodDps3boV7du3lzpOkwUEBJi1C0pLS8O5c+dw8eJFABUr+s+dO4dz585Bp9MBAIqKikzHcnNzAQDp6ek4d+6cWZNzQRDg7+/fct8MERGRlbLZEUq1Wo2MjAwAgLu7O1588UVTw+u6HltrtVrs2bMHM2fOxJYtW8zOWfMOKi+//DJSU1PxyCOPSL4DjqUEBQXh+PHjpq9jY2ORn59v+jo1NRWpqakAgOXLl8PJyQm5ubnYtm2b2XXi4uIAAL169cLixYsBVPxdBwUFNfe3QEREZPVstqBMTk42tQaqb8PrSjExMfDz80P//v2rPS8IApKTk61qB5WTJ0/i3XffhZeXV41bGVojT09P+Pn5ITMzE6Io4vnnn6/zPb6+vli5cmWtrxEEAT4+Plb3SwMREZEUbPaR9507qDRESkoKrl27VmuTb2vbQUWv12Pq1KmQyWTYu3evVbYIqk14eLjFvydBEGxmFJeIiKi52VZl8f8au4NKQUEBdu7ciWnTptU5v7ByBxVr8Kc//Qm3bt3CW2+9VeOoqzVTKBQWHy0OCwuz2oVXRERELc0mC8rGrvqNjY2Fl5cXRowYUa/XV7dop7XZsmULfv31VwQGBuKll16SOk6zCQwMrHXVfkOEhoYiMDDQItciIiKyBzZZUDZmZ5Nz584hPT0d06ZNQ2lpKUpKSlBSUgIAMBgMKCkpgcFgaPJ9WtKtW7ewcOFCuLi4WKwXZ2sWHByMiIgIyOXyBj8CFwQBcrkcERERmDhxYjMlJCIisk02uSinMTub5ObmQhRFrFu3rso5pVIJpVKJOXPmmD0ybu07qEyePBk6nQ6//vqrTbQIqo/AwED4+vqa9mwXBAGiKNb4+srzPj4+NrtnOxERUXNr3RVRIzVmZ5Nhw4bBx8enyvGvv/4a/fr1w+jRo9G5c+cm36c5vfTSS1Cr1fjoo4/w73//G2fPnsWcOXNw//33Sx2tRSkUCixYsABqtRrJyclIT0+vMj3BaDSisLAQU6ZMQVBQEFdzExERNYFNFpTV7aCSlpYGnU5nWkhT2fAaqGiOrVAoahydateuHXx9fc2OtcYdVDZs2ICcnBzExsYiNzcXXbp0webNm6WOJRlPT0/TYh2dTgeNRgO9Xg+5XI7XX38dX375JXx9fa2q/RMREVFrZJMFJVBRJCYlJZlaB9Wn4XVDdOvWDSqVCqIomv3Xs2dPSdry3L59Gzk5OQBg+r+zZ8+GKIo21yaoMZycnODl5WX6unK6wvLlyzFq1CiMHDlSqmhERERWT2ZsTLNGK6BWqxEVFdVs1//ss89w8+bNKsdfffVVvPHGG81235qkpKRUu6vL+PHjsX//fjg4OLR4ptYsPDzctDuOQqHA8ePHuc0iERFRI9ns0FXlDirN0fA6Ly+v2mISqFgII4Xz58+bfV35fZeXl7f61ehSSEpKMv3/+fn5mDJlimlkl4iIiBrGZgtKoPl2UHnllVdwzz33VNnS0cvLq1lazuh0OqhUKmRnZ0OlUlXbUD0lJcXsa19fX/zwww84evQonJ2dLZ7JmuXk5CA3N9f0tSiKyMrKwj333IOysjIJkxEREVknm33kXUmpVCImJsZi14uIiEBgYCBu3bqFIUOGICcnx6w/pbOzM5544gl89NFHZvMyjx49iosXL2LhwoX1uk/lCuW0tLRqG7UrFAoEBASYVih7eHhAq9XC3d0d//nPf7Bw4cJW39ZIKr/99hvuu+++Ksc9PT1x5swZdOnSRYJURERE1svmC0oASExMtEhj79DQULMRyOPHj2P8+PHQ6/UYPnw4nnnmGbz88stQq9VwdHTEokWL8Omnn0IulyMgIACZmZn45Zdf8MADD9R4D61Wa+qhKJPJat2PvPJ8+/bt8cYbb2DQoEHYtWsXXFxcmvy92rLVq1dj+fLlcHBwgCAIcHBwQGJiIgIDAznXlIiIqBHsoqAEKkYq4+PjTaux60sQBAiCgLCwsGq34/v000/x7LPPIjY21jTq9c033+CFF16ASqWCXC7HmDFjcPDgQQCAm5sbTp06BT8/P4tlBCoe295///313jbSnmm1WiQnJ2P06NF4/PHH8eOPPyInJ6dKn1EiIiKqH7spKAHz0b/67qDi5+dX6w4qRqMRWVlZ6NmzZ5VzW7duxfPPP4/r16+bjjk4OGDgwIE4duyY2UiipUZRQ0JCEBwc3OTr2IsdO3YgLCwM//rXv7By5Uqp4xAREVkluyooK9W2gwpQ0bTc39/fIjuobN26FXPmzKlyfNGiRdi4cSOA5pvnSXUTRRFOTk4YPHgwTpw4IXUcIiIiq2SXBeWd7t5BxdI74AwaNMi0I8/dpk+fjnXr1mHjxo0oKirC/v37oVKpoFKpUFxcjEmTJiEkJMTsPUePHsWZM2eg0Wig0+nQtm1beHt7Y9KkSaZHtnK5HJGRkdyXup769++PjIwMrvAmIiJqJJtuG1QflTuo9OjRA15eXhbfTnHkyJEYO3YsJkyYgODgYEyePBmBgYFo27Ytfv/9d7z77rsQRRHFxcVISUmBwWBAv379arxeSUkJAgICcP/992PBggUICQmBSqXC2rVrTb0xRVFEbGysRb8PWzZjxgzodDocOXJE6ihERERWye5HKKV06tQp/PLLLwBgWs0tk8lQVFSEDz/8sNoRyuqo1Wp8/vnnCA4ORmhoqOl4ZGRkkx/Z24OMjAz07t0bCxcuxNdffy11HCIiIqtj9yOUUrp+/bqpObpMJqvSKL2+2rRpAwBmTdwFQUBycnLTQ9oBPz8/uLm5Ye/evVJHISIiskosKCWUlpZWa5/J2oiiCL1eD7Vaje3bt6Nt27YYPny42fn09HRLRbV5w4cPx7Vr16rdhYiIiIhqx61UJFJWVlbtDjj19fbbb5t26OnYsSMee+wxdOjQwew1lQt3LD0v1BY98sgjOHDgADZt2oTFixdLHYeIiMiqcIRSIk0pJgFgyZIlWLJkCWbNmgUnJyds3LjRbH/qStW1RaKqFi1aBADYtGmTxEmIiIisDwtKiej1+ia9v1u3bvD29saQIUPw2GOPAQD27Nlj8fvYizZt2qBr165ISkqSOgoREZHVYUEpEbnccrMNnJ2d0alTJ9y6datZ72PrJk2ahIKCAmRnZ0sdhYiIyKqwoJSIh4eHxa5VVFSEnJycaq9pyfvYuqeffhoAEBUVJXESIiIi68LhK4k4OTlBoVCYzaVMS0uDTqczrTRWq9WmXXYCAgIgiiK++eYbDB48GB07doRcLsetW7dw7NgxGAwGTJo0yewelt71x9ZNnjwZcrkcMTExeOedd6SOQ0REZDVYUEooICAASUlJptZBsbGxyM/PN51PTU1FamoqAGD58uVo164dvLy8kJKSgoKCAuj1eri5ucHHxwcPP/ywaetFoKIPpb+/f8t+Qzagb9++uHDhgtQxiIiIrAp3ypGQWq1u1ser3Cmn4V566SW89957SEhIwOTJk6WOQ0REZBU4h1JCnp6e8PPzM9vhxhIEQYCfnx+LyUaIjIwEAKxdu1biJERERNaDBaXEwsPDm6WgDA8Pt+g17YW3tzfat2+PhIQEqaMQERFZDRaUElMoFAgLC7PoNcPCwqBQKCx6TXsSFBSEGzduoLS0VOooREREVoEFZSsQGBiIkJAQi1wrNDQUgYGBFrmWvZo3bx4AYOPGjdIGISIishJclNOKKJVKxMfHQxRFiKJY7/cJggBBEBAWFsZi0gJ0Oh1cXFwwceJE7N+/X+o4RERErR4LylZGq9UiNjYWGRkZEASh1sKy8ryfnx/Cw8P5mNuCvL29kZeXh9u3b0sdxa7odDpoNBro9XrI5XL2UiUishIsKFsptVqN5ORkpKenQ6PRmJ0zGo3Iy8vDPffcg5EjR3I1dzNYtGgRvvnmG2RkZMDX11fqODat8mc9LS3NrNF/JYVCgYCAAAQFBfFnnYiolWJBaQXuHrVZtGgRdu7ciYULF+Lrr7+WOp5NOnz4MMaPH4+///3v+PDDD6WOY5PuHI2XyWSo7Z+iyvMcjSciap1YUFqhadOmYffu3QAqFo4sWrRI4kS2ydnZGX5+fvjjjz+kjmJzOF+YiMi2sKC0MkajEZ6enrh16xYAQC6XY/fu3VX28aamGzZsGM6ePQudTmfxXqH2LDEx0SJ9PkNCQhAcHGyBRERE1FT8lLQyly9fNhWTACCKIu6//35cvHhRwlS26cEHH4TBYMCuXbukjmIzlEqlxZrGJyQkQKlUWuRaRETUNCworczhw4fNvhZFEQUFBbj//vslSmS7li5dCgBYt26dxElsg1arRXx8vEWvGR8fX+1CHiIiallyqQNQw1QWlHK5HHq9HgDg7++P2bNnw2g0QiaTSRnPpnh5ecHd3R0HDhyQOopNiI2NrXO+5JUrV3DgwAFkZ2dDr9ejffv2GDp0aI1TOkRRRGxsLBYsWNAckYmIqJ5YUFoZT09P9O/fH1OmTMGXX36JXr16IS0tTepYNmv06NH4/fffUVhYCDc3N6njWC21Wo2MjIxaX3P69Gn8/PPPGDhwIGbOnAknJydoNJpae4GKooiMjAyo1Wq2FCIikhAX5VixwYMH48KFC9DpdFJHsVmbNm3Co48+ilWrVuHZZ5+VOo7Vio+PR1JSUo2tgQoKCvDpp59i6NChCA8Pb9C1BUFAUFAQwsLCLBGViIgagXMordjUqVNRXl6OkydPSh3FZs2ZMwcymQxbtmyROopVS0tLq7XPpFKpRHl5OSZMmNDga4uiiPT09KbEIyKiJmJBacUWL14MAPjqq68kTmK75HI5evbsyaK9CcrKyupcOHPlyhW4urri5s2b+OKLL/D666/jgw8+QExMDEpLS+u8h0aj4Ug9EZGEWFBasSFDhsDJyQl79uyROopNmzp1KoqLi3HhwgWpo1il+qzCLigoQHl5ObZu3YpBgwZh4cKFGD9+PE6dOoVNmzbVOrpZ6e4tSomIqOWwoLRyvXv35uO+ZvbMM88AAKKioiROYp0quxHUxmg0Qq/XY+LEiZg4cSJ8fX0xfvx4TJ06FVlZWXUu6KnvfYiIqHmwoLRyU6ZMgU6nw9mzZ6WOYrNGjBgBZ2dni/dQtBdyed3NJNq0aQOgogXWnSq/vnHjhkXuQ0REzYMFpZXjPMqWMWjQIFy6dKlB+05TBQ8Pjzpf06VLl1rP16e/an3uQ0REzYMFpZULDAyEo6Mjdu/eLXUUmzZz5kxTE21qGCcnJygUilpf079/fwCo0lO18usePXrU+n4PDw84OTk1ISURETUFC0ob4Ovry728m1nlNowbNmyQOIl1CggIqHWU0d/fH3369MH+/fuxf/9+XLp0CQcOHMCePXvQp08f9OrVq8b3ymSyKo/KiYioZbGxuQ1YunQp1qxZg/Pnz6Nv375Sx7FZHTt2hIODA3Jzc6WOYnXUanWdi5rKy8uxb98+nDlzBoWFhWjXrh0GDx6MyZMn1zk/ctu2bfDz84OjoyNKS0tRWlqKkpISDB06FF988YUlvxUiIqoGC0obcOTIEYwbNw7//Oc/8f7770sdx2aFh4cjLi4OSqUSR44cgV6v5+45DRAdHY3MzEyLzkOVyWS4ceMGvvzyy2rPjxkzBkeOHLHY/YiIqHosKG2Eo6MjBg0ahBMnTkgdxeYUFRVh586d+OKLL7Br1y7T8Q4dOiAvL0+6YFZGq9UiKirKou195HI5li5dipkzZ+LAgQNVzv/666+4//77LXY/Imuj0+mg0Wig1+shl8s535iaDfts2AgfHx+cP39e6hg2afny5Vi/fn2Vx67Dhg2TJpCVUigUCAsLQ0xMjMWuGRYWho4dOyImJgbDhw/HlStXzEZAly5dips3b+Lxxx+32D3vxA9rao3UajWSk5ORlpZW7cYCCoUCAQEBCAoKgqenpwQJyRZxhNJGPPHEE1i/fj0yMjLg6+srdRybcuLECUyYMAElJSWmHVscHR3x3HPP4YMPPpA4nXUpKirCE088gX79+jX5WqGhoZg4caLp63PnziEoKMi0VeO4ceOQlJSE8vJyuLu74+9//zteeuklCML/1iLqdDr8/PPPmDlzZr0LQX5YU2ul1WoRGxuLjIwMyGSyWneYqjzv5+eH8PDwOjsxENWFq7xtxMKFCwEA69atkziJ7Rk+fDh+/fVXODg4mI6Vl5djxIgREqayLjqdDl9//TU8PDywdetWDBs2DHK53Ky4qw9BECCXyxEREWFWTALAwIEDER0dDQDw8vJCQkICCgsL8fe//x06nQ6vvPIK3Nzc8Oyzz5qKzrVr12Lu3Ln1mgur1WoRHR2NqKgoJCUl1bilpFarRVJSEqKiohAdHV2vrSeJmkqpVCIqKgqZmZkAUOd2pZXnMzMzERUVBaVS2dwRycZxhNJGiKIIJycnDB06FCkpKVLHsUmbN2/G/PnzTV+npaWxXU0dcnJysGbNGnz66ae4efMmAGD06NE4evSo2WiKIAi1LtapPF+f0ZQNGzbA29sbU6dONR0TRREfffQR3nvvPdy6dQtyuRyzZs3CwYMHcf36dQDA+vXra3w0rlQqER8fD1EUG7SoSBAECIKAsLAwBAYG1vt9RA2RmJiIhISEJl8nJCQEwcHBFkhE9ogFpQ3x8/NDTk4OioqKpI5isz766CP87W9/g0wmg8FgqNcOLvaooKAAf/nLX/Ddd99VKcJ++uknzJw50/R15SPk9PR0aDSaKtfy8PCAv7+/xR4hb9q0CS+99BKysrLMjsvlchw+fBgjR440O84Pa2rNlEqlReclR0RE8JcfahQWlDZk0aJF+Oabb3DlyhX07NlT6jg2q0+fPrh8+TJ0Oh3Ky8u5KKMaSqUSI0eOrHY0Lzc3t8bCsKUWuRiNRvj6+uLKlStmxzt27IjU1FR07tzZ9H3ww5paq7o6J5SVlWHv3r04d+4cSkpK0KlTJ0yYMAGDBw+u8ZpyuRyRkZGcU0kNxoLShuzevRvTpk3Dv/71L6xcuVLqODYrLi4On3/+OaZOnYrbt29XOc9FGRV+//13PPjgg6b5igDQu3dvpKenS5iqQnx8PGbMmFHtubZt2yIlJQWdO3eu8cP68uXL+Prrr6t9/5IlS+Dt7V3tOX5YkyXV1dv1m2++wfXr1zF16lR07NgRZ86cgVKpxKxZszBkyJBq3yMIAnx8fLBgwYLmjE42iG2DbEhoaCgEQcBvv/3GgrIZ3Dnnb8yYMdUWk5WvS0pKwvHjx+16BeWAAQNgMBgAwLSgKTQ0VMpIJhqNBu3atYNcLoeTkxOcnJzg4OCAmzdvori4GP369cMnn3xS53zJKVOmwMfHx+xY5ehmdSr3g+eHNTWVWq1GRkZGjecvXryIjIwMPPTQQ6YRSV9fX+Tl5WHXrl0YNGhQtYviRFFERkYG1Gq1Xf9CTA3HVd42RBAEeHt7IzU1VeooNocrKBumtLQUgYGBKC8vx+bNmzF69GgYDIZWM4dw/vz5KCgogEajgUqlwtWrV3H58mXcvn0bRUVF+O6775CXl1dnQenh4QFvb2+z/5ydnWt8/Z0f1kRNkZycXOsc7vPnz8PJyQkDBgwwOz58+HDcvn0b2dnZNb5XEAQkJydbLCvZBxaUNmb8+PEoKioyrVylpktMTERMTAz0en2Dtw0URRF6vR4xMTFITExspoStz9ixY3Hz5k289957eOSRR7B3715s2rQJDz/8sNTR6uTi4oIOHTo024IrfliTJaSlpdX6i21ubi46depk1u4MALp06WI6XxNRFFvF1BSyLiwobcyjjz4KoKJ1CjWdUqm0yApfAEhISLCLkcp58+bh5MmTmDNnDl544QUAgLOzM+bNm2c1C5bq+rCu9Ntvv+H111/HO++8g+jo6CqLfKrDD2tqqrKysjr7mxYXF8PV1bXK8cpjJSUltb5fo9FAp9M1PiTZHRaUNubee++FIAiIjY2VOorV02q1iI+Pt+g14+PjbbrR9QcffIDvvvsOQ4YMwffffy91nEapz4e1i4sLRo8ejfDwcDz22GMICwtDfn4+Nm7cWK9ikR/W1BT1/TekqaPs1bXxIqoJF+XYGEEQ0L17d5w9e1bqKFYvNjYWoiiirKwM+/fvh0qlgkqlQnFxMSZNmoSQkJAq77l+/Tp27dqF7OxsCIIAX19f3HPPPfDw8ABg24sy4uPj8eKLL6Jjx444duyY1HEarT4f1l27dkXXrl1NX/fq1Qv9+vXDF198gV27dtWr4b1Go4GXl1eTspJ9qqlN0J3atGmD4uLiKscrRyarG71szH2IKnGE0gaNGzcOhYWFtc6RodpVrqAURRHFxcVISUmBwWCodQ9qtVqNjRs3wmAwYPbs2XjggQdw69YtbNiwwdRs3lYXZaSlpeGBBx6Ao6MjlEolXFxcpI7UaI39EHV1dUWfPn2Qk5OD8vLyZrsPkVxe91hQ586dcfPmTVOnhUo5OTmm85a4D1ElFpQ26JFHHgHAeZRNcecKSnd3d7z44otYvHgxpkyZUuN7EhISIJfLMW/ePPTp0wcDBgzA/PnzUVRUhMOHD5teZ2uLMgoLCzFq1Cjo9Xrs2LHD6pvqN+VDtCFtfflhTY1V+cSjNv3794dOp8Mff/xhdvzUqVNo164devToYZH7EFViQWmDIiIiIJPJLLrDh725c1GGTCarcy6SwWDAxYsX0b9/f7PROXd3d/j6+pr9o25LizJEUURQUBDy8vKwevXqaqcBWJvGfoiWlJTg4sWL8PLygqOjY7Pdh8jJyanO3rYBAQHw8/NDbGwsUlJScPnyZWzfvh3p6emYNm1atT0o78Rdv6ih+CuyDRIEAV27dsWZM2ekjmKV6rMo425arRZ6vd7UkuNOXbp0waVLl1BeXm4qNCoXZVj7P9gzZ87EhQsXsGTJEvzlL3+ROo5FVH5Y1/Yz8MMPP6BDhw7o1q0b2rRpA41Gg8OHD6OoqAgPPvhgnffghzU1VUBAAJKSkmodFZ8zZw727t2LhIQE09aLdzY6r4kgCPWaB0x0JxaUNmrs2LH48ccfodFoOBLSQI1ZhV05+b22Nh2lpaVmI1fWvijjtddew/bt2zF69GisW7dO6jgWVdeHdZcuXXDu3DkkJydDp9PB1dUVPXv2xKxZs9C9e/dar80Pa7KEoKAgHD9+vNbXODs7IywsDGFhYQ26duWTB6KGYEFpo+bOnYsff/wRGzduxF//+lep41iVpiyWaEibDmtelPHDDz/gzTffRNeuXXHw4EGp41hcXR/WEydOxMSJExt1bX5YkyV4enrCz8+v1r28G6NyL29uu0gNxTmUNurBBx+ETCbD9u3bpY5idRqzWKJNmzYAUGubjrtXPlvrooyzZ89i7ty5cHV1hVKptNrvozaVH9Z1zTNrKEEQ4Ofnxw9rsojw8PBm+RkNDw+36DXJPrCgtFFyuRxdunTByZMnpY5idRozRUChUEAul1fbqiknJwceHh5VFmpY41SEvLw8jB07FkajEfv377fqR/Z14Yc1tXYKhaLBj7PrEhYWVueCH6LqsKC0YaNHj0Z+fj7y8vKkjmJV6rOC8m4ODg7o27cv/vjjD5SVlZmO5+XlITMzE/379zd7vTUuyhBFEcOGDUNhYSG++uorjBw5UupIzYof1mQNAgMDLdZdITQ0FIGBgRa5FtkfFpQ2bM6cOQCA6OhoiZNYn4CAALP5kGlpaTh37hwuXrwIoKKJ+blz53Du3DnTFnqTJ09GeXk5Nm/ejLS0NPzxxx/YvHkz2rRpg3HjxpmuZS2LMnJycsx2XJo2bRquXLmC5557DosWLZIwWcvhhzVZg+DgYEREREAulzd4VF0QBMjlckRERDR6XjARAMiMDenES1ZFp9PB2dkZoaGh2LNnj9RxrIparUZUVJTp648//hj5+fnVvnb58uWmUae6tl6slJycjKFDhwKo6GFZ+d/kyZMRERHRTN9Vw8ybNw9btmzBp59+irS0NHzyyScICQnB3r17pY7W4pRKJeLj4yGKYoMWQAiCAEEQEBYWxmKSmp1Wq0VsbCwyMjIgk8lqbSkkCAJEUYSfnx/Cw8M5ck5NxoLSxnXp0gXl5eXQaDRSR7E60dHRFl9BCQCXL1/G119/DeB/BYdMJkN5eTkeeugh/PDDDxa9X2MYjUZ06dLFbIvInj174vLlyxafV2gt7vywrvwwrgk/rElKarUaP/30E86dO4eOHTtWOe/h4QF/f38EBQVxgRhZDAtKGxceHo64uDjcvn0bbm5uUsexKlqtFlFRURZt7yOXyzF79mwEBgaa9ve+08GDBzF+/HiL3a+xMjIy0Lt3b7NjkyZNwq+//ooOHTpIlKp1UKvVSE5ORnp6erW/qN26dQsDBgzAQw89xA9rkkRZWRm6d++OW7du4ccff8S4ceOg1+shl8utcv42WQfb6/dBZmbPno24uDh8++23WLp0qdRxrErlogxLbmEZFhaGPn364Pfff0dwcLDZKJdMJkN0dDRGjBhRpcVQSztw4ECVY/v370dwcDBOnjzZoH6btsbT09O0WEen00Gj0Zg+rL/66it8+umnaNOmDe677z6Jk5I9MhqNWLp0KW7dugUA+OOPPzBr1iyJU5E9sM9nV3akcmHOTz/9JHES6xQYGGix9j53LsoYP348PvzwQ7PzHTp0wJo1a9CuXTssWrQIhYWFFrnvnXQ6HVQqFbKzs6FSqUwLiu62b98+s68FQYCLiwvuvfdei2eyZk5OTvDy8kKPHj3g5eVl2mWpuLgYISEhZlMGiFrC6tWrsXHjRtPXnD9PLYWPvO1A586dIYoi9u7di8TERAQGBpqtOqbqlZWVYdmyZVi7di3++te/wsPDw6KLMoxGIx566CH8/PPPGDlyJI4dO4bvv/8e//jHP3Dt2jU4ODhg1qxZ+O9//wt3d3fT+95880107twZTz/9dL0yVD6iTUtLq3ZbSYVCgYCAALP5VC4uLqb2R926dcNzzz2HJ554gnMB6zB16lTTB7ggCBg8eDASExPRvn17iZORPdi9ezfuvfdes3+jXFxckJ+fz8fc1OxYUNqwixcv4rfffsM777xjNlIyf/58fPvttxIma92Kioqwdu1avPXWW6bHRiUlJSgpKbH4ooyCggI89thj+Mc//oGxY8eajv/6669Yvnw5rly5YmqGvXbtWmg0GgwYMAAA8Pvvv2PatGk1ZmjIis/K835+figtLcVTTz0FDw8PrF27Fvfff79N7obTHDp16mT6mQEq+pOOHTsWu3btknwaA9m227dvo0ePHigoKKhy7tChQxxEoGbHgtJGGY1GeHh4IC8vz6yYkMlk+M9//oPnn39e4oStj1arxeeff45///vfKCgoMP2ZDR8+HEql0vS6uhZlWHIF5a5du/DnP/8ZaWlpkMlkppXXRqMR7du3x5kzZ9CjR48q72tKmxudTof8/HysWrXKrudKNlROTk6VnYMqf7E4cOAAJkyYIFEysgclJSV4/vnn8fvvvyMzMxNAxSJAvV6Pt99+GytWrJA2INk8DjvYKJlMhjfffBPLli0zG5kyGo38TbUGISEhOHXqlNkxQRAwdepUs2O1Lcqw9ArKadOm4eLFizh48CAef/xxpKWlmc7dvn0bDz30EA4ePGi2rWNiYiISEhIadT9RFCGXy9GxY0ccOHAAwcHBTf4e7MWZM2fMvpbJZFi5ciXCw8MxfPhwiVKRvXB1dcWXX34JAKYnIs888wyOHj2KgIAAKaORneCiHBv2l7/8BcuWLTMbZXJ0dOSHWw1WrFgBZ2dnsz8vURQRFBRU43vuXpTRXPOUJkyYgODgYDg4OJiOGQwGHD9+HI8//rjpmFKpbHQxebeEhASzkVmq3YABAxAZGYmvv/4aS5cuhdFoRGhoKP/3Ri1Kp9MhLy8Po0aNwjvvvIO9e/di9uzZUsciO8BH3jbOYDAgIiIC8fHxAICRI0fi+PHjEqdqvdauXYunnnrK7Fh6enqVnowtTaPRoEuXLjAYDJDL5TAajTAYDKbR53vuuQeffvoptm3bVm3fzJ9//rnK6OudlixZAm9v7yrH5XI5IiMjuRinga5evYpevXphzpw5+P7776WOQ3bkhx9+wOzZs/HJJ59g+fLlUschO8KC0g7cvn0bffr0gUqlwowZMxAXFyd1pFbp5s2b8Pb2hl6vR3BwMPbu3Yt27dohPz9f8rmEpaWleOONN5Cfnw8HBwfTf7dv30ZCQgJu3LiBpUuXon379tXOmdRoNNU2Uv/uu+/g4OCA559/vtodcARBgI+PDxYsWNAs35ctUygUcHR0RG5urtRRyI7Mnz8fmzdvRk5ODjp37ix1HLIjnENpB9q1a4e9e/diwIABcHd3b/Z5f9ZIFEWMHj0apaWl+OGHHzBz5kx88sknMBgMkheTQEXrj3feeafG81euXMHGjRtrXIDj4eFRpZ9mZmYmiouLERwcXON2iqIoIiMjA2q1mru+NNCECRMQGxuLW7duVbv9HVFzOHr0KFxdXVlMUotjQWknOnXqhKeeegqdOnXCu+++W+V8db0I7cn8+fORkZGBZ555Bg899BAA4K9//avEqeovNTW1ztZAd6ucH1nXHD9BEJCcnGxaiET1s3TpUsTGxuLTTz/FypUrpY5DdiIrKwv9+vWTOgbZIRaUNu7OXoTdu3evseDQarVISkrC8ePH6+ydaGvWrVuH77//HkOGDEFUVJTUcRolLS2tQcVkaWkpUlNT4efnV+ffsyiKSE9Pb2pEuxMWFga5XI4ffviBBSW1iAsXLqC8vBwTJ06UOgrZIa7ytmFKpRJRUVGmnmR1FRyV5zMzMxEVFWUXK3zPnTuHpUuXol27djh06JDUcRqlrKys2h1wanPmzBno9fp6r0DWaDQ1btNI1RMEAf3798f58+cb1AuUqLEqN6x45JFHJE5C9ogFpY1KTExETEwM9Hp9gz/MRFGEXq9HTEwMEhMTmymh9EpLSzFhwgTTtpRubm5SR2qUhhaTAHDixAm4urqif//+9X5PdU3cqXYPP/wwDAYDYmJipI5CdmDPnj0QBIG9hkkSLChtEHsR1s/kyZORl5eHjz76qNZek61ddW2CaqNSqXD9+nUMGTKkQVsqNvQ+BPz5z38GAKxZs0biJGQPUlNT4eXlVeMiO6LmxJ86G6PVak09Jy0lPj6+UaNgrdkLL7yAY8eOITw8HM8995zUcZqkoftsnzhxAgAQGBjYrPehisVuXbp0weHDh6WOQjaupKQE+fn5bKRPkuEnhI2JjY2FKIooKyvD/v37oVKpoFKpUFxcjEmTJiEkJMTs9TU1vO7YsSOWLVsGoOIReGxsrM30ItyxYwc++OADdO/eHb/++qvUcZrs7nZAtdHr9Th9+jS6d++OLl26NNt96H+mTJmCzZs3IzMzEz4+PlLHIRv1008/AQDCw8MlTkL2igWlDVGr1cjIyAAAFBcXIyUlBV5eXujXr1+tj63lcjkWLVpkduzOvaFtqRehSqXCgw8+CCcnJxw/ftwmHg05OTlBoVDUaxT5/PnzKCkpafDoJHuVNt5f/vIXbN68GZ988gk++eQTqeOQjfrll18AAHPnzpU2CNkt6/80JZPk5GRTE253d3e8+OKLWLx4MaZMmVLr+2QyGby9vc3+8/LyMntNZS9CayaKIkaNGoWysjL8+OOP6Natm9SRLCYgIKBeDdiVSiUcHR0xaNCgel9bEAT4+/s3JZ5dGzt2LFxcXLgwh5pVUlIS2rZtC3d3d6mjkJ3iCKUNubMXoaV3d7GFXoR/+tOfkJWVheeff97mHgsFBQXVa4/2hQsXNvjaoiha9aKl1mDYsGE4duyYaXcqIku7du0ahgwZInUMsmMcobQRjelFWEmv1+PDDz/E66+/jv/85z+Ii4tDcXFxlddZcy/Czz//HD///DNGjBiBjz76SOo4Fufp6Qk/Pz+LP8IXBAF+fn5WP9VBao8++iiMRiO+/vprqaOQDTp58iT0ej0mTZokdRSyYywobURji0kvLy/cc889mDVrFh599FEMGzYMJ0+exFdffYWysrIqr7fGXoQnT57Es88+iw4dOuDAgQNSx2k24eHhzVJQ2tporhSWLFkCmUzGgpKaxebNmwGwoTlJi89ebERjewSOHTvW7OvevXuja9eu2Lp1K5RKZZXz1taLsHJ1O1DR7N3V1VXiRM1HoVAgLCzMonP1wsLC7GYLzubk4uICb29vpKSkSB2FbFBCQgIcHBwwcuRIqaOQHeMIpY2w5Lysfv36wdHREdnZ2c16n5YwceJEFBQU4LPPPrOL+UWBgYFVWkM1VmhoaINXg1PNZsyYgeLiYpw8eVLqKGRjLly4YFOLDMk6saC0Ec3RI7C6hT2tuRehwWDAlStXTF8vX74cSqUSDz30EJ555hkJk7Ws4OBgREREQC6XN/gRuCAIkMvliIiIwMSJE5spoX2qbKC/atUqaYOQTSksLMTt27e5cI4kx4LSRlT2IrSE1NRUlJeXo0ePHmbHW3svwrVr18LX1xdvvvkmfvnlF6xevRq9evXC1q1bpY7W4gIDAxEZGWlqpF1XYVn5y4OPjw8iIyM5MtkM+vbtCzc3N+zcuVPqKGRDtmzZAgCIiIiQOAnZO+t6fkm1CggIQFJSkql1UFpaGnQ6nWlltlqtxrlz50yvLS4uxo8//ohBgwaZRh6vXLmCo0ePwtPT06yokMlkrb4X4aFDhwAAr732GmQyGZydnW2meXljKBQKLFiwAGq1GsnJyUhPT692UdWtW7eQn5+Pf//731zN3cxGjx6NPXv2oLCwEG5ublLHIRtQOWd69uzZEiche8eC0obc3YswNjYW+fn5pq9TU1ORmpoKoOJxsIuLC9q2bYsjR46gsLAQRqMRHTp0wOjRozFx4kSz0Uij0Yinn34abdq0gZubG4qLi1FcXIySkhK0bdsWSqVS8oa6R44cMRXTRqMR7du3x/Xr19G5c2dJc0nN09MTYWFhAACdTgeNRmPqh+js7Gz6ZWLYsGF4/vnnpYxq85544gns2bMHX375Jf7+979LHYdsQEpKCtq3b89fUEhyMmPlJzDZhOjoaGRmZkIURYtdUyaTobS0FO+++2615z09PZGVlQVnZ2eL3bOhbt++jQ4dOuDOH2eZTAZHR0ecOHECAwYMkCxba5aUlIRRo0aZvl6/fj0ef/xxCRPZNlEU4ejoiGHDhnHFN1mEXC7HiBEjcOzYMamjkJ2zz2eBNqw5ehE6ODjgH//4Bz7//PNqzy9durRZi0mdTgeVSoXs7GyoVKpqm6ufOHHCrJh0cHCA0WjE5MmTq2wjSf9z9uxZs6+feOIJbNu2TaI0tk8QBAQEBODMmTNSRyEbcOzYMRgMBoSGhkodhYiPvG1Nc/YijIyMxJkzZ7BmzRqz4u3NN9/EwYMH8cUXX6Bv374WuWflvL+0tLRqm7YrFAoEBAQgKCgInp6e+Pnnn83Oz5w5Ey+//DKGDRtmkTy26syZM3B0dER5eTmAiqkC8+bNQ4cOHXDPPfdInM42zZw5E++99x727NmDKVOmSB2HrNh3330HAJg/f77ESYg4QmmTmrMX4apVqzBu3Dg4ODjAwcEBM2fOxKBBg5CQkIB+/fph0KBB2L17t9k1zp07h8mTJ+PSpUt13k+r1SI6OhpRUVFISkqqcQcgrVaLpKQkREVF4euvvzaNqs2cORMXLlzAtm3bWEzWw8mTJ03FJFAxsqvX6xEbGythKtu2fPlyAEBUVJTEScja7d+/H3K5HIMGDZI6ChHnUNoypVKJ+Ph4iKLYoDmVgiBAEASEhYVV2z4mNzcXw4YNQ05ODi5dugQfHx9cuHABkZGRSEhIgNFoRNeuXfGvf/0LTz/9NObOnYstW7agb9++SEpKQrt27SyaFwDKy8vh6+uLJUuWNOh99s7Lyws5OTmmr2fMmIG33noLw4YNq7YPKVlGx44dIZPJcPPmTamjkBVzc3ODp6cnLl++LHUUIhaUtk6r1SI2NhYZGRkQBKHWQq3yvJ+fH8LDw2vta5mWlobU1FQ88MADVe63fPlybNmyBTqdDm3btkVRURGAitGv8PBw/PTTT1XmeSYmJiIhIaEJ32mFkJAQBAcHN/k69mL9+vUAgHvuuQc+Pj4ICgri5P4WMHPmTPzyyy+4ceMG5/hSo2g0GnTs2BFz5szB999/L3UcIhaU9qKuXoQeHh7w9/c3zUlsKr1ej9dffx0ffPBBlUU0//rXv7By5UrT10ql0qJzPiMiItiYuxH69OmDK1euoKysTOooNm/37t2YNm0aVqxYgbffflvqOGSFoqKi8Oc//xmbN2/GI488InUcIhaU9ujuXoTNtQOOVqtFt27dUFpaWuXcqlWr8Oyzz0Kr1SIqKgp6vd5i95XL5YiMjLTYzkH24oUXXsAHH3yAPXv2cNVoC3ByckLv3r3xxx9/SB2FrNB9992H3377DSUlJXBxcZE6DhELSmo+n332GZYtWwYHBwfIZDIYDAaz1eFPPPEExo0bh+zsbJSUlGD//v1QqVRQqVQoLi7GpEmTql1cZDAYcOzYMZw8eRIajQYODg7w9PTEPffcg549e0IQBPj4+GDBggUt+e1aPZVKha5du2LmzJn46aefpI5j8wIDA3H69GnodDq73c2JGq9bt24oKSmpceEiUUtj2yBqNpMnT8bf/vY3uLq6wtXVFW3atEGbNm1w+/ZtHD9+HCdPnjTtF15cXIyUlBR4eXmhX79+UCqV1V5TFEV8//33uHr1KsaPHw9vb2+Ul5fj+vXrptXKoigiIyMDarWaWwk2gJeXFzp27Ij9+/dLHcUuzJ07FydOnMAPP/yAhx9+WOo4ZEVEUUROTg7GjBkjdRQiE45QkmTi4+NNe49X/hjKZDIUFRXhww8/rHaE8siRI9i5cycef/xxeHt713htQRAQFBRk2nKQ6mf27Nn44YcfkJWVZSr2qXkUFhaiXbt2mDZtGnbu3Cl1HLIi+/btQ0hISJX56ERS4nMWkkxaWppZIVmfNjXHjh1Dr169ai0mgYrf4NPT0y2S055U9kj8+OOPJU5i+9zc3NC1a1ccPXpU6ihkZbZu3QqADc2pdWFBSZIoKytr8Nyf/Px85OXloXPnzti9ezc+/PBDvP766/j8889x8uTJKq/XaDTVbtNINZswYQKcnZ3x66+/Sh3FLkybNg23b99GWlqa1FHIihw4cABOTk4ICAiQOgqRCQtKkkRjJpIXFBQAAE6dOoULFy5gxowZmD9/Pjw9PfHLL78gJSWlynuqa5FEtQsMDERGRgaL8Rbw3HPPAQA++eQTSXOQdbl06RJ69uwpdQwiMywoSRKNaRNU+Xhcr9dj/vz5GDhwIPz9/TF79mx07dq12sUklmxHZC8WLVoEo9GIjRs3Sh3F5g0fPhyurq6Ii4uTOgpZCZVKhZKSEowePVrqKERmWFCSJOTyhjcYaNOmDQCgU6dOcHd3Nx2XyWTo3bs3CgoKUFhY2OT72LvFixdDJpOxoGwhI0aMwNWrVzkiTPWyefNmAMBDDz0kcRIicywoSRIeHh4Nfo9CoYCjo2Otr7l7YU9j7mPvnJyc4OvrixMnTkgdxS5UjghXboNJVJsdO3YAqNgRjKg1YUFJknBycmrwTjYODg7o27cv1Gq12RxMo9GI9PR0KBQKtG3b1nS8uXYAsgcPPPAASktLcejQIamj2LyFCxdCJpPhm2++wYEDB/Cvf/2r2kVmREDFHPKOHTvy6Qu1OvyJJMkEBASY+lACFW2EdDqd6dGfWq3GuXPnTK91cnJCaGgo0tPT8e2332Ly5MlwdnaGUqmESqXC7NmzTdcWBAH+/v4t/03ZiOeffx4ff/wxVq1ahfHjx0sdx2bdvHkT8fHxcHV1xdGjRxEcHAygYqrGsGHDpA1HrcahQ4dQWlqKoKAgqNVq088JUWvCxuYkGbVajaioKNPXH3/8MfLz86t97fLly00jmjk5Odi9ezeuXLkCURTh5eWFiRMnom/fvmbviYyM5E45TeDh4QEHBweo1Wqpo9ik27dvo2vXrigqKoJMJjPblvSnn37CzJkzJUxHrUnv3r2RkZFh+nr06NH485//jHvuuQddunSRMBnR/7CgJElFR0cjMzMToiha7Jrcy9syZs6ciV9++QU3btyAl5eX1HFsjtFoxGOPPYbo6Gjc/c/wpUuX4OfnJ1Eyam3mzp2Lbdu2mf6ddHBwgMFgwMSJE5GYmChxOqIKnENJkgoPD4cgWPbHUBAEhIeHW/Sa9mjZsmUAgFWrVkmcxDbJZDKsX7++yv8GXF1d4ePjI10wanUmT55s9kuHwWCATCbDyy+/LGEqInMsKElSCoXC4vtth4WFNXjBD1UVGhoKJycn/PTTT1JHsVlyuRxbt27FhAkTTMcGDBhg8V+yyLrdXVACwIcffoh7771XokREVfFfLZJcYGAgQkJCLHKt0NBQBAYGWuRaBAwdOhTp6elsEN+MXFxcEBsbC19fXwAVjzOJ7tS3b1+z3rvz5s3DX//6V+kCEVWDBSW1CsHBwYiIiIBcLm/w6IwgCJDL5YiIiMDEiRObKaF9WrBgAURRxLfffit1FJvWrl07HD16FIIgmFpd6XQ6qFQqZGdnQ6VSsfG5HZPJZOjRoweAigU669atq9Jzl0hqXJRDrYpWq0VsbCwyMjIgCEKti3Uqz/v5+SE8PJyPuZtBaWkp2rRpgwkTJnDyfwt49tlnUVBQgMDAwGr3u1coFAgICEBQUBA7GNiZSZMmITExEVevXoW3t7fUcYiqYEFJrZJarUZycjLS09Oh0WiqnPfw8IC/vz8/WFuAj48PcnNzUVxcLHUUm3XnL1J3txC6W+V5/iJl23Q6HTQaDfR6PeRyOZYuXYry8nLu+06tFgtKavXu/oeVO+C0rGXLluGzzz7D8ePHMXLkSKnj2BylUon4+HiIotig9lmCIEAQBISFhXHesI2o/EU6LS2t2hHqtm3bYuDAgfxFmlolFpREVKvLly/Dz88P8+bNw6ZNm6SOY1MSExORkJDQ5OuEhIRw9xQrxhFqsgUsKImoTu7u7nB2dkZOTo7UUWyGUqlETEyMxa4XERHBkUorxBFqshVc5U1EdZo4cSJyc3Nx8+ZNqaPYBK1Wi/j4eIteMz4+vtrHpNR6JSYmIiYmBnq9vsG7hYmiCL1ej5iYGC6Yo1aBBSUR1SkyMhIAsHr1aomT2IbY2Nh6FxApKSlYuXIl3n777VpfJ4oiYmNjLRGPWoBSqbTIdAcASEhIgFKptMi1iBpLLnUAImr9pk+fDkdHR2zbtg1vvPGG1HGsmlqtRkZGRr1eW1BQgJ07d6Jdu3YoLS2t9bWiKCIjIwNqtZoLNlq5ukaoy8rKsH//fqhUKqhUKhQXF2PSpEm1bgARHx8PX19fzqkkyXCEkojqJJPJMHjwYFy8eLHBj+bIXHJycr2bUsfGxqJXr17w8/Or1+sFQUBycnJT4lELqGuEuri4GCkpKTAYDOjXr1+9rskRapIaC0oiqpd58+ZBFEV89913UkexamlpabWu4q106tQpZGZm4r777qv3tUVRRHp6elPiUTOrHKGuraB0d3fHiy++iMWLF2PKlCn1uu6dI9REUmBBSUT1snTpUgDA+vXrJU5ivcrKyuq1cKawsBA7duzA1KlT0aFDhwbdQ6PRcJvGVqw+I9QymaxRWytyhJqkxIKSiOqlbdu28Pb2xrFjx6SOYrXquwo7Li4OnTp1anQj+ep2l6LWob4j1I3BEWqSEgtKIqq3GTNmoLi4GCdPnpQ6ilXS6/V1viY1NRUXL15EREREo0ap6nsfann1HaFuCo5Qk1RYUBJRvT3//PMAgI8//ljiJNZJLq+9sUZZWRni4uIwatQotGvXDiUlJSgpKYHBYAAAlJSU1KtYqOs+JI2W6hPKEWqSAv/VIaJ669u3L9q1a4edO3dKHcUqeXh41Hq+uLgYRUVFOHLkCI4cOVLl/Pvvv4++ffvikUceadJ9SBotNXLMEWqSAgtKImqQ8ePHY8eOHcjLy4O7u7vUcayKk5MTFApFjSNVbm5uWLRoUZXjBw8exJUrVzB//ny0adOm1nt4eHjAycnJInnJslpq5Jgj1CQF/tQRUYMsXboUO3bswOrVq/Haa69JHcfqBAQEICkpqdqFGY6OjvD19a1y/OTJk5DJZNWeu5u3tzeuX7+O27dv4/bt2ygoKMDt27fh6+uLIUOGWOR7oMZpqZFjjlCTFFhQElGDREREQC6XY9u2bSwoGyEoKAjHjx9vtus/+eST1e65PnjwYJw+fbrZ7kt1q2uE+k5paWnQ6XSmObNqtRrnzp0DUPFLSU2j0ByhJqmwoCSiBhEEAQMGDMC5c+cgiiIEgWv7GsLT0xN+fn7IzMys965DM2fOxMyZM2t9jSAIMBqN1RaTMpkMc+fObVTe+tDpdNBoNNDr9ZDL5SxqalHbCPWdYmNjkZ+fb/o6NTUVqampAIDly5dX++crCAL8/f0tG5ionlhQElGDzZ07FytWrMArr7wCjUaDixcvIi4uDq6urlJHswrh4eGIioqy6DaWgiAgMjISPj4+VeZhGo1GGAwGi/4CoFarkZycjLS0tGpH3BQKBQICAhAUFMS9xe9Q3xHqyo4KDSGKIoKCghoTi6jJZMbm6rBKRDanuLgYmzZtwo8//ojff/8dQMXol9FoRH5+Ptq3by9xQuuhVCoRExNjsetFREQgMDAQAPDCCy/gww8/hNFoNO26IooiXFxc8Mgjj+Cjjz4yW1CVkpKCDz74AGvWrKlzoZVWq0VsbCwyMjJMf/c1qTzv5+eH8PBwKBQKS3yrVi86OrpBI9T1IQgCfHx8sGDBAotdk6gh+KyKiOpt3bp1eOqpp8zaBhmNRnTt2pXFZAP5+PhYrEF8aGioqZgEgHfffde0B7jRaMTJkyfx/vvvo3379tiwYQM8PDwwadIk05zKFStWYOvWrZg9e3atLWeUSiWioqKQmZlpunZtKs9nZmYiKioKSqWyKd+mzQgPD4coihbdMUcQBISHh1vsekQNxRFKIqq3wsJChISE4MSJE6Zm2wAQFhaG3377TcJk1qOsrAzR0dF45plnIJPJEBsbi6SkJIii2KARK0EQIAgCwsLCzIrJSrdv38b48ePh7e2NuLg40/Hff/8d//jHP3DmzBkAQI8ePZCdnQ2gYkRx2bJlWLVqVZXrJSYmIiEhoaHfbhUhISEIDg5u8nWsjSiKSE5ORnx8PP7973+jT58+uP/++y12/TtHqImkwIKSiBpEo9FgwoQJuHDhAkRRhEwmwwsvvIB3331X6mit2q1bt7BmzRp88sknUKvVAIDp06cjPj7e7DGyIAi1FpaV5+vzGNlgMMBgMFS7gOPKlStYtmxZtY/d16xZg6eeesr0dXM+nrdlBoMBW7ZsQVxcHH777Tfk5eWZznl6euKHH36wSJEeGhqKiRMnNvk6RE3BgpKIGkylUmHs2LGmR5+bNm3CvHnzpA3VShUWFuKf//wnvvrqK5SXl5sVizt37sS0adNMX1cudElPT692+zwPDw/4+/tbbKGLVqtF165dUVZWZnZcJpNh8+bNmDt3LrRaLaKioiy6+4pcLkdkZKTNz6ncsWMHwsLCqv0lYdu2bfjTn/4EpVKJ+Pj4Bo9Qi6IIJyenGkeoiVoaC0oiapQrV66gT58+0Ol0OHbsGEaNGiV1pFYpMTERkyZNqnJcJpMhLy+vxrmnLdGKZ9WqVXjuuefg4OBgKnrunMqwbNkyBAYGIisrq9pi58aNG9i7dy9ycnJQXFwMuVyOTp06YeTIkRg6dGiN97WXBSQGgwEPP/wwfv75Z7P5kh06dEBOTg6cnZ0BoFEj1JcuXUJycjIOHDiAjh07Nvv3QlQXtg0iokbp1asX1qxZg8WLF0Oj0bAXYQ2Cg4OxefNmLFy40GyUr1+/frUuZHJycoKXl1ezZhs2bBjmzp0LNzc3tGvXDu3bt0e7du2Ql5eH06dP4+DBg7UWK6WlpWjfvj0GDRqE9u3bQ6fT4cyZM/j555+Rl5dXbSENVIyuZWRkQK1W23RLIQcHB7z88sv45ZdfTAWlXC7HY489ZiomgYoWSwsWLKj3CPWQIUPQo0cPABU/R9999x2mTp3aMt8UUQ04QklEjZabm4vnn38e/fr1q3ZUhb0IK9y8eRM9evRAWVkZHBwcAABPPPEEvvzyS4mT1S4+Pr5eTbjvtnbtWty+fRt//etfa3yNIAgICgpCWFhYU2O2WomJiZg6dSr0ej169uyJq1evwmg04vTp0xg8eHCt763tFzRRFE0/R5X+9re/4e233zYrVIlaEtsGEVGDabVaREdH44svvkDfvn1rfESn1WqRlJSEqKgoREdH12vLOVsjiiICAwNRVlaGNWvWIDAwEAaDAePGjZM6Wp3S0tIa1dqmTZs2dTZQF0UR6enpjY3W6v3www8ICQkBAOzbtw9JSUnw9fXFyJEj6ywmgf+NUPfo0QNeXl5mo/2CIFQpKD/66COMHDkSubm5lv1GiOqJj7yJqEHuXEQANLwXob0tIrj33nuRlZWFv//973jqqaewcOFCbNmyBXPmzJE6Wq3Kysrq/QtAZU/F0tJSnDt3DpcuXcKMGTPqfF/lVAlbmxrx2WefYdmyZXB1dcXx48cxaNAgAMCZM2dQXl5ukXs4OjqazXcVBAEXLlxAVlYWOnfubJF7EDUEC0oiqrem9CKsXMUaExODwsJCu+hFuGLFCuzevRuTJ0/Ghx9+CABwcXGpsjVia9SQ0eS4uDikpKQAqJg3GBYWVu8tADUaTbPPFW1Jr776Kt566y24u7vjzJkzprmOQMXIraU4OjqitLTU9PWDDz6IVatWoXv37ha7B1FDsKAkonpRKpUW6ZkHAAkJCXBzc7Ppkcqff/4Z7777Lrp3745du3ZJHafBGtImaOLEiQgMDERRUREuXryI3377DTqdDuPHj7fofVq7J554AuvXr0e3bt1w7ty5OrexbApXV1fcvn0b9913H+Li4nDx4kUWkyQpFpREVCetVov4+Pgaz9+4cQP79u3DtWvXUFpaig4dOmDw4MEYN25cjY8z4+Pj4evra5O9CNPS0vDwww/DxcUFKSkpkMut75/ahmR2d3c3FU99+vQBAOzZswfDhg1D27ZtLXaf1iw8PBxxcXHo378/Tp482eyP8b///nsoFAoMGzYMM2bMQHx8PPbt24fJkyc3632JasJFOURUp9jY2BoX3uTm5mL9+vXIy8vD9OnTMW/ePAwaNAj79+/Hjz/+WOM1RVFEbGxsc0WWTHFxMUaNGgWDwYBdu3ahS5cuUkdqFA8Pj0a/t3v37hBFsV6PzZtyn9ZAFEWMHDkScXFxGD9+PM6ePdsic0JDQkIwbNgwAMA333wDQRCwePHiZr8vUU1YUBJRrdRqNTIyMmosKM+cOQO9Xo85c+Zg0KBB8PPzQ0hICAIDA3HhwgWUlJRU+747exHakjFjxiAvLw+rVq3ChAkTpI7TaE5OTo0ePb58+TJkMlmd77f2XqWlpaXo06cPkpOTMWvWLBw8eLDO1e3NoVOnTpg7dy4yMzPxww8/tPj9iQAWlERUh+TkZMhkshrPV7Yvubv/nYuLC2QyWZX2JncSBAHJycmWCdoKPProozhz5gzmz5+PZcuWSR2nyQICAmr9u9++fTt+//13nD17FpmZmUhNTcW2bdtw+vRpjB07ttbH3TKZDP7+/s0Ru0XcvHkTvXr1wqVLlxAZGVnraHxLWLt2LRwdHfHnP/9Z0hxkv2xj8goRNZu6ehEOHToUR48eRVxcHKZOnYq2bdsiMzMTycnJGDlyZK0jULbUi/DTTz/Fpk2bMGjQIHz77bdSx7GIoKAgHD9+vMbz3t7eOHHiBE6dOoXS0lI4OTmhS5cumDlzZq1bLwIV7aRWrlyJH374AQ4ODjAajab2Q1OnTm1Ve8MXFBSgsLAQ3bp1A1AxAjts2DAUFBTgzTffxCuvvCJxwooV5M888wxWr16NqKgoREZGSh2J7Ax3yiGiGpWVleG9996r83VqtRpbtmzBzZs3TcdGjx6N6dOn1zrCVemll16y6kefhw4dwsSJE9GhQwdcu3bNou1hpBYdHY3MzMxa95duKJlMhszMTGzYsMH0tSAIkMlk0Ov1mD17NrZu3Wqx+zXVrFmzsHPnThw6dAgGgwHjx49HWVkZ1q5diyVLlkgdz0Sv16N9+/ZwdHSEVquV5PE72S/+tBFRjeqzqEKr1eK7776Dq6srHn74YTz22GOYNm0aTp48ie3bt9frPtXtW2wtVCoVpk6dCgcHBxw7dsymikmgYvWypQsTBwcHvPHGG6YG3EajEQaDwdRC6J///KdF79cUOTk52L59O4qKijBx4kSMGjUK5eXl2L59e6sqJoGKFfMrVqxAQUEB3njjDanjkJ1hQUlENapPj8Ddu3ejrKwMCxYswIABA+Dj44Px48dj+vTpOHHiBDIzMy1yn9ZIr9cjKCgIpaWl2Lp1q6llji1RKBQW3287LCwMPXr0wK5du6odmf7ggw+Ql5dn0XtW0ul0UKlUyM7Ohkqlgk6nq/X10dHRpikft2/fhsFgwC+//ILw8PBmyddUK1asQIcOHfD+++9b7f+uyDpxDiUR1ag+PQJVKhU8PT2rFAaV881yc3Ph4+PT5Pu0RtOmTcO1a9fw0ksvYebMmVLHaTaBgYEoLCy0SGP70NBQU0P7IUOG4IsvvjAb6evWrRu2bduGH3/8EQ888ADWrVtXpbWQSqVCmzZt0L59+3rdU61WIzk5GWlpadWOuisUCgQEBCAoKAienp6m40ajEf/973/NHvcLgoCXXnrJNMWhtREEAR988AGefvppLF++HJ9//rnUkchOcISSiGpUnx6B7dq1Q25uLsrKysyOZ2dnA0C9PvStsRfhP//5T+zbtw9Tp07FO++8I3WcZhccHIyIiAjI5fIGPwIXBAFyuRwRERGYOHGi2bnFixdjwYIFAICpU6fi2rVr2LFjB3x9ffHzzz/D09MT999/P3JzcwFUjDCOHDkSY8aMQXFxca331Wq1iI6ORlRUFJKSkmqcwqHVapGUlISoqChER0ebXnfs2DGkpaWZvVYmk+Hs2bPYvHlzg/4MWtJTTz0FLy8v/Pe//0VhYaHUcchOcFEOEdVq9erVtc6lPH/+PL7//nv06NEDY8aMQZs2bZCdnY2DBw+iQ4cOePrpp2sdgfTw8LC6Fjtbt27FnDlz4O3tjczMTLta/KDVahEbG4uMjAwIglDrYp3K835+fggPD6+xL2VRURGee+45LF++HIMGDTId37t3L5555hlcvHgRMpkM06dPx5QpU/D3v/8dgiBg0aJF+Oqrr6q9plKpRHx8vGkP+foSBAGCICAsLAxz5841Kyg7d+6M++67DzNmzMD999/fqheS/fzzz5g1axbmzJmD77//Xuo4ZAdYUBJRreLj45GUlFRr66DLly/j4MGDyMnJMW292KdPH0ycOLHORSqjRo2y+Bw9S1u/fj3OnDmD9957D5cvX8bgwYPh5OSEK1eumD0itSeVj5HT09OrXVTl4eEBf3//Ko+RGyMxMRFPP/00zp8/X+Xchg0b8Nhjj1V5vSUez+/ZswdKpRIrVqxAeHg4Bg8eXK+uBa1F7969kZmZiRs3bpgWQBE1FxaURFQrtVqNqKioZrv+l19+CScnJ/Ts2dM0miSKIpycnLB+/fpWsdClf//+OH/+PIYPH4709HQUFhbi8OHDGDNmjNTRWgWdTgeNRgO9Xg+5XN5sO+C8/PLLVaYXODk5ISUlxTSyqVQqERMTY7F7RkREmOZ8WpsDBw4gODgY99xzD37//Xep45CNs86Z8ETUYjw9PeHn52fxXoSCIMDV1RU5OTkwGo24evVqlde0hlWq+fn5uHDhAgDgxIkTAIAXXniBxeQdnJyc4OXl1az30Ov12LRpU5XjlXMqU1NT4e7ujvj4eIveNz4+Hr6+vo3ehlJKEydOxJAhQ7Bz505cunQJvXv3ljoS2TCOUBJRnbRaLaKioixa4MnlckRGRuLUqVMIDQ01e6Quk8kQERGBX3/91WL3a6ydO3fi3nvvNTvm5OSEr7/+GnPnzpUolf3JysqCn59fjT+Dzs7OeOONN1BWVlbtLz4ZGRk4ffo0srKyUFBQABcXF3Tr1g2TJk0ydSSojiAI8PHxMS0csjZnz57F4MGDMXr0aBw9elTqOGTD7GcmORE1WnP1IlQoFJg8eTLeeusts3NGoxG//fYbnnnmGZSWllr0vpXq24/w8OHDVebN6XQ6/POf/6x1XilZlre3N27duoWcnByo1WrcvHkTt27dglarxaVLl/Dcc8+hpKSkxlH05ORk5OXlYcyYMZg/fz6mT5+OoqIirFu3DhkZGTXeVxRFZGRkQK1WN9e31qwGDRqECRMm4NixY0hJSZE6DtkwjlASUb1ZarFDaGioWfsYURRx3333YdeuXQCAgQMHQqVSITc3F46Ojnj00Ufx2WefVVngo9frodfr4eLiUq/7NqYfYWBgoOlRNwC0bdsWS5cuxbPPPouePXs2+Hun5lHX4rHCwkK4ubmZHSsrK8Pq1avRuXNnLFq0qMZrC4KAoKCgVr94rCZXr16Fj48P+vfvj3Pnzkkdh2wURyiJqN6aqxehIAj49ttv0aVLFxgMBqxbtw45OTn49ttv0alTJ2zYsAEdOnTAo48+atZX79FHH0Xfvn3r3CKysf0Is7KyTMVkly5d8PHHH+PGjRv497//zWKylUlLS6t1xPjuYhKoeEzu6emJgoKCWq8tiiLS09ObnFEqPXv2xH333YfU1FTTL21ElsYRSiJqsOboRQhUzPc6fPgwnnrqKbPjP/74I55//nlkZWXBwcEBs2bNwnPPPYfx48cDAO677z5s37692iK3Kf0I9Xo94uLiMG3aNLz55ptWu6OPrSsrK8N7773X4PeVlpbi448/hq+vb73mw7700kutuvdkbTQaDTp37ozu3bvjypUrUschG8SCkogarSV7EQJAbGwsli1bZtofXCaTmUal3n//ffzzn/80e31TH9EbjUbIZDKEhIQgODi40deh5qVSqbBmzZoGv+/HH3/EuXPn8MQTT9S6MKfS008/3eyr2ZvTokWL8M0332Dz5s145JFHpI5DNoYFJRFZREv1IgQqGo0/8cQTZsdkMhn27dtnKvzYj9B+ZGdnY/369Q16z969e5GYmIiwsDCMHj26Xu9ZsmQJevTo0ZiIrUJpaSnat2+PDh06WO0iI2q9OIeSiCyishdhjx494OXl1ayPBvft21fl8bbRaMTUqVNx+vRpaLXaZulHWNdcTZJGQ6ci7Nu3D4mJiQgNDa13MdmY+7Q2Li4uWLZsGW7evIlVq1ZJHYdsDEcoicjqtGvXzmxxzp0cHR2xcuVKGAyGGudMZmdnIyEhAVlZWTAajejevTtCQ0NrXWhj7f0IbZlOp8O7775br9fu27cP+/btw+TJkzF58uQG3cea51BWEkUR7dq1gyAIyM/Pt6t96Kl58SeJiKzO2bNncfz4cSQnJ0OpVOLkyZM4ffo0YmNjERkZifLy8hqLyWvXrmHDhg0oLy/HzJkzMWvWLOj1enz99dfIysqq8Z7W3o/Qljk5OdVrJ5v9+/ebpkU0tJhszikcLUkQBLz66qsoLCzEq6++KnUcsiEcoSQim1JXP8Lo6GioVCosX77cVCCUlZVh1apV6NixI5YsWVLjta29H6Etq+vv/fDhw9i5cyf8/f0xadKkKue9vb1rvLat/b2LoohOnTqhuLgYBQUFNlEok/Sse0IIEdFd6upHmJWVhYCAALMPUWdnZ/Tq1Qt//PEHbt++jXbt2lX7XmvvR2jLgoKCcPz48RrPV+7Hnp6eXu3f4cqVK2t8ryiKCAoKanLG1kIQBPznP//B448/jmeffRZffvklTpw4AYPBYFPfJ7UsFpREZDPKysrqXDhjMBiqXVzh4OAAAMjJyamxoAQq+vnpdDqO6rQynp6e8PPzQ2ZmZrXTHRYvXtyo61bOnbVE26vWZPHixXj55Zexbt06XL9+HTExMRgwYAB30qFG4xxKIrIZ9VmF7enpiezsbLOiw2Aw4Nq1awCAkpKSOq9RXc9Nkl54eLjFF5kIgoDw8HCLXrM1yMrKwsCBA2EwGBAbGwsAKC4uljgVWTMWlERkM/R6fZ2vGTVqFG7duoXffvsNBQUFyM/PR2xsLPLy8gBU9LO0xH2o5SkUCovPcwwLC6vXgh9rkpCQgN69e5ua/ldOEdHpdFLGIivHR95EZDPq0ycwMDAQxcXFSExMRHJyMgCgR48eGDduHA4dOlTr4+6G3IekERgYiMLCwibtkFQpNDTUJpvZ9+zZE15eXqZR+UosKKkp+K8iEdkMDw+Per1uwoQJGDNmDG7dugVnZ2e4u7sjJiYGjo6O9dqCr773IWkEBwfDzc2t0Xu4C4KAsLAwmywmAaB37944e/Ys/vznP+Pbb781HS8tLZUwFVk7PvImIptR336EQMUoY5cuXeDu7o68vDycPXsWI0aMgKOjY63vs5V+hLYuMDAQkZGR8PHxAYA651ZWnvfx8UFkZKTNFpOV2rdvj+joaHz33Xdo06YNAKCoqKjK63Q6HVQqFbKzs6FSqTiKSTViH0oisil19SPMycnBH3/8gW7dusHBwQE5OTk4ePAg3N3dsWjRIjg7O9d4bVvrR2gv1Go1kpOTkZ6eXu2CKg8PD/j7+yMoKMjmVnPXx5UrVzB06FDk5+cjKysLzs7OSE5ORlpaWrUL3RQKBQICAuz2z4uqx4KSiGyKWq1GVFRUjedv3ryJmJgY5ObmQqfToUOHDhg0aBAmTJhQr5HHyMhIfohaMZ1OB41GA71ej+DgYBgMBly5ckXqWJLLy8tDYGAg5s+fD7lcDplMVms/18rzfn5+CA8Pt7mFS9RwLCiJyOZER0fX2I+wsbiXt20pKiqCm5sbAODnn3/Ggw8+KG0giSmVSsTFxcFoNNZaSN7NHuacUv1wDiUR2Rz2I6S6/P7776b//7HHHsP169clTCOtxMRExMTEQBTFBhWTQMUuQnq9HjExMUhMTGymhGQNWFASkc1hP0Kqy08//WTqOVpYWIj58+dbdETbWiiVSou0WAIq+lsqlUqLXIusDwtKIrJJgYGBCAkJsci1bLUfob3S6XT49ddfTaNxBoMB+/btw8cffyxxspal1WoRHx9v0WvGx8fXa8cqsj2cQ0lENk2pVLIfIZnZuXMn7r333irH5XI5rl69iq5du0qQquVFR0dj7969OHnyJLKyslBQUAAXFxd069YNkyZNMuvJevToUZw5c8a0l33btm3h7e2NSZMmoXPnzqbXca6x/WJjcyKyaYGBgfD19UVsbCwyMjIgCEKthWXleR8fH65etVF3P+J1dHTE5MmTMWLECHTo0EGiVC1LrVYjIyMDx48fR3FxMcaMGQNPT08UFRXhyJEjWLduHR599FH4+fkBqNjjPiAgAF26dIGrqyu0Wi0OHjyItWvX4umnn0anTp0AVMypzMjIgFqtlqwbwp0r+eVyOXvHthCOUBKR3WA/QgKAq1ev4siRI+jbty/uueceAEBubq7EqVpWZb/W27dvm1a7VyorK8Pq1avRuXNnLFq0qMZrqNVqfP755wgODkZoaKjpuBT9Wiv/t83emdLhCCUR2Q1PT0/ThxxHMexXz5490bNnTwAVO+OcOHFC4kQtLy0tDUajsUoxCQDOzs7w9PREQUFBrdeo3GHn7o4KoigiPT3dcmFrodVqTU8fauudqdVqkZSUhOPHj7N3ZjPhohwisktOTk7w8vJCjx494OXlxWLSTg0cOBB6vd6uFpKUlZXV+v2Wlpbixo0b1Y7kVbYJUqvV2L59O9q2bYvhw4dXeV3lXMvmpFQqERUVhczMTACos+VR5fnMzExERUVxRbqFcYSSiIjs1pgxY7Bx40bs3bsXDz30kNRxWkRdxXNcXBzKy8sRHBxc5dzbb78Ng8EAAOjYsSMee+yxGuedajQaeHl5NT1wNRITExvd7qhygV5MTAwKCwur/T6p4ThCSUREdqty7t/BgwclTtJy9Hp9jef27t2LM2fO4N577zVb5V1pyZIlWLJkCWbNmgUnJyds3LixxvmnlfcxGAw4ePAg/va3v8HPzw/vvvtuk/Kzd2brxBFKIiKyWwEBAZDJZDh16pTUUVqMXF79R/++ffuQmJiI0NBQjB49utrXVBaZ3t7e6Nu3L1avXo09e/bgkUceqfLaXbt24fDhw/jpp5+g0Wggl8uh1+tx69ataq99/fp1/OUvf8GLL76IUaNGVfuayt6ZGRkZOH36dK3tjkRRxNGjR3Hp0iXk5uaipKQE7u7u6Nu3LyZMmABXV1cAFQuUfH19OaeyiThCSUREdq1du3bIyMiQOkaL8fDwqHJs37592LdvHyZPnlzvR8DOzs7o1KlTtQWi0WjE0qVLsW7dOlNHhcoRSzc3t2pbd+3cuRM///wzxo0bh3fffdf0aP1OsbGxEEURycnJyMvLw5gxYzB//nxMnz4dRUVFWLdunenvsry8HPv27YO7uzumT5+O+fPnIzAwECkpKfjqq69QXl4OoKLwjI2Nrdf3TDXjCCUREdm1bt264cqVK1LHaDFOTk5QKBSmuZT79+/Hvn37EBwcjMmTJ9f7OkVFRcjJyTGtmL/7Hu7u7tU+Dn/99dfxxhtvoG3btujatSsCAgIQGBiItLQ00yjmihUr8Ntvv2Hz5s3w9vYG8L/emQAwY8aMKivU/f39sXr1ahw4cAB+fn5wdHTEc889Z1qNDgC+vr7o0KEDtm3bhtTUVAwdOrRV9M60BRyhJCIiu9anTx+UlJTUOrfQ1lQ+6j98+DASEhLg7++PgIAAZGVlmf0HVKz6/u9//4sjR47g4sWLyMjIQFJSEjZs2ACDwYBJkyaZXVsQBAwfPhzXrl3DG2+8AUEQ4ODgYDq/bNkyTJ06FZ6enrh27Rp+++03vPXWW9iyZYvZ38HBgwfRu3dvvPDCCyguLkZycrJp//X6tDsSBMGsmKzUvXt3ADBriyQIApKTkxv7x0ngCCUREdm5ESNGYPv27Th+/DjGjRsndZwWERQUhOPHj+PChQsAgPT09Gp7R65cuRJyuRxeXl5ISUlBQUEB9Ho93Nzc4OPjg4cffths60Wg4hFyUFAQ5HI5Xn31VUydOhVz5sxBVlYWPDw8sHr16iqvT0lJwb333ltlBXp5eTk++OADfPDBB1i5cmWt31NluyNfX99aX3f58mUAMBuNbMnembaKBSUREdm1yse8CQkJdlNQenp6ws/PD0uWLKlzj3u5XI7777+/Xtet3Mv7zmJt7NixOHv2LJ5//nm0a9eu2vcMGTIE+fn5AAAHBwcYDAb06NED9957L/r06YNOnTqZRkxrUlu7o0oFBQXYvXs3unXrhj59+pidq+ydyZ60jcOCkoiI7NqYMWMAACkpKRInaVnh4eGIioqqs6BsCEEQEB4eXuV4+/btsX79+hrfV1ZWBmdnZ/j6+mL27NmYOXMmhgwZYnrErVKpsGbNmhrfX9nuKCwsrNp2RwBQXFyMTZs2AQD+9Kc/VdnhB2je3pm2jgUlERHZNScnJ7i4uJge/9oLhUKBsLAwxMTEWOyaYWFhjWq/0759e9y+fdtsruWdapvfWp92RyUlJYiOjkZBQQEWLVpU7Ur3uu5DteOiHCIisnuenp64fv261DFaXGBgIEJCQixyrdDQUAQGBjb6/TUVk0DtvTPrandUUlKCb775BlqtFgsXLqx1BLKm+1DdWFASEZHd8/X1NVv1a0+Cg4MREREBuVxe7WPg2giCALlcjoiICEycOLGZElbfO7M+7Y7uLCYXLFiArl27Nvg+VD8sxYmIyO4NHToUiYmJuHLlCnr16iV1nBYXGBgIX19fxMbG1qvJuyAIEEURPj4+CA8Pb/ZdZu7unVldu6M7eXt7o7y8HNHR0bhx4wamT58OURTNXte2bVuzAtLDw4MLcpqABSUREdm98ePH49NPP8WuXbvwxBNPSB1HEgqFAgsWLEBOTg6efvppBAYGwmg0Vnmdh4cH/P39ERQU1KKNwAMCApCUlASj0VivdkeFhYWmaQw7duyo8pqhQ4di5syZACoKZH9//2ZMb/tkxup+WoiIiOzIzZs34enpiSVLlmDdunVSx5HUm2++iddeew1ffvklFi9eDI1GA71eD7lcLukonlqtRlRUVLNdPzIykjvlNAFHKImIyO516tQJDg4OOHv2rNRRJJWamorXX38dQMXWik5OTq2mjU5l78zMzEyLtzq6u3cmNRwX5RAREQFwd3e3qz2973bz5k2EhYXBYDAAAE6ePCltoGqEh4c3eOFQXWrqnUkNw4KSiIgIFQs5NBqN1DEkodPp8OCDDyI7O9t0LCEhQcJE1avsnWlJje2dSeZYUBIREQEYMGAAdDodiouLpY7SooxGI55++mkcOXLE7FFydnY2rl27JmGy6rWm3pn0PywoiYiIAIwaNQpARbNse6LVavHtt99CFEXTVoeVDh06JFGq2llD70x7w4KSiIgIwJQpUwBUNMy2Jx4eHjh//jzWrl2Ldu3amZ07ceKERKnqFhgYiMjISPj4+ABAnYVl5dxQHx8fREZGcmTSwtg2iIiICIAoipDL5Zg2bRp+//13qeNIwtnZGf7+/ti5cyeOHj2KkSNHomfPnlLHqpNarUZycjLS09OrnQfr5uaGPXv2IC0tDSkpKWjfvr0EKW0bC0oiIqL/165dO3Tu3BmXLl2SOkqLS01NxcCBAxEZGYnPP/9c6jiNptPpqvTO/OOPPzBs2DAAwJAhQ7B79262CbIwPvImIiL6f15eXsjJyZE6hiS+/vprAMDChQslTtI0lb0ze/ToAS8vLzg5OZktLjp79izGjh1bZbtGahoWlERERP/P398fRUVFFm2cbS12794NBwcHjB49WuooFndnQSmKIjIzMzF69GhcvHhRwlS2hQUlERHR/6tcqHH69GmJk7S88+fPW8V8yca4du0a5PL/bQ5oMBhw48YNPPnkkxKmsi0sKImIiP5fcHAwAGDPnj0SJ2lZKpUKxcXFGDdunNRRmkV2djYMBoOpLZKjoyOeeuqpZt0b3N6woCQiIvp/lX0Jk5KSJE7SsirnTz788MMSJ2keZWVlAICpU6fC0dERnp6eWLNmDQYOHChxMtvBVd5ERER3cHZ2Rr9+/XDq1Cmpo7SY4OBgHDx4EHq93uJ7ZbcG+fn5KC0tRZcuXTBz5kz88ssvuHjxIgICAqSOZjNYUBIREd2hW7duKCsrw61bt6SO0mIUCgVcXFxw48YNqaM0u3PnzmHQoEGYPXs2tm7dKnUcm2F7v4YQERE1gY+PD/Lz86WO0WIKCwuRl5eHoKAgqaO0iIEDB8LLywtxcXFSR7EpLCiJiIjuMGjQIBgMBqhUKqmjtIgtW7YAAGbNmiVxkpazcOFCFBcX49dff5U6is1gQUlERHSHypXO9rLS++effwYAzJkzR+IkLefVV1+FTCbD22+/LXUUm8GCkoiI6A5TpkwBABw+fFjiJC0jOTkZCoUCbdq0kTpKi3Fzc8PgwYORkpKC0tJSqePYBBaUREREd/D29oYgCHbR3Fyv1yM3NxdDhgyROkqL++c//wlRFPHBBx9IHcUmsKAkIiK6S4cOHXD58mWpYzS7uLg4GI1G3HfffVJHaXGPPPIInJ2dsW7dOqmj2AQWlERERHfp3r07bt68KXWMZlfZNmfRokUSJ2l5giBg2rRpyMrKsotfHpobC0oiIqK79O3bF2VlZdDpdFJHaVaHDx9G27Zt0blzZ6mjSKJyUc7LL78scRLrx4KSiIjoLpU9GQ8ePChxkuaVlZWFfv36SR1DMkOGDEHnzp2xfft2qaNYPRaUREREdwkNDQUA7N+/X+Ikzefw4cMwGAyYNm2a1FEk9eijj6KoqAi//fab1FGsGgtKIiKiu4wYMQIAkJKSInGS5vPtt98CAB577DFpg0jstddeg0wmw5tvvil1FKvGvbyJiIiq0bZtW/To0QMXLlyQOkqzGDhwINLT01FWViZ1FMkNHjwYqampKCkpgZOTk9RxrBJHKImIiKrRuXNn3LhxQ+oYzebSpUvw8/OTOkar8Le//Q2iKOI///mP1FGsFgtKIiKiavTu3RuFhYXQarU4cuQI0tPTpY5kMWlpaSgrK8PkyZOljtIqLFy4EE5OTlizZo3UUayWXOoARERErcm2bduwZ88e/PHHHzAajfDw8AAAjBkzBkeOHJE4nWVs3LgRADB//nxpg7QSgiBgypQpiI+Px9WrV9GzZ0+pI1kdjlASERHd4R//+AfWrFmD69evm47JZDJMmDBBwlSWtXPnTjg4OGDcuHFSR2k1KhflvPLKKxInsU5clENERHSHHTt2ICwsrMrxw4cPY+zYsRIksjw3Nzd06tQJmZmZUkdpVTw9PVFWVoaCggKpo1gdjlASERHdYfr06Xj22WchCP/7iPT09MTo0aMlTGU5arUaRUVFNlMcW9K8efNw+/Zt7Nq1S+ooVocFJRER0V3ef/999O3b1/T1Qw89ZFZgWrNvvvkGAPDwww9LnKT1WblyJQDg9ddflzaIFbKN/3UQERFZkIuLC7Zu3Wr6etasWRKmsazY2FjIZDJERERIHaXVUSgU6N+/P44ePQq9Xi91HKvCgpKIiKgagwYNMm1LOGbMGInTWM7p06fh6ekJuZyNXqrz3HPPwWAw4OOPP5Y6ilXhohwiIqIaHDp0CKGhoUhISEDPnj0hl8vh4eFhtbuplJaWwtXVFWFhYdy7ugaiKMLFxQU9evRARkaG1HGsBn89ISIiuotarUZycjLS0tKwYsWKKos0FAoFAgICEBQUBE9PT4lSNtyWLVsAAA8++KC0QVoxQRAQEhKCnTt3Ijs7Gz169JA6klXgCCUREdH/02q1iI2NRUZGBmQyGWr7iKw87+fnh/DwcCgUihZM2jgPPPAAtm/fjtu3b8PNzU3qOK3W8ePHMXr0aDz22GPYsGGD1HGsAgtKIiIiAEqlEvHx8RBFEaIo1vt9giBAEASEhYUhMDCwGRM2Xbdu3VBcXIy8vDypo7R6HTt2hF6vR35+vtRRrAIX5RARkd1LTExETEwM9Hp9g4pJoGLOnV6vR0xMDBITE5spYdOJogiVSoXBgwdLHcUqzJ07FwUFBdi7d6/UUawCC0oiIrJrSqUSCQkJFrlWQkIClEqlRa5laTt27IDRaMSMGTOkjmIVKntRsidl/XBRDhER2S2tVov4+HgAQFlZGfbv3w+VSgWVSoXi4mJMmjQJISEhZu+pbH5dnY4dO0Iul8PX17fVzan8/vvvAQCLFi2SOIl16NSpE/r06YPDhw9Dr9ezzVIdOEJJRER2KzY21vSIu7i4GCkpKTAYDOjXr1+N71myZEmV/6ZPnw4A6N+/P0RRRGxsbIvkb4hDhw6hTZs26Natm9RRrMby5cuh1+vx2WefSR2l1WNBSUREdkmtViMjI8NUULq7u+PFF1/E4sWLMWXKlBrf5+3tXeU/lUoFABg+fDhEUURGRgbUanWLfB/1dfXqVfTp00fqGFZl6dKlkMvlLCjrgQUlERHZpeTkZMhkMtPXMpnM7Ov6Kisrw7lz5+Dj44OOHTsCqFj5nZycbLGsTZWUlAS9Xo+pU6dKHcWqCIKA4OBgXLp0CQcPHsSKFSvwyCOPNHjhlj3ghAAiIrJLaWlptfaZrK+zZ8+ivLzcrGWQKIpIT09v8rUtJTo6GgDnTzZUXl4eAgMDsXfvXkycOBFARZEZHR0NQeCY3J1YUBIRkd0pKyuDVqu1yLWUSiVcXFzQv39/s+MajQY6na5VbNOYkJAAR0dHDBo0SOooViMuLg4zZ86EXq83O1658IrMsbwmIiK7Y6liMjc3F9euXcPgwYPh6OhY5bxGo7HIfRrjwoULSElJgV6vR3p6Onx9fSXLYo169uwJFxeXKiOR3bt3lyhR68aCkoiI7M7do06NVdlzsqYdcix1n8ZYuHAhgoKC0K5dO5SWlsLNzQ0JCQkoKSmRLJM1GTx4MA4dOgSFQgEHBwfT8V69ekmYqvViQUlERHbHEo8s9Xo9Tp8+ja5du6Jr167Ndp/GGjx4MARBQGlpKQDg5MmTCA0NrdJXk2o2ePBgHD161KzVEkcoq8eCkoiI7I6Hh0eTr3HhwgUUFxfXun+3Je7TWBMnTjRbjVz5/8+bN0+qSFapd+/eOHr0KDp37gwAyMrKkjhR68RZpUREZHecnJygUCiqzKVMS0uDTqeDTqcDUNGr8ty5cwCAgIAAswU2J06cgFwur3FvbA8PD0kX5EyYMMHsa5lMhieffBLLli2TKJH16tatG86cOYPu3bubpgzodDpoNBrTLjpS/31LTWa0RM8EIiIiKxMfH4+kpCSz1kEff/wx8vPzq3398uXLTdsp5ufn45NPPsGQIUMwc+bMKq8VRRF6vR6BgYFwdnZGWVmZ6T+dToewsDD07Nmzeb6x/2c0GtGpUydoNBrIZDIEBwdj165d1S4eovp56623kJqaijFjxlS7sEuhUCAgIABBQUHw9PSUIKF0WFASEZFdUqvViIqKarbrf/bZZ7h582a159544w28+uqrzXbvSkOHDsXp06fh6emJ8+fPS/oI3ppptVrExsYiIyMDMpms1v6llef9/PwQHh7e6vZ0by6cQ0lERHbJ09MTfn5+Fm9QLQgC/Pz88NRTT9V4fsGCBRa9Z03atGkDANizZw+LyUZSKpWIiopCZmYmANTZDL/yfGZmJqKiokydAGwdC0oiIrJb4eHhzVJQhoeH4+2338by5curbOfYpk0bnD171qL3rKTT6aBSqZCdnQ2VSgVPT09MmzatxnmeVLvExETExMRAr9c3eLvFymkPMTExSExMbKaErQcfeRMRkV1TKpWIiYmx2PUiIiJMK7/Ly8sRGhqKI0eOwGAwAPjfI1EvLy+88cYbePLJJ83e/80338DNzQ2zZs2q1/3UajWSk5ORlpZW7bw+V1dXDB482C7n9TVFc/5c2CIWlEREZPcSExORkJDQ5OuEhoaa9nyulJubi6FDh0KlUuGee+7Btm3bsHz5cmzevBk6nQ4dOnTA888/j1dffRU3b96Et7c3DAYDEhISqlzrTpzX13y0Wi2ioqJw8eJFnD59GllZWSgoKICLiwu6deuGSZMmmfWmvHLlCk6ePAmVSoXc3FwYDAazRVxARU/SyMhIm/2zZ0FJRESEihGp+Ph4iKLYoMebgiBAEASEhYXVOAKVlJSEhx56CFu3bsWYMWMAVDRGf+WVV/DZZ5+hqKgILi4uGDBgAE6cOAGZTAaFQoFTp05V20i7ObMSEB0djczMTHz//fcoLi7GwIED4enpiaKiIhw5cgTXr1/Ho48+Cj8/PwDAvn37cOLECXTt2hWlpaXIzMysUlAKggAfH58Wmz/b0lhQEhER/b87R/0EQai1WKs8X99RP6PRWGU+JVAx1+7TTz/Fm2++iVu3bpldf8SIEThw4ACcnZ1Nxy01mhoSEoLg4OAmX8fW3Ln6v7CwEG5ubmbny8rKsHr1anTu3BmLFi0CUPF3WDkX99ChQ9i1a1eVgrJSZGSkTU49YGNzIiKi/6dQKLBgwQLTvMT09HRoNJoqr/Pw8IC/v3+D5iVWV0wCFYXj8uXLodfr8fe//910XBRFJCUlYeHChdiyZQuAipFJSxSTAJCQkAA3NzeOVN4lOTnZNEXg7mISAJydneHp6YmCggLTsfou7BIEAcnJyQgLC7NY3taCBSUREdFdPD09TR/6LbEjiiiK+PDDD6s9t3XrVmRnZ2PdunWIj49HWVkZ9u/fD5VKBZVKheLiYkyaNKnKHt1GoxFKpRLJycm4desWHBwc0LlzZ4wfPx59+vQBUNHc3dfX12bn9TVGWlparfNRS0tLcePGDfj6+jb42qIoIj09vSnxWi0WlERERLVwcnKCl5dXs95DJpNh8uTJuHXrFlxcXODi4gJnZ2eUlpbi+PHjOHPmDKKiotC5c2cUFxcjJSUFXl5e6NevX419DhMSEpCYmIigoCBMnToVer0ex44dw+bNm/Hwww9jwIABEEURsbGxNjuvr6HKysqqXSl/p7i4OJSXlzd6uoBGo4FOp7O5bRpZUBIREUlMJpPh+++/r/H8lStXsHHjRoiiCHd3d7z44ouQyWQoKiqqsaA8ceIEevbsifDwcNMxPz8//Pvf/8apU6dMBWVGRgbUarVNzutrqLqKyb179+LMmTMICwszW+XdUBqNptl/SWlpbGxORETUyqWmpprmYMpkshrnY97JwcHBbDEPADg6OkIul0Mu/994UuW8PqpYeV+Tffv2ITExEaGhoRg9enSz3cdasaAkIiJq5eqa11ed0aNHIz09HUqlEiUlJbh9+zZ27NiBsrIys4LIluf1NdSdhfad9u3bh3379mHy5MkWWRlf032sme19R0RERDakPvP6qjN27Fg4OjoiLi4O27dvB1Cxa84jjzyCnj17mr3WVuf1NVR1+53v378f+/btQ3BwMCZPntxs97F2LCiJiIhascYUk0DFHMr4+HiMGjUKAQEBMBgMOHXqFL7//nvMmTMH/v7+Zq+3xXl9DeXk5ASFQmH6Mz98+DASEhLg7++PgIAAZGVlmb3e29sbAFBUVITMzEwAFTsjAUB6ejratGmDtm3bwsfHx/Se5ugS0BqwoCQiImrFGjPfrqSkBHFxcQgMDMS9995rOh4QEIANGzYgNjYWzz33XJPvY4sCAgKQlJQEo9GICxcuAKgoDqubFrBy5UoAFUXktm3bzM7FxcUBAHr16oXFixcDqJivenchbytYUBIREbVijZlvd/PmTej1+mq3bezWrRuuXLmCsrIys0U7tjivrzGCgoJw/PhxADAVgnXx9fU1FZe1EUURQUFBTYnXanFRDhERUSvWmPl27dq1AwBkZ2ebHTcajcjOzoaLi0uVx662OK+vMTw9PeHn51fv3W/qSxAE+Pn52Wx7Jv46QkRE1IrdPa8PqFj1rdPpoNPpAFTsP33u3DkAFY9s3d3d0b9/f6SkpMDBwcE0h/LkyZPIyspCSEiIWeshW53X11jh4eGIioqqdS/3hhIEwawnqK2RGRvah4CIiIhaVHx8vGleHwB8/PHHyM/Pr/a1y5cvh0KhQHl5OY4fP47Tp09Dq9XCwcEBHTt2xKhRozB48GBTQSkIAoKCgmxyf+mmUCqViImJsdj1IiIibHrfdBaURERErZxarUZUVFSzXT8yMtJmH8U2RWJiIhISEpp8ndDQUEycONECiVovzqEkIiJq5TivTxrBwcGIiIiAXC5v8J+9IAiQy+WIiIiw+WIS4AglERGRVdBqtYiKirJoex+5XI7IyEgoFAqLXdMWabVaxMbGIiMjAzKZrNZdiwRBgCiK8PPzQ3h4uN382bKgJCIishKc1ycttVqNl156CW3btq12VbyHhwf8/f0RFBRkd6O+XOVNRERkJQIDA1FYWGixeX0sJhsmKSkJ69evB1DRPD4vLw96vR5yudzuV8pzhJKIiMjKKJVKxMfHQxTFBrW2EQQBgiAgLCyMxWQDnThxAmPHjkVZWRkAIDU1Ff3795c4VevBEUoiIiIrExgYCF9fX9O8vsp5ezUxGAxwcHCAj4+PXc3rs5Ts7GxMnz7d1PcTAI4dO8aC8g4coSQiIrJiarUaycnJSE9Ph0ajqXJeoVBgx44dOHPmDJRKpd3N7WuqgoICjB07FhcvXjQtiHJwcMATTzyBL7/8UuJ0rQcLSiIiIhuh0+mg0WjM5vUVFBSYisjevXtj//791e7xTdV79dVX8dZbb1U5PmDAANPuRMSCkoiIyKYplUqMGDECQMUcyu7du2Pfvn3w8/OTOJl1uH79OtavX4+4uDgcO3bMdFwQBBQUFKBt27YSpms9OIeSiIjIhl29etX0/4uiiOvXr2Ps2LHYt28f5wDWQ7du3fDqq6/C0dERx44dw5tvvgmdTofs7Gy4uLhIHa/VYEFJRERkw7KysswW7RgMBuTm5uLhhx/GmTNnJE5nPbZv3w5BELBixQqL71hkC/gnQkREZMMqRyhlMhmAike1s2bNata9wW3RmTNn0L17dxaTNeAIJRERkQ0rKiqCKIoYN24cUlJS0LZtW/z4449Sx7Iq169fR2FhIe6//36po7RaLLOJiIhs2HvvvYesrCwcOnQIDz30EDQaDU6fPi11LKuydu1aAMCiRYskTtJ6cZU3ERGRnbh8+TL8/PzwwAMP4JdffpE6jtUYOXIklEolysvL+ci7BiwoiYiI7Ej37t2h1WpRXFwsdRSr0bZtW3Tu3BmXL1+WOkqrxTKbiIjIjixYsAAlJSX49ddfpY5iFa5cuYLi4mJMnjxZ6iitGgtKIiIiO7JixQrIZDK8++67UkexCv/9738BAI899pi0QVo5PvImIiKyMwMHDsSFCxdQWloKuZwNX2ozfPhwnDlzBuXl5abWS1QVRyiJiIjszLPPPguDwYBVq1ZJHaXVO3/+PHx9fVlM1oEFJRERkZ158sknIZfL8eWXX0odpVWrHMUNDQ2VOkqrx4KSiIjIzgiCgPHjxyM9PR03b96UOk6rtW7dOgDAkiVLJE7S+rGgJCIiskOvvvoqAOD111+XOEnr9fvvv8PR0RGjRo2SOkqrx0U5REREdqpdu3ZwdXVFbm6u1FFaJRcXF/j4+OD8+fNSR2n1OEJJRERkp8LDw6FWq3H27Fmpo7Q6Z8+eRVlZGaZOnSp1FKvAgpKIiMhOvfHGGwD+9/ib/qdy/+4nnnhC4iTWgY+8iYiI7Fi3bt2Qn5+PoqIiqaO0Kv3790dGRgbKysqkjmIVOEJJRERkxx599FEUFxcjJiZG6iityqVLlxAQECB1DKvBgpKIiMiOvfLKK9yK8S7JyckoLy/HvffeK3UUq8GCkoiIyI61b98e/fr1w/Hjx6HX66WO0yqsX78eQEUDeKofFpRERER2btmyZTAYDPjss8+kjtIq7NmzB87OzujXr5/UUawGF+UQERHZOVEU4ezsDF9fX1y8eFHqOJJzdHTEwIEDcfLkSamjWA2OUBIREdk5QRAwduxYpKWlQaPRSB1HUgcPHoRer8f06dOljmJVWFASERGRqRdlZW9Ke/XVV18BAJ566imJk1gXPvImIiIiABVbMbZp0wY5OTlSR5FM7969cePGDRQXF0sdxapwhJKIiIgAAGFhYcjNzUVqaqrUUSQhiiKuXLmCAQMGSB3F6rCgJCIiIgDAm2++CQB47bXXJE4ijYSEBBgMBtx3331SR7E6fORNREREJl27dkVBQYFdbsW4cOFCREdH4+rVq/D29pY6jlXhCCURERGZzJs3D8XFxYiPj5c6SotLTExE27ZtWUw2AkcoiYiIyCQvLw8KhQLjxo3DoUOHpI7TYkRRhKOjI0aOHImjR49KHcfqcISSiIiITNzd3dGvXz8cO3bMrrZijI+PhyiKiIiIkDqKVWJBSURERGb+/Oc/w2Aw4IsvvpA6SouJjo4GACxZskTiJNaJj7yJiIjIjF6vh6urK3r37o3z589LHadF9OjRAwUFBSgoKJA6ilXiCCURERGZkcvlGDNmDC5cuIC8vDyp4zQ7vV6P69evY8iQIVJHsVosKImIiKiKV155BYB9bMX466+/wmg0YubMmVJHsVp85E1ERETVcnNzg5ubG27cuIFDhw5BEASMGzdO6lgW99BDD+Gnn37CrVu34OHhIXUcq8SCkoiIiKoVFhaGHTt2oHPnzsjNzUWfPn1w4cIFqWNZXLdu3VBcXGwXj/ebCx95ExERkZk9e/ZgxIgR2LFjBwAgNzcXAODl5SVlrGah0+mgUqkwfPhwqaNYNbnUAYiIiKh12bNnD5RKpdkxuVwOHx8faQJZWHl5Od5//3307t0bt27dgtFoxKxZs6SOZdVYUBIREZGZN998EyqVChs2bDAdMxqN6Nmzp4SpLCcnJwevvvqq2bFTp05h27ZtiIiIgIuLi0TJrBcfeRMREZEZBwcHrFu3DsuWLTMdMxgMNlNQdu/evcrim40bN+Lhhx/Gp59+KlEq68aCkoiIiKoQBAGrVq3CihUrTMe6du0qYSLLkclkGDduHATBvAzy8fHB4sWLJUpl3VhQEhERUbVkMhnefvttPPjggwCAmzdvAvjfQpbs7GyoVCrodDoJUzbOne2PZDIZ3NzcsHPnTnTq1EnCVNaLbYOIiIioVqIoYvjw4ZgxYwa6du0KrVZb5TUKhQIBAQEICgqCp6enBCkbZu/evZgyZQqAigVHCQkJmDBhgsSprBcLSiIiIqqRVqtFbGwsMjIyIJPJUFvZUHnez88P4eHhUCgULZi0YW7fvo327dsDAL777jvMnTtX4kTWjQUlERERVUupVCI+Ph6iKEIUxXq/TxAECIKAsLAwBAYGNmPC+tPpdNBoNNDr9ZDL5XBzc0P79u0RFBSE48ePSx3P6rGgJCIioioSExORkJDQ5OuEhIQgODjYAokaTq1WIzk5GWlpadU+ptdoNAgMDMSMGTOs4jF9a8aCkoiIiMwolUrExMRY7HoREREtOlJpq4/pWzMWlERERGSi1WoRFRUFvV5vsWvK5XJERka2SLFmS4/prQkLSiIiIjKJjo5GZmYmSkpKsH//fqhUKqhUKhQXF2PSpEkICQkxe73RaMSxY8eQlJSEvLw8uLq6ol+/fpgyZQpcXV0BVBRrPj4+WLBgQbNmt4XH9NaKfSiJiIgIQMWcw4yMDIiiiOLiYqSkpMBgMKBfv341vmfnzp34/fff0a9fP8ybNw8TJkzAmTNn8M0338BgMACoaDuUkZEBtVrdbNmVSqVFikkASEhIqLKXOdWOe3kTERERACA5Odk0p9Dd3R0vvvgiZDIZioqKqi2wCgoKcPToUYwcORLTpk0DAPTu3Rtt27bFjz/+iJMnT2LEiBEAKkYpk5OTERYWZvHcWq0W8fHxFr1mfHw8fH19OaeynlhQEhEREQAgLS3NtIBFJpPV+frs7GwYjUYEBASYHe/Tpw8AIDU11VRQiqKI9PR0CyeuEBsbaxoFPX36NLKyslBQUAAXFxd069YNkyZNQrdu3czec/36dezatQvZ2dkQBAG+vr645557THt8i6KI2NjYZn9Mbyv4yJuIiIhQVlZWbWud2lQ+0pbLzcenKvfIzsnJMTuu0WjMtmk0Go04cuQI5s+fj6FDh9a4heMvv/yC3Nzcas/d+Zg+OTkZeXl5GDNmDObPn4/p06ejqKgI69atQ0ZGhtl7Nm7cCIPBgNmzZ+OBBx7ArVu3sGHDBhQVFQFomcf0toQFJRERETW4mARg6t149epVs+NZWVkAgJKSkirv0Wg0piJvyJAhGDduHL777jucPn3aVKDeKTMzEzNnzkTv3r2xevXqKqvPKx/TA8CMGTPw2GOPYeTIkfDx8cHAgQOxYMECuLq64sCBA6b3JCQkQC6XY968eejTpw8GDBiA+fPno6ioCIcPHza9rvIxPdWNBSURERE1qk2Ql5cXevXqhcOHD+PcuXMoKSnB1atXERsbC5lMVu1j8/DwcLRv3x5PPvkkzp49C6BipNLR0RGXLl1CYWGh2esrRwgLCwuxfPlyDB48GPv27TOdv/MxvZubW5X7OTs7w9PTEwUFBQAqRlUvXryI/v37w8XFxfQ6d3d3+Pr64o8//jAda87H9LaGcyiJiIioymPr+po9ezZ++eUXbNu2DQDg4OCAMWPGICMjA6WlpVVef/369Wr7Q5aXl2Pw4MGmrx0cHODo6Fgl1/nz5xESEoKAgAC8/PLLdY6slpaW4saNG/D19QVQMRKr1+vRpUuXKq/t0qULLl26hPLycjg6OgL432N6JyenOv4k7BsLSiIiIjItRmkoNzc3PProoygsLERhYSHc3d0h/7/27t+lrS6O4/jH20sES4dkUdBJEgp1KRKsCEqIOAgJXVt1kS6igzrpUujSf8HBRUXwHzClkyZ1KKG2WUQdIqI4qKT1B4IEI/EZ5N4aYvx18jxPTd8vCOTm3pwcMn043/PDtrW8vKwXL14UPZ9OpzU5Oan3798rm826Ze7a2lq9e/dO+/v7+vnzp379+qWjoyPt7u4WjVo67YyPj2tgYODG/n369Em5XM7dV/L09FSS3D0yr3I+y2azbqCULkNlXV3dHf+RvxOBEgAAyOPxyOv1PmgupXQZLJ2SczKZVC6XU0tLS8EzPp9PT58+1ejoqHp6ejQ2NqaZmRlJl9sNffz4sajdiYkJDQ0NSbocRT0/P1dbW5vevn2rly9famFhoWSfFhcXtbKyou7u7qJV3ndZxe4o56lBlYpACQAAJEmBQEDLy8vunMR0Oq2zszN39XUmk9Hq6qr7rMfj0Y8fPyRJXq9X2WxWGxsbSqVS6uzsLAhxlmXJ7/e717W1tZqentbAwIBGRkbU3t5+bZ9OTk4kXY4e9vf3a3BwUE1NTZKkvb29koEykUhoaWlJ4XBYr169cj+vqamR9Huk8ipnEdHVuZXSw6cD/E34hwAAgCQpGAzq27dv7nUsFtPx8bF7vba2prW1NUnS8PCwPB6PLi4ulEwmdXx8rKqqKtXV1enNmzdFp+vk83kFg8Gi32xtbVUymSzZp97eXtXX1+v169d69uxZwb1SZfpEIqFEIqFQKFR0hKLX65Vt29duQ7S/vy+fz1dQ7r7pd/AbgRIAAEi63AaosbFRW1tbyufzGh0dvfU7wWDw2qB4lXOWt7PN0H00NDSor6/v2nvXlem/fPmiRCKhjo4OhUKhou88efJEz58/1/r6urq6ulRdXS1JOjo60tbWllpbWwue9/l8LMi5A7YNAgAArkgk4m5MXi6WZSkSiZS1TUcgEHDnQ379+lXxeFx+v1+BQEA7OzsFL0coFFIul9Pc3JzS6bTW19c1NzenmpoatbW1FfT7apkepVVdOBMlAAAAJKVSKc3Pz5etvWg0qubm5rK1d1Umk9HExIQkaWpqStvb2yWf/fDhg/v+tqMXHYODgw8aWf3bECgBAECRpaUlxeNx43bC4XDJBTflMjs765bpy8Up03OW991Q8gYAAEU6OjoUjUZl2/a9S+CWZcm2bUWj0X89TEqPr0xfiRihBAAAJR0eHioWi2lzc1OWZd04Cujcb2xsVCQSkdfr/c/6+ZjK9JWIQAkAAG6VyWT0/ft3bWxs6ODgoOi+z+eT3+9XMBj83+YcPqYyfaUhUAIAgHs5OzvTwcGBzs/PZdv2H7W1TiqV0ufPn5XP5+81p9KyLFmWpe7ubkYmH4BACQAAKspjKdNXEgIlAACoSI+hTF8pCJQAAKDi/cll+kpAoAQAAIAR9qEEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABghUAIAAMAIgRIAAABGCJQAAAAwQqAEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABghUAIAAMAIgRIAAABGCJQAAAAwQqAEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABghUAIAAMAIgRIAAABGCJQAAAAwQqAEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABghUAIAAMAIgRIAAABGCJQAAAAwQqAEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABj5B9C6Xh9TlurRAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "idx = 0 # Feel free to change this to visualize different graphs.\n", + "\n", + "# Visualize the graphs\n", + "display(test_data[idx].mol)\n", + "visualize_pyg_graph(pygs[idx])" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "3a9d2dd1-2f90-44cc-8859-b93c41167f33", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0. 0. 0. ... 0. 0. 0.12011]\n", + " [0. 0. 0. ... 0. 1. 0.14007]\n", + " [0. 0. 0. ... 0. 1. 0.12011]\n", + " ...\n", + " [0. 0. 0. ... 0. 1. 0.12011]\n", + " [0. 0. 0. ... 0. 1. 0.12011]\n", + " [0. 0. 0. ... 0. 1. 0.12011]]\n", + "[[0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]]\n" + ] + } + ], + "source": [ + "# Examine the features\n", + "print(pygs[idx].x)\n", + "print(pygs[idx].edge_attr)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop_dgl", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/chemprop-updated/pyproject.toml b/chemprop-updated/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..b1c9154387a4e03aa9b6fa5eaf9c143c10f08202 --- /dev/null +++ b/chemprop-updated/pyproject.toml @@ -0,0 +1,77 @@ +[build-system] +requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.2"] +build-backend = "setuptools.build_meta" + +[project] +name = "chemprop" +description = "Molecular Property Prediction with Message Passing Neural Networks" +version = "2.1.2" +authors = [ + {name = "The Chemprop Development Team (see LICENSE.txt)", email="chemprop@mit.edu"} +] +readme = "README.md" +license = {text = "MIT"} +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.11", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent" +] +keywords = [ + "chemistry", + "machine learning", + "property prediction", + "message passing neural network", + "graph neural network", + "drug discovery" +] +requires-python = ">=3.11" +dependencies = [ + "lightning >= 2.0", + "numpy", + "pandas", + "rdkit", + "scikit-learn", + "scipy", + "torch >= 2.1", + "astartes[molecules]", + "ConfigArgParse", + "rich", + "descriptastorus", +] + +[project.optional-dependencies] +hpopt = ["ray[tune]", "hyperopt", "optuna"] +dev = ["black == 23.*", "bumpversion", "autopep8", "flake8", "pytest", "pytest-cov", "isort"] +docs = ["nbsphinx", "sphinx", "sphinx-argparse != 0.5.0", "sphinx-autobuild", "sphinx-autoapi", "sphinxcontrib-bibtex", "sphinx-book-theme", "nbsphinx-link", "ipykernel", "docutils < 0.21", "readthedocs-sphinx-ext", "pandoc"] +test = ["pytest >= 6.2", "pytest-cov"] +notebooks = ["ipykernel", "matplotlib"] + +[project.urls] +documentation = "https://chemprop.readthedocs.io/en/latest/" +source = "https://github.com/chemprop/chemprop" +PyPi = "https://pypi.org/project/chemprop/" + +[project.scripts] +chemprop = "chemprop.cli.main:main" + +[tool.black] +line-length = 100 +target-version = ["py311"] +skip-magic-trailing-comma = true +required-version = "23" + +[tool.autopep8] +in_place = true +recursive = true +aggressive = 2 +max_line_length = 100 + + +[tool.pytest.ini_options] +addopts = "--cov chemprop" + +[tool.isort] +profile = "black" +line_length = 100 +force_sort_within_sections = true diff --git a/chemprop-updated/tests/cli/test_cli_classification_mol.py b/chemprop-updated/tests/cli/test_cli_classification_mol.py new file mode 100644 index 0000000000000000000000000000000000000000..05b8b4aaee0486673b8734a27444c363c83bf952 --- /dev/null +++ b/chemprop-updated/tests/cli/test_cli_classification_mol.py @@ -0,0 +1,292 @@ +"""This tests the CLI functionality of training and predicting a regression model on a single molecule. +""" + +import pytest + +from chemprop.cli.main import main +from chemprop.models.model import MPNN + +pytestmark = pytest.mark.CLI + + +@pytest.fixture +def data_path(data_dir): + return str(data_dir / "classification" / "mol.csv") + + +@pytest.fixture +def model_path(data_dir): + return str(data_dir / "example_model_v2_classification_mol.pt") + + +@pytest.fixture +def dirichlet_model_path(data_dir): + return str(data_dir / "example_model_v2_classification_dirichlet_mol.pt") + + +def test_train_quick(monkeypatch, data_path): + base_args = [ + "chemprop", + "train", + "-i", + data_path, + "--epochs", + "3", + "--num-workers", + "0", + "--task-type", + "classification", + "--metric", + "prc", + "accuracy", + "f1", + "roc", + "--show-individual-scores", + ] + + task_types = ["classification", "classification-dirichlet"] + + for task_type in task_types: + args = base_args.copy() + + args += ["--task-type", task_type] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_quick(monkeypatch, data_path, model_path): + args = ["chemprop", "predict", "-i", data_path, "--model-path", model_path] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_ignore_chirality(monkeypatch, data_path, model_path): + args = [ + "chemprop", + "predict", + "-i", + data_path, + "--model-path", + model_path, + "--ignore-chirality", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_dirichlet_quick(monkeypatch, data_path, dirichlet_model_path): + args = [ + "chemprop", + "predict", + "-i", + data_path, + "--model-path", + dirichlet_model_path, + "--uncertainty-method", + "classification-dirichlet", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +@pytest.mark.parametrize("calibration_method", ["platt", "isotonic"]) +def test_predict_unc_quick(monkeypatch, data_path, model_path, calibration_method): + args = [ + "chemprop", + "predict", + "-i", + data_path, + "--model-path", + model_path, + "--cal-path", + data_path, + "--uncertainty-method", + "classification", + "--calibration-method", + calibration_method, + "--evaluation-methods", + "nll-classification", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_quick(monkeypatch, data_path, model_path, ffn_block_index): + args = [ + "chemprop", + "fingerprint", + "-i", + data_path, + "--model-path", + model_path, + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_train_output_structure(monkeypatch, data_path, tmp_path): + args = [ + "chemprop", + "train", + "-i", + data_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--save-smiles-splits", + "--task-type", + "classification", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "model_0" / "best.pt").exists() + assert (tmp_path / "model_0" / "checkpoints" / "last.ckpt").exists() + assert (tmp_path / "model_0" / "trainer_logs" / "version_0").exists() + assert (tmp_path / "train_smiles.csv").exists() + + +def test_train_output_structure_replicate_ensemble(monkeypatch, data_path, tmp_path): + args = [ + "chemprop", + "train", + "-i", + data_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--save-smiles-splits", + "--split-type", + "random", + "--num-replicates", + "3", + "--ensemble-size", + "2", + "--task-type", + "classification", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "replicate_2" / "model_1" / "best.pt").exists() + assert (tmp_path / "replicate_2" / "model_1" / "checkpoints" / "last.ckpt").exists() + assert (tmp_path / "replicate_2" / "model_1" / "trainer_logs" / "version_0").exists() + assert (tmp_path / "replicate_2" / "train_smiles.csv").exists() + + +def test_predict_output_structure(monkeypatch, data_path, model_path, tmp_path): + args = [ + "chemprop", + "predict", + "-i", + data_path, + "--model-path", + model_path, + model_path, + "--output", + str(tmp_path / "preds.csv"), + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "preds.csv").exists() + assert (tmp_path / "preds_individual.csv").exists() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_output_structure( + monkeypatch, data_path, model_path, tmp_path, ffn_block_index +): + args = [ + "chemprop", + "fingerprint", + "-i", + data_path, + "--model-path", + model_path, + "--output", + str(tmp_path / "fps.csv"), + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "fps_0.csv").exists() + + +def test_train_outputs(monkeypatch, data_path, tmp_path): + args = [ + "chemprop", + "train", + "-i", + data_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--task-type", + "classification", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + checkpoint_path = tmp_path / "model_0" / "checkpoints" / "last.ckpt" + + model = MPNN.load_from_checkpoint(checkpoint_path) + assert model is not None + + +def test_class_balance(monkeypatch, data_path, tmp_path): + args = [ + "chemprop", + "train", + "-i", + data_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--task-type", + "classification", + "--class-balance", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() diff --git a/chemprop-updated/tests/cli/test_cli_classification_mol_multiclass.py b/chemprop-updated/tests/cli/test_cli_classification_mol_multiclass.py new file mode 100644 index 0000000000000000000000000000000000000000..9f5164e65a0364f8ff4b8c21cb102d1f5a84a8c4 --- /dev/null +++ b/chemprop-updated/tests/cli/test_cli_classification_mol_multiclass.py @@ -0,0 +1,246 @@ +"""This tests the CLI functionality of training and predicting a regression model on a single molecule. +""" + +import pytest + +from chemprop.cli.main import main +from chemprop.models.model import MPNN + +pytestmark = pytest.mark.CLI + + +@pytest.fixture +def data_path(data_dir): + return str(data_dir / "classification" / "mol_multiclass.csv") + + +@pytest.fixture +def model_path(data_dir): + return str(data_dir / "example_model_v2_classification_mol_multiclass.pt") + + +@pytest.fixture +def dirichlet_model_path(data_dir): + return str(data_dir / "example_model_v2_multiclass_dirichlet_mol.pt") + + +def test_train_quick(monkeypatch, data_path): + base_args = [ + "chemprop", + "train", + "-i", + data_path, + "--epochs", + "3", + "--num-workers", + "0", + "--show-individual-scores", + ] + + task_types = ["multiclass", "multiclass-dirichlet"] + + for task_type in task_types: + args = base_args.copy() + + args += ["--task-type", task_type] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_quick(monkeypatch, data_path, model_path): + args = ["chemprop", "predict", "-i", data_path, "--model-path", model_path] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_dirichlet_quick(monkeypatch, data_path, dirichlet_model_path): + args = [ + "chemprop", + "predict", + "-i", + data_path, + "--model-path", + dirichlet_model_path, + "--uncertainty-method", + "multiclass-dirichlet", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_isotonic_quick(monkeypatch, data_path, model_path): + args = [ + "chemprop", + "predict", + "-i", + data_path, + "--model-path", + model_path, + "--cal-path", + data_path, + "--uncertainty-method", + "classification", + "--calibration-method", + "isotonic-multiclass", + "--evaluation-methods", + "nll-multiclass", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_quick(monkeypatch, data_path, model_path, ffn_block_index): + args = [ + "chemprop", + "fingerprint", + "-i", + data_path, + "--model-path", + model_path, + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_train_output_structure(monkeypatch, data_path, tmp_path): + args = [ + "chemprop", + "train", + "-i", + data_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--save-smiles-splits", + "--task-type", + "multiclass", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "model_0" / "best.pt").exists() + assert (tmp_path / "model_0" / "checkpoints" / "last.ckpt").exists() + assert (tmp_path / "model_0" / "trainer_logs" / "version_0").exists() + assert (tmp_path / "train_smiles.csv").exists() + + +def test_train_output_structure_replicate_ensemble(monkeypatch, data_path, tmp_path): + args = [ + "chemprop", + "train", + "-i", + data_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--save-smiles-splits", + "--split-type", + "random", + "--num-replicates", + "3", + "--ensemble-size", + "2", + "--task-type", + "multiclass", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "replicate_2" / "model_1" / "best.pt").exists() + assert (tmp_path / "replicate_2" / "model_1" / "checkpoints" / "last.ckpt").exists() + assert (tmp_path / "replicate_2" / "model_1" / "trainer_logs" / "version_0").exists() + assert (tmp_path / "replicate_2" / "train_smiles.csv").exists() + + +def test_predict_output_structure(monkeypatch, data_path, model_path, tmp_path): + args = [ + "chemprop", + "predict", + "-i", + data_path, + "--model-path", + model_path, + model_path, + "--output", + str(tmp_path / "preds.csv"), + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "preds.csv").exists() + assert (tmp_path / "preds_individual.csv").exists() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_output_structure( + monkeypatch, data_path, model_path, tmp_path, ffn_block_index +): + args = [ + "chemprop", + "fingerprint", + "-i", + data_path, + "--model-path", + model_path, + "--output", + str(tmp_path / "fps.csv"), + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "fps_0.csv").exists() + + +def test_train_outputs(monkeypatch, data_path, tmp_path): + args = [ + "chemprop", + "train", + "-i", + data_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--task-type", + "multiclass", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + checkpoint_path = tmp_path / "model_0" / "checkpoints" / "last.ckpt" + + model = MPNN.load_from_checkpoint(checkpoint_path) + assert model is not None diff --git a/chemprop-updated/tests/cli/test_cli_regression_mol+mol.py b/chemprop-updated/tests/cli/test_cli_regression_mol+mol.py new file mode 100644 index 0000000000000000000000000000000000000000..cdfa1917dcb20f182866ec827af15cdb0a1fd81f --- /dev/null +++ b/chemprop-updated/tests/cli/test_cli_regression_mol+mol.py @@ -0,0 +1,240 @@ +"""This tests the CLI functionality of training and predicting a regression model on a multi-molecule. +""" + +import json + +import pytest + +from chemprop.cli.main import main + +pytestmark = pytest.mark.CLI + + +@pytest.fixture +def data_path(data_dir): + return ( + str(data_dir / "regression" / "mol+mol" / "mol+mol.csv"), + str(data_dir / "regression" / "mol+mol" / "descriptors.npz"), + ("0", str(data_dir / "regression" / "mol+mol" / "atom_features_0.npz")), + ("1", str(data_dir / "regression" / "mol+mol" / "atom_features_1.npz")), + ("0", str(data_dir / "regression" / "mol+mol" / "bond_features_0.npz")), + ("1", str(data_dir / "regression" / "mol+mol" / "atom_descriptors_1.npz")), + ) + + +@pytest.fixture +def model_path(data_dir): + return str(data_dir / "example_model_v2_regression_mol+mol.pt") + + +def test_train_quick(monkeypatch, data_path): + ( + input_path, + desc_path, + atom_feat_path_0, + atom_feat_path_1, + bond_feat_path_0, + atom_desc_path_1, + ) = data_path + + base_args = [ + "chemprop", + "train", + "-i", + input_path, + "--smiles-columns", + "smiles", + "solvent", + "--epochs", + "3", + "--num-workers", + "0", + "--descriptors-path", + desc_path, + "--atom-features-path", + *atom_feat_path_0, + "--atom-features-path", + *atom_feat_path_1, + "--bond-features-path", + *bond_feat_path_0, + "--atom-descriptors-path", + *atom_desc_path_1, + "--show-individual-scores", + ] + + task_types = ["", "regression-mve", "regression-evidential", "regression-quantile"] + + for task_type in task_types: + args = base_args.copy() + + if task_type: + args += ["--task-type", task_type] + + if task_type == "regression-evidential": + args += ["--evidential-regularization", "0.2"] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_quick(monkeypatch, data_path, model_path): + input_path, _, _, _, _, _ = data_path + + args = [ + "chemprop", + "predict", + "-i", + input_path, + "--smiles-columns", + "smiles", + "solvent", + "--model-path", + model_path, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_quick(monkeypatch, data_path, model_path, ffn_block_index): + input_path, _, _, _, _, _ = data_path + args = [ + "chemprop", + "fingerprint", + "-i", + input_path, + "--smiles-columns", + "smiles", + "solvent", + "--model-path", + model_path, + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_train_output_structure(monkeypatch, data_path, tmp_path): + input_path, *_ = data_path + + args = [ + "chemprop", + "train", + "-i", + input_path, + "--smiles-columns", + "smiles", + "solvent", + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--save-smiles-splits", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "model_0" / "best.pt").exists() + assert (tmp_path / "model_0" / "checkpoints" / "last.ckpt").exists() + assert (tmp_path / "model_0" / "trainer_logs" / "version_0").exists() + assert (tmp_path / "train_smiles.csv").exists() + assert (tmp_path / "model_0" / "test_predictions.csv").exists() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_output_structure( + monkeypatch, data_path, model_path, tmp_path, ffn_block_index +): + input_path, *_ = data_path + args = [ + "chemprop", + "fingerprint", + "-i", + input_path, + "--smiles-columns", + "smiles", + "solvent", + "--model-path", + model_path, + "--output", + str(tmp_path / "fps.csv"), + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "fps_0.csv").exists() + + +def test_train_splits_file(monkeypatch, data_path, tmp_path): + splits_file = str(tmp_path / "splits.json") + splits = [ + {"train": [1, 2], "val": "3-5", "test": "6,7"}, + {"val": [1, 2], "test": "3-5", "train": "6,7"}, + ] + + with open(splits_file, "w") as f: + json.dump(splits, f) + + input_path, *_ = data_path + + args = [ + "chemprop", + "train", + "-i", + input_path, + "--smiles-columns", + "smiles", + "solvent", + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--splits-file", + splits_file, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_train_molecule_featurizers(monkeypatch, data_path): + input_path, descriptors_path, *_ = data_path + + args = [ + "chemprop", + "train", + "-i", + input_path, + "--smiles-columns", + "smiles", + "solvent", + "--epochs", + "3", + "--num-workers", + "0", + "--descriptors-path", + descriptors_path, + "--molecule-featurizers", + "morgan_count", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() diff --git a/chemprop-updated/tests/cli/test_cli_regression_mol.py b/chemprop-updated/tests/cli/test_cli_regression_mol.py new file mode 100644 index 0000000000000000000000000000000000000000..22298700ef556a7da1e5f37674aa6f871034df29 --- /dev/null +++ b/chemprop-updated/tests/cli/test_cli_regression_mol.py @@ -0,0 +1,619 @@ +"""This tests the CLI functionality of training and predicting a regression model on a single molecule. +""" + +import json + +import pytest +import torch + +from chemprop.cli.hpopt import NO_HYPEROPT, NO_OPTUNA, NO_RAY +from chemprop.cli.main import main +from chemprop.cli.train import TrainSubcommand +from chemprop.models.model import MPNN + +pytestmark = pytest.mark.CLI + + +@pytest.fixture +def data_path(data_dir): + return ( + str(data_dir / "regression" / "mol" / "mol.csv"), + str(data_dir / "regression" / "mol" / "descriptors.npz"), + str(data_dir / "regression" / "mol" / "atom_features.npz"), + str(data_dir / "regression" / "mol" / "bond_features.npz"), + str(data_dir / "regression" / "mol" / "atom_descriptors.npz"), + ) + + +@pytest.fixture +def model_path(data_dir): + return str(data_dir / "example_model_v2_regression_mol.pt") + + +@pytest.fixture +def extra_model_path(data_dir): + return str(data_dir / "example_model_v2_regression_mol_with_metrics.ckpt") + + +@pytest.fixture +def mve_model_path(data_dir): + return str(data_dir / "example_model_v2_regression_mve_mol.pt") + + +@pytest.fixture +def evidential_model_path(data_dir): + return str(data_dir / "example_model_v2_regression_evidential_mol.pt") + + +@pytest.fixture +def config_path(data_dir): + return str(data_dir / "regression" / "mol" / "config.toml") + + +def test_train_quick(monkeypatch, data_path): + input_path, *_ = data_path + + args = [ + "chemprop", + "train", + "-i", + input_path, + "--epochs", + "3", + "--num-workers", + "0", + "--show-individual-scores", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_train_config(monkeypatch, config_path, tmp_path): + args = [ + "chemprop", + "train", + "--config-path", + config_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + new_config_path = tmp_path / "config.toml" + parser = TrainSubcommand.parser + + new_args = parser.parse_args(["--config-path", str(new_config_path)]) + old_args = parser.parse_args(["--config-path", str(config_path)]) + + for key, value in old_args.__dict__.items(): + if key not in ["config_path", "output_dir", "epochs"]: + assert getattr(new_args, key) == value + + assert new_args.epochs == 3 + + +def test_train_quick_features(monkeypatch, data_path): + ( + input_path, + descriptors_path, + atom_features_path, + bond_features_path, + atom_descriptors_path, + ) = data_path + + base_args = [ + "chemprop", + "train", + "-i", + input_path, + "--epochs", + "3", + "--num-workers", + "0", + "--descriptors-path", + descriptors_path, + "--atom-features-path", + atom_features_path, + "--bond-features-path", + bond_features_path, + "--atom-descriptors-path", + atom_descriptors_path, + ] + + task_types = ["", "regression-mve", "regression-evidential", "regression-quantile"] + + for task_type in task_types: + args = base_args.copy() + + if task_type: + args += ["--task-type", task_type] + + if task_type == "regression-evidential": + args += ["--evidential-regularization", "0.2"] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_quick(monkeypatch, data_path, model_path): + input_path, *_ = data_path + args = ["chemprop", "predict", "-i", input_path, "--model-path", model_path] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_ensemble_quick(monkeypatch, data_path, model_path, extra_model_path): + input_path, *_ = data_path + args = [ + "chemprop", + "predict", + "-i", + input_path, + "--model-path", + model_path, + extra_model_path, + "--uncertainty-method", + "ensemble", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_dropout_quick(monkeypatch, data_path, model_path, extra_model_path): + input_path, *_ = data_path + args = [ + "chemprop", + "predict", + "-i", + input_path, + "--model-path", + model_path, + extra_model_path, + "--uncertainty-method", + "dropout", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_mve_quick(monkeypatch, data_path, mve_model_path): + input_path, *_ = data_path + args = [ + "chemprop", + "predict", + "-i", + input_path, + "--model-path", + mve_model_path, + "--cal-path", + input_path, + "--uncertainty-method", + "mve", + "--calibration-method", + "zscaling", + "--evaluation-methods", + "nll-regression", + "miscalibration_area", + "ence", + "spearman", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_evidential_quick(monkeypatch, data_path, evidential_model_path): + input_path, *_ = data_path + args = [ + "chemprop", + "predict", + "-i", + input_path, + "--model-path", + evidential_model_path, + "--cal-path", + input_path, + "--uncertainty-method", + "evidential-total", + "--calibration-method", + "zscaling", + "--evaluation-methods", + "nll-regression", + "miscalibration_area", + "ence", + "spearman", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_quick(monkeypatch, data_path, model_path, ffn_block_index): + input_path, *_ = data_path + args = [ + "chemprop", + "fingerprint", + "-i", + input_path, + "--model-path", + model_path, + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_train_output_structure(monkeypatch, data_path, tmp_path): + input_path, *_ = data_path + args = [ + "chemprop", + "train", + "-i", + input_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--save-smiles-splits", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "model_0" / "best.pt").exists() + assert (tmp_path / "model_0" / "checkpoints" / "last.ckpt").exists() + assert (tmp_path / "model_0" / "trainer_logs" / "version_0").exists() + assert (tmp_path / "train_smiles.csv").exists() + assert (tmp_path / "model_0" / "test_predictions.csv").exists() + + +def test_train_output_structure_replicate_ensemble(monkeypatch, data_path, tmp_path): + input_path, *_ = data_path + args = [ + "chemprop", + "train", + "-i", + input_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--save-smiles-splits", + "--split-type", + "random", + "--num-replicates", + "3", + "--ensemble-size", + "2", + "--metrics", + "mse", + "rmse", + "--molecule-featurizers", + "rdkit_2d", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "replicate_2" / "model_1" / "best.pt").exists() + assert (tmp_path / "replicate_2" / "model_1" / "checkpoints" / "last.ckpt").exists() + assert (tmp_path / "replicate_2" / "model_1" / "trainer_logs" / "version_0").exists() + assert (tmp_path / "replicate_2" / "train_smiles.csv").exists() + + +def test_train_csv_splits(monkeypatch, data_dir, tmp_path): + input_path = str(data_dir / "regression" / "mol" / "mol_with_splits.csv") + args = [ + "chemprop", + "train", + "-i", + input_path, + "--smiles-columns", + "smiles", + "--target-columns", + "lipo", + "--splits-column", + "split", + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_train_splits_file(monkeypatch, data_path, tmp_path): + input_path, *_ = data_path + splits_file = str(tmp_path / "splits.json") + splits = [ + {"train": [1, 2], "val": "3-5", "test": "6,7"}, + {"val": [1, 2], "test": "3-5", "train": "6,7"}, + ] + + with open(splits_file, "w") as f: + json.dump(splits, f) + + args = [ + "chemprop", + "train", + "-i", + input_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--splits-file", + splits_file, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_output_structure(monkeypatch, data_path, model_path, tmp_path): + input_path, *_ = data_path + args = [ + "chemprop", + "predict", + "-i", + input_path, + "--model-path", + model_path, + model_path, + "--output", + str(tmp_path / "preds.csv"), + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "preds.csv").exists() + assert (tmp_path / "preds_individual.csv").exists() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_output_structure( + monkeypatch, data_path, model_path, tmp_path, ffn_block_index +): + input_path, *_ = data_path + args = [ + "chemprop", + "fingerprint", + "-i", + input_path, + "--model-path", + model_path, + "--output", + str(tmp_path / "fps.csv"), + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "fps_0.csv").exists() + + +def test_train_outputs(monkeypatch, data_path, tmp_path): + input_path, *_ = data_path + + args = [ + "chemprop", + "train", + "-i", + input_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + checkpoint_path = tmp_path / "model_0" / "checkpoints" / "last.ckpt" + + model = MPNN.load_from_checkpoint(checkpoint_path) + assert model is not None + + +def test_freeze_model(monkeypatch, data_path, model_path, tmp_path): + input_path, *_ = data_path + args = [ + "chemprop", + "train", + "-i", + input_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--checkpoint", + model_path, + "--freeze-encoder", + "--frzn-ffn-layers", + "1", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + checkpoint_path = tmp_path / "model_0" / "checkpoints" / "last.ckpt" + + trained_model = MPNN.load_from_checkpoint(checkpoint_path) + frzn_model = MPNN.load_from_file(model_path) + + assert torch.equal( + trained_model.message_passing.W_o.weight, frzn_model.message_passing.W_o.weight + ) + assert torch.equal( + trained_model.predictor.ffn[0][0].weight, frzn_model.predictor.ffn[0][0].weight + ) + + +def test_checkpoint_model(monkeypatch, data_path, model_path, tmp_path): + input_path, *_ = data_path + args = [ + "chemprop", + "train", + "-i", + input_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--checkpoint", + model_path, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + checkpoint_path = tmp_path / "model_0" / "checkpoints" / "last.ckpt" + + model = MPNN.load_from_checkpoint(checkpoint_path) + assert model is not None + + +@pytest.mark.skipif(NO_RAY or NO_OPTUNA, reason="Optuna not installed") +def test_optuna_quick(monkeypatch, data_path, tmp_path): + input_path, *_ = data_path + + args = [ + "chemprop", + "hpopt", + "-i", + input_path, + "--epochs", + "6", + "--hpopt-save-dir", + str(tmp_path), + "--raytune-num-samples", + "2", + "--raytune-search-algorithm", + "optuna", + "--molecule-featurizers", + "morgan_count", + "--search-parameter-keywords", + "all", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "best_config.toml").exists() + assert (tmp_path / "best_checkpoint.ckpt").exists() + assert (tmp_path / "all_progress.csv").exists() + assert (tmp_path / "ray_results").exists() + + args = [ + "chemprop", + "train", + "--config-path", + str(tmp_path / "best_config.toml"), + "--save-dir", + str(tmp_path), + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "model_0" / "best.pt").exists() + + +@pytest.mark.skipif(NO_RAY or NO_HYPEROPT, reason="Ray and/or Hyperopt not installed") +def test_hyperopt_quick(monkeypatch, data_path, tmp_path): + input_path, *_ = data_path + + args = [ + "chemprop", + "hpopt", + "-i", + input_path, + "--epochs", + "6", + "--hpopt-save-dir", + str(tmp_path), + "--raytune-num-samples", + "2", + "--raytune-search-algorithm", + "hyperopt", + "--molecule-featurizers", + "morgan_binary", + "--search-parameter-keywords", + "all", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "best_config.toml").exists() + assert (tmp_path / "best_checkpoint.ckpt").exists() + assert (tmp_path / "all_progress.csv").exists() + assert (tmp_path / "ray_results").exists() + + args = [ + "chemprop", + "train", + "--config-path", + str(tmp_path / "best_config.toml"), + "--save-dir", + str(tmp_path), + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "model_0" / "best.pt").exists() diff --git a/chemprop-updated/tests/cli/test_cli_regression_mol_multitask.py b/chemprop-updated/tests/cli/test_cli_regression_mol_multitask.py new file mode 100644 index 0000000000000000000000000000000000000000..914b594e3084551ee580757a0c056c0acf2629a8 --- /dev/null +++ b/chemprop-updated/tests/cli/test_cli_regression_mol_multitask.py @@ -0,0 +1,68 @@ +"""This tests the CLI functionality of training and predicting a regression model on a single molecule. +""" + +import pytest + +from chemprop.cli.main import main + +pytestmark = pytest.mark.CLI + + +@pytest.fixture +def data_path(data_dir): + return str(data_dir / "regression" / "mol_multitask.csv") + + +@pytest.fixture +def model_path(data_dir): + return str(data_dir / "example_model_v2_regression_mol_multitask.pt") + + +def test_train_quick(monkeypatch, data_path): + args = [ + "chemprop", + "train", + "-i", + data_path, + "--epochs", + "3", + "--num-workers", + "0", + "--show-individual-scores", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_quick(monkeypatch, data_path, model_path): + args = ["chemprop", "predict", "-i", data_path, "--model-path", model_path] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + args = ["chemprop", "predict", "-i", data_path, "--model-path", model_path] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_quick(monkeypatch, data_path, model_path, ffn_block_index): + args = [ + "chemprop", + "fingerprint", + "-i", + data_path, + "--model-path", + model_path, + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() diff --git a/chemprop-updated/tests/cli/test_cli_regression_rxn+mol.py b/chemprop-updated/tests/cli/test_cli_regression_rxn+mol.py new file mode 100644 index 0000000000000000000000000000000000000000..98731cebc8e3ed9fb9105c1a0abac5838df2619f --- /dev/null +++ b/chemprop-updated/tests/cli/test_cli_regression_rxn+mol.py @@ -0,0 +1,146 @@ +"""This tests the CLI functionality of training and predicting a regression model on a multi-molecule. +""" + +import pytest + +from chemprop.cli.main import main + +pytestmark = pytest.mark.CLI + + +@pytest.fixture +def data_path(data_dir): + return ( + str(data_dir / "regression" / "rxn+mol" / "rxn+mol.csv"), + str(data_dir / "regression" / "rxn+mol" / "descriptors.npz"), + ("0", str(data_dir / "regression" / "rxn+mol" / "atom_features.npz")), + ("0", str(data_dir / "regression" / "rxn+mol" / "bond_features.npz")), + ("0", str(data_dir / "regression" / "rxn+mol" / "atom_descriptors.npz")), + ) + + +@pytest.fixture +def model_path(data_dir): + return str(data_dir / "example_model_v2_regression_rxn+mol.pt") + + +def test_train_quick(monkeypatch, data_path): + ( + input_path, + descriptors_path, + atom_features_path, + bond_features_path, + atom_descriptors_path, + ) = data_path + + base_args = [ + "chemprop", + "train", + "-i", + input_path, + "--reaction-columns", + "rxn_smiles", + "--smiles-columns", + "solvent_smiles", + "--epochs", + "3", + "--num-workers", + "0", + "--split-key-molecule", + "1", + "--descriptors-path", + descriptors_path, + "--atom-features-path", + *atom_features_path, + "--bond-features-path", + *bond_features_path, + "--atom-descriptors-path", + *atom_descriptors_path, + "--show-individual-scores", + ] + + task_types = ["", "regression-mve", "regression-evidential", "regression-quantile"] + + for task_type in task_types: + args = base_args.copy() + + if task_type: + args.extend(["--task-type", task_type]) + + if task_type == "regression-evidential": + args += ["--evidential-regularization", "0.2"] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_quick(monkeypatch, data_path, model_path): + input_path, *_ = data_path + args = [ + "chemprop", + "predict", + "-i", + input_path, + "--reaction-columns", + "rxn_smiles", + "--smiles-columns", + "solvent_smiles", + "--model-path", + model_path, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_quick(monkeypatch, data_path, model_path, ffn_block_index): + input_path, *_ = data_path + args = [ + "chemprop", + "fingerprint", + "-i", + input_path, + "--reaction-columns", + "rxn_smiles", + "--smiles-columns", + "solvent_smiles", + "--model-path", + model_path, + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_train_molecule_featurizers(monkeypatch, data_path): + input_path, descriptors_path, *_ = data_path + args = [ + "chemprop", + "train", + "-i", + input_path, + "--reaction-columns", + "rxn_smiles", + "--smiles-columns", + "solvent_smiles", + "--epochs", + "3", + "--num-workers", + "0", + "--split-key-molecule", + "1", + "--descriptors-path", + descriptors_path, + "--molecule-featurizers", + "morgan_count", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() diff --git a/chemprop-updated/tests/cli/test_cli_regression_rxn.py b/chemprop-updated/tests/cli/test_cli_regression_rxn.py new file mode 100644 index 0000000000000000000000000000000000000000..67a4cbb40e37d54a5d024617d3e9c6cc4c1fc84f --- /dev/null +++ b/chemprop-updated/tests/cli/test_cli_regression_rxn.py @@ -0,0 +1,94 @@ +"""This tests the CLI functionality of training and predicting a regression model on a multi-molecule. +""" + +import pytest + +from chemprop.cli.main import main + +pytestmark = pytest.mark.CLI + + +@pytest.fixture +def data_path(data_dir): + return str(data_dir / "regression" / "rxn" / "rxn.csv"), str( + data_dir / "regression" / "rxn" / "descriptors.npz" + ) + + +@pytest.fixture +def model_path(data_dir): + return str(data_dir / "example_model_v2_regression_rxn.pt") + + +def test_train_quick(monkeypatch, data_path): + input_path, descriptors_path = data_path + + base_args = [ + "chemprop", + "train", + "-i", + input_path, + "--reaction-columns", + "smiles", + "--epochs", + "3", + "--num-workers", + "0", + "--descriptors-path", + descriptors_path, + "--show-individual-scores", + ] + + task_types = ["", "regression-mve", "regression-evidential", "regression-quantile"] + + for task_type in task_types: + args = base_args.copy() + + if task_type: + args.extend(["--task-type", task_type]) + + if task_type == "regression-evidential": + args += ["--evidential-regularization", "0.2"] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_quick(monkeypatch, data_path, model_path): + input_path, _ = data_path + args = [ + "chemprop", + "predict", + "-i", + input_path, + "--reaction-columns", + "smiles", + "--model-path", + model_path, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_quick(monkeypatch, data_path, model_path, ffn_block_index): + input_path, _ = data_path + args = [ + "chemprop", + "fingerprint", + "-i", + input_path, + "--reaction-columns", + "smiles", + "--model-path", + model_path, + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() diff --git a/chemprop-updated/tests/cli/test_cli_utils.py b/chemprop-updated/tests/cli/test_cli_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..89fe915c66196c4beca53f4b761e99fa1fc068da --- /dev/null +++ b/chemprop-updated/tests/cli/test_cli_utils.py @@ -0,0 +1,170 @@ +import pytest + +from chemprop.cli.common import find_models +from chemprop.cli.utils.parsing import get_column_names, parse_indices + + +def test_parse_indices(): + """ + Testing if CLI parse_indices yields expected results. + """ + splits = {"train": [0, 1, 2, 4], "val": [3, 5, 6], "test": [7, 8, 9]} + split_idxs = {"train": "0-2, 4", "val": "3,5-6", "test": [7, 8, 9]} + split_idxs = {split: parse_indices(idxs) for split, idxs in split_idxs.items()} + + assert split_idxs == splits + + +def test_find_models(data_dir): + """ + Testing if CLI find_models gets the correct model paths. + """ + models = find_models([data_dir / "example_model_v2_regression_mol.pt"]) + assert len(models) == 1 + models = find_models([data_dir / "example_model_v2_regression_mol.ckpt"]) + assert len(models) == 1 + models = find_models([data_dir]) + assert len(models) == 14 + models = find_models( + [ + data_dir / "example_model_v2_regression_mol.pt", + data_dir / "example_model_v2_regression_mol.ckpt", + data_dir, + ] + ) + assert len(models) == 16 + + +@pytest.mark.parametrize( + "path,smiles_cols,rxn_cols,target_cols,ignore_cols,splits_col,weight_col,no_header_row,expected", + [ + ( + "classification/mol.csv", + ["smiles"], + None, + ["NR-AhR", "NR-ER", "SR-ARE", "SR-MMP"], + None, + None, + None, + False, + ["smiles", "NR-AhR", "NR-ER", "SR-ARE", "SR-MMP"], + ), + ( + "classification/mol.csv", + ["smiles"], + None, + None, + None, + None, + None, + False, + ["smiles", "NR-AhR", "NR-ER", "SR-ARE", "SR-MMP"], + ), + ( + "classification/mol.csv", + None, + None, + None, + ["NR-AhR", "SR-ARE"], + None, + None, + False, + ["smiles", "NR-ER", "SR-MMP"], + ), + ("regression/mol/mol.csv", None, None, None, None, None, None, False, ["smiles", "lipo"]), + ( + "regression/mol/mol.csv", + None, + None, + ["lipo"], + None, + None, + None, + False, + ["smiles", "lipo"], + ), + ( + "regression/mol/mol_with_splits.csv", + ["smiles"], + None, + ["lipo"], + None, + "split", + None, + False, + ["smiles", "lipo"], + ), + ( + "regression/mol/mol_with_splits.csv", + None, + None, + None, + None, + "split", + None, + False, + ["smiles", "lipo"], + ), + ( + "regression/rxn/rxn.csv", + None, + ["smiles"], + ["ea"], + None, + None, + None, + False, + ["smiles", "ea"], + ), + ( + "classification/mol+mol.csv", + ["mol a smiles", "mol b Smiles"], + None, + ["synergy"], + None, + None, + None, + False, + ["mol a smiles", "mol b Smiles", "synergy"], + ), + ( + "classification/mol+mol.csv", + ["mol a smiles", "mol b Smiles"], + None, + None, + None, + None, + None, + False, + ["mol a smiles", "mol b Smiles", "synergy"], + ), + ("regression/mol/mol.csv", None, None, None, None, None, None, True, ["SMILES", "pred_0"]), + ], +) +def test_get_column_names( + data_dir, + path, + smiles_cols, + rxn_cols, + target_cols, + ignore_cols, + splits_col, + weight_col, + no_header_row, + expected, +): + """ + Testing if CLI get_column_names gets the correct column names. + """ + input_cols, target_cols = get_column_names( + data_dir / path, + smiles_cols, + rxn_cols, + target_cols, + ignore_cols, + splits_col, + weight_col, + no_header_row, + ) + + assert input_cols + target_cols == expected diff --git a/chemprop-updated/tests/conftest.py b/chemprop-updated/tests/conftest.py new file mode 100644 index 0000000000000000000000000000000000000000..a18e838c7bb4d17ef39219f1071aa2a4dad0fc91 --- /dev/null +++ b/chemprop-updated/tests/conftest.py @@ -0,0 +1,113 @@ +from pathlib import Path + +import numpy as np +import pandas as pd +import pytest +from rdkit import Chem + +_DATA_DIR = Path(__file__).parent / "data" +_DF = pd.read_csv(_DATA_DIR / "smis.csv") +_DF["mol"] = _DF["smiles"].map(Chem.MolFromSmiles) +_DF["smi"] = _DF["mol"].map(Chem.MolToSmiles) + + +@pytest.fixture +def data_dir(): + return _DATA_DIR + + +@pytest.fixture +def smis(): + return _DF.smi + + +@pytest.fixture +def mols(): + return _DF.mol + + +@pytest.fixture +def targets(smis): + return np.random.rand(len(smis), 1) + + +# @pytest.fixture +# def mol_data(mols, targets): +# return [MoleculeDatapoint(mol, y) for mol, y in zip(mols, targets)] + + +# @pytest.fixture +# def rxn_data(rxns, targets): +# return [ReactionDatapoint(mol, y) for mol, y in zip(mols, targets)] + + +@pytest.fixture(params=_DF.smi.sample(5)) +def smi(request): + return request.param + + +@pytest.fixture(params=_DF.mol.sample(5)) +def mol(request): + return request.param + + +@pytest.fixture +def mol_regression_data(data_dir): + df = pd.read_csv(data_dir / "regression/mol/mol.csv") + smis = df["smiles"].to_list() + Y = df["lipo"].to_numpy().reshape(-1, 1) + + return smis, Y + + +@pytest.fixture +def rxn_regression_data(data_dir): + df = pd.read_csv(data_dir / "regression/rxn/rxn.csv") + smis = df["smiles"].to_list() + Y = df["ea"].to_numpy().reshape(-1, 1) + + return smis, Y + + +@pytest.fixture +def mol_mol_regression_data(data_dir): + df = pd.read_csv(data_dir / "regression/mol+mol/mol+mol.csv") + smis1 = df["smiles"].to_list() + smis2 = df["solvent"].to_list() + Y = df["peakwavs_max"].to_numpy().reshape(-1, 1) + + return smis1, smis2, Y + + +@pytest.fixture +def rxn_mol_regression_data(data_dir): + df = pd.read_csv(data_dir / "regression/rxn+mol/rxn+mol.csv") + rxns = df["rxn_smiles"].to_list() + smis = df["solvent_smiles"].to_list() + Y = df["target"].to_numpy().reshape(-1, 1) + + return rxns, smis, Y + + +@pytest.fixture +def mol_classification_data(data_dir): + df = pd.read_csv(data_dir / "classification" / "mol.csv") + smis = df["smiles"].to_list() + Y = df["NR-AhR"].to_numpy().reshape(-1, 1) + + return smis, Y + + +@pytest.fixture +def mol_classification_data_multiclass(data_dir): + df = pd.read_csv(data_dir / "classification" / "mol_multiclass.csv") + smis = df["smiles"].to_list() + activities = df["activity"].unique() + Y = ( + df["activity"] + .map({activity: i for i, activity in enumerate(activities)}) + .to_numpy() + .reshape(-1, 1) + ) + + return smis, Y diff --git a/chemprop-updated/tests/data/classification.csv b/chemprop-updated/tests/data/classification.csv new file mode 100644 index 0000000000000000000000000000000000000000..24cc3dc65c75fcdf6537fea5936183b9831f2628 --- /dev/null +++ b/chemprop-updated/tests/data/classification.csv @@ -0,0 +1,501 @@ +smiles,NR-AR,NR-AR-LBD,NR-AhR,NR-Aromatase,NR-ER,NR-ER-LBD,NR-PPAR-gamma,SR-ARE,SR-ATAD5,SR-HSE,SR-MMP,SR-p53 +CCOc1ccc2nc(S(N)(=O)=O)sc2c1,0,0,1,,,0,0,1,0,0,0,0 +CCN1C(=O)NC(c2ccccc2)C1=O,0,0,0,0,0,0,0,,0,,0,0 +CC[C@]1(O)CC[C@H]2[C@@H]3CCC4=CCCC[C@@H]4[C@H]3CC[C@@]21C,,,,,,,,0,,0,, +CCCN(CC)C(CC)C(=O)Nc1c(C)cccc1C,0,0,0,0,0,0,0,,0,,0,0 +CC(O)(P(=O)(O)O)P(=O)(O)O,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)(C)OOC(C)(C)CCC(C)(C)OOC(C)(C)C,0,0,0,0,0,0,0,,0,0,0,0 +O=S(=O)(Cl)c1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +O=C(O)Cc1cc(I)c(Oc2ccc(O)c(I)c2)c(I)c1,0,,0,,1,,,1,0,1,0,1 +OC[C@H](O)[C@@H](O)[C@H](O)CO,0,0,0,0,0,0,0,0,0,0,,0 +CCCCCCCC(=O)[O-].CCCCCCCC(=O)[O-].[Zn+2],,,,,,,,0,,0,, +NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)([O-])OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1,0,0,0,,0,0,0,,0,,,0 +O=c1[nH]c(=O)n([C@H]2C[C@H](O)[C@@H](CO)O2)cc1I,0,0,,,0,0,0,0,0,0,0,0 +CC(C)COC(=O)C(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +C=C(C)C(=O)OCCOC(=O)C(=C)C,0,0,0,0,0,0,0,0,0,0,0,0 +Cl/C=C\C[N+]12CN3CN(CN(C3)C1)C2,0,0,0,,0,0,,1,0,0,0, +O=C([O-])Cc1cccc2ccccc12,0,0,0,0,0,0,1,0,0,0,0,0 +CCCCCCCCCCOCC(O)CN,0,0,0,,0,,,,,0,, +CCN(CC)C(=O)c1cccnc1,0,0,0,0,,0,0,,0,,0,0 +COc1cc(O)cc(O)c1,0,0,,0,,,0,,0,0,0,0 +CCOC(=O)c1cccnc1,,,,,,,,0,,0,, +CCOc1ccc(S(=O)(=O)O)c2cccnc12,0,0,,0,1,1,0,,0,,0,0 +O=C(O)[C@H](O)c1ccccc1,0,0,0,0,,0,0,0,0,0,0,0 +Nc1ccc(/N=N/c2ccccc2)cc1,0,0,1,,1,0,,1,1,0,,0 +CN[C@@H]1C[C@@H](c2ccc(Cl)c(Cl)c2)c2ccccc21,0,0,1,,0,0,0,,0,,, +CN1[C@H]2CC[C@@H]1C[C@H](OC(=O)c1cc(Cl)cc(Cl)c1)C2,0,0,0,0,0,0,0,0,0,0,,0 +CN(C)CCCN1c2ccccc2CCc2ccc(Cl)cc21,0,0,0,,0,0,,0,0,0,,0 +C#CCO,0,0,0,0,0,0,0,0,0,0,0,0 +Nc1ccccc1S(=O)(=O)O,0,0,0,0,0,0,0,0,0,0,,0 +CC(O)CC(C)(C)O,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)(C)CC(C)(C)N,0,0,0,0,0,0,0,0,0,0,0,0 +CC(=O)CC(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +CCCC(C)=O,0,0,0,0,0,0,0,0,0,0,0,0 +Nc1nc2ccccc2[nH]1,0,0,1,0,0,0,0,0,0,0,0,0 +Oc1c(Cl)c(Cl)c(Cl)c(Cl)c1Cl,0,,1,,,0,,1,0,0,1,1 +c1ccc(-c2ccccc2)cc1,0,0,0,0,0,0,0,0,0,0,0,0 +CNC(=O)Nc1ccc(Cl)c(Cl)c1,0,0,1,0,0,,0,0,0,0,,0 +CC(=O)Nc1ccc(C)c(Cl)c1,0,0,0,0,,0,0,0,0,0,,0 +CCCCNC(=S)NCCCC,0,0,0,0,0,0,0,0,0,1,0,0 +CCCCNC(=O)NCCCC,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)N(c1ccccc1)C(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +CCc1cccc(C)c1,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1,0,0,0,0,0,0,0,,0,0,0,0 +CCCCCCCC/C=C\CCCCCCCC(=O)OC(CO)CO,0,0,0,,0,0,0,0,0,1,0,0 +CCCCCCCCCCC=CC1CC(=O)OC1=O,0,0,0,0,0,0,0,1,0,0,0,0 +CC(C)C(Nc1ccc(C(F)(F)F)cc1Cl)C(=O)OC(C#N)c1cccc(Oc2ccccc2)c1,0,0,,0,0,,,0,0,,1, +CS(=O)(=O)NC(=O)c1cc(Oc2ccc(C(F)(F)F)cc2Cl)ccc1[N+](=O)[O-],0,0,0,0,0,0,0,0,0,0,0,0 +CCOP(=S)(CC)Sc1ccccc1,0,0,0,1,,0,0,0,0,0,0,0 +CC/C=C\CCCCO,0,0,0,0,0,0,0,0,0,0,0,0 +Nc1ccccc1C(=O)Oc1ccc2ccccc2c1,,0,1,,1,0,0,1,1,0,1,0 +C=C[C@H]1CN2CCC1C[C@H]2[C@H](O)c1ccnc2ccc(OC)cc12,0,0,1,0,0,0,0,,0,0,0,0 +CC(=O)CCC(C)=O,0,0,0,0,0,0,0,0,0,0,0,0 +N#CCCNCCC#N,0,0,0,0,0,0,0,0,0,0,0,0 +CCOc1ccc(N=Nc2ccc(C=Cc3ccc(N=Nc4ccc(OCC)cc4)cc3S(=O)(=O)[O-])c(S(=O)(=O)[O-])c2)cc1,0,0,0,0,0,0,0,,0,0,,0 +O=C1c2ccccc2C(=O)C1c1ccc2cc(S(=O)(=O)[O-])cc(S(=O)(=O)[O-])c2n1,0,0,,0,0,0,0,1,0,0,,0 +O=C(Nc1ccc2c(O)c(N=Nc3ccc(N=Nc4ccc(S(=O)(=O)[O-])cc4)cc3)c(S(=O)(=O)[O-])cc2c1)c1ccccc1,0,0,0,,,0,0,1,0,0,,0 +CSc1ccc2c(c1)C(N1CCN(C)CC1)Cc1ccccc1S2,0,0,0,,0,0,,0,0,0,,0 +COCCCC/C(=N\OCCN)c1ccc(C(F)(F)F)cc1,0,0,0,,0,0,,,0,0,,0 +Cc1ccccc1CCO,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1nc(C)c(C)nc1C,0,0,0,0,0,0,0,0,0,0,0,0 +CC1=CC(O)CC(C)(C)C1,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1cnc(C)c(C)n1,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)COC(=O)c1ccccc1,0,0,0,0,,0,0,0,0,0,0,0 +C=C(C)[C@@H]1CC=C(C)CC1,0,0,0,0,,0,0,0,0,0,0,0 +O=[N+]([O-])[O-].O=[N+]([O-])[O-].[Ca+2],0,0,0,0,0,0,0,0,0,0,0,0 +Nc1ccc(N)c([N+](=O)[O-])c1,0,0,1,0,,0,0,1,0,0,1, +CC1COc2ccccc2N1,0,0,1,,0,0,0,0,0,0,,0 +O=C(O)c1cc(Cl)cc(Cl)c1O,0,0,0,0,0,0,0,0,0,0,,0 +CCCCCCCCCCCC(=O)NCCCN(C)C,0,,0,,,,,,0,,, +CC(C)CCCCCOC(=O)CCS,0,0,0,0,0,0,0,0,0,0,0,0 +O=[N+]([O-])c1cc([As](=O)(O)O)ccc1O,0,0,0,0,0,0,0,0,0,0,0,0 +CCC(COC(=O)CCS)(COC(=O)CCS)COC(=O)CCS,0,0,0,0,0,0,0,0,0,0,0,1 +C=CCOc1c(Br)cc(Br)cc1Br,0,0,1,0,0,0,0,0,0,0,,0 +F[B-](F)(F)F.[H+],0,0,0,0,0,0,0,,0,0,0,0 +CC(C)[C@H]1CC[C@H](C)C[C@@H]1O,0,0,0,0,0,0,0,0,0,0,0,0 +C(=C/c1ccccc1)\c1ccccc1,0,0,1,,1,0,0,1,0,0,,0 +Cc1ccc2c(ccc3ccccc32)c1,0,0,,,,0,0,0,0,0,,0 +Cn1c(=O)c2c(ncn2CC2OCCO2)n(C)c1=O,0,0,0,0,0,0,0,0,0,0,0,0 +C[C@H]1O[C@@H](n2cc(F)c(=O)[nH]c2=O)[C@H](O)[C@@H]1O,0,0,0,0,1,1,0,,0,0,0,1 +CN1CCc2cccc3c2[C@H]1Cc1ccc(O)c(O)c1-3,0,0,1,,,0,0,1,0,,,0 +COC(=O)C1=CCCN(C)C1,0,0,0,0,0,0,0,0,0,0,0,0 +COc1ccc(C2C(=O)c3ccccc3C2=O)cc1,,,,,,,,0,,0,, +Cc1ccc(C(=O)O)cc1[N+](=O)[O-],0,0,0,0,0,0,0,0,0,0,0,0 +Cc1cc(C(=O)O)ccc1[N+](=O)[O-],0,0,0,0,0,0,0,0,0,0,0,0 +CCCC(CCC)C(=O)O[C@@H]1C[C@@H]2CC[C@H](C1)[N+]2(C)C,0,0,0,0,0,0,0,,0,,0,0 +CCCCCC(C)O,0,0,0,0,0,0,0,0,0,0,0,0 +O=C([O-])c1ccccc1O,0,0,0,0,0,0,0,0,0,0,,0 +NC(=O)c1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +CCN1CCc2nc(N)oc2CC1,,,,,,,,0,,0,, +CC(C)(C)[C@]1(O)CCN2C[C@H]3c4ccccc4CCc4cccc(c43)[C@@H]2C1,0,0,0,,,0,0,,0,,,0 +O=C1C(N(CO)C(=O)NCO)N(CO)C(=O)N1CO,0,0,0,0,0,0,0,0,0,0,0,0 +O=c1[nH]c(=O)n([C@H]2C[C@H](O)[C@@H](CO)O2)cc1/C=C/Br,,0,0,0,0,0,0,,0,,0,0 +OCCN1CCN(CCCN2c3ccccc3C=Cc3ccccc32)CC1,0,0,0,0,0,0,,0,0,0,,0 +CC(C)NC[C@@H](O)COc1ccc(CC(N)=O)cc1,0,0,0,0,0,0,0,0,0,0,0,0 +CCNC(=O)N1CCN(CCCC(c2ccc(F)cc2)c2ccc(F)cc2)CC1,0,,0,,,0,,1,0,0,0,0 +Nc1ccc([N+](=O)[O-])cc1N,0,0,1,0,1,1,0,1,0,,1,0 +[I-].[K+],0,0,0,0,0,0,0,0,0,0,0,0 +O=C(C=Cc1ccc(O)c(O)c1)O[C@@H]1C[C@](O)(C(=O)O)C[C@@H](O)[C@H]1O,0,0,0,0,0,0,0,0,0,0,0,0 +Oc1nc(Cl)c(Cl)cc1Cl,0,,1,,0,0,,1,0,0,1,0 +C/C=C/C=C/C=O,0,0,0,0,0,0,0,1,0,0,0,0 +O=[N+]([O-])c1cc(C(F)(F)F)cc([N+](=O)[O-])c1Cl,1,,0,,,0,,1,0,0,,0 +C[Si](C)(C)N[Si](C)(C)C,0,0,0,0,0,1,0,0,0,0,0,0 +C=CC(=O)OCCCl,0,0,0,0,0,0,0,,0,0,0,0 +COCC(C)N(C(=O)CCl)c1c(C)csc1C,0,0,0,1,0,0,,1,0,0,,0 +CN(C)CCn1nnnc1SCC1=C(C(=O)O)N2C(=O)[C@@H](NC(=O)Cc3csc(N)n3)[C@H]2SC1,0,0,0,0,0,0,0,,0,0,0,0 +C/C(=N\NC(=O)Nc1cc(F)cc(F)c1)c1ncccc1C(=O)[O-],0,0,,0,0,0,0,0,0,0,0,0 +CC1COC(Cn2cncn2)(c2ccc(Oc3ccc(Cl)cc3)cc2Cl)O1,0,0,0,,,,,1,,0,, +CCN(CC)CCOC(=O)C(Cc1cccc2ccccc12)CC1CCCO1,0,0,0,0,0,0,0,0,0,0,0,0 +CCN[C@H]1C[C@H](C)S(=O)(=O)c2sc(S(N)(=O)=O)cc21,0,0,0,0,0,0,0,0,0,0,0,0 +CCN[C@H]1CN(CCCOC)S(=O)(=O)c2sc(S(N)(=O)=O)cc21,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)N(CCC(C(N)=O)(c1ccccc1)c1ccccn1)C(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +CC[C@H]1[C@@H]2C[C@H]3[C@@H]4N(C)c5ccccc5[C@]45C[C@@H](C2[C@H]5O)N3[C@@H]1O,0,0,0,0,0,0,0,,0,,,0 +CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)C=C[C@]5(C)[C@H]4[C@@H](O)C[C@]3(C)[C@]2(C(=O)CO)O1,1,0,0,,0,,0,0,,0,,0 +CSC(=O)c1c(C(F)F)nc(C(F)(F)F)c(C(=O)SC)c1CC(C)C,0,0,0,,0,0,0,0,0,0,,0 +O=C(O)/C=C(\CC(=O)O)C(=O)O,0,0,0,0,,0,0,0,0,0,0,0 +CCCCCCCCCCCCCCCC(=O)O[C@@H]1CC(C)=C(/C=C/C(C)=C/C=C/C(C)=C/C=C\C=C(C)\C=C\C=C(C)\C=C\C2=C(C)C[C@@H](OC(=O)CCCCCCCCCCCCCCC)CC2(C)C)C(C)(C)C1,0,0,0,0,0,0,0,,0,,0,0 +O=C(CO)[C@@H](O)[C@H](O)[C@@H](O)CO,,,,,,,,0,,0,, +CC1=C(C(=O)Nc2ccccc2)S(=O)(=O)CCO1,0,0,0,,,0,,1,0,,,1 +CNCC(=O)c1ccc(O)c(O)c1,,,,,,,,0,,0,, +CC(C)(C)C1CCC(=O)CC1,0,0,0,,0,0,0,0,0,1,0,0 +CN(C)[C@@H]1C(O)=C(C(=O)NCN2CCCC2)C(=O)[C@@]2(O)C(O)=C3C(=O)c4c(O)cccc4[C@@](C)(O)C3C[C@@H]12,0,0,0,0,0,0,0,0,0,0,0,0 +CN1CCN=C(c2ccccc2)c2cc(Cl)ccc21,0,0,,0,0,0,0,,0,,0, +CN(C)CCc1c[nH]c2ccc(Cn3cncn3)cc12,0,0,0,0,0,0,0,,0,,0,0 +CCCCC(=O)[O-],0,0,0,0,0,0,0,,0,,0,0 +CCCCCCCCCCCCCC(=O)OC,0,0,0,0,0,0,0,0,0,0,0,0 +Nc1ccncc1N,,,,,,,,0,,0,, +CCCCCCC(C)O,0,0,0,0,0,0,0,0,0,0,0,0 +CCC[Si](OC)(OC)OC,0,0,0,0,0,0,0,0,0,0,0,0 +CC1=C(CC=O)C(C)(C)CCC1,0,0,0,0,0,0,0,0,0,0,0,0 +NCCNCCNCCN,0,0,0,0,,0,0,0,0,0,0,0 +C[C@]12CC[C@H]3[C@@H](CC[C@@]45O[C@@H]4C(O)=C(C#N)C[C@]35C)[C@@H]1CC[C@@H]2O,1,1,0,,1,0,0,1,0,0,,1 +CCCC1COC(Cn2cncn2)(c2ccc(Cl)cc2Cl)O1,0,0,1,1,,0,0,1,0,0,0,0 +Cc1ccc(N)c(N)c1,0,0,1,0,1,1,0,1,1,1,,0 +CCCCCNCCCCC,0,0,0,0,0,0,0,0,0,0,0,0 +COCC(C)O,0,0,0,0,1,0,0,0,0,0,0,0 +c1ccc2c(c1)Oc1ccccc1S2,0,0,0,0,1,0,0,0,0,0,1,0 +CC1CN1,0,0,0,0,0,0,0,0,0,0,0,0 +CCc1cnc(C2=NC(C)(C(C)C)C(=O)N2)c(C(=O)O)c1,0,0,0,0,0,0,0,0,0,0,0,0 +NCC(=O)CCC(=O)O,,,,,,,,,,0,, +Clc1ccc(C(Cn2ccnc2)OCc2c(Cl)cccc2Cl)c(Cl)c1,0,,,1,0,,0,,0,1,0, +Clc1cnc(Oc2ccc(Oc3ncc(Cl)cc3Cl)cc2)c(Cl)c1,0,0,0,0,0,0,0,0,0,0,1,0 +COc1ccccc1OCCNCC(O)COc1cccc2[nH]c3ccccc3c12,0,0,1,0,,0,0,0,0,0,1,0 +ClCOCCl,0,0,0,0,1,0,0,0,0,0,0,0 +CC(O)CNCC(C)O,0,0,0,0,0,0,0,0,0,0,0,0 +C[C@H](CCC(=O)[O-])[C@H]1CC[C@H]2[C@H]3[C@H](C[C@H](O)[C@@]21C)[C@@]1(C)CC[C@@H](O)C[C@H]1C[C@H]3O,0,0,,0,0,0,0,0,0,0,0,0 +CC(=O)[C@H]1[C@H](C#N)C[C@H]2[C@@H]3CC=C4C[C@@H](O)CC[C@]4(C)[C@H]3CC[C@@]21C,0,0,0,,,0,0,1,0,0,0,0 +O=[N+]([O-])c1ccc([As](=O)(O)O)cc1,0,0,0,0,0,0,0,,0,0,0,0 +CCOC(=O)C1OC1c1ccccc1,0,0,0,0,0,0,0,,0,0,0,0 +ONc1ccccc1,0,0,1,,1,0,0,,0,0,,0 +O=CC(=O)c1ccccc1,0,0,0,0,0,0,0,0,0,,0,0 +[Cu]I,0,0,0,0,,0,0,,0,1,0,0 +CCCCC(CC)CCC(CC(C)C)OS(=O)(=O)[O-],0,0,0,0,0,0,0,0,0,0,0,0 +ClCc1ccc(Cl)cc1Cl,0,0,0,0,0,0,0,,0,0,, +O=C(O)CCCCCCCCC(=O)O,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCCCCC(=O)OC,0,0,0,0,0,0,0,0,0,0,0,0 +CC(O)COCC(C)O,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1ccc(C(=O)C(=O)[O-])cc1C,,,,,,,,0,,0,, +O=C([O-])COc1nn(Cc2ccccc2)c2ccccc12,0,,0,0,0,0,0,0,0,0,0,0 +Cc1ncc[nH]1,0,0,1,0,0,0,0,0,0,0,0,0 +COc1ccc2sc(C(=O)Nc3nnn[n-]3)c(OC(C)C)c2c1,0,,0,1,,0,1,,0,0,, +Oc1ccc2c(c1)OC[C@@H](N1CCC(O)(c3ccc(F)cc3)CC1)[C@H]2O,0,0,1,,1,0,0,0,1,0,,0 +O=C(O)CCN(C1(C(=O)NO)CCCC1)S(=O)(=O)c1ccc(Oc2ccc(F)cc2)cc1,0,0,0,0,0,0,0,,0,0,,0 +O=C(NO)C1(NS(=O)(=O)c2ccc(Oc3ccc(F)cc3)cc2)CCOCC1,0,0,0,0,,0,0,,0,0,0,0 +Cc1nc(C)nc(N2C[C@H](C)N(c3ccnc([C@@H](C)O)n3)[C@H](C)C2)n1,0,0,0,,0,0,0,,0,0,0,0 +CC[C@H](C)[C@@H](C(=O)O)n1sc2ccccc2c1=O,0,0,0,0,0,0,0,1,0,0,,0 +Cc1cc(SC2=C(O)C[C@@](CCc3ccc(N)cc3)(C(C)C)OC2=O)c(C(C)(C)C)cc1CO,0,,0,0,0,0,,0,0,0,0,0 +CCn1nc(C)c2c1C(=O)NCC(c1ccc(O)cc1)=N2,0,0,,0,0,0,0,,0,0,0,0 +C[C@]12C[C@H](O)[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@]2(O)C(=O)COP(=O)([O-])[O-],1,1,0,,1,,0,,0,,0,0 +CN1C[C@H]2c3ccccc3Oc3ccc(Cl)cc3[C@@H]2C1,,,,,,,,0,,0,, +CO[C@H]1C[C@H](O[C@@H]2[C@@H](C)C(=O)O[C@H](C)[C@H](C)[C@H](OC(C)=O)[C@@H](C)C(=O)[C@@]3(CO3)C[C@H](C)[C@H](O[C@@H]3O[C@H](C)C[C@H](N(C)C)[C@H]3OC(C)=O)[C@H]2C)O[C@@H](C)[C@@H]1OC(C)=O,0,0,0,0,0,0,0,,0,,0,0 +CO[Si](C)(C)OC,0,0,0,0,0,0,0,0,0,0,0,0 +CC(O)(c1ccc(Cl)cc1)c1ccc(Cl)cc1,0,0,0,,0,0,,,0,0,1,0 +CN(C)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)cc1,0,0,1,1,,0,0,1,0,0,1,0 +CC(=O)c1ccccc1O,0,0,0,,0,0,0,0,0,0,0,0 +O=C(O)Cc1c(Cl)ccc(Cl)c1Cl,0,0,0,0,0,0,1,0,0,0,0,0 +O=C(O)c1cccc(Cl)n1,0,0,0,0,0,0,0,0,0,,0,0 +CCCCCCCCCC=O,0,0,0,0,,0,,0,0,0,0,0 +Cc1ccc(C(C)(C)C)cc1,0,0,0,0,0,0,0,0,0,0,0,0 +BrCBr,0,0,0,0,0,0,0,0,0,0,0,0 +Nc1cc(Cl)cc(Cl)c1,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCCCCCCC(=O)O,0,0,0,0,0,0,0,0,0,0,,0 +CC(C)(C)c1cc([N+](=O)[O-])cc(C(C)(C)C)c1O,0,,0,,,,0,,,,1, +O.O.O.O.O.O.O=[N+]([O-])[O-].O=[N+]([O-])[O-].[Mg+2],0,0,0,0,0,0,0,0,0,0,0,0 +CCCCCCCCCOS(=O)(=O)[O-],0,0,0,0,1,0,0,0,0,0,0,0 +O=Cc1ccc(C(=O)O)cc1,0,0,0,0,0,0,0,1,0,0,0,0 +CCC(Cl)CCl,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)(O)c1ccccc1,0,,0,,0,0,0,0,0,0,,0 +O=C1CCCN1,0,0,0,0,1,0,0,0,0,0,0,0 +ClCc1ccccc1Cl,0,,0,0,0,0,0,0,0,0,,0 +Cc1ccc([N+](=O)[O-])c([N+](=O)[O-])c1,0,0,,0,0,0,0,1,0,0,,0 +N#CC1(N=NC2(C#N)CCCCC2)CCCCC1,0,0,0,0,0,0,0,0,0,0,0,0 +C=CC(=O)OCCOC(=O)C=C,0,,0,0,1,1,1,1,1,,0, +CCCC[P+](CCCC)(CCCC)CCCC,0,0,0,0,0,0,0,0,0,0,0, +N#CCc1cccc(C(F)(F)F)c1,0,0,0,0,0,0,0,0,0,,0,0 +COc1cccc(Br)c1,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCCCCCNC,0,0,0,0,0,0,0,0,0,0,0,0 +CCC1OCC(COc2ccc(Oc3ccccc3)cc2)O1,0,,1,,1,0,0,,0,,,0 +CC1=C(C(=O)Nc2ccccc2)SCCO1,0,0,1,,1,0,0,0,1,0,0,0 +CCCCN(CCCC)SN(C)C(=O)Oc1cccc2c1OC(C)(C)C2,0,0,1,,1,0,,0,0,1,,0 +Cc1cc(OC(=O)N(C)C)nn1C(=O)N(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1ccc2c(Br)cc(Br)c(O)c2n1,0,0,1,0,,0,,,1,1,1, +O=c1c(O)c(-c2ccc(O)cc2)oc2cc(O)cc(O)c12,0,0,1,1,1,1,,0,0,0,1,0 +CC(O)COc1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +O=P1(NCCCl)OCCCN1CCCl,0,0,0,0,0,0,0,0,0,0,0,0 +C=CC(=C)C,,,,,,0,0,0,0,0,0,0 +CC(C)O,0,,0,,0,0,0,0,0,,0,0 +CC(C)OC(=O)Nc1cccc(Cl)c1,0,0,,,0,0,0,0,1,0,,0 +CC(C)OC(=O)Nc1ccccc1,0,0,0,,1,0,0,0,0,0,0,0 +CC=Cc1ccc2c(c1)OCO2,0,0,0,0,0,0,0,0,0,0,0,0 +CCCC(CCC)C(=O)[O-],0,0,0,,0,0,0,0,0,0,0,0 +CC1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)cn1c23,0,0,0,0,0,0,0,0,0,0,0,0 +CC(=O)Oc1cc(C)c(OC(C)=O)c2ccccc12,0,0,1,,,0,,,0,,,1 +CCN(Cc1ccc(Cl)nc1)/C(=C/[N+](=O)[O-])NC,0,0,0,0,0,0,0,0,0,0,0,0 +CC1CCC(C(C)C)C(OC(=O)c2ccccc2N)C1,0,0,0,0,1,1,0,0,0,,1, +O=C(c1ccccc1)c1cc(Cl)ccc1O,0,0,,1,,,0,,0,0,1,0 +OC[C@]1(O)OC[C@@H](O)[C@H](O)[C@@H]1O,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1ccc(C=C2C(=O)C3CCC2C3(C)C)cc1,0,0,0,0,0,,0,0,0,0,1,0 +CC(C)C[P+](C)(CC(C)C)CC(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +C=C1C[C@]23CC[C@@H]4[C@](C)(C(=O)O[C@@H]5O[C@H](CO)[C@@H](O)[C@H](O)[C@H]5O)CCC[C@]4(C)[C@@H]2C[C@@H](O[C@@H]2O[C@H](CO)[C@@H](O)[C@H](O[C@@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@H]4O)[C@H]2OC2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)[C@@H]1C3,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCCCCC[P+](CCCCCCCC)(CCCCCCCC)CCCCCCCC,0,,,1,,,,,,,1, +CCCCC(CC)COC(=O)c1ccc(C(=O)OCC(CC)CCCC)c(C(=O)OCC(CC)CCCC)c1,0,0,0,0,1,0,0,0,0,0,0,0 +O=c1n(CCO)c(=O)n(CCO)c(=O)n1CCO,0,0,0,0,0,0,0,0,0,1,0,0 +Cc1cc(C)cc(OP(=O)(Oc2cc(C)cc(C)c2)Oc2cc(C)cc(C)c2)c1,0,0,1,0,0,0,0,,0,0,,0 +O=P(OC(CCl)CCl)(OC(CCl)CCl)OC(CCl)CCl,0,0,0,1,0,0,0,0,0,0,,0 +O=c1n(CC2CO2)c(=O)n(CC2CO2)c(=O)n1CC1CO1,0,0,0,,0,0,0,1,1,1,0,1 +Cc1cc(-c2ccc(N=Nc3c(S(=O)(=O)[O-])cc4cc(S(=O)(=O)[O-])cc(N)c4c3O)c(C)c2)ccc1N=Nc1c(S(=O)(=O)[O-])cc2cc(S(=O)(=O)[O-])cc(N)c2c1O,0,0,,,,0,0,,,,,0 +O=C(O)c1ccc(O)cc1O,0,0,0,0,0,0,0,0,0,0,0,0 +O=C1c2c(O)ccc([N+](=O)[O-])c2C(=O)c2c([N+](=O)[O-])ccc(O)c21,0,,,,,,,1,,,1, +CC1=CC(C)(C)Nc2ccccc21,0,0,0,0,0,0,0,0,0,0,1,0 +Cc1cc(=O)oc2cc(O)cc(O)c12,0,0,1,0,,0,,,0,,1, +CC(C)CNCC(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +CNC1(c2ccccc2Cl)CCCCC1=O,0,0,0,0,0,0,0,,0,,0,0 +Cc1ccccc1OCC(O)CNCCOc1ccc(C(N)=O)cc1,0,0,0,0,0,0,0,,0,,0,0 +O=c1oc2cc(O)ccc2c2oc3cc(O)ccc3c12,0,0,1,,1,,,,1,,1,0 +COc1ccc(-c2coc3cc(O)cc(O)c3c2=O)cc1,0,,1,,1,1,,1,1,0,, +O=c1cc(-c2ccccc2)oc2cc(O)cc(O)c12,0,0,1,,1,1,1,1,0,,1, +O=c1cc(-c2ccc(O)cc2)oc2cc(O)cc(O)c12,0,0,1,,1,1,,1,1,0,1,1 +O=C(CCc1ccc(O)cc1)c1c(O)cc(O)cc1O,,,,,1,1,,1,0,0,1, +CCC(=O)N(c1ccccc1)C1CCN(CCc2ccccc2)CC1,1,1,0,0,0,0,0,,0,,0,0 +O=C(O)CCC(=O)c1ccc(-c2ccccc2)cc1,0,,,,0,0,0,0,0,0,,0 +CCOC(=O)C1=C(C)NC(C)=C(C(=O)OC)C1c1cccc(Cl)c1Cl,0,,,,0,,0,1,0,,, +NC(=O)OCC(COC(N)=O)c1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +CCNC(C)Cc1cccc(C(F)(F)F)c1,0,0,0,0,0,0,0,,0,,,0 +COC(=O)c1ccc(C)cc1C1=NC(=O)C(C)(C(C)C)N1,0,0,0,0,0,0,0,0,0,0,0,0 +CCOP(=S)(NC(C)C)Oc1ccccc1C(=O)OC(C)C,0,0,1,1,0,0,0,,0,0,,0 +CSc1nc(NC2CC2)nc(NC(C)(C)C)n1,0,0,1,,0,0,0,,0,,1,0 +C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@@H]2C(=O)CO,1,1,0,,1,0,0,,0,0,0,0 +CN(C)[C@@H]1C(O)=C(C(N)=O)C(=O)[C@@]2(O)C(O)=C3C(=O)c4c(O)ccc(Cl)c4[C@@H](O)[C@H]3C[C@@H]12,,,,,,,,0,,0,, +CCC1(C)CC(=O)NC(=O)C1,0,0,0,0,0,0,0,,0,,0,0 +O=C1NCN(c2ccccc2)C12CCN(CCCOc1ccc(F)cc1)CC2,0,0,0,,,0,0,1,0,0,1,0 +NC(=S)NNC(N)=S,0,0,0,0,0,0,0,0,0,0,0,0 +NC(=S)C(N)=S,0,0,0,0,0,0,0,,0,0,0,0 +CC1CN1P(=O)(N1CC1C)N1CC1C,0,0,0,0,0,0,,0,0,0,0,0 +O=C(Oc1ccccc1)Oc1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +C[Hg]Cl,0,0,0,,0,,1,,1,1,,1 +S=c1[nH]cnc2[nH]cnc12,0,0,1,,0,0,,1,0,0,0,1 +[Hg+2],0,1,1,,,1,1,,,1,,1 +CCCCCCCCCCCCNC(=N)N,0,0,0,0,0,0,0,,0,0,,0 +CN(C)CCN(Cc1cccs1)c1ccccn1,0,0,0,0,0,0,0,0,0,0,0,0 +COc1nn(CSP(=S)(OC)OC)c(=O)s1,0,0,0,0,0,0,0,0,0,,0,0 +NC1=NCC2c3ccccc3Cc3ccccc3N12,0,0,0,0,0,0,0,0,0,0,0, +CC(=O)[C@H]1CC[C@H]2[C@@H]3CC[C@H]4C[C@](C)(O)CC[C@]4(C)[C@H]3CC[C@]12C,0,0,0,0,0,0,0,0,0,0,0,0 +O=C([O-])CCC/C=C\C[C@H]1[C@@H](O)C[C@@H](O)[C@@H]1/C=C/[C@@H](O)COc1cccc(Cl)c1,0,0,0,0,0,0,0,,0,,,0 +O=C(O)Cc1ccc(CCNS(=O)(=O)c2ccc(Cl)cc2)cc1,0,0,0,0,0,0,1,0,0,0,0,0 +NC(=O)c1cn(Cc2c(F)cccc2F)nn1,0,0,0,0,0,0,0,0,0,0,0,0 +COc1ccc(C=CC(=O)OCCC(C)C)cc1,0,0,,,,0,0,,,,0,0 +O=C(NC1CCN(CCc2c[nH]c3ccccc23)CC1)c1ccccc1,0,0,,0,0,0,0,0,0,0,0,0 +CCn1cc[n+](C)c1C.O=S(=O)([O-])C(F)(F)F,0,0,0,0,1,0,0,0,0,0,0,0 +Clc1ccc2cc3ccccc3cc2c1,0,,1,,,,,0,0,,,1 +CCCCn1cc[n+](C)c1.F[B-](F)(F)F,0,0,0,0,0,0,0,1,0,0,0,0 +F/C(COc1ccc2c(c1)[nH]c1ccccc12)=C1/CN2CCC1CC2,0,0,1,,0,0,,,0,,,0 +CC(C)Cc1ccc([C@@H](C)C(=O)NS(C)(=O)=O)cc1,0,0,0,0,0,0,0,,0,,0,0 +CCCCN(CCCC)C(=S)SSC(=S)N(CCCC)CCCC,0,0,0,,0,0,,,0,1,,0 +CCC[n+]1ccn(C)c1C.O=S(=O)([N-]S(=O)(=O)C(F)(F)F)C(F)(F)F,0,0,0,0,0,0,0,0,0,0,,0 +Brc1c2ccccc2cc2ccccc12,0,,0,,1,,1,,0,,,1 +CCO/C=C1\N=C(c2ccccc2)OC1=O,0,0,,0,0,0,0,0,0,0,0,0 +CNc1cc(OC)c(C(=O)N[C@H]2CCN(Cc3ccccc3)[C@H]2C)cc1Cl,0,0,0,0,0,0,0,,0,,0,0 +CCN1CCCC1CNC(=O)c1cc(S(=O)(=O)CC)c(N)cc1OC,0,0,0,0,0,0,0,0,0,0,0,0 +COc1cc2c(cc1OC)C1CC(=O)C(CC(C)C)CN1CC2,0,0,0,0,0,0,0,0,0,0,,0 +Cc1cc(C)cc(C(=O)OC2C[C@@H]3CC[C@H](C2)N3C)c1,0,0,0,0,0,0,0,,0,,0,0 +CC[N+]1(C)CCCC1.O=S(=O)([O-])C(F)(F)F,0,0,0,0,0,0,0,0,0,0,0,0 +COP(=O)(OC)SCn1c(=O)oc2cc(Cl)cnc21,0,0,1,,0,0,0,1,0,0,, +CNC(=O)/C=C(\C)OP(=O)(OC)OC,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1occc1SSc1ccoc1C,0,,0,,0,0,0,,0,1,0,1 +Cc1cc(C(F)(C(F)(F)F)C(F)(F)F)ccc1NC(=O)c1cccc(I)c1C(=O)NC(C)(C)CS(C)(=O)=O,0,0,0,0,,0,,,0,1,1,0 +CC=CC(=O)CC,0,0,0,0,0,0,0,0,0,0,0,0 +CC1OCCC1=O,0,0,0,0,0,0,0,0,0,0,0,0 +CC1CCCC(=O)C1=O,0,0,0,0,0,0,0,0,0,0,0,0 +CC1=C(O)C(=O)OC1C,0,0,0,0,0,0,0,,0,0,0,0 +CCCCCc1ccco1,0,0,0,,0,0,,,0,,1, +c1cnc2c(n1)CCCC2,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCc1ccc2cccc(S(=O)(=O)[O-])c2c1,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1cc(S(=O)(=O)[O-])ccc1/N=N/c1c(O)ccc2ccccc12,0,0,1,,0,0,0,,0,0,,0 +Cc1ccc(N=Nc2c(O)ccc(N=Nc3ccc(S(=O)(=O)[O-])cc3)c2O)c(C)c1,0,0,1,0,0,0,0,0,0,0,,0 +Nc1cnn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)n1,0,0,0,0,0,0,,0,0,0,,1 +CCNc1nc(Cl)nc(NC(C)(C)C)n1,0,0,,,1,1,0,0,0,0,,0 +NS(=O)(=O)c1cc2c(cc1Cl)N=CNS2(=O)=O,0,0,0,0,0,0,0,0,0,0,0,0 +Oc1c(Cl)cc(Cl)c2cccnc12,0,,1,,,,0,,1,1,1,1 +NC(=O)OCC(O)COc1ccc(Cl)cc1,0,0,,0,0,0,0,,0,,0,0 +CN(C)CCCN1c2ccccc2Sc2ccc(Cl)cc21,0,0,0,0,,0,,,0,0,,1 +CCCCCCCCCCCCCCn1cc[n+](C)c1,0,,0,1,,,,1,0,,1, +O=[Cr](=O)([O-])O[Cr](=O)(=O)[O-],0,,0,,0,0,,1,0,0,,1 +O=P(Cl)(Cl)Cl,0,0,0,0,0,0,0,0,0,0,0,0 +CCN(Cc1cccc(S(=O)(=O)[O-])c1)c1ccc(C(=C2C=CC(=[N+](CC)Cc3cccc(S(=O)(=O)[O-])c3)C=C2)c2ccccc2)cc1,0,0,0,0,0,0,,1,0,0,0,0 +CC(C)COC(=O)COc1cc(Cl)c(Cl)cc1Cl,0,0,0,0,0,0,0,,0,0,,0 +O=C(OC[C@H]1O[C@@H](OC(=O)c2cc(O)c(O)c(OC(=O)c3cc(O)c(O)c(O)c3)c2)[C@H](OC(=O)c2cc(O)c(O)c(OC(=O)c3cc(O)c(O)c(O)c3)c2)[C@@H](OC(=O)c2cc(O)c(O)c(OC(=O)c3cc(O)c(O)c(O)c3)c2)[C@@H]1OC(=O)c1cc(O)c(O)c(OC(=O)c2cc(O)c(O)c(O)c2)c1)c1cc(O)c(O)c(OC(=O)c2cc(O)c(O)c(O)c2)c1,1,0,1,,0,0,,1,1,,1,1 +CN(C)CCOC(c1ccccc1)c1ccccc1,0,0,0,0,0,1,0,0,0,0,0,0 +COC(=O)c1ccc(C)cc1,0,0,0,0,0,0,0,0,0,0,0,0 +CN(C)CCCN1c2ccccc2C(C)(C)c2ccccc21,0,0,0,0,0,0,,,0,,,0 +COc1ccc([C@@H]2Sc3ccccc3N(CCN(C)C)C(=O)[C@@H]2OC(C)=O)cc1,0,0,0,0,0,,0,0,0,0,0, +CC(=O)C=Cc1ccccc1,0,0,0,0,1,0,0,0,0,0,0,0 +Cc1c[nH]c(=S)[nH]c1=O,0,0,0,0,0,0,0,0,0,0,0,0 +COc1ccc2cc1Oc1cc3c(cc1OC)CC[N+](C)(C)[C@H]3Cc1ccc(cc1)Oc1c(OC)c(OC)cc3c1[C@@H](C2)[N+](C)(C)CC3,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCC[C@H](O)/C=C/[C@H]1[C@H](O)CC(=O)[C@@H]1C/C=C\CCCC(=O)O,0,,0,0,,,0,0,0,0,0,0 +CCCCC[C@H](O)/C=C/[C@H]1[C@H](O)C[C@H](O)[C@@H]1C/C=C\CCCC(=O)O,0,0,0,0,1,,0,,0,,0,0 +CC12CCC(CC1)C(C)(C)O2,0,0,1,0,0,0,0,0,0,0,0,0 +C=COCC1CCC(CO)CC1,0,0,0,0,0,0,0,0,0,0,0,0 +CC(=O)/C=C/C1=C(C)CCCC1(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +CC(=O)NC1CCSC1=O,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)(C)CC(C)(C)c1ccc(O)c(Cc2ccc(Cl)cc2Cl)c1,0,,0,0,,,,0,0,,1, +CC1COc2ccccc2N1C(=O)C(Cl)Cl,0,0,1,,0,,,1,1,0,0,1 +CC(N)CN,0,0,0,,0,0,0,0,0,0,0,0 +CCC(C)O,0,0,0,0,,0,0,0,0,0,0,0 +CCCCC(CC)CNC(=N)NC(=N)NCCCCCCNC(=N)NC(=N)NCC(CC)CCCC,0,,,,,,,,,,, +CC(O)CN,0,0,0,0,0,0,0,0,0,0,0,0 +CO/N=C(\C(=O)N[C@@H]1C(=O)N2C(C(=O)[O-])=C(CSc3nc(=O)c([O-])nn3C)CS[C@H]12)c1csc(N)n1,0,0,,0,,0,0,0,0,0,0,0 +O=c1oc2cc(O)ccc2s1,0,0,0,0,0,0,0,,0,,0,0 +C=CCc1ccc(O)c(OC)c1,0,0,0,0,0,0,0,0,0,0,0,0 +COC(=O)[C@@H](N)CCCN/C(N)=N/[N+](=O)[O-],0,0,0,0,0,0,0,0,0,0,0,0 +CC1(S(=O)(=O)[O-])CC(=O)c2ccccc2C1=O,0,0,1,,,,,,0,,,1 +Cc1nnc2n1-c1sc(CCC(=O)N3CCOCC3)cc1C(c1ccccc1Cl)=NC2,0,0,0,0,0,0,0,0,0,0,,0 +C[C@H](N[C@H](CCc1ccccc1)C(=O)O)C(=O)N1CCC[C@H]1C(=O)O,0,0,0,0,0,0,0,0,0,0,0,0 +CCC(C)[C@H]1O[C@]2(C=C[C@@H]1C)C[C@@H]1C[C@@H](CC=C(C)[C@@H](O[C@H]3C[C@H](OC)[C@@H](O[C@H]4C[C@H](OC)[C@H](NC(C)=O)[C@H](C)O4)[C@H](C)O3)[C@@H](C)C=CC=C3CO[C@@H]4[C@H](O)C(C)=C[C@@H](C(=O)O1)[C@]34O)O2,0,0,0,,,0,,,0,,1, +COc1c(Br)cc(Br)c(C)c1Br,0,0,0,0,0,0,0,0,0,0,,0 +C=C[C@H]1CN2CC[C@H]1C[C@H]2[C@H](O)c1ccnc2ccc(OC)cc12.C=C[C@H]1CN2CC[C@H]1C[C@H]2[C@H](O)c1ccnc2ccc(OC)cc12,1,0,1,,1,0,0,,0,,0,0 +CN(C)CCc1c[nH]c2ccc(CS(=O)(=O)N3CCCC3)cc12,0,0,0,0,0,0,0,0,0,0,0,0 +O=C1/C(=C2\Nc3ccc(S(=O)(=O)O)cc3C2=O)Nc2ccc(S(=O)(=O)O)cc21,0,0,0,0,1,0,0,,0,,0,0 +CC(=O)O[Cr](O)OC(C)=O.CC(=O)O[Cr](O)OC(C)=O.CC(=O)O[Cr](OC(C)=O)OC(C)=O,0,0,0,0,0,0,0,,0,0,0,0 +CO[C@H]1CC(O[C@H]2C[C@H]([C@H]3O[C@](C)(O)[C@H](C)C[C@@H]3C)O[C@H]2[C@]2(C)CC[C@H]([C@]3(C)CC[C@]4(C[C@H](O)[C@@H](C)[C@@H]([C@@H](C)[C@@H]5O[C@](O)(CC(=O)[O-])[C@@H](C)[C@H](OC)[C@H]5OC)O4)O3)O2)O[C@@H](C)[C@@H]1OC,0,0,0,0,,0,0,,0,0,,1 +C=CC(=O)OCCn1c(=O)n(CCOC(=O)C=C)c(=O)n(CCOC(=O)C=C)c1=O,0,1,0,,,1,1,,1,,0, +C=C(C)C(=O)OCCNC(C)(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +NS(=O)(=O)c1ccccc1OC(F)(F)F,0,0,0,0,0,0,0,0,0,0,0,0 +O=C=NCC1CCCC(CN=C=O)C1,0,0,0,0,0,0,0,0,0,0,0,0 +C=C[C@H]1CN2CCC1C[C@@H]2[C@@H](O)c1ccnc2ccccc12,0,0,,,0,0,0,,0,,,0 +Cc1cc(N)c2cc(NC(=O)Nc3ccc4nc(C)cc(N)c4c3)ccc2n1,,,,,,,,,,0,, +O=C(O)c1ccccc1O.Oc1cccc2cccnc12,0,0,0,,1,1,,0,0,0,, +C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1CC[C@@H]2OP(=O)(O)O,1,,0,0,1,1,0,,0,,0,0 +O=C(O)[C@@H](S)[C@H](S)C(=O)O,0,0,0,0,,0,,1,0,0,0, +CN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1,0,0,0,,0,0,,,,0,, +Cc1ncc(CO)c(CN)c1O,,,,,,,,0,,0,, +NS(=O)(=O)c1ccc(C(=O)O)cc1,0,0,0,0,0,0,0,0,0,0,0,0 +O=C(CCS)OCC(COC(=O)CCS)(COC(=O)CCS)COC(=O)CCS,0,0,0,0,0,0,0,,0,0,,1 +C[C@@H]1NC(=O)[C@@H](N)CNC(=O)[C@H]([C@@H]2CCNC(N)=N2)NC(=O)/C(=C/NC(N)=O)NC(=O)[C@H](CNC(=O)C[C@@H](N)CCCN)NC1=O,,,,,,,,0,,0,, +OCCOCCN1CCN(C2=Nc3ccccc3Sc3ccccc32)CC1,0,0,0,0,0,0,0,0,0,0,,0 +Cc1cc(/C=C/c2ccc3cc(N(C)C)ccc3[n+]2C)c(C)n1-c1ccccc1.Cc1cc(/C=C/c2ccc3cc(N(C)C)ccc3[n+]2C)c(C)n1-c1ccccc1.O=C([O-])c1cc2ccccc2c(Cc2c(O)c(C(=O)[O-])cc3ccccc23)c1O,0,0,,,,0,0,,0,,1,1 +COc1ccc(CN(CCN(C)C)c2ccccn2)cc1,0,0,0,0,0,0,0,0,0,0,1,0 +CN(C)C(=O)Oc1ccc[n+](C)c1,1,0,0,0,1,0,0,0,0,0,0,0 +Cc1ncc(CO)c(CO)c1O,0,0,0,0,0,0,0,0,0,0,0,0 +CCC1NC(=O)c2cc(S(N)(=O)=O)c(Cl)cc2N1,0,0,0,0,0,0,0,0,0,0,,0 +C=C[C@H]1CN2CCC1C[C@@H]2[C@@H](O)c1ccnc2ccc(OC)cc12,0,0,1,0,1,1,0,0,0,0,,0 +Brc1cc2ccccc2c2ccccc12,0,0,,0,0,0,0,0,0,0,,0 +CC(C)(N)CO,0,0,,0,0,0,0,0,0,0,0,0 +CC(C)(CO)CO,0,0,0,0,0,1,0,0,0,0,0,0 +O=S1(=O)CCCC1,0,0,0,0,0,0,0,0,0,0,0,0 +O=[N+]([O-])C(CO)(CO)CO,0,0,0,0,0,0,0,1,0,0,0,0 +OCC(CO)(CO)COCC(CO)(CO)CO,0,0,0,0,0,0,0,0,0,0,,0 +O=[N+]([O-])OCCN(CCO[N+](=O)[O-])CCO[N+](=O)[O-],0,0,0,0,0,0,0,0,0,0,0,0 +NC(CO)(CO)CO,0,0,0,0,0,0,0,0,0,0,0,0 +O=C(Cl)c1cc(C(=O)Cl)cc(C(=O)Cl)c1,0,0,,,0,0,0,,0,1,0,1 +CO[Si](CCCS)(OC)OC,0,0,,0,0,0,0,0,0,0,0,0 +COc1cc2c3cc1Oc1cc(ccc1O)C[C@@H]1c4c(cc(OC)c(O)c4Oc4ccc(cc4)C[C@@H]3N(C)CC2)CC[N+]1(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +O=C(O[C@@H]1C[C@@H]2CC[C@H](C1)[N+]21CCCC1)C(O)(c1ccccc1)c1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +COc1cc(C(=O)NC2CCCNC2)cc(OC)c1OC,0,0,0,0,0,0,0,0,0,0,0,0 +C[N+](C)=CCl,0,0,0,0,0,0,0,0,0,0,0,0 +CC(=O)c1cccnc1,0,0,0,0,0,0,0,0,0,0,0,0 +O=S1(=O)OC(c2ccc([O-])cc2)(c2ccc(O)cc2)c2ccccc21,0,0,0,,0,0,0,0,0,0,,0 +O=CN1CCOCC1,0,0,0,0,0,0,0,0,0,0,0,0 +COC(=O)CCC(=O)O,0,0,0,0,0,0,0,0,0,0,0,0 +NCc1cccnc1,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCCCCCn1sc(Cl)c(Cl)c1=O,0,,0,,,0,0,,0,1,,1 +Cc1cc(O)cc(C)c1Cl,0,0,0,0,0,0,0,0,0,0,1,0 +O=[Zr](Cl)Cl,0,0,0,0,0,0,0,,0,0,0,0 +CSc1ccc2c(c1)N(CCC1CCCCN1C)c1ccccc1S2,0,0,0,,,0,,,,0,, +c1ccc2cnncc2c1,0,0,0,0,0,0,0,0,0,0,0,0 +COC(=O)c1ccc(CBr)cc1,0,0,1,,1,0,0,0,1,0,0,0 +CN1CCc2cc(Cl)c(O)cc2[C@H]2c3ccccc3CC[C@@H]21,0,0,0,0,0,0,0,0,0,0,,0 +O=P(O)(OCc1ccccc1)OCc1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +S=C=NCc1ccccc1,0,0,,0,0,0,,,0,1,1,1 +Oc1ccc(Cl)cc1Cc1ccccc1,0,0,0,,0,0,0,1,0,0,1,0 +ClCc1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +OCc1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +CC(=O)OCc1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +COCCc1ccc(OCC(O)CNC(C)C)cc1.COCCc1ccc(OCC(O)CNC(C)C)cc1,0,0,0,0,0,0,0,0,0,0,0,0 +CN(C)C(=N)NC(=N)N,0,0,0,,0,0,0,0,0,0,0,0 +CCCC[Sn](CCCC)(OC(C)=O)OC(C)=O,0,,,,,0,,1,,,1,1 +C[NH+](C)CCC(c1ccccc1)c1cccc[nH+]1,0,0,0,0,1,0,0,0,0,0,0,0 +CCOc1ccc(N)cc1,0,0,1,1,0,0,0,,0,0,0,0 +CC(C)=CCC[C@H](C)CCO,0,0,0,0,0,0,0,0,0,0,0,0 +CCOc1cccc(N)c1,0,0,,0,0,0,0,0,0,0,0,0 +Nc1ccccc1C(=O)OCCc1ccccc1,1,0,1,,1,0,0,0,1,0,,0 +CC(C)CC(O)CC(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +C=C(C)C(=O)OCCOP(=O)(O)OCCOC(=O)C(=C)C,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)OS(C)(=O)=O,,0,0,0,0,0,0,0,0,0,,0 +c1ccc2c(c1)OCC(CN1CCCCC1)O2,0,0,0,0,0,0,0,,0,,0,0 +C=CCN1CCCC1CNC(=O)c1cc(S(N)(=O)=O)cc(OC)c1OC,0,0,0,0,0,0,0,,0,,0,0 +C=C(C)OC(C)=O,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1cc(O)cc2c1O[C@](C)(CCC[C@H](C)CCC[C@H](C)CCCC(C)C)CC2,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1ncc(C[n+]2csc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1,0,0,0,0,0,0,0,,0,,0,0 +NC(=O)[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1O,0,0,0,0,0,0,0,,0,,0,0 +O=C1CC[C@@H](C(=O)O)N1,0,0,0,0,0,0,0,0,0,0,0,0 +CN1C(=S)CN=C(c2ccccc2)c2cc(Cl)ccc21,0,0,0,,,0,0,0,,0,,0 +CC(C)(C)OC(=O)c1ncn2c1[C@@H]1CCCN1C(=O)c1c(Br)cccc1-2,0,0,,0,0,0,0,,0,0,0,0 +CCC(Cc1c(I)cc(I)c(O)c1I)C(=O)O,0,0,0,0,,,,,0,,,0 +CCOc1cc(NC(C)=O)ccc1C(=O)OC,0,0,,0,0,0,0,0,0,0,0,0 +CC(O)C#CC(C)O,0,,0,,0,0,1,,,,,0 +COc1ccc(N)cc1N,0,0,1,,0,,0,1,0,1,1,1 +CC1(C)[C@@H](O[C@H]2O[C@H](C(=O)O)[C@@H](O)[C@H](O)[C@H]2O[C@@H]2O[C@H](C(=O)[O-])[C@@H](O)[C@H](O)[C@H]2O)CC[C@@]2(C)[C@H]1CC[C@]1(C)[C@@H]2C(=O)C=C2[C@@H]3C[C@@](C)(C(=O)O)CC[C@]3(C)CC[C@]21C,0,0,0,0,0,0,0,0,0,0,0,0 +O=C1NC(=O)C(=O)C(=O)N1,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)(C)c1cc(/C=C2\SC(=N)NC2=O)cc(C(C)(C)C)c1O,0,0,0,,,,,1,,,1, +OCCCC1CCCCC1,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1cc2c3c(c1)C(c1ccccc1)=N[C@@H](NC(=O)c1ccncc1)C(=O)N3CC2,0,0,0,0,0,0,1,,0,0,0,0 +CCc1cc(C2=C(C(=O)[O-])N(c3ccccc3C(F)(F)F)S(=O)(=O)c3ccccc32)cc2c1OCO2,0,0,0,0,0,0,,,0,0,1,0 +O=S(=O)([O-])c1ccc2c(/N=N\c3ccc(S(=O)(=O)[O-])c4ccccc34)c(O)c(S(=O)(=O)[O-])cc2c1,0,0,0,0,0,0,0,0,0,0,0,0 +O=C=Nc1ccc(Cl)cc1,0,0,,,,0,0,,,0,1,0 +CC(C)OC(=O)c1ccccc1C(=O)OC(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +CO[C@H]1[C@H]([C@@]2(C)O[C@@H]2CC=C(C)C)[C@]2(CC[C@H]1OC(=O)/C=C/C=C/C=C/C=C/C(=O)O)CO2,0,0,0,1,0,0,0,1,0,0,0,0 +C1CCC2(CCCCO2)OC1,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCC(CC)COC(=O)c1ccccc1O,,,,,,,,0,,0,, +C[C@H]1O[C@H](O[C@@H]2[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)[C@H]2O)[C@@H](N)C[C@@H]1NC(=N)C(=O)O,0,0,0,0,0,0,0,0,0,0,0,0 +CN(C)CCCN1c2ccccc2CCc2ccccc21,0,0,0,0,,0,,,0,0,,0 +CCCCOCCO,0,0,0,0,0,0,0,0,0,0,0,0 +[O-][n+]1ccccc1[S-],0,1,0,,,0,1,,1,1,1, +CCCN(CCC)C(=O)SCC,0,0,0,0,0,0,0,0,0,0,0,0 +O=S(=O)([O-])c1cccc2ccccc12,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)C1=CC2=CC[C@H]3[C@](C)(C(=O)[O-])CCC[C@]3(C)[C@H]2CC1,0,0,1,0,0,0,,,0,0,, +CN(C)c1ccc(C(=O)c2ccc(N(C)C)cc2)cc1,0,1,1,,,,0,0,1,0,1,1 +N#CCCC#N,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1ncc([N+](=O)[O-])n1CCO,0,0,0,0,0,0,0,0,0,0,0,0 +Nc1c(CC(=O)[O-])cccc1C(=O)c1ccccc1,0,,0,0,1,0,1,,0,,0,0 +C[N+]1(C)[C@H]2CC[C@@H]1C[C@H](OC(=O)C(CO)c1ccccc1)C2,1,0,0,0,1,0,0,0,0,0,0,0 +CC(Cl)(Cl)C(=O)O,0,0,0,0,0,0,0,0,0,0,0,0 +CN[C@@H]1[C@H](O[C@H]2[C@H](O[C@@H]3[C@@H](NC(=N)N)[C@H](O)[C@@H](NC(=N)N)[C@H](O)[C@H]3O)O[C@@H](C)[C@]2(O)C=O)O[C@@H](CO)[C@H](O)[C@H]1O.CN[C@@H]1[C@H](O[C@H]2[C@H](O[C@@H]3[C@@H](NC(=N)N)[C@H](O)[C@@H](NC(=N)N)[C@H](O)[C@H]3O)O[C@@H](C)[C@]2(O)C=O)O[C@@H](CO)[C@H](O)[C@H]1O,0,0,0,0,0,0,0,,0,,0,0 +O=C(CCl)CCl,0,,0,,0,1,1,,1,0,,1 +CC(=O)C(Cl)Cl,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)(c1ccccc1)c1ccc(O)cc1,0,0,0,,1,1,0,1,0,0,1, +Cc1cc(O)c2c(O)c3c(O)cccc3cc2c1,0,0,1,0,0,0,1,1,0,0,1,0 +CCC(=O)[N-]S(=O)(=O)c1ccc(-c2c(-c3ccccc3)noc2C)cc1,0,0,0,,0,0,,,0,,0,0 +Cc1ccccc1N1CCN(CCc2nnc3n2CCCC3)CC1,0,0,0,0,1,1,0,0,0,0,0,0 +C=Cc1ccc(S(=O)(=O)[O-])cc1,0,0,0,0,0,0,,1,0,0,,0 +C[C@]12CC[C@@H]3c4ccc(OC(=O)N(CCCl)CCCl)cc4CC[C@H]3[C@@H]1CC[C@@H]2OP(=O)(O)O,1,0,0,0,1,1,,,0,,0,1 +CC1Cc2ccccc2N1NC(=O)c1ccc(Cl)c(S(N)(=O)=O)c1,0,0,0,0,0,0,0,0,0,0,0,0 +CC(=O)OC(C)C(=O)OCC[N+](C)(C)C.CC(=O)OC(C)C(=O)OCC[N+](C)(C)C.O=S(=O)([O-])c1cccc2c(S(=O)(=O)[O-])cccc12,0,0,0,0,1,0,0,,0,,0,0 +CC(=O)CC(=O)Nc1ccc2[nH]c(=O)[nH]c2c1,0,0,0,,0,0,0,0,0,0,0,0 +CCO[Si](C)(CCCOCC1CO1)OCC,0,0,0,0,0,0,0,0,0,0,0,0 +O=[N+]([O-])c1cc(C(F)(F)F)c(Cl)c([N+](=O)[O-])c1Cl,0,0,0,,,,,,,,, +CCCCOCCOCCOCCO,0,0,0,0,0,0,0,0,0,1,0,0 +CCCCCCCC/C=C/C(=O)[O-].CCCCCCCC/C=C/C(=O)[O-],,,,,,,,0,,0,, +Nc1cc(C(F)(F)F)ccc1S,,,,,,,,,,1,, +Cc1cccc(Cc2c[nH]cn2)c1C,0,,,,0,0,0,0,0,,0,0 +CCOC(=O)CC(=O)OCC,0,0,,0,0,0,0,0,0,0,0,0 +COc1ccc(CNCC(O)COc2ccc3[nH]c(=O)ccc3c2)cc1OC,0,0,0,0,0,0,0,,0,,0,0 +COC(=O)C1=C(C)NC(COC(N)=O)=C(C(=O)OC(C)C)C1c1cccc(Cl)c1Cl,0,1,,,0,,1,1,0,1,,1 +CCNC(=O)NCCCOc1cccc(CN2CCCCC2)c1,0,0,0,0,0,0,0,0,0,0,0,0 +CC(=O)SCC(CC(=O)c1ccc(C)cc1)C(=O)O,0,0,0,0,0,0,0,0,0,0,0,0 +CCOC(=O)Cn1cccc1-c1nc(-c2ccc(OC)cc2)c(-c2ccc(OC)cc2)s1,0,0,0,0,0,0,0,1,0,,0, +O=C(CCCN1CCN(c2ccc(F)cc2)CC1)NC1c2ccccc2CSc2ccccc21,0,0,0,,0,0,,,0,0,,0 +CC(C)(C)NC[C@H](O)c1ccc(O)cc1Cl,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCC/C=C\C/C=C\CCCCCCCC(=O)NC(C)c1ccccc1,0,0,0,0,0,0,0,1,0,0,0,0 +CC(NN)c1ccccc1,0,0,,0,,0,0,0,0,0,0,0 +O=Cc1ccc(Cl)cc1,,,0,,0,0,0,0,0,0,,0 +CCN(C)C(=O)Oc1cccc([C@H](C)N(C)C)c1,0,0,0,0,0,0,0,0,0,0,0,0 +CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)cc2)ccn1,0,,0,,,0,,,,,1,1 +CCN(CC)C(C)C(=O)c1ccccc1,0,0,,0,0,0,0,0,0,0,0,0 +CCN1CC(CCN2CCOCC2)C(c2ccccc2)(c2ccccc2)C1=O,0,0,0,0,0,0,0,,0,0,0,0 +Cc1cccc(C(=O)O)c1[N+](=O)[O-],0,0,0,0,0,0,0,0,0,0,0,0 +CCN(CC)CCOC(=O)C1(c2ccccc2)CCCC1.CCN(CC)CCOC(=O)C1(c2ccccc2)CCCC1.O=S(=O)(O)CCS(=O)(=O)O,0,0,0,0,0,0,0,0,0,0,0,0 +Clc1ccccn1,0,0,0,0,0,0,0,0,0,0,0,0 +CCC(=O)/C=C/C1C(C)=CCCC1(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +CC1CC(OC(=O)c2ccccc2O)CC(C)(C)C1,0,0,0,0,0,0,0,0,0,1,0,0 +CCCCCCCCCO,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCCCNCCCCCC,0,0,0,0,0,0,0,0,0,0,0,0 +CCN(CC)c1ccc(N)cc1,0,0,1,,,0,0,1,,1,1, +ClCCCCl,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCCCOC(C)=O,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCC(CC)COC(=O)CCCCCCCCC(=O)OCC(CC)CCCC,0,0,0,0,,0,0,0,0,0,0,0 +CCOC(C)=O,0,0,0,0,0,0,0,0,0,0,,0 +NCCNCCN,1,1,0,0,0,0,0,0,0,0,0,0 +CCOP(=O)(CC)OCC,0,0,0,0,0,0,0,0,0,0,,0 +Cc1c2oc3c(C)ccc(C(=O)N[C@@H]4C(=O)N[C@H](C(C)C)C(=O)N5CCC[C@H]5C(=O)N(C)CC(=O)N(C)[C@@H](C(C)C)C(=O)O[C@@H]4C)c3nc-2c(C(=O)N[C@@H]2C(=O)N[C@H](C(C)C)C(=O)N3CCC[C@H]3C(=O)N(C)CC(=O)N(C)[C@@H](C(C)C)C(=O)O[C@@H]2C)c(N)c1=O,0,,0,1,,0,,,0,0,,1 +NC(=O)CCCCC(N)=O,0,0,0,0,0,0,0,0,0,0,0,0 +CNC(=O)ON=CC(C)(C)SC,0,0,0,0,0,0,0,0,0,0,0,0 +C=CCOc1ccc(CC(=O)O)cc1Cl,1,0,0,0,1,1,1,,0,,0,0 +NN,0,0,1,0,0,0,0,0,0,0,0,0 +N[C@@H](Cc1cnc[nH]1)C(=O)O,0,0,,0,,0,0,,0,,0, +NNc1nc(-c2ccccc2)cs1,0,0,1,,0,0,0,1,0,1,1, +NNc1nc(-c2ccc(N)cc2)cs1,0,0,1,,0,0,,1,0,,1,0 +Cc1ccccc1CO[C@H]1C[C@]2(C(C)C)CC[C@@]1(C)O2,0,0,,,0,0,0,0,0,0,0,0 +NNc1ccc(C(=O)O)cc1,1,0,,,,0,0,0,0,0,0,0 +CCCCCCOc1ccccc1C(=O)O,0,0,0,0,,0,0,0,0,0,0,0 +O=C(OCc1ccccc1)C(=O)OCc1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +CCCSc1ccc2[nH]c(NC(=O)OC)nc2c1,0,,1,,1,0,0,0,1,1,1,1 \ No newline at end of file diff --git a/chemprop-updated/tests/data/classification.npz b/chemprop-updated/tests/data/classification.npz new file mode 100644 index 0000000000000000000000000000000000000000..871ce27559c9f3b2b0e20b8b7e3c53849f6e0047 --- /dev/null +++ b/chemprop-updated/tests/data/classification.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:843a6b271daacd34b8225c14e1b0f933b78e6ff8f0f7a9766de7a51bbdedb906 +size 267678 diff --git a/chemprop-updated/tests/data/classification/mol+mol.csv b/chemprop-updated/tests/data/classification/mol+mol.csv new file mode 100644 index 0000000000000000000000000000000000000000..c805a1326b48177c68838d177bfd3738ba853ebb --- /dev/null +++ b/chemprop-updated/tests/data/classification/mol+mol.csv @@ -0,0 +1,260 @@ +mol a smiles,mol b Smiles,synergy +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C1CCC(C(O1)OC2C(CC(C(C2O)OC3C(C(C(CO3)(C)O)NC)O)N)N)N)NC,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C)(C(=O)O)ON=C(C1=CSC(=N1)N)C(=O)NC2C3N(C2=O)C(=C(CS3)C[N+]4=CC=CC=C4)C(=O)[O-],0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C)(C)NCC(=O)NC1=CC(=C2CC3CC4C(C(=O)C(=C(C4(C(=O)C3=C(C2=C1O)O)O)O)C(=O)N)N(C)C)N(C)C,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CN1C(=NC(=O)C(=O)N1)SCC2=C(N3C(C(C3=O)NC(=O)C(=NOC)C4=CSC(=N4)N)SC2)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C=CC=CC=CC=CC=CC=CC=CC(CC2C(C(CC(O2)(CC(CC(C(CCC(CC(CC(=O)OC(C(C1O)C)C)O)O)O)O)O)O)O)C(=O)O)OC3C(C(C(C(O3)C)O)N)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)O)(C)O,0 +N=C(N)NCCC[C@H](NC(=O)[C@H](CCCNC(=N)N)NC(=O)[C@H](Cc1c[nH]c2ccccc12)NC(=O)[C@H](CCCNC(=N)N)NC(=O)[C@H](CCCNC(=N)N)NC(=O)[C@H](Cc1c[nH]c2ccccc12)NC(=O)[C@@H](N)Cc1ccccc1)C(=O)O[NH2],CC1=C(C=CC2=C1OC(=O)C(=C2O)NC(=O)C3=CC(=C(C=C3)O)CC=C(C)C)OC4C(C(C(C(O4)(C)C)OC)OC(=O)N)O,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)NC(=O)C(CCN)NC(=O)O)C(C)O)CCN)CCN,C(C(C1C(=C(C(=O)O1)O)O)O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C1C2CC(=C(N2C1=O)C(=O)O)SCCN=CN)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2C(C(=O)N2C(=C1SC3CC(NC3)CNS(=O)(=O)N)C(=O)O)C(C)O,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O)O)C,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1C(C(C(C(C1NC(=O)C(CCN)O)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(C(C(C(O3)CN)O)O)O)N,1 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)O)(C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2C(C(=O)N2C(=C1SC3CC(NC3)CNS(=O)(=O)N)C(=O)O)C(C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1=CC(=NO1)NS(=O)(=O)C2=CC=C(C=C2)N.COC1=CC(=CC(=C1OC)OC)CC2=CN=C(N=C2N)N,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C1C2CC(=C(N2C1=O)C(=O)O)SCCN=CN)O,0 +C1=COC(=C1)CNCCS(=O)(=O)O,C1=CC(=C(C=C1F)F)C(CN2C=NC=N2)(CN3C=NC=N3)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C1C2CC(=C(N2C1=O)C(=O)O)SCCN=CN)O,0 +CC1=NC2=C(N1CC(C)OC3=CC=CC=N3)N=C(C=C2)C4=CC(=NC(=C4)N)N,[Na+].[Cl-],0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCC1C(C(C(N(CC(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)C)O)(C)O,1 +CCOP(=O)(O)OP(=O)(O)O,CCN1C=C(C(=O)C2=C1N=C(C=C2)C)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C=CC=CC=CC=CC=CC=CC=CC(CC2C(C(CC(O2)(CC(CC(C(CCC(CC(CC(=O)OC(C(C1O)C)C)O)O)O)O)O)O)O)C(=O)O)OC3C(C(C(C(O3)C)O)N)O,1 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,C1=CC(=CC=C1C(C(CO)NC(=O)C(Cl)Cl)O)[N+](=O)[O-],0 +CC(C)CC1C(=O)NC(C(=O)N2CCCC2C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)N3CCCC3C(=O)NC(C(=O)NC(C(=O)N1)CCCN)C(C)C)CC4=CC=CC=C4)CC(C)C)CCCN)C(C)C)CC5=CC=CC=C5,[Ag],0 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2C(C(=O)N2C(=C1SC3CC(NC3)CNS(=O)(=O)N)C(=O)O)C(C)O,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2CCC3(C(C2(CCC1O)C)C(CC4C3(CC(C4=C(CCC=C(C)C)C(=O)O)OC(=O)C)C)O)C,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC(C1C2CC(=C(N2C1=O)C(=O)O)SCCN=CN)O,1 +N=C(N)NCCC[C@H](NC(=O)[C@H](CCCNC(=N)N)NC(=O)[C@H](Cc1c[nH]c2ccccc12)NC(=O)[C@H](CCCNC(=N)N)NC(=O)[C@H](Cc1c[nH]c2ccccc12)NC(=O)[C@@H](N)Cc1ccccc1)C(=O)O[NH2],CC1=C(C=CC2=C1OC(=O)C(=C2O)NC(=O)C3=CC(=C(C=C3)O)CC=C(C)C)OC4C(C(C(C(O4)(C)C)OC)OC(=O)N)O,0 +CCOP(=O)(O)OP(=O)(O)O,CC(=O)OCC1=C(N2C(C(C2=O)NC(=O)C(=NOC)C3=CSC(=N3)N)SC1)C(=O)O,0 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)O)(C)O,1 +CC[C@H](C)[C@H](NC(=O)[C@H](CCCNC(=N)N)NC(=O)[C@H](Cc1c[nH]c2ccccc12)NC(=O)[C@@H](N)Cc1ccccc1)C(=O)N[C@@H](CCCNC(=N)N)C(=O)N[C@@H](CCCCN)C(=O)N[C@H](C(=O)N[C@@H](CCCNC(=N)N)C(=O)O)C(C)C[NH2],CC1=C(C=CC2=C1OC(=O)C(=C2O)NC(=O)C3=CC(=C(C=C3)O)CC=C(C)C)OC4C(C(C(C(O4)(C)C)OC)OC(=O)N)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1=CC(=C(C=C1F)F)C(CN2C=NC=N2)(CN3C=NC=N3)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCC(C)CC(C)CCCCCCCCC(=O)NC1CC(C(NC(=O)C2C(CCN2C(=O)C(NC(=O)C(NC(=O)C3CC(CN3C(=O)C(NC1=O)C(C)O)O)C(C(C4=CC=C(C=C4)O)O)O)C(CCN)O)O)NCCN)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O)O)C,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC(C)(C)CC(C)(C)C1=CC=C(C=C1)OCCOCCO,1 +CC(C(=O)NC(CCC(=O)O)C(=O)N)NC(=O)COC1C(C(OC(C1O)CO)O)NC(=O)C,CC(C1CCC(C(O1)OC2C(CC(C(C2O)OC3C(C(C(CO3)(C)O)NC)O)N)N)N)NC,1 +N[C@@]([H])(C(C)C)C(=O)N[C@@]([H])(C)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCC(=O)O)C(=O)O,CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C1CCC(C(O1)OC2C(CC(C(C2O)OC3C(C(C(CO3)(C)O)NC)O)N)N)N)NC,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1=CC=C2C(=C1)C=CN2,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCC(C)CC(C)CCCCCCCCC(=O)NC1CC(C(NC(=O)C2C(CCN2C(=O)C(NC(=O)C(NC(=O)C3CC(CN3C(=O)C(NC1=O)C(C)O)O)C(C(C4=CC=C(C=C4)O)O)O)C(CCN)O)O)NCCN)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2C(C3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O,0 +N[C@@]([H])(C(C)C)C(=O)N[C@@]([H])(C)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCC(=O)O)C(=O)O,C1C(C(C(C(C1NC(=O)C(CCN)O)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(C(C(C(O3)CN)O)O)O)N,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC(=O)OCC1=C(N2C(C(C2=O)NC(=O)CC3=CC=CS3)SC1)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)O)(C)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,N[C@@]([H])(Cc1ccccc1)C(=O)N[C@@]([H])([C@]([H])(CC)C)C(=O)N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(C)C)C(=O)O[NH2],1 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C(C=CC(=C41)Cl)O)O)O)O)C(=O)N)N(C)C)O,1 +O.O.O.O.O.O.O.O.[V].[V].[V],C1C(C(C(C(C1N)OC2C(C(C(C(O2)CN)O)O)O)O)OC3C(C(C(C(O3)CO)O)N)O)N,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1C(C(C(C(C1NC(=O)C(CCN)O)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(C(C(C(O3)CN)O)O)O)N,0 +N[C@@]([H])(Cc1ccc(O)cc1)C(=O)N[C@@]([H])(CO)C(=O)N1[C@@]([H])(CCC1)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])([C@]([H])(O)C)C(=O)N[C@@]([H])(CC(=O)N)C(=O)N[C@@]([H])(Cc1ccccc1)C(=O)O,CC(C1C2CC(=C(N2C1=O)C(=O)O)SCCN=CN)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCN1CCN(C(=O)C1=O)C(=O)NC(C2=CC=CC=C2)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)[O-].CC1(C(N2C(S1(=O)=O)CC2=O)C(=O)O)CN3C=CN=N3.[Na+],0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)NC(=O)N4CCNC4=O)C(=O)O)C,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCCC1CC(N(C1)C)C(=O)NC(C2C(C(C(C(O2)SC)O)O)O)C(C)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2C(C(=O)N2C(=C1SC3CC(NC3)C(=O)N(C)C)C(=O)O)C(C)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2C(C3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O,0 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCCC=CC=O,1 +CCCOCCN1C2=C(C=CC(=N2)C3=CN=C(C=C3)OC)N=C(C1=O)NCCN4CCOCC4,N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)O,1 +CCCCCCCC(=O)NC(C(C)O)C(=O)NC(CC)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC2=CC=CC=C2)CC(C)C)CCN)CCN)C(C)O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,1 +C[As](=O)(O)[O-].C[As](=O)(O)[O-].[Ca+2],C1CC1N2C=C(C(=O)C3=CC(=C(C=C32)N4CCNCC4)F)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(=O)NC(CS)C(=O)O,0 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,CC1C(C(C(O1)OC2C(C(C(C(C2O)O)N=C(N)N)O)N=C(N)N)OC3C(C(C(C(O3)CO)O)O)NC)(C=O)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C(O1)P(=O)(O)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C(C(=O)N1S(=O)(=O)O)NC(=O)C(=NOC(C)(C)C(=O)O)C2=CSC(=N2)N,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1CC1N2C=C(C(=O)C3=CC(=C(C=C32)N4CCNCC4)F)C(=O)O,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1C(C(=O)N1S(=O)(=O)O)NC(=O)C(=NOC(C)(C)C(=O)O)C2=CSC(=N2)N,1 +C1C(=O)N(C2=C(S1)C=CC(=C2)C(=O)NCC3=CC=CO3)CC4=C(C=CC=C4Cl)F,C1C(C(C(C(C1N)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(CC(C(O3)CN)O)N)N,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,C1C(=C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CS3)C(=O)[O-])C[N+]4=CC=CC=C4,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1=CC(=CC=C1C(=O)O)N,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,[Mg],0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C(O1)P(=O)(O)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2C(C3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)NC(=O)C(CCN)NC(=O)O)C(C)O)CCN)CCN,C1=CN=C(N=C1)NS(=O)(=O)C2=CC=C(C=C2)N,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCCCCCCCNCCCC(C)C1CCC2C1(C(CC3C2C(CC4C3(CCC(C4)OCCCN)C)OCCCN)OCCCN)C,1 +O.O.O.O.O.O.O.O.[V].[V].[V],CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C(O1)P(=O)(O)O,0 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C(C=CC(=C41)Cl)O)O)O)O)C(=O)N)N(C)C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1C(C2C(C(=O)N1)N=C(N2)NC3C(C(C(C(O3)CO)OC(=O)N)O)NC(=O)CC(CCCN)N)O,0 +CC1=C(C=CC(=C1)CNCCS(=O)(=O)O)F,CCC(C)CC(C)CCCCCCCCC(=O)NC1CC(C(NC(=O)C2C(CCN2C(=O)C(NC(=O)C(NC(=O)C3CC(CN3C(=O)C(NC1=O)C(C)O)O)C(C(C4=CC=C(C=C4)O)O)O)C(CCN)O)O)NCCN)O,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)O)(C)O,1 +CCOP(=O)(O)OP(=O)(O)O,CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CSC=C3)C(=O)O)C(=O)O)C,1 +CC1=C(C(CC=C1)(C)C)C=CC(=CC=CC(=CCO)C)C,C1C(C(C(C(C1N)OC2C(C(C(C(O2)CN)O)O)O)O)OC3C(C(C(C(O3)CO)O)N)O)N,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCN1C=C(C(=O)C2=C1N=C(C=C2)C)C(=O)O,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CCCC1CC(N(C1)C)C(=O)NC(C2C(C(C(C(O2)SC)O)O)O)C(C)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1=CC(=NO1)NS(=O)(=O)C2=CC=C(C=C2)N.COC1=CC(=CC(=C1OC)OC)CC2=CN=C(N=C2N)N,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CCN1CCN(C(=O)C1=O)C(=O)NC(C2=CC=C(C=C2)O)C(=O)NC3C4N(C3=O)C(=C(CS4)CSC5=NN=NN5C)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCN1CCN(C(=O)C1=O)C(=O)NC(C2=CC=CC=C2)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCN1CCN(C(=O)C1=O)C(=O)NC(C2=CC=CC=C2)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C(N2C(S1(=O)=O)CC2=O)C(=O)O)C,0 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,CC(C1CCC(C(O1)OC2C(CC(C(C2O)OC3C(C(C(CO3)(C)O)NC)O)N)N)N)NC,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1=NC(=O)NC(=C1F)N,0 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,C1=CC(=CC=C1C(C(CO)NC(=O)C(Cl)Cl)O)[N+](=O)[O-],1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C)(C(=O)O)ON=C(C1=CSC(=N1)N)C(=O)NC2C3N(C2=O)C(=C(CS3)C[N+]4=CC=CC=C4)C(=O)[O-],1 +N[C@@]([H])(Cc1ccc(O)cc1)C(=O)N[C@@]([H])(CO)C(=O)N1[C@@]([H])(CCC1)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])([C@]([H])(O)C)C(=O)N[C@@]([H])(CC(=O)N)C(=O)N[C@@]([H])(Cc1ccccc1)C(=O)O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,0 +N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)O,CN1CCN(CC1)C2=NC3=CC=CC=C3C=C2CN4C5=NC=NC(=C5C(=N4)C6=CC7=C(C=C6)N=C(S7)N)N,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CCOC1=C(C2=CC=CC=C2C=C1)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1=CN=C(N=C1)[N-]S(=O)(=O)C2=CC=C(C=C2)N.[Na+],1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC(C)(C(=O)O)ON=C(C1=CSC(=N1)N)C(=O)NC2C3N(C2=O)C(=C(CS3)C[N+]4=CC=CC=C4)C(=O)[O-],1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1=C(C(=NO1)C2=CC=CC=C2Cl)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)O,1 +C[As](=O)(O)[O-].C[As](=O)(O)[O-].[Ca+2],C1C(C(C(C(C1N)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(CC(C(O3)CN)O)N)N,1 +N[C@@]([H])(C(C)C)C(=O)N[C@@]([H])(C)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCC(=O)O)C(=O)O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC(=O)OCC1=C(N2C(C(C2=O)NC(=O)C(=NOC)C3=CSC(=N3)N)SC1)C(=O)O,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC(=O)OCC1=C(N2C(C(C2=O)NC(=O)CC3=CC=CS3)SC1)C(=O)O,1 +CCCCCCCCNCCCC(C)C1CCC2C1(C(CC3C2C(CC4C3(CCC(C4)OCCCN)C)OCCCN)OCCCN)C,CC1CO1.C1CO1,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(=O)NCC1CN(C(=O)O1)C2=CC(=C(C=C2)N3CCOCC3)F,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1COC2=C3N1C=C(C(=O)C3=CC(=C2N4CCN(CC4)C)F)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C(CN(C=O)O)CP(=O)(O)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C(C(=O)N1OS(=O)(=O)O)NC(=O)C(=NOCC2=CC(=O)C(=CN2O)O)C3=CSC(=N3)N)C,1 +CC1=C(C(CC=C1)(C)C)C=CC(=CC=CC(=CCO)C)C,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)OC)C)C)O)(C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1CC1N2C=C(C(=O)C3=CC(=C(C=C32)N4CCNCC4)F)C(=O)O,0 +C1=CC=C(C=C1)CC(C(=O)NC(CCCN=C(N)N)C(=O)NC2=CC3=CC=CC=C3C=C2)N,CC1COC2=C3N1C=C(C(=O)C3=CC(=C2N4CCN(CC4)C)F)C(=O)O,1 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,C1CC1N2C=C(C(=O)C3=CC(=C(C=C32)N4CCNCC4)F)C(=O)O,0 +P#P,CC1=C(C=CC2=C1OC(=O)C(=C2O)NC(=O)C3=CC(=C(C=C3)O)CC=C(C)C)OC4C(C(C(C(O4)(C)C)OC)OC(=O)N)O,1 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC(C1C2CC(=C(N2C1=O)C(=O)O)SCCN=CN)O,1 +C1=COC(=C1)CNCCS(=O)(=O)O,CCC(C)CC(C)CCCCCCCCC(=O)NC1CC(C(NC(=O)C2C(CCN2C(=O)C(NC(=O)C(NC(=O)C3CC(CN3C(=O)C(NC1=O)C(C)O)O)C(C(C4=CC=C(C=C4)O)O)O)C(CCN)O)O)NCCN)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C(N2C(S1(=O)=O)CC2=O)C(=O)O)C,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1=C(C=CC2=C1OC(=O)C(=C2O)NC(=O)C3=CC(=C(C=C3)O)CC=C(C)C)OC4C(C(C(C(O4)(C)C)OC)OC(=O)N)O,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O)O)C,1 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,CC(C1CCC(C(O1)OC2C(CC(C(C2O)OC3C(C(C(CO3)(C)O)NC)O)N)N)N)NC,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O,0 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C(CN(C=O)O)CP(=O)(O)O,1 +C[As](=O)(O)[O-].C[As](=O)(O)[O-].[Ca+2],CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,1 +C1=CC=C(C=C1)CCC(C(=CS(=O)(=O)OC2=CC=C(C=C2)[N+](=O)[O-])S)NC(=O)C(CC3=CC=CC=C3)NC(=O)OCC4=CC=CC=C4,C1=CC(=C(C=C1F)F)C(CN2C=NC=N2)(CN3C=NC=N3)O,1 +CCOP(=O)(O)OP(=O)(O)O,CC1C2CCC3(C(C2(CCC1O)C)C(CC4C3(CC(C4=C(CCC=C(C)C)C(=O)O)OC(=O)C)C)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCNC1CC(C(C(C1OC2C(C(C(CO2)(C)O)NC)O)O)OC3C(CC=C(O3)CN)N)N,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)NC(=O)N4CCNC4=O)C(=O)O)C,1 +N[C@@]([H])(Cc1ccc(O)cc1)C(=O)N[C@@]([H])(CO)C(=O)N1[C@@]([H])(CCC1)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])([C@]([H])(O)C)C(=O)N[C@@]([H])(CC(=O)N)C(=O)N[C@@]([H])(Cc1ccccc1)C(=O)O,CC1=NN=C(S1)SCC2=C(N3C(C(C3=O)NC(=O)CN4C=NN=N4)SC2)C(=O)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1C(C(C(C(C1NC(=O)C(CCN)O)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(C(C(C(O3)CN)O)O)O)N,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CCCC1CC(N(C1)C)C(=O)NC(C2C(C(C(C(O2)SC)O)O)O)C(C)Cl,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C)(C)NCC(=O)NC1=CC(=C2CC3CC4C(C(=O)C(=C(C4(C(=O)C3=C(C2=C1O)O)O)O)C(=O)N)N(C)C)N(C)C,0 +N[C@@]([H])(C(C)C)C(=O)N[C@@]([H])(C)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCC(=O)O)C(=O)O,CC1COC2=C3N1C=C(C(=O)C3=CC(=C2N4CCN(CC4)C)F)C(=O)O,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CN1C(=NN=N1)SCC2=C(N3C(C(C3=O)(NC(=O)C(C4=CC=C(C=C4)O)C(=O)O)OC)OC2)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C(C(C(O1)OC2C(C(C(C(C2O)O)N=C(N)N)O)N=C(N)N)OC3C(C(C(C(O3)CO)O)O)NC)(C=O)O,1 +CCOP(=O)(O)OP(=O)(O)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CCCC1CC(N(C1)C)C(=O)NC(C2C(C(C(C(O2)SC)O)O)O)C(C)O,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,C1C(=C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CS3)C(=O)[O-])C[N+]4=CC=CC=C4,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1=CC(=CC=C1C(C(CO)NC(=O)C(Cl)Cl)O)[N+](=O)[O-],1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCN1CCN(C(=O)C1=O)C(=O)NC(C2=CC=C(C=C2)O)C(=O)NC3C4N(C3=O)C(=C(CS4)CSC5=NN=NN5C)C(=O)O.CC1(C(N2C(S1(=O)=O)CC2=O)C(=O)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1CC1N2C=C(C(=O)C3=CC(=C(C=C32)N4CCNCC4)F)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CN(C)C1C2CC3CC4=C(C=CC(=C4C(=C3C(=O)C2(C(=C(C1=O)C(=O)N)O)O)O)O)N(C)C,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CCN1CCN(C(=O)C1=O)C(=O)NC(C2=CC=CC=C2)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1=CC(=CC=C1NC(=NC(=NCCCCCCN=C(N)N=C(N)NC2=CC=C(C=C2)Cl)N)N)Cl,1 +N[C@@]([H])(C(C)C)C(=O)N[C@@]([H])(C)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCC(=O)O)C(=O)O,CC1C2C(C(=O)N2C(=C1SC3CC(NC3)C(=O)N(C)C)C(=O)O)C(C)O,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,C1CC1N2C=C(C(=O)C3=CC(=C(C=C32)N4CCNCC4)F)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCCCCCCCOC1=CC=C(C=C1)C2=CC=C(C=C2)C(=O)NC3CC(CNC(=O)C4C(C(CN4C(=O)C(NC(=O)C(NC(=O)C5CC(CN5C(=O)C(NC3=O)C(C)O)O)C(CC6=CC=CC=C6)O)CO)C)O)NCCN,1 +CCOP(=O)(O)OP(=O)(O)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CSC=C3)C(=O)O)C(=O)O)C,0 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O)O)C,0 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)O)(C)O,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC(=O)OCC1=C(N2C(C(C2=O)NC(=O)C(=NOC)C3=CSC(=N3)N)SC1)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)C(=O)O)C(=O)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1=C(ON=C1C)NS(=O)(=O)C2=CC=C(C=C2)N,1 +CCOP(=O)(O)OP(=O)(O)O,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)O)(C)O,1 +CCCCCCCCCCCCCCCCCC(=O)OCC1C(C(C(C(O1)O)NC(=O)C)OC(C)C(=O)NC(C)C(=O)NC(CCC(=O)N)C(=O)O)O,CC(C1CCC(C(O1)OC2C(CC(C(C2O)OC3C(C(C(CO3)(C)O)NC)O)N)N)N)NC,1 +NCC(=O)N[C@@]([H])(Cc1ccccc1)C(=O)N[C@@]([H])(Cc1ccccc1)C(=O)N[C@@]([H])(C)C(=O)N[C@@]([H])(CC(C)C)C(=O)N[C@@]([H])([C@]([H])(CC)C)C(=O)N1[C@@]([H])(CCC1)C(=O)NCC(=O)N[C@@]([H])([C@]([H])(CC)C)C(=O)N[C@@]([H])(CCC(=O)O)C(=O)O,CC1C(C(C(O1)OC2C(C(C(C(C2O)O)N=C(N)N)O)N=C(N)N)OC3C(C(C(C(O3)CO)O)O)NC)(C=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1C(C(C(C(C1N)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(CC(C(O3)CN)O)N)N,0 +N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)O,N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)O,1 +N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)O,CN1CCN(CC1)C2=NC3=CC=CC=C3C=C2CN4C5=NC=NC(=C5C(=N4)C6=CC7=C(C=C6)N=C(S7)N)N,1 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,0 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,CCC(=O)C1CC(=O)OC(CC2C(O2)C=CC(C(CC(C(C1OC)OC3C(C(C(C(O3)C)OC4CC(C(C(O4)C)OC(=O)CC)(C)O)N(C)C)O)CC=O)C)O)C,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC(C)(C(=O)O)ON=C(C1=CSC(=N1)N)C(=O)NC2C3N(C2=O)C(=C(CS3)C[N+]4=CC=CC=C4)C(=O)[O-],1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C[As](=O)(O)[O-].C[As](=O)(O)[O-].[Ca+2],0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,[Ca],0 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,CC1C(C(C(O1)OC2C(C(C(C(C2O)O)N=C(N)N)O)N=C(N)N)OC3C(C(C(C(O3)CO)O)O)NC)(C=O)O,1 +CC1=C(C=CC(=C1)CNCCS(=O)(=O)O)F,C1=CC(=C(C=C1F)F)C(CN2C=NC=N2)(CN3C=NC=N3)O,1 +N[C@@]([H])(C(C)C)C(=O)N[C@@]([H])(C)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCC(=O)O)C(=O)O,CC(C1C2CC(=C(N2C1=O)C(=O)O)SCCN=CN)O,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1C2CCC3(C(C2(CCC1O)C)C(CC4C3(CC(C4=C(CCC=C(C)C)C(=O)O)OC(=O)C)C)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCC(C)CC(C)CCCCCCCCC(=O)NC1CC(C(NC(=O)C2C(CCN2C(=O)C(NC(=O)C(NC(=O)C3CC(CN3C(=O)C(NC1=O)C(C)O)O)C(C(C4=CC=C(C=C4)O)O)O)C(CCN)O)O)NCCN)O,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CN1C(=NN=N1)SCC2=C(N3C(C(C3=O)(NC(=O)C(C4=CC=C(C=C4)O)C(=O)O)OC)OC2)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C1=NC=NC=C1F)C(CN2C=NC=N2)(C3=C(C=C(C=C3)F)F)O,0 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1=CN=C(N=C1)NS(=O)(=O)C2=CC=C(C=C2)N,1 +CCOP(=O)(O)OP(=O)(O)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O)O)C,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1C2CCC3(C(C2(CCC1O)C)C(CC4C3(CC(C4=C(CCC=C(C)C)C(=O)O)OC(=O)C)C)O)C,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)C(=O)O)C(=O)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C(O1)P(=O)(O)O,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CCN1CCN(C(=O)C1=O)C(=O)NC(C2=CC=CC=C2)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)O,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,C1=CC(=CC=C1C(C(CO)NC(=O)C(Cl)Cl)O)[N+](=O)[O-],1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C1=NC=NC=C1F)C(CN2C=NC=N2)(C3=C(C=C(C=C3)F)F)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C)(C(=O)O)ON=C(C1=CSC(=N1)N)C(=O)NC2C3N(C2=O)C(=C(CS3)C[N+]4=CC=CC=C4)C(=O)[O-],0 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,C1CC1N2C=C(C(=O)C3=CC(=C(C=C32)N4CCNCC4)F)C(=O)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C)(C)NCC(=O)NC1=CC(=C2CC3CC4C(C(=O)C(=C(C4(C(=O)C3=C(C2=C1O)O)O)O)C(=O)N)N(C)C)N(C)C,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1C(C(=O)N1S(=O)(=O)O)NC(=O)C(=NOC(C)(C)C(=O)O)C2=CSC(=N2)N,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C,1 +N[C@@]([H])(Cc1ccc(O)cc1)C(=O)N[C@@]([H])(CO)C(=O)N1[C@@]([H])(CCC1)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])([C@]([H])(O)C)C(=O)N[C@@]([H])(CC(=O)N)C(=O)N[C@@]([H])(Cc1ccccc1)C(=O)O,CC1COC2=C3N1C=C(C(=O)C3=CC(=C2N4CCN(CC4)C)F)C(=O)O,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CCOC1=C(C2=CC=CC=C2C=C1)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)O,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1C2C(C(=O)N2C(=C1SC3CC(NC3)C(=O)N(C)C)C(=O)O)C(C)O,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CN1C(=NC(=O)C(=O)N1)SCC2=C(N3C(C(C3=O)NC(=O)C(=NOC)C4=CSC(=N4)N)SC2)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2C(C(=O)N2C(=C1SC3CC(NC3)C(=O)NC4=CC=CC(=C4)C(=O)O)C(=O)O)C(C)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1=CC(=NO1)NS(=O)(=O)C2=CC=C(C=C2)N,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,C1=CC(=CC=C1C(C(CO)NC(=O)C(Cl)Cl)O)[N+](=O)[O-],1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C1C2CC(=C(N2C1=O)C(=O)O)SCCN=CN)O,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ga+3],1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CN(C)C1C2CC3CC4=C(C=CC(=C4C(=C3C(=O)C2(C(=C(C1=O)C(=O)N)O)O)O)O)N(C)C,0 +CCC1(CC=NCCN1CC)C(=O)NCC2=CC=CC=C2,[Na+].[Cl-],0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCN1C=C(C(=O)C2=C1N=C(C=C2)C)C(=O)O,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CCN1CCN(C(=O)C1=O)C(=O)NC(C2=CC=C(C=C2)O)C(=O)NC3C4N(C3=O)C(=C(CS4)CSC5=NN=NN5C)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1C(C(C(C(C1N)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(CC(C(O3)CN)O)N)N,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CSC=C3)C(=O)O)C(=O)O)C,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1=C(C=CC2=C1OC(=O)C(=C2O)NC(=O)C3=CC(=C(C=C3)O)CC=C(C)C)OC4C(C(C(C(O4)(C)C)OC)OC(=O)N)O,0 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)O)(C)O,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,COC1=CC(=CC(=C1OC)OC)CC2=CN=C(N=C2N)N,1 +N[C@@]([H])(Cc1ccc(O)cc1)C(=O)N[C@@]([H])(CO)C(=O)N1[C@@]([H])(CCC1)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])([C@]([H])(O)C)C(=O)N[C@@]([H])(CC(=O)N)C(=O)N[C@@]([H])(Cc1ccccc1)C(=O)O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1=CC(=NO1)NS(=O)(=O)C2=CC=C(C=C2)N,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1=CC(=CC=C1C(=O)O)N,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCC1C(C(C(N(CC(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)C)O)(C)O,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CSC=C3)C(=O)O)C(=O)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCCC=CC=O,0 +C1=CC=C(C=C1)CCC(C(=O)C(=O)S)NC(=O)C(CC2=CC=CC=C2)NC(=O)OCC3=CN=CC=C3,CC1=C(C=CC2=C1OC(=O)C(=C2O)NC(=O)C3=CC(=C(C=C3)O)CC=C(C)C)OC4C(C(C(C(O4)(C)C)OC)OC(=O)N)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C=CC=CC=CC=CC=CC=CC=CC(CC2C(C(CC(O2)(CC(CC(C(CCC(CC(CC(=O)OC(C(C1O)C)C)O)O)O)O)O)O)O)C(=O)O)OC3C(C(C(C(O3)C)O)N)O,0 +CCCCCCCC(=O)NC(C(C)O)C(=O)NC(CC)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC2=CC=CC=C2)CC(C)C)CCN)CCN)C(C)O,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)OC)C)C)O)(C)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2C(C(=O)N2C(=C1SC3CC(NC3)C(=O)NC4=CC=CC(=C4)C(=O)O)C(=O)O)C(C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1CC(=O)C2(C(O1)OC3C(C(C(C(C3O2)NC)O)NC)O)O,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O)O)C,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1=CC(=NO1)NS(=O)(=O)C2=CC=C(C=C2)N,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1=C(C=CC2=C1OC(=O)C(=C2O)NC(=O)C3=CC(=C(C=C3)O)CC=C(C)C)OC4C(C(C(C(O4)(C)C)OC)OC(=O)N)O,1 +CC(C)CC1C(=O)N2CCCC2C(=O)N1,C1CC2C(=O)NC(C(=O)N2C1)CC3=CC=CC=C3,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2C(C(=O)N2C(=C1SC3CC(NC3)C(=O)N(C)C)C(=O)O)C(C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)C(=O)O)C(=O)O)C,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C)(C)NCC(=O)NC1=CC(=C2CC3CC4C(C(=O)C(=C(C4(C(=O)C3=C(C2=C1O)O)O)O)C(=O)N)N(C)C)N(C)C,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C1CCC(C(O1)OC2C(CC(C(C2O)OC3C(C(C(CO3)(C)O)NC)O)N)N)N)NC,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1=C(C(=NO1)C2=CC=CC=C2Cl)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(=O)OCC1=C(N2C(C(C2=O)NC(=O)C(=NOC)C3=CSC(=N3)N)SC1)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C(CC(=O)O)C(=O)O,1 +C[As](=O)(O)[O-].C[As](=O)(O)[O-].[Ca+2],C1C(C(C(C(C1N)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(CC(C(O3)CN)O)N)N,0 +N[C@@]([H])(C(C)C)C(=O)N[C@@]([H])(C)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCC(=O)O)C(=O)O,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)O)(C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCN1CCN(C(=O)C1=O)C(=O)NC(C2=CC=CC=C2)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)[O-].CC1(C(N2C(S1(=O)=O)CC2=O)C(=O)O)CN3C=CN=N3.[Na+],1 +CCOP(=O)(O)OP(=O)(O)O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O)O)C,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,1 +C1=CC=C(C=C1)CCC(C(CS(=O)(=O)CC2=CC=CC=C2)S)NC(=O)C(CC3=CC=CC=C3)NC(=O)OCC4=CC=CC=C4,C1=CC(=C(C=C1F)F)C(CN2C=NC=N2)(CN3C=NC=N3)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCC(C)N1C(=O)N(C=N1)C2=CC=C(C=C2)N3CCN(CC3)C4=CC=C(C=C4)OCC5COC(O5)(CN6C=NC=N6)C7=C(C=C(C=C7)Cl)Cl,1 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,CCC(=O)C1CC(=O)OC(CC2C(O2)C=CC(C(CC(C(C1OC)OC3C(C(C(C(O3)C)OC4CC(C(C(O4)C)OC(=O)CC)(C)O)N(C)C)O)CC=O)C)O)C,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1C(C(C(C(C1N)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(CC(C(O3)CN)O)N)N,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1C(C(C(C(C1NC(=O)C(CCN)O)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(C(C(C(O3)CN)O)O)O)N,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,COC1=CC(=CC(=C1OC)OC)CC2=CN=C(N=C2N)N,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCN1CCN(C(=O)C1=O)C(=O)NC(C2=CC=C(C=C2)O)C(=O)NC3C4N(C3=O)C(=C(CS4)CSC5=NN=NN5C)C(=O)O.CC1(C(N2C(S1(=O)=O)CC2=O)C(=O)O)C,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1C(C(C(C(C1N)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(CC(C(O3)CN)O)N)N,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C1=NC=NC=C1F)C(CN2C=NC=N2)(C3=C(C=C(C=C3)F)F)O,0 +CC(C)CC1C(=O)NC(C(=O)N2CCCC2C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)N3CCCC3C(=O)NC(C(=O)NC(C(=O)N1)CCCN)C(C)C)CC4=CC=CC=C4)CC(C)C)CCCN)C(C)C)CC5=CC=CC=C5,[N+](=O)([O-])[O-].[Ag+],0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1=CC=C2C(=C1)C=CN2,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1C2C(C(=O)N2C(=C1SC3CC(NC3)C(=O)N(C)C)C(=O)O)C(C)O,1 +CCOP(=O)(O)OP(=O)(O)O,C1=CC(=CC=C1C(C(CO)NC(=O)C(Cl)Cl)O)[N+](=O)[O-],1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1C(C(C(C(C1N)OC2C(C(C(C(O2)CN)O)O)O)O)OC3C(C(C(C(O3)CO)O)N)O)N,1 \ No newline at end of file diff --git a/chemprop-updated/tests/data/classification/mol.csv b/chemprop-updated/tests/data/classification/mol.csv new file mode 100644 index 0000000000000000000000000000000000000000..464ff6c0822218e00fdf02d551412d4c21372215 --- /dev/null +++ b/chemprop-updated/tests/data/classification/mol.csv @@ -0,0 +1,501 @@ +"smiles","NR-AhR","NR-ER","SR-ARE","SR-MMP" +"CCOc1ccc2nc(S(N)(=O)=O)sc2c1",1,,1,0 +"CCN1C(=O)NC(c2ccccc2)C1=O",0,0,,0 +"CC[C@]1(O)CC[C@H]2[C@@H]3CCC4=CCCC[C@@H]4[C@H]3CC[C@@]21C",,,0, +"CCCN(CC)C(CC)C(=O)Nc1c(C)cccc1C",0,0,,0 +"CC(O)(P(=O)(O)O)P(=O)(O)O",0,0,0,0 +"CC(C)(C)OOC(C)(C)CCC(C)(C)OOC(C)(C)C",0,0,,0 +"O=S(=O)(Cl)c1ccccc1",0,0,0,0 +"O=C(O)Cc1cc(I)c(Oc2ccc(O)c(I)c2)c(I)c1",0,1,1,0 +"OC[C@H](O)[C@@H](O)[C@H](O)CO",0,0,0, +"CCCCCCCC(=O)[O-].CCCCCCCC(=O)[O-].[Zn+2]",,,0, +"NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)([O-])OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1",0,0,, +"O=c1[nH]c(=O)n([C@H]2C[C@H](O)[C@@H](CO)O2)cc1I",,0,0,0 +"CC(C)COC(=O)C(C)C",0,0,0,0 +"C=C(C)C(=O)OCCOC(=O)C(=C)C",0,0,0,0 +"Cl/C=C\C[N+]12CN3CN(CN(C3)C1)C2",0,0,1,0 +"O=C([O-])Cc1cccc2ccccc12",0,0,0,0 +"CCCCCCCCCCOCC(O)CN",0,0,, +"CCN(CC)C(=O)c1cccnc1",0,,,0 +"COc1cc(O)cc(O)c1",,,,0 +"CCOC(=O)c1cccnc1",,,0, +"CCOc1ccc(S(=O)(=O)O)c2cccnc12",,1,,0 +"O=C(O)[C@H](O)c1ccccc1",0,,0,0 +"Nc1ccc(/N=N/c2ccccc2)cc1",1,1,1, +"CN[C@@H]1C[C@@H](c2ccc(Cl)c(Cl)c2)c2ccccc21",1,0,, +"CN1[C@H]2CC[C@@H]1C[C@H](OC(=O)c1cc(Cl)cc(Cl)c1)C2",0,0,0, +"CN(C)CCCN1c2ccccc2CCc2ccc(Cl)cc21",0,0,0, +"C#CCO",0,0,0,0 +"Nc1ccccc1S(=O)(=O)O",0,0,0, +"CC(O)CC(C)(C)O",0,0,0,0 +"CC(C)(C)CC(C)(C)N",0,0,0,0 +"CC(=O)CC(C)C",0,0,0,0 +"CCCC(C)=O",0,0,0,0 +"Nc1nc2ccccc2[nH]1",1,0,0,0 +"Oc1c(Cl)c(Cl)c(Cl)c(Cl)c1Cl",1,,1,1 +"c1ccc(-c2ccccc2)cc1",0,0,0,0 +"CNC(=O)Nc1ccc(Cl)c(Cl)c1",1,0,0, +"CC(=O)Nc1ccc(C)c(Cl)c1",0,,0, +"CCCCNC(=S)NCCCC",0,0,0,0 +"CCCCNC(=O)NCCCC",0,0,0,0 +"CC(C)N(c1ccccc1)C(C)C",0,0,0,0 +"CCc1cccc(C)c1",0,0,0,0 +"CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1",0,0,,0 +"CCCCCCCC/C=C\CCCCCCCC(=O)OC(CO)CO",0,0,0,0 +"CCCCCCCCCCC=CC1CC(=O)OC1=O",0,0,1,0 +"CC(C)C(Nc1ccc(C(F)(F)F)cc1Cl)C(=O)OC(C#N)c1cccc(Oc2ccccc2)c1",,0,0,1 +"CS(=O)(=O)NC(=O)c1cc(Oc2ccc(C(F)(F)F)cc2Cl)ccc1[N+](=O)[O-]",0,0,0,0 +"CCOP(=S)(CC)Sc1ccccc1",0,,0,0 +"CC/C=C\CCCCO",0,0,0,0 +"Nc1ccccc1C(=O)Oc1ccc2ccccc2c1",1,1,1,1 +"C=C[C@H]1CN2CCC1C[C@H]2[C@H](O)c1ccnc2ccc(OC)cc12",1,0,,0 +"CC(=O)CCC(C)=O",0,0,0,0 +"N#CCCNCCC#N",0,0,0,0 +"CCOc1ccc(N=Nc2ccc(C=Cc3ccc(N=Nc4ccc(OCC)cc4)cc3S(=O)(=O)[O-])c(S(=O)(=O)[O-])c2)cc1",0,0,, +"O=C1c2ccccc2C(=O)C1c1ccc2cc(S(=O)(=O)[O-])cc(S(=O)(=O)[O-])c2n1",,0,1, +"O=C(Nc1ccc2c(O)c(N=Nc3ccc(N=Nc4ccc(S(=O)(=O)[O-])cc4)cc3)c(S(=O)(=O)[O-])cc2c1)c1ccccc1",0,,1, +"CSc1ccc2c(c1)C(N1CCN(C)CC1)Cc1ccccc1S2",0,0,0, +"COCCCC/C(=N\OCCN)c1ccc(C(F)(F)F)cc1",0,0,, +"Cc1ccccc1CCO",0,0,0,0 +"Cc1nc(C)c(C)nc1C",0,0,0,0 +"CC1=CC(O)CC(C)(C)C1",0,0,0,0 +"Cc1cnc(C)c(C)n1",0,0,0,0 +"CC(C)COC(=O)c1ccccc1",0,,0,0 +"C=C(C)[C@@H]1CC=C(C)CC1",0,,0,0 +"O=[N+]([O-])[O-].O=[N+]([O-])[O-].[Ca+2]",0,0,0,0 +"Nc1ccc(N)c([N+](=O)[O-])c1",1,,1,1 +"CC1COc2ccccc2N1",1,0,0, +"O=C(O)c1cc(Cl)cc(Cl)c1O",0,0,0, +"CCCCCCCCCCCC(=O)NCCCN(C)C",0,,, +"CC(C)CCCCCOC(=O)CCS",0,0,0,0 +"O=[N+]([O-])c1cc([As](=O)(O)O)ccc1O",0,0,0,0 +"CCC(COC(=O)CCS)(COC(=O)CCS)COC(=O)CCS",0,0,0,0 +"C=CCOc1c(Br)cc(Br)cc1Br",1,0,0, +"F[B-](F)(F)F.[H+]",0,0,,0 +"CC(C)[C@H]1CC[C@H](C)C[C@@H]1O",0,0,0,0 +"C(=C/c1ccccc1)\c1ccccc1",1,1,1, +"Cc1ccc2c(ccc3ccccc32)c1",,,0, +"Cn1c(=O)c2c(ncn2CC2OCCO2)n(C)c1=O",0,0,0,0 +"C[C@H]1O[C@@H](n2cc(F)c(=O)[nH]c2=O)[C@H](O)[C@@H]1O",0,1,,0 +"CN1CCc2cccc3c2[C@H]1Cc1ccc(O)c(O)c1-3",1,,1, +"COC(=O)C1=CCCN(C)C1",0,0,0,0 +"COc1ccc(C2C(=O)c3ccccc3C2=O)cc1",,,0, +"Cc1ccc(C(=O)O)cc1[N+](=O)[O-]",0,0,0,0 +"Cc1cc(C(=O)O)ccc1[N+](=O)[O-]",0,0,0,0 +"CCCC(CCC)C(=O)O[C@@H]1C[C@@H]2CC[C@H](C1)[N+]2(C)C",0,0,,0 +"CCCCCC(C)O",0,0,0,0 +"O=C([O-])c1ccccc1O",0,0,0, +"NC(=O)c1ccccc1",0,0,0,0 +"CCN1CCc2nc(N)oc2CC1",,,0, +"CC(C)(C)[C@]1(O)CCN2C[C@H]3c4ccccc4CCc4cccc(c43)[C@@H]2C1",0,,, +"O=C1C(N(CO)C(=O)NCO)N(CO)C(=O)N1CO",0,0,0,0 +"O=c1[nH]c(=O)n([C@H]2C[C@H](O)[C@@H](CO)O2)cc1/C=C/Br",0,0,,0 +"OCCN1CCN(CCCN2c3ccccc3C=Cc3ccccc32)CC1",0,0,0, +"CC(C)NC[C@@H](O)COc1ccc(CC(N)=O)cc1",0,0,0,0 +"CCNC(=O)N1CCN(CCCC(c2ccc(F)cc2)c2ccc(F)cc2)CC1",0,,1,0 +"Nc1ccc([N+](=O)[O-])cc1N",1,1,1,1 +"[I-].[K+]",0,0,0,0 +"O=C(C=Cc1ccc(O)c(O)c1)O[C@@H]1C[C@](O)(C(=O)O)C[C@@H](O)[C@H]1O",0,0,0,0 +"Oc1nc(Cl)c(Cl)cc1Cl",1,0,1,1 +"C/C=C/C=C/C=O",0,0,1,0 +"O=[N+]([O-])c1cc(C(F)(F)F)cc([N+](=O)[O-])c1Cl",0,,1, +"C[Si](C)(C)N[Si](C)(C)C",0,0,0,0 +"C=CC(=O)OCCCl",0,0,,0 +"COCC(C)N(C(=O)CCl)c1c(C)csc1C",0,0,1, +"CN(C)CCn1nnnc1SCC1=C(C(=O)O)N2C(=O)[C@@H](NC(=O)Cc3csc(N)n3)[C@H]2SC1",0,0,,0 +"C/C(=N\NC(=O)Nc1cc(F)cc(F)c1)c1ncccc1C(=O)[O-]",,0,0,0 +"CC1COC(Cn2cncn2)(c2ccc(Oc3ccc(Cl)cc3)cc2Cl)O1",0,,1, +"CCN(CC)CCOC(=O)C(Cc1cccc2ccccc12)CC1CCCO1",0,0,0,0 +"CCN[C@H]1C[C@H](C)S(=O)(=O)c2sc(S(N)(=O)=O)cc21",0,0,0,0 +"CCN[C@H]1CN(CCCOC)S(=O)(=O)c2sc(S(N)(=O)=O)cc21",0,0,0,0 +"CC(C)N(CCC(C(N)=O)(c1ccccc1)c1ccccn1)C(C)C",0,0,0,0 +"CC[C@H]1[C@@H]2C[C@H]3[C@@H]4N(C)c5ccccc5[C@]45C[C@@H](C2[C@H]5O)N3[C@@H]1O",0,0,, +"CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)C=C[C@]5(C)[C@H]4[C@@H](O)C[C@]3(C)[C@]2(C(=O)CO)O1",0,0,0, +"CSC(=O)c1c(C(F)F)nc(C(F)(F)F)c(C(=O)SC)c1CC(C)C",0,0,0, +"O=C(O)/C=C(\CC(=O)O)C(=O)O",0,,0,0 +"CCCCCCCCCCCCCCCC(=O)O[C@@H]1CC(C)=C(/C=C/C(C)=C/C=C/C(C)=C/C=C\C=C(C)\C=C\C=C(C)\C=C\C2=C(C)C[C@@H](OC(=O)CCCCCCCCCCCCCCC)CC2(C)C)C(C)(C)C1",0,0,,0 +"O=C(CO)[C@@H](O)[C@H](O)[C@@H](O)CO",,,0, +"CC1=C(C(=O)Nc2ccccc2)S(=O)(=O)CCO1",0,,1, +"CNCC(=O)c1ccc(O)c(O)c1",,,0, +"CC(C)(C)C1CCC(=O)CC1",0,0,0,0 +"CN(C)[C@@H]1C(O)=C(C(=O)NCN2CCCC2)C(=O)[C@@]2(O)C(O)=C3C(=O)c4c(O)cccc4[C@@](C)(O)C3C[C@@H]12",0,0,0,0 +"CN1CCN=C(c2ccccc2)c2cc(Cl)ccc21",,0,,0 +"CN(C)CCc1c[nH]c2ccc(Cn3cncn3)cc12",0,0,,0 +"CCCCC(=O)[O-]",0,0,,0 +"CCCCCCCCCCCCCC(=O)OC",0,0,0,0 +"Nc1ccncc1N",,,0, +"CCCCCCC(C)O",0,0,0,0 +"CCC[Si](OC)(OC)OC",0,0,0,0 +"CC1=C(CC=O)C(C)(C)CCC1",0,0,0,0 +"NCCNCCNCCN",0,,0,0 +"C[C@]12CC[C@H]3[C@@H](CC[C@@]45O[C@@H]4C(O)=C(C#N)C[C@]35C)[C@@H]1CC[C@@H]2O",0,1,1, +"CCCC1COC(Cn2cncn2)(c2ccc(Cl)cc2Cl)O1",1,,1,0 +"Cc1ccc(N)c(N)c1",1,1,1, +"CCCCCNCCCCC",0,0,0,0 +"COCC(C)O",0,1,0,0 +"c1ccc2c(c1)Oc1ccccc1S2",0,1,0,1 +"CC1CN1",0,0,0,0 +"CCc1cnc(C2=NC(C)(C(C)C)C(=O)N2)c(C(=O)O)c1",0,0,0,0 +"NCC(=O)CCC(=O)O",,,, +"Clc1ccc(C(Cn2ccnc2)OCc2c(Cl)cccc2Cl)c(Cl)c1",,0,,0 +"Clc1cnc(Oc2ccc(Oc3ncc(Cl)cc3Cl)cc2)c(Cl)c1",0,0,0,1 +"COc1ccccc1OCCNCC(O)COc1cccc2[nH]c3ccccc3c12",1,,0,1 +"ClCOCCl",0,1,0,0 +"CC(O)CNCC(C)O",0,0,0,0 +"C[C@H](CCC(=O)[O-])[C@H]1CC[C@H]2[C@H]3[C@H](C[C@H](O)[C@@]21C)[C@@]1(C)CC[C@@H](O)C[C@H]1C[C@H]3O",,0,0,0 +"CC(=O)[C@H]1[C@H](C#N)C[C@H]2[C@@H]3CC=C4C[C@@H](O)CC[C@]4(C)[C@H]3CC[C@@]21C",0,,1,0 +"O=[N+]([O-])c1ccc([As](=O)(O)O)cc1",0,0,,0 +"CCOC(=O)C1OC1c1ccccc1",0,0,,0 +"ONc1ccccc1",1,1,, +"O=CC(=O)c1ccccc1",0,0,0,0 +"[Cu]I",0,,,0 +"CCCCC(CC)CCC(CC(C)C)OS(=O)(=O)[O-]",0,0,0,0 +"ClCc1ccc(Cl)cc1Cl",0,0,, +"O=C(O)CCCCCCCCC(=O)O",0,0,0,0 +"CCCCCCCC(=O)OC",0,0,0,0 +"CC(O)COCC(C)O",0,0,0,0 +"Cc1ccc(C(=O)C(=O)[O-])cc1C",,,0, +"O=C([O-])COc1nn(Cc2ccccc2)c2ccccc12",0,0,0,0 +"Cc1ncc[nH]1",1,0,0,0 +"COc1ccc2sc(C(=O)Nc3nnn[n-]3)c(OC(C)C)c2c1",0,,, +"Oc1ccc2c(c1)OC[C@@H](N1CCC(O)(c3ccc(F)cc3)CC1)[C@H]2O",1,1,0, +"O=C(O)CCN(C1(C(=O)NO)CCCC1)S(=O)(=O)c1ccc(Oc2ccc(F)cc2)cc1",0,0,, +"O=C(NO)C1(NS(=O)(=O)c2ccc(Oc3ccc(F)cc3)cc2)CCOCC1",0,,,0 +"Cc1nc(C)nc(N2C[C@H](C)N(c3ccnc([C@@H](C)O)n3)[C@H](C)C2)n1",0,0,,0 +"CC[C@H](C)[C@@H](C(=O)O)n1sc2ccccc2c1=O",0,0,1, +"Cc1cc(SC2=C(O)C[C@@](CCc3ccc(N)cc3)(C(C)C)OC2=O)c(C(C)(C)C)cc1CO",0,0,0,0 +"CCn1nc(C)c2c1C(=O)NCC(c1ccc(O)cc1)=N2",,0,,0 +"C[C@]12C[C@H](O)[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@]2(O)C(=O)COP(=O)([O-])[O-]",0,1,,0 +"CN1C[C@H]2c3ccccc3Oc3ccc(Cl)cc3[C@@H]2C1",,,0, +"CO[C@H]1C[C@H](O[C@@H]2[C@@H](C)C(=O)O[C@H](C)[C@H](C)[C@H](OC(C)=O)[C@@H](C)C(=O)[C@@]3(CO3)C[C@H](C)[C@H](O[C@@H]3O[C@H](C)C[C@H](N(C)C)[C@H]3OC(C)=O)[C@H]2C)O[C@@H](C)[C@@H]1OC(C)=O",0,0,,0 +"CO[Si](C)(C)OC",0,0,0,0 +"CC(O)(c1ccc(Cl)cc1)c1ccc(Cl)cc1",0,0,,1 +"CN(C)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)cc1",1,,1,1 +"CC(=O)c1ccccc1O",0,0,0,0 +"O=C(O)Cc1c(Cl)ccc(Cl)c1Cl",0,0,0,0 +"O=C(O)c1cccc(Cl)n1",0,0,0,0 +"CCCCCCCCCC=O",0,,0,0 +"Cc1ccc(C(C)(C)C)cc1",0,0,0,0 +"BrCBr",0,0,0,0 +"Nc1cc(Cl)cc(Cl)c1",0,0,0,0 +"CCCCCCCCCC(=O)O",0,0,0, +"CC(C)(C)c1cc([N+](=O)[O-])cc(C(C)(C)C)c1O",0,,,1 +"O.O.O.O.O.O.O=[N+]([O-])[O-].O=[N+]([O-])[O-].[Mg+2]",0,0,0,0 +"CCCCCCCCCOS(=O)(=O)[O-]",0,1,0,0 +"O=Cc1ccc(C(=O)O)cc1",0,0,1,0 +"CCC(Cl)CCl",0,0,0,0 +"CC(C)(O)c1ccccc1",0,0,0, +"O=C1CCCN1",0,1,0,0 +"ClCc1ccccc1Cl",0,0,0, +"Cc1ccc([N+](=O)[O-])c([N+](=O)[O-])c1",,0,1, +"N#CC1(N=NC2(C#N)CCCCC2)CCCCC1",0,0,0,0 +"C=CC(=O)OCCOC(=O)C=C",0,1,1,0 +"CCCC[P+](CCCC)(CCCC)CCCC",0,0,0,0 +"N#CCc1cccc(C(F)(F)F)c1",0,0,0,0 +"COc1cccc(Br)c1",0,0,0,0 +"CCCCCCCCNC",0,0,0,0 +"CCC1OCC(COc2ccc(Oc3ccccc3)cc2)O1",1,1,, +"CC1=C(C(=O)Nc2ccccc2)SCCO1",1,1,0,0 +"CCCCN(CCCC)SN(C)C(=O)Oc1cccc2c1OC(C)(C)C2",1,1,0, +"Cc1cc(OC(=O)N(C)C)nn1C(=O)N(C)C",0,0,0,0 +"Cc1ccc2c(Br)cc(Br)c(O)c2n1",1,,,1 +"O=c1c(O)c(-c2ccc(O)cc2)oc2cc(O)cc(O)c12",1,1,0,1 +"CC(O)COc1ccccc1",0,0,0,0 +"O=P1(NCCCl)OCCCN1CCCl",0,0,0,0 +"C=CC(=C)C",,,0,0 +"CC(C)O",0,0,0,0 +"CC(C)OC(=O)Nc1cccc(Cl)c1",,0,0, +"CC(C)OC(=O)Nc1ccccc1",0,1,0,0 +"CC=Cc1ccc2c(c1)OCO2",0,0,0,0 +"CCCC(CCC)C(=O)[O-]",0,0,0,0 +"CC1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)cn1c23",0,0,0,0 +"CC(=O)Oc1cc(C)c(OC(C)=O)c2ccccc12",1,,, +"CCN(Cc1ccc(Cl)nc1)/C(=C/[N+](=O)[O-])NC",0,0,0,0 +"CC1CCC(C(C)C)C(OC(=O)c2ccccc2N)C1",0,1,0,1 +"O=C(c1ccccc1)c1cc(Cl)ccc1O",,,,1 +"OC[C@]1(O)OC[C@@H](O)[C@H](O)[C@@H]1O",0,0,0,0 +"Cc1ccc(C=C2C(=O)C3CCC2C3(C)C)cc1",0,0,0,1 +"CC(C)C[P+](C)(CC(C)C)CC(C)C",0,0,0,0 +"C=C1C[C@]23CC[C@@H]4[C@](C)(C(=O)O[C@@H]5O[C@H](CO)[C@@H](O)[C@H](O)[C@H]5O)CCC[C@]4(C)[C@@H]2C[C@@H](O[C@@H]2O[C@H](CO)[C@@H](O)[C@H](O[C@@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@H]4O)[C@H]2OC2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)[C@@H]1C3",0,0,0,0 +"CCCCCCCC[P+](CCCCCCCC)(CCCCCCCC)CCCCCCCC",,,,1 +"CCCCC(CC)COC(=O)c1ccc(C(=O)OCC(CC)CCCC)c(C(=O)OCC(CC)CCCC)c1",0,1,0,0 +"O=c1n(CCO)c(=O)n(CCO)c(=O)n1CCO",0,0,0,0 +"Cc1cc(C)cc(OP(=O)(Oc2cc(C)cc(C)c2)Oc2cc(C)cc(C)c2)c1",1,0,, +"O=P(OC(CCl)CCl)(OC(CCl)CCl)OC(CCl)CCl",0,0,0, +"O=c1n(CC2CO2)c(=O)n(CC2CO2)c(=O)n1CC1CO1",0,0,1,0 +"Cc1cc(-c2ccc(N=Nc3c(S(=O)(=O)[O-])cc4cc(S(=O)(=O)[O-])cc(N)c4c3O)c(C)c2)ccc1N=Nc1c(S(=O)(=O)[O-])cc2cc(S(=O)(=O)[O-])cc(N)c2c1O",,,, +"O=C(O)c1ccc(O)cc1O",0,0,0,0 +"O=C1c2c(O)ccc([N+](=O)[O-])c2C(=O)c2c([N+](=O)[O-])ccc(O)c21",,,1,1 +"CC1=CC(C)(C)Nc2ccccc21",0,0,0,1 +"Cc1cc(=O)oc2cc(O)cc(O)c12",1,,,1 +"CC(C)CNCC(C)C",0,0,0,0 +"CNC1(c2ccccc2Cl)CCCCC1=O",0,0,,0 +"Cc1ccccc1OCC(O)CNCCOc1ccc(C(N)=O)cc1",0,0,,0 +"O=c1oc2cc(O)ccc2c2oc3cc(O)ccc3c12",1,1,,1 +"COc1ccc(-c2coc3cc(O)cc(O)c3c2=O)cc1",1,1,1, +"O=c1cc(-c2ccccc2)oc2cc(O)cc(O)c12",1,1,1,1 +"O=c1cc(-c2ccc(O)cc2)oc2cc(O)cc(O)c12",1,1,1,1 +"O=C(CCc1ccc(O)cc1)c1c(O)cc(O)cc1O",,1,1,1 +"CCC(=O)N(c1ccccc1)C1CCN(CCc2ccccc2)CC1",0,0,,0 +"O=C(O)CCC(=O)c1ccc(-c2ccccc2)cc1",,0,0, +"CCOC(=O)C1=C(C)NC(C)=C(C(=O)OC)C1c1cccc(Cl)c1Cl",,0,1, +"NC(=O)OCC(COC(N)=O)c1ccccc1",0,0,0,0 +"CCNC(C)Cc1cccc(C(F)(F)F)c1",0,0,, +"COC(=O)c1ccc(C)cc1C1=NC(=O)C(C)(C(C)C)N1",0,0,0,0 +"CCOP(=S)(NC(C)C)Oc1ccccc1C(=O)OC(C)C",1,0,, +"CSc1nc(NC2CC2)nc(NC(C)(C)C)n1",1,0,,1 +"C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@@H]2C(=O)CO",0,1,,0 +"CN(C)[C@@H]1C(O)=C(C(N)=O)C(=O)[C@@]2(O)C(O)=C3C(=O)c4c(O)ccc(Cl)c4[C@@H](O)[C@H]3C[C@@H]12",,,0, +"CCC1(C)CC(=O)NC(=O)C1",0,0,,0 +"O=C1NCN(c2ccccc2)C12CCN(CCCOc1ccc(F)cc1)CC2",0,,1,1 +"NC(=S)NNC(N)=S",0,0,0,0 +"NC(=S)C(N)=S",0,0,,0 +"CC1CN1P(=O)(N1CC1C)N1CC1C",0,0,0,0 +"O=C(Oc1ccccc1)Oc1ccccc1",0,0,0,0 +"C[Hg]Cl",0,0,, +"S=c1[nH]cnc2[nH]cnc12",1,0,1,0 +"[Hg+2]",1,,, +"CCCCCCCCCCCCNC(=N)N",0,0,, +"CN(C)CCN(Cc1cccs1)c1ccccn1",0,0,0,0 +"COc1nn(CSP(=S)(OC)OC)c(=O)s1",0,0,0,0 +"NC1=NCC2c3ccccc3Cc3ccccc3N12",0,0,0,0 +"CC(=O)[C@H]1CC[C@H]2[C@@H]3CC[C@H]4C[C@](C)(O)CC[C@]4(C)[C@H]3CC[C@]12C",0,0,0,0 +"O=C([O-])CCC/C=C\C[C@H]1[C@@H](O)C[C@@H](O)[C@@H]1/C=C/[C@@H](O)COc1cccc(Cl)c1",0,0,, +"O=C(O)Cc1ccc(CCNS(=O)(=O)c2ccc(Cl)cc2)cc1",0,0,0,0 +"NC(=O)c1cn(Cc2c(F)cccc2F)nn1",0,0,0,0 +"COc1ccc(C=CC(=O)OCCC(C)C)cc1",,,,0 +"O=C(NC1CCN(CCc2c[nH]c3ccccc23)CC1)c1ccccc1",,0,0,0 +"CCn1cc[n+](C)c1C.O=S(=O)([O-])C(F)(F)F",0,1,0,0 +"Clc1ccc2cc3ccccc3cc2c1",1,,0, +"CCCCn1cc[n+](C)c1.F[B-](F)(F)F",0,0,1,0 +"F/C(COc1ccc2c(c1)[nH]c1ccccc12)=C1/CN2CCC1CC2",1,0,, +"CC(C)Cc1ccc([C@@H](C)C(=O)NS(C)(=O)=O)cc1",0,0,,0 +"CCCCN(CCCC)C(=S)SSC(=S)N(CCCC)CCCC",0,0,, +"CCC[n+]1ccn(C)c1C.O=S(=O)([N-]S(=O)(=O)C(F)(F)F)C(F)(F)F",0,0,0, +"Brc1c2ccccc2cc2ccccc12",0,1,, +"CCO/C=C1\N=C(c2ccccc2)OC1=O",,0,0,0 +"CNc1cc(OC)c(C(=O)N[C@H]2CCN(Cc3ccccc3)[C@H]2C)cc1Cl",0,0,,0 +"CCN1CCCC1CNC(=O)c1cc(S(=O)(=O)CC)c(N)cc1OC",0,0,0,0 +"COc1cc2c(cc1OC)C1CC(=O)C(CC(C)C)CN1CC2",0,0,0, +"Cc1cc(C)cc(C(=O)OC2C[C@@H]3CC[C@H](C2)N3C)c1",0,0,,0 +"CC[N+]1(C)CCCC1.O=S(=O)([O-])C(F)(F)F",0,0,0,0 +"COP(=O)(OC)SCn1c(=O)oc2cc(Cl)cnc21",1,0,1, +"CNC(=O)/C=C(\C)OP(=O)(OC)OC",0,0,0,0 +"Cc1occc1SSc1ccoc1C",0,0,,0 +"Cc1cc(C(F)(C(F)(F)F)C(F)(F)F)ccc1NC(=O)c1cccc(I)c1C(=O)NC(C)(C)CS(C)(=O)=O",0,,,1 +"CC=CC(=O)CC",0,0,0,0 +"CC1OCCC1=O",0,0,0,0 +"CC1CCCC(=O)C1=O",0,0,0,0 +"CC1=C(O)C(=O)OC1C",0,0,,0 +"CCCCCc1ccco1",0,0,,1 +"c1cnc2c(n1)CCCC2",0,0,0,0 +"CCCCc1ccc2cccc(S(=O)(=O)[O-])c2c1",0,0,0,0 +"Cc1cc(S(=O)(=O)[O-])ccc1/N=N/c1c(O)ccc2ccccc12",1,0,, +"Cc1ccc(N=Nc2c(O)ccc(N=Nc3ccc(S(=O)(=O)[O-])cc3)c2O)c(C)c1",1,0,0, +"Nc1cnn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)n1",0,0,0, +"CCNc1nc(Cl)nc(NC(C)(C)C)n1",,1,0, +"NS(=O)(=O)c1cc2c(cc1Cl)N=CNS2(=O)=O",0,0,0,0 +"Oc1c(Cl)cc(Cl)c2cccnc12",1,,,1 +"NC(=O)OCC(O)COc1ccc(Cl)cc1",,0,,0 +"CN(C)CCCN1c2ccccc2Sc2ccc(Cl)cc21",0,,, +"CCCCCCCCCCCCCCn1cc[n+](C)c1",0,,1,1 +"O=[Cr](=O)([O-])O[Cr](=O)(=O)[O-]",0,0,1, +"O=P(Cl)(Cl)Cl",0,0,0,0 +"CCN(Cc1cccc(S(=O)(=O)[O-])c1)c1ccc(C(=C2C=CC(=[N+](CC)Cc3cccc(S(=O)(=O)[O-])c3)C=C2)c2ccccc2)cc1",0,0,1,0 +"CC(C)COC(=O)COc1cc(Cl)c(Cl)cc1Cl",0,0,, +"O=C(OC[C@H]1O[C@@H](OC(=O)c2cc(O)c(O)c(OC(=O)c3cc(O)c(O)c(O)c3)c2)[C@H](OC(=O)c2cc(O)c(O)c(OC(=O)c3cc(O)c(O)c(O)c3)c2)[C@@H](OC(=O)c2cc(O)c(O)c(OC(=O)c3cc(O)c(O)c(O)c3)c2)[C@@H]1OC(=O)c1cc(O)c(O)c(OC(=O)c2cc(O)c(O)c(O)c2)c1)c1cc(O)c(O)c(OC(=O)c2cc(O)c(O)c(O)c2)c1",1,0,1,1 +"CN(C)CCOC(c1ccccc1)c1ccccc1",0,0,0,0 +"COC(=O)c1ccc(C)cc1",0,0,0,0 +"CN(C)CCCN1c2ccccc2C(C)(C)c2ccccc21",0,0,, +"COc1ccc([C@@H]2Sc3ccccc3N(CCN(C)C)C(=O)[C@@H]2OC(C)=O)cc1",0,0,0,0 +"CC(=O)C=Cc1ccccc1",0,1,0,0 +"Cc1c[nH]c(=S)[nH]c1=O",0,0,0,0 +"COc1ccc2cc1Oc1cc3c(cc1OC)CC[N+](C)(C)[C@H]3Cc1ccc(cc1)Oc1c(OC)c(OC)cc3c1[C@@H](C2)[N+](C)(C)CC3",0,0,0,0 +"CCCCC[C@H](O)/C=C/[C@H]1[C@H](O)CC(=O)[C@@H]1C/C=C\CCCC(=O)O",0,,0,0 +"CCCCC[C@H](O)/C=C/[C@H]1[C@H](O)C[C@H](O)[C@@H]1C/C=C\CCCC(=O)O",0,1,,0 +"CC12CCC(CC1)C(C)(C)O2",1,0,0,0 +"C=COCC1CCC(CO)CC1",0,0,0,0 +"CC(=O)/C=C/C1=C(C)CCCC1(C)C",0,0,0,0 +"CC(=O)NC1CCSC1=O",0,0,0,0 +"CC(C)(C)CC(C)(C)c1ccc(O)c(Cc2ccc(Cl)cc2Cl)c1",0,,0,1 +"CC1COc2ccccc2N1C(=O)C(Cl)Cl",1,0,1,0 +"CC(N)CN",0,0,0,0 +"CCC(C)O",0,,0,0 +"CCCCC(CC)CNC(=N)NC(=N)NCCCCCCNC(=N)NC(=N)NCC(CC)CCCC",,,, +"CC(O)CN",0,0,0,0 +"CO/N=C(\C(=O)N[C@@H]1C(=O)N2C(C(=O)[O-])=C(CSc3nc(=O)c([O-])nn3C)CS[C@H]12)c1csc(N)n1",,,0,0 +"O=c1oc2cc(O)ccc2s1",0,0,,0 +"C=CCc1ccc(O)c(OC)c1",0,0,0,0 +"COC(=O)[C@@H](N)CCCN/C(N)=N/[N+](=O)[O-]",0,0,0,0 +"CC1(S(=O)(=O)[O-])CC(=O)c2ccccc2C1=O",1,,, +"Cc1nnc2n1-c1sc(CCC(=O)N3CCOCC3)cc1C(c1ccccc1Cl)=NC2",0,0,0, +"C[C@H](N[C@H](CCc1ccccc1)C(=O)O)C(=O)N1CCC[C@H]1C(=O)O",0,0,0,0 +"CCC(C)[C@H]1O[C@]2(C=C[C@@H]1C)C[C@@H]1C[C@@H](CC=C(C)[C@@H](O[C@H]3C[C@H](OC)[C@@H](O[C@H]4C[C@H](OC)[C@H](NC(C)=O)[C@H](C)O4)[C@H](C)O3)[C@@H](C)C=CC=C3CO[C@@H]4[C@H](O)C(C)=C[C@@H](C(=O)O1)[C@]34O)O2",0,,,1 +"COc1c(Br)cc(Br)c(C)c1Br",0,0,0, +"C=C[C@H]1CN2CC[C@H]1C[C@H]2[C@H](O)c1ccnc2ccc(OC)cc12.C=C[C@H]1CN2CC[C@H]1C[C@H]2[C@H](O)c1ccnc2ccc(OC)cc12",1,1,,0 +"CN(C)CCc1c[nH]c2ccc(CS(=O)(=O)N3CCCC3)cc12",0,0,0,0 +"O=C1/C(=C2\Nc3ccc(S(=O)(=O)O)cc3C2=O)Nc2ccc(S(=O)(=O)O)cc21",0,1,,0 +"CC(=O)O[Cr](O)OC(C)=O.CC(=O)O[Cr](O)OC(C)=O.CC(=O)O[Cr](OC(C)=O)OC(C)=O",0,0,,0 +"CO[C@H]1CC(O[C@H]2C[C@H]([C@H]3O[C@](C)(O)[C@H](C)C[C@@H]3C)O[C@H]2[C@]2(C)CC[C@H]([C@]3(C)CC[C@]4(C[C@H](O)[C@@H](C)[C@@H]([C@@H](C)[C@@H]5O[C@](O)(CC(=O)[O-])[C@@H](C)[C@H](OC)[C@H]5OC)O4)O3)O2)O[C@@H](C)[C@@H]1OC",0,,, +"C=CC(=O)OCCn1c(=O)n(CCOC(=O)C=C)c(=O)n(CCOC(=O)C=C)c1=O",0,,,0 +"C=C(C)C(=O)OCCNC(C)(C)C",0,0,0,0 +"NS(=O)(=O)c1ccccc1OC(F)(F)F",0,0,0,0 +"O=C=NCC1CCCC(CN=C=O)C1",0,0,0,0 +"C=C[C@H]1CN2CCC1C[C@@H]2[C@@H](O)c1ccnc2ccccc12",,0,, +"Cc1cc(N)c2cc(NC(=O)Nc3ccc4nc(C)cc(N)c4c3)ccc2n1",,,, +"O=C(O)c1ccccc1O.Oc1cccc2cccnc12",0,1,0, +"C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1CC[C@@H]2OP(=O)(O)O",0,1,,0 +"O=C(O)[C@@H](S)[C@H](S)C(=O)O",0,,1,0 +"CN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1",0,0,, +"Cc1ncc(CO)c(CN)c1O",,,0, +"NS(=O)(=O)c1ccc(C(=O)O)cc1",0,0,0,0 +"O=C(CCS)OCC(COC(=O)CCS)(COC(=O)CCS)COC(=O)CCS",0,0,, +"C[C@@H]1NC(=O)[C@@H](N)CNC(=O)[C@H]([C@@H]2CCNC(N)=N2)NC(=O)/C(=C/NC(N)=O)NC(=O)[C@H](CNC(=O)C[C@@H](N)CCCN)NC1=O",,,0, +"OCCOCCN1CCN(C2=Nc3ccccc3Sc3ccccc32)CC1",0,0,0, +"Cc1cc(/C=C/c2ccc3cc(N(C)C)ccc3[n+]2C)c(C)n1-c1ccccc1.Cc1cc(/C=C/c2ccc3cc(N(C)C)ccc3[n+]2C)c(C)n1-c1ccccc1.O=C([O-])c1cc2ccccc2c(Cc2c(O)c(C(=O)[O-])cc3ccccc23)c1O",,,,1 +"COc1ccc(CN(CCN(C)C)c2ccccn2)cc1",0,0,0,1 +"CN(C)C(=O)Oc1ccc[n+](C)c1",0,1,0,0 +"Cc1ncc(CO)c(CO)c1O",0,0,0,0 +"CCC1NC(=O)c2cc(S(N)(=O)=O)c(Cl)cc2N1",0,0,0, +"C=C[C@H]1CN2CCC1C[C@@H]2[C@@H](O)c1ccnc2ccc(OC)cc12",1,1,0, +"Brc1cc2ccccc2c2ccccc12",,0,0, +"CC(C)(N)CO",,0,0,0 +"CC(C)(CO)CO",0,0,0,0 +"O=S1(=O)CCCC1",0,0,0,0 +"O=[N+]([O-])C(CO)(CO)CO",0,0,1,0 +"OCC(CO)(CO)COCC(CO)(CO)CO",0,0,0, +"O=[N+]([O-])OCCN(CCO[N+](=O)[O-])CCO[N+](=O)[O-]",0,0,0,0 +"NC(CO)(CO)CO",0,0,0,0 +"O=C(Cl)c1cc(C(=O)Cl)cc(C(=O)Cl)c1",,0,,0 +"CO[Si](CCCS)(OC)OC",,0,0,0 +"COc1cc2c3cc1Oc1cc(ccc1O)C[C@@H]1c4c(cc(OC)c(O)c4Oc4ccc(cc4)C[C@@H]3N(C)CC2)CC[N+]1(C)C",0,0,0,0 +"O=C(O[C@@H]1C[C@@H]2CC[C@H](C1)[N+]21CCCC1)C(O)(c1ccccc1)c1ccccc1",0,0,0,0 +"COc1cc(C(=O)NC2CCCNC2)cc(OC)c1OC",0,0,0,0 +"C[N+](C)=CCl",0,0,0,0 +"CC(=O)c1cccnc1",0,0,0,0 +"O=S1(=O)OC(c2ccc([O-])cc2)(c2ccc(O)cc2)c2ccccc21",0,0,0, +"O=CN1CCOCC1",0,0,0,0 +"COC(=O)CCC(=O)O",0,0,0,0 +"NCc1cccnc1",0,0,0,0 +"CCCCCCCCn1sc(Cl)c(Cl)c1=O",0,,, +"Cc1cc(O)cc(C)c1Cl",0,0,0,1 +"O=[Zr](Cl)Cl",0,0,,0 +"CSc1ccc2c(c1)N(CCC1CCCCN1C)c1ccccc1S2",0,,, +"c1ccc2cnncc2c1",0,0,0,0 +"COC(=O)c1ccc(CBr)cc1",1,1,0,0 +"CN1CCc2cc(Cl)c(O)cc2[C@H]2c3ccccc3CC[C@@H]21",0,0,0, +"O=P(O)(OCc1ccccc1)OCc1ccccc1",0,0,0,0 +"S=C=NCc1ccccc1",,0,,1 +"Oc1ccc(Cl)cc1Cc1ccccc1",0,0,1,1 +"ClCc1ccccc1",0,0,0,0 +"OCc1ccccc1",0,0,0,0 +"CC(=O)OCc1ccccc1",0,0,0,0 +"COCCc1ccc(OCC(O)CNC(C)C)cc1.COCCc1ccc(OCC(O)CNC(C)C)cc1",0,0,0,0 +"CN(C)C(=N)NC(=N)N",0,0,0,0 +"CCCC[Sn](CCCC)(OC(C)=O)OC(C)=O",,,1,1 +"C[NH+](C)CCC(c1ccccc1)c1cccc[nH+]1",0,1,0,0 +"CCOc1ccc(N)cc1",1,0,,0 +"CC(C)=CCC[C@H](C)CCO",0,0,0,0 +"CCOc1cccc(N)c1",,0,0,0 +"Nc1ccccc1C(=O)OCCc1ccccc1",1,1,0, +"CC(C)CC(O)CC(C)C",0,0,0,0 +"C=C(C)C(=O)OCCOP(=O)(O)OCCOC(=O)C(=C)C",0,0,0,0 +"CC(C)OS(C)(=O)=O",0,0,0, +"c1ccc2c(c1)OCC(CN1CCCCC1)O2",0,0,,0 +"C=CCN1CCCC1CNC(=O)c1cc(S(N)(=O)=O)cc(OC)c1OC",0,0,,0 +"C=C(C)OC(C)=O",0,0,0,0 +"Cc1cc(O)cc2c1O[C@](C)(CCC[C@H](C)CCC[C@H](C)CCCC(C)C)CC2",0,0,0,0 +"Cc1ncc(C[n+]2csc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1",0,0,,0 +"NC(=O)[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1O",0,0,,0 +"O=C1CC[C@@H](C(=O)O)N1",0,0,0,0 +"CN1C(=S)CN=C(c2ccccc2)c2cc(Cl)ccc21",0,,0, +"CC(C)(C)OC(=O)c1ncn2c1[C@@H]1CCCN1C(=O)c1c(Br)cccc1-2",,0,,0 +"CCC(Cc1c(I)cc(I)c(O)c1I)C(=O)O",0,,, +"CCOc1cc(NC(C)=O)ccc1C(=O)OC",,0,0,0 +"CC(O)C#CC(C)O",0,0,, +"COc1ccc(N)cc1N",1,0,1,1 +"CC1(C)[C@@H](O[C@H]2O[C@H](C(=O)O)[C@@H](O)[C@H](O)[C@H]2O[C@@H]2O[C@H](C(=O)[O-])[C@@H](O)[C@H](O)[C@H]2O)CC[C@@]2(C)[C@H]1CC[C@]1(C)[C@@H]2C(=O)C=C2[C@@H]3C[C@@](C)(C(=O)O)CC[C@]3(C)CC[C@]21C",0,0,0,0 +"O=C1NC(=O)C(=O)C(=O)N1",0,0,0,0 +"CC(C)(C)c1cc(/C=C2\SC(=N)NC2=O)cc(C(C)(C)C)c1O",0,,1,1 +"OCCCC1CCCCC1",0,0,0,0 +"Cc1cc2c3c(c1)C(c1ccccc1)=N[C@@H](NC(=O)c1ccncc1)C(=O)N3CC2",0,0,,0 +"CCc1cc(C2=C(C(=O)[O-])N(c3ccccc3C(F)(F)F)S(=O)(=O)c3ccccc32)cc2c1OCO2",0,0,,1 +"O=S(=O)([O-])c1ccc2c(/N=N\c3ccc(S(=O)(=O)[O-])c4ccccc34)c(O)c(S(=O)(=O)[O-])cc2c1",0,0,0,0 +"O=C=Nc1ccc(Cl)cc1",,,,1 +"CC(C)OC(=O)c1ccccc1C(=O)OC(C)C",0,0,0,0 +"CO[C@H]1[C@H]([C@@]2(C)O[C@@H]2CC=C(C)C)[C@]2(CC[C@H]1OC(=O)/C=C/C=C/C=C/C=C/C(=O)O)CO2",0,0,1,0 +"C1CCC2(CCCCO2)OC1",0,0,0,0 +"CCCCC(CC)COC(=O)c1ccccc1O",,,0, +"C[C@H]1O[C@H](O[C@@H]2[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)[C@H]2O)[C@@H](N)C[C@@H]1NC(=N)C(=O)O",0,0,0,0 +"CN(C)CCCN1c2ccccc2CCc2ccccc21",0,,, +"CCCCOCCO",0,0,0,0 +"[O-][n+]1ccccc1[S-]",0,,,1 +"CCCN(CCC)C(=O)SCC",0,0,0,0 +"O=S(=O)([O-])c1cccc2ccccc12",0,0,0,0 +"CC(C)C1=CC2=CC[C@H]3[C@](C)(C(=O)[O-])CCC[C@]3(C)[C@H]2CC1",1,0,, +"CN(C)c1ccc(C(=O)c2ccc(N(C)C)cc2)cc1",1,,0,1 +"N#CCCC#N",0,0,0,0 +"Cc1ncc([N+](=O)[O-])n1CCO",0,0,0,0 +"Nc1c(CC(=O)[O-])cccc1C(=O)c1ccccc1",0,1,,0 +"C[N+]1(C)[C@H]2CC[C@@H]1C[C@H](OC(=O)C(CO)c1ccccc1)C2",0,1,0,0 +"CC(Cl)(Cl)C(=O)O",0,0,0,0 +"CN[C@@H]1[C@H](O[C@H]2[C@H](O[C@@H]3[C@@H](NC(=N)N)[C@H](O)[C@@H](NC(=N)N)[C@H](O)[C@H]3O)O[C@@H](C)[C@]2(O)C=O)O[C@@H](CO)[C@H](O)[C@H]1O.CN[C@@H]1[C@H](O[C@H]2[C@H](O[C@@H]3[C@@H](NC(=N)N)[C@H](O)[C@@H](NC(=N)N)[C@H](O)[C@H]3O)O[C@@H](C)[C@]2(O)C=O)O[C@@H](CO)[C@H](O)[C@H]1O",0,0,,0 +"O=C(CCl)CCl",0,0,, +"CC(=O)C(Cl)Cl",0,0,0,0 +"CC(C)(c1ccccc1)c1ccc(O)cc1",0,1,1,1 +"Cc1cc(O)c2c(O)c3c(O)cccc3cc2c1",1,0,1,1 +"CCC(=O)[N-]S(=O)(=O)c1ccc(-c2c(-c3ccccc3)noc2C)cc1",0,0,,0 +"Cc1ccccc1N1CCN(CCc2nnc3n2CCCC3)CC1",0,1,0,0 +"C=Cc1ccc(S(=O)(=O)[O-])cc1",0,0,1, +"C[C@]12CC[C@@H]3c4ccc(OC(=O)N(CCCl)CCCl)cc4CC[C@H]3[C@@H]1CC[C@@H]2OP(=O)(O)O",0,1,,0 +"CC1Cc2ccccc2N1NC(=O)c1ccc(Cl)c(S(N)(=O)=O)c1",0,0,0,0 +"CC(=O)OC(C)C(=O)OCC[N+](C)(C)C.CC(=O)OC(C)C(=O)OCC[N+](C)(C)C.O=S(=O)([O-])c1cccc2c(S(=O)(=O)[O-])cccc12",0,1,,0 +"CC(=O)CC(=O)Nc1ccc2[nH]c(=O)[nH]c2c1",0,0,0,0 +"CCO[Si](C)(CCCOCC1CO1)OCC",0,0,0,0 +"O=[N+]([O-])c1cc(C(F)(F)F)c(Cl)c([N+](=O)[O-])c1Cl",0,,, +"CCCCOCCOCCOCCO",0,0,0,0 +"CCCCCCCC/C=C/C(=O)[O-].CCCCCCCC/C=C/C(=O)[O-]",,,0, +"Nc1cc(C(F)(F)F)ccc1S",,,, +"Cc1cccc(Cc2c[nH]cn2)c1C",,0,0,0 +"CCOC(=O)CC(=O)OCC",,0,0,0 +"COc1ccc(CNCC(O)COc2ccc3[nH]c(=O)ccc3c2)cc1OC",0,0,,0 +"COC(=O)C1=C(C)NC(COC(N)=O)=C(C(=O)OC(C)C)C1c1cccc(Cl)c1Cl",,0,1, +"CCNC(=O)NCCCOc1cccc(CN2CCCCC2)c1",0,0,0,0 +"CC(=O)SCC(CC(=O)c1ccc(C)cc1)C(=O)O",0,0,0,0 +"CCOC(=O)Cn1cccc1-c1nc(-c2ccc(OC)cc2)c(-c2ccc(OC)cc2)s1",0,0,1,0 +"O=C(CCCN1CCN(c2ccc(F)cc2)CC1)NC1c2ccccc2CSc2ccccc21",0,0,, +"CC(C)(C)NC[C@H](O)c1ccc(O)cc1Cl",0,0,0,0 +"CCCCC/C=C\C/C=C\CCCCCCCC(=O)NC(C)c1ccccc1",0,0,1,0 +"CC(NN)c1ccccc1",,,0,0 +"O=Cc1ccc(Cl)cc1",0,0,0, +"CCN(C)C(=O)Oc1cccc([C@H](C)N(C)C)c1",0,0,0,0 +"CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)cc2)ccn1",0,,,1 +"CCN(CC)C(C)C(=O)c1ccccc1",,0,0,0 +"CCN1CC(CCN2CCOCC2)C(c2ccccc2)(c2ccccc2)C1=O",0,0,,0 +"Cc1cccc(C(=O)O)c1[N+](=O)[O-]",0,0,0,0 +"CCN(CC)CCOC(=O)C1(c2ccccc2)CCCC1.CCN(CC)CCOC(=O)C1(c2ccccc2)CCCC1.O=S(=O)(O)CCS(=O)(=O)O",0,0,0,0 +"Clc1ccccn1",0,0,0,0 +"CCC(=O)/C=C/C1C(C)=CCCC1(C)C",0,0,0,0 +"CC1CC(OC(=O)c2ccccc2O)CC(C)(C)C1",0,0,0,0 +"CCCCCCCCCO",0,0,0,0 +"CCCCCCNCCCCCC",0,0,0,0 +"CCN(CC)c1ccc(N)cc1",1,,1,1 +"ClCCCCl",0,0,0,0 +"CCCCCCOC(C)=O",0,0,0,0 +"CCCCC(CC)COC(=O)CCCCCCCCC(=O)OCC(CC)CCCC",0,,0,0 +"CCOC(C)=O",0,0,0, +"NCCNCCN",0,0,0,0 +"CCOP(=O)(CC)OCC",0,0,0, +"Cc1c2oc3c(C)ccc(C(=O)N[C@@H]4C(=O)N[C@H](C(C)C)C(=O)N5CCC[C@H]5C(=O)N(C)CC(=O)N(C)[C@@H](C(C)C)C(=O)O[C@@H]4C)c3nc-2c(C(=O)N[C@@H]2C(=O)N[C@H](C(C)C)C(=O)N3CCC[C@H]3C(=O)N(C)CC(=O)N(C)[C@@H](C(C)C)C(=O)O[C@@H]2C)c(N)c1=O",0,,, +"NC(=O)CCCCC(N)=O",0,0,0,0 +"CNC(=O)ON=CC(C)(C)SC",0,0,0,0 +"C=CCOc1ccc(CC(=O)O)cc1Cl",0,1,,0 +"NN",1,0,0,0 +"N[C@@H](Cc1cnc[nH]1)C(=O)O",,,,0 +"NNc1nc(-c2ccccc2)cs1",1,0,1,1 +"NNc1nc(-c2ccc(N)cc2)cs1",1,0,1,1 +"Cc1ccccc1CO[C@H]1C[C@]2(C(C)C)CC[C@@]1(C)O2",,0,0,0 +"NNc1ccc(C(=O)O)cc1",,,0,0 +"CCCCCCOc1ccccc1C(=O)O",0,,0,0 +"O=C(OCc1ccccc1)C(=O)OCc1ccccc1",0,0,0,0 +"CCCSc1ccc2[nH]c(NC(=O)OC)nc2c1",1,1,0,1 diff --git a/chemprop-updated/tests/data/classification/mol_multiclass.csv b/chemprop-updated/tests/data/classification/mol_multiclass.csv new file mode 100644 index 0000000000000000000000000000000000000000..bc14f7fd74c37ebffa5eefab411859e46efe99ac --- /dev/null +++ b/chemprop-updated/tests/data/classification/mol_multiclass.csv @@ -0,0 +1,500 @@ +smiles,activity +CCC1=[O+][Cu-3]2([O+]=C(CC)C1)[O+]=C(CC)CC(CC)=[O+]2,0 +C(=Cc1ccccc1)C1=[O+][Cu-3]2([O+]=C(C=Cc3ccccc3)CC(c3ccccc3)=[O+]2)[O+]=C(c2ccccc2)C1,0 +CC(=O)N1c2ccccc2Sc2c1ccc1ccccc21,0 +Nc1ccc(C=Cc2ccc(N)cc2S(=O)(=O)O)c(S(=O)(=O)O)c1,0 +O=S(=O)(O)CCS(=O)(=O)O,0 +CCOP(=O)(Nc1cccc(Cl)c1)OCC,0 +O=C(O)c1ccccc1O,0 +CC1=C2C(=COC(C)C2C)C(O)=C(C(=O)O)C1=O,0 +O=[N+]([O-])c1ccc(SSc2ccc([N+](=O)[O-])cc2[N+](=O)[O-])c([N+](=O)[O-])c1,0 +O=[N+]([O-])c1ccccc1SSc1ccccc1[N+](=O)[O-],0 +CC(C)(CCC(=O)O)CCC(=O)O,0 +O=C(O)Cc1ccc(SSc2ccc(CC(=O)O)cc2)cc1,1 +O=C(O)c1ccccc1SSc1ccccc1C(=O)O,0 +CCCCCCCCCCCC(=O)Nc1ccc(SSc2ccc(NC(=O)CCCCCCCCCCC)cc2)cc1,0 +Sc1cccc2c(S)cccc12,0 +CCOP(N)(=O)c1ccccc1,0 +NNP(=S)(NN)c1ccccc1,1 +O=P(Nc1ccccc1)(Nc1ccccc1)Nc1ccccc1,0 +O=C1C(O)=C(CCCc2ccc(Oc3ccccc3)cc2)C(=O)c2ccccc21,0 +CC(C)N(C(C)C)P(=O)(OP(=O)(c1ccc([N+](=O)[O-])cc1)N(C(C)C)C(C)C)c1ccc([N+](=O)[O-])cc1,0 +c1ccc2c(c1)Sc1ccccc1S2,0 +CC(C)CCS(=O)(=O)O,0 +Cc1ccccc1NC(=N)Nc1ccccc1C,0 +CCCNP(=S)(NCCC)NCCC,0 +CCCCCCCCCCCCNP(=S)(NCCCCCCCCCCCC)NCCCCCCCCCCCC,0 +O=C1OC(=O)c2c1ccc1ccccc21,0 +S=P(NC1CCCCC1)(NC1CCCCC1)NC1CCCCC1,0 +Clc1ccnc2c1ccc1c(Cl)ccnc12,0 +O=C(OOC(=O)c1ccccc1)c1ccccc1,0 +c1ccc2nsnc2c1,0 +S=C1NCCS1,0 +CN(C)C1=[S+][Zn-2]2(S1)SC(N(C)C)=[S+]2,0 +CN(Cc1cnc2nc(N)nc(N)c2n1)c1ccc(C(=O)NC(CCC(=O)O)C(=O)O)cc1,0 +[N-]=[N+]=CC(=O)OCC(N)C(=O)O,0 +Nc1nc(O)c2nn[nH]c2n1,0 +CS(=O)(=O)OCCCCOS(C)(=O)=O,0 +Nc1nc(S)c2nc[nH]c2n1,0 +Sc1ncnc2[nH]cnc12,0 +COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1C(NC(C)=O)CC2,0 +CN(CCCl)CCCl,0 +CS(C)=O,0 +CCCCOB(OCCCC)OCCCC,0 +CCCCCOB(OCCCCC)OCCCCC,0 +CC1CC(C)(C)OB(OC(C)CC(C)(C)OB2OC(C)CC(C)(C)O2)O1,0 +c1ccn2nnnc2c1,0 +c1ccn2nncc2c1,0 +Clc1ccc(Cl)c(SSc2cc(Cl)ccc2Cl)c1,0 +CN(C)c1ccc(SSc2ccc(N(C)C)cc2)cc1,0 +Brc1ccc(SSc2ccc(Br)cc2)cc1,0 +Cc1ccc(SSc2ccc(C)cc2)cc1,0 +COc1ccc(SSc2ccc(OC)cc2)cc1,0 +NC1(C(=O)O)CCCC1,0 +CC(C)(Br)C(=O)C(Br)Br,0 +CCOC(=S)SCCSC,0 +CCOCC(C)(CO)CC(C)CO,0 +O=C(O)C1CC1,0 +O=C(O)C1(O)CC(O)C(O)C(O)C1,0 +Nc1c(Cl)cc(Cl)cc1C(=O)O,0 +CCCCCOC(=S)S,0 +O=C(O)c1ccc([N+](=O)[O-])cc1S(=O)(=O)O,0 +NC(=O)c1cc(O)c(O)c(O)c1,0 +C1C[S+]2CC[S+]1CC2,0 +Nc1cc(Cl)c(S(=O)(=O)O)cc1Cl,0 +CC12CCC(C(Br)C1=O)C2(C)CS(=O)(=O)O,0 +CCC(C)(C(=O)O)C(=O)O,0 +CC(C)C(C(=O)O)C(=O)O,0 +CCOC(=O)C(=O)C1CCCCC1=O,0 +CCOC(=O)CNS(=O)(=O)c1ccccc1,0 +CCN(CC)C(C)(O)CN,0 +Cc1cccc2c(=O)c3ccccc3oc12,0 +CCCCCCCCCCCC(=O)OCCOCCOCCOCCOCCOCCOCCOCCOCCO,0 +C1CN[Co-4]23(N1)(NCCN2)NCCN3,0 +CC(C)OC(=S)SSC(=S)OC(C)C,0 +O=C(Nc1ccccc1)OCC1OCOC(COC(=O)Nc2ccccc2)C1OC(=O)Nc1ccccc1,0 +OCC1OCOC2COCOC12,0 +CC(=O)OC1C(OC(C)=O)C(OC(C)=O)C2(CO2)C(OC(C)=O)C1OC(C)=O,0 +CCN(CC)C(=O)N1CCN(C)CC1,0 +CC(=O)OC1COC(c2ccccc2)OC1C1OC(c2ccccc2)OCC1OC(C)=O,0 +Oc1ncnc2[nH]ncc12,0 +O=C1O[Cu-5]2(O)(O)(OC1=O)OC(=O)C(=O)O2,0 +O=Nc1ccc(O)c(N=O)c1O,1 +Oc1ccc(Nc2ccccc2)cc1,0 +CCCCCCc1ccc(O)cc1O,0 +CCCCCCCC[N+]12CN3CN(CN(C3)C1)C2,0 +CC(C)(O)O.CC1(O)C(O)C(O)C1(O)CO,0 +OC1COCOC1C(O)C1OCOCC1O,0 +CN(C)C(=S)SSC(=S)N(C)C,0 +O=[N+]([O-])c1ccc(C=Cc2ccc([N+](=O)[O-])cc2S(=O)(=O)O)c(S(=O)(=O)O)c1,0 +CCc1cc[n+]([Mn](SC#N)(SC#N)([n+]2ccc(CC)cc2)([n+]2ccc(CC)cc2)[n+]2ccc(CC)cc2)cc1,0 +N=c1[nH][nH]c(=N)[nH]1,0 +O=S(=O)(O)CCO,0 +O=C1CSC(=S)N1,0 +C1CCNCC1.S=C(S)N1CCCCC1,0 +C1SCSCS1,0 +CCC(CC)(C(=O)O)C(=O)O,0 +N#CC(=Cc1ccccc1)c1ccccc1,0 +N#CNC(=N)N,0 +O=C1C(O)=C(CCCC2CCC3CCCCC3C2)C(=O)c2ccccc21,0 +O=[N+]([O-])c1cc([As](=O)(O)O)ccc1O,0 +O=C(O)c1ccccc1S,0 +CCOC(=O)C(C(=O)OCC)C(C(=O)OCC)C(=O)OCC,0 +C=C(C)CS(=O)(=O)O,0 +CCSc1ccc(N=[N+]([O-])c2ccc(SCC)c(Cl)c2)cc1Cl,0 +COC1C(OC(N)=O)C(O)C(Oc2ccc3c(O)c(NC(=O)c4ccc(O)c(CC=C(C)C)c4)c(=O)oc3c2C)OC1(C)C,0 +O=C1C(=Cc2ccccc2)CCCC1=Cc1ccccc1,0 +CCCCCCC(O)CCCCCCCCCCC(=O)OCC(COC(=O)CCCCCCCCCCC(O)CCCCCC)OC(=O)CCCCCCCCCCC(O)CCCCCC,0 +O=S(=O)(O)CO,0 +CCN(CC)CCCCCCNc1cc(OC)cc2c(C)ccnc12,0 +CCCC(O)CNCc1ccc(N(C)C)cc1,0 +N=C1NC(=O)C(c2ccccc2)S1,0 +Cc1cc(SCC(=O)c2ccccc2[N+](=O)[O-])cc(C)[o+]1,0 +CC12CCC(CC1)C(C)(C)NC(=N)S2,0 +CCSC(SCC)C(O)C(O)C(O)C(O)C(O)C(O)CO,0 +O=C(O)C(O)C(O)C(O)C(O)C(O)C(O)CO,0 +Nc1ccc(S(=O)(=O)Nc2ccccc2)cc1,0 +ClP1(Cl)=NP(Cl)(Cl)=NP(Cl)(Cl)=N1,0 +CCN(CC)CC.O=C(Nc1ccc(Cl)cc1)P(=O)(O)c1ccccc1,0 +O=S(=O)(O)CCCCBr,0 +c1ccc(SSc2ccccc2)cc1,0 +O=S(=O)(O)CC(S(=O)(=O)O)S(=O)(=O)O,0 +O=S1(=O)CCCc2c1ccc1ccccc21,0 +CCOC(=O)C(CCCCS(=O)(=O)O)C(=O)OCC,0 +O=S1(=O)CCc2ccccc2C(Br)C1,0 +CCCCCCCCS(=O)(=O)O,0 +CCOCCCCS(=O)(=O)O,0 +CCOC(C)CCCS(=O)(=O)O,0 +O=S(=O)(O)CCCCCO,0 +O=C(O)CN(CCN(CC(=O)O)CC(=O)O)CC(=O)O,0 +CC(=O)N1C(=O)C(=O)c2ccccc21,0 +O=C1NC2NC(=O)NC2N1,0 +O=C1c2ccccc2C(=O)N1Cc1ccccc1,0 +CN(C)c1ccc(N=O)cc1,0 +O=S1(=O)CS(=O)(=O)CS(=O)(=O)C1,0 +ClC(Cl)(Cl)C1OCOC(C(Cl)(Cl)Cl)OCO1,0 +O=C1c2c(O)cc(O)cc2OC(c2ccc(O)c(O)c2)C1O,0 +O=C(CCc1ccc(O)cc1)c1c(O)cc(O)cc1OC1OC(CO)C(O)C(O)C1O,0 +N=C1NC(=O)CS1,0 +O=C1C[N+]23CC[N+]45CC(=O)O[Ni-4]24(O1)(OC(=O)C3)OC(=O)C5,0 +CC(=O)c1cc2c(cc1C(C)C)CCC1C(C)(C#N)CCCC21C,0 +COC(=O)C1(C)CCCC2(C)c3cc(Br)c(C(C)C)cc3CCC12,0 +COC(=O)C1(C)CCCC2(C)c3ccccc3C(OO)CC12,0 +COC(=O)C1(C)CCCC2(C)c3ccc(C(C)C)cc3C(=O)CC12,0 +C=C(C)c1ccc2c(c1)C(=O)CC1C(C)(C#N)CCCC21C,0 +CC(C)c1ccc2c(c1)CCC1C2(C)CCCC1(C)C(O)c1ccccc1,0 +CC(=O)OC(C)(C)c1ccc2c(c1)C(=O)CC1C(C)(C#N)CCCC21C,0 +CC(O)c1ccc2c(c1)C(O)CC1C(C)(CO)CCCC21C,0 +CC(C)C1(Cl)CCC2C3(C)CCCC(C)(C(=O)O)C3CC(Cl)C2(Cl)C1Cl,0 +COC(=O)C1(C)CCCC2(C)c3ccc(C(C)C)cc3C(=O)C(Br)C12,0 +CNC=O,0 +Cc1c2oc3c(C)ccc(C(=O)NC4C(=O)NC(C(C)C)C(=O)N5CCCC5C(=O)N(C)CC(=O)N(C)C(C(C)C)C(=O)OC4C)c3nc-2c(C(=O)NC2C(=O)NC(C(C)C)C(=O)N3CCCC3C(=O)N(C)CC(=O)N(C)C(C(C)C)C(=O)OC2C)c(N)c1=O,0 +COc1ccc(CC(N)C(=O)NC2C(CO)OC(n3cnc4c(N(C)C)ncnc43)C2O)cc1,0 +O=[As]O,0 +CCc1nc(N)nc(N)c1-c1ccc(Cl)cc1,0 +CCc1nc(N)nc(N)c1-c1ccc(Cl)c(Cl)c1,0 +O=C(NC(CO)C(O)c1ccc([N+](=O)[O-])cc1)C(Cl)Cl,0 +CC1(C)NC(=N)NC(=N)N1c1ccc(Cl)c(Cl)c1,0 +CC(C)(CO)C(O)C(=O)NCCS(=O)(=O)O,0 +O=C(O)CCCc1ccc(N(CCCl)CCCl)cc1,0 +CN(C)c1ccc(C(=C2C=CC(=[N+](C)C)C=C2)c2ccc(N(C)C)cc2)cc1,0 +CC[N+](C)(C)c1ccc(C(=C2C=CC(=[N+](C)C)C=C2)c2ccc(N(C)C)cc2)cc1,0 +CNC1CCc2cc(OC)c(OC)c(OC)c2-c2ccc(OC)c(=O)cc21,0 +Nc1cc([As]=[As]c2ccc(O)c(N)c2)ccc1O,0 +Oc1nc(O)c2nnoc2n1,0 +N=C1NCCS1,0 +O=Nc1ccc(O)cc1,0 +NNC(=O)C(CC(C(=O)NN)C(=O)NN)C(=O)NN,0 +O=C(O)Cc1cc(=O)[nH]n(-c2ccccc2)c1=O,0 +NC(=O)C(=O)NN=Cc1ccc([N+](=O)[O-])o1,0 +C[N+]1=Cc2ccccc2O[Cu-3]12Oc1ccccc1C=[N+]2C,0 +O=S1OCCO1,0 +CCCCS(=O)(=O)O,0 +COc1ccc2c(ccc(=O)n2C)c1,0 +CC(CN1CCCCC1)SSC(C)CN1CCCCC1,0 +Cn1c(SSc2ccc(-c3cccnc3)n2C)ccc1-c1cccnc1,0 +O=C1N(CO)C2C(N1CO)N(CO)C(=O)N2CO,0 +CC(=O)OC1CCC2(C)C3=CCC4(C)C(C(C)=O)CCC4C34C=CC2(C1)C1C(=O)OC(=O)C14,0 +CNC(=O)C(C)C1C(=O)C(=C(O)C=CC(C)=CC(C)C2OC3(C)OC(C=CC34CO4)C2C)C(=O)N1C1CCC(O)C(C)O1,0 +CCCCCC(O)C1C(=O)OC(C)C(O)C=CC=CC=CC=CC=C(C)C(O)CC(O)CC(O)CC(O)CC(O)CC(O)CC1O,0 +CCC(C(=O)O)c1ccc([N+](=O)[O-])cc1.COc1cc2c(cc1OC)C13CCN4CC5=CCOC6CC(=O)N2C1C6C5CC43,0 +CCC(C)OC(=O)c1ccccc1C(=O)O.COc1cc2c(cc1OC)C13CCN4CC5=CCOC6CC(=O)N2C1C6C5C4C3,0 +O=c1ssc(=Nc2ccccc2)n1-c1ccccc1,0 +CCCN=c1ssc(=O)n1CCC,0 +CCCCN=c1ssc(=O)n1CCCC,0 +O=c1ssc(=Nc2ccc(Cl)cc2)n1-c1ccc(Cl)cc1,0 +CCCCCCCN=c1ssc(=O)n1CCCCCCC,0 +NS(=O)(=O)c1cc(O)nc(O)n1,0 +CS(=O)(=O)c1cc(O)nc(O)n1,0 +CCS(=O)(=O)c1cc(O)nc(O)n1,0 +O=S(=O)(Cc1ccccc1)c1cc(O)nc(O)n1,0 +Cc1oc(C)c2c1C(=O)c1ccccc1C2=O,0 +Nc1nc(O)c2c(n1)NCC(CNc1ccc(C(=O)NC(CCC(=O)O)C(=O)O)cc1)N2C=O,0 +CCOC(=O)Nc1ccc(C(=O)C=Cc2ccc(N(CC)CC)cc2)cc1,0 +CC1NC(C)SC(C)S1,0 +N#CNC(=N)NC(=O)C=CC=Cc1ccc(Cl)cc1,0 +O=C(O)C=CC(=O)NNC(=O)C=CC(=O)O,0 +CC1C(=O)N2C(=O)C(C)C(C)C(=O)N2C(=O)C1C,0 +O=C(O)CCC(=O)NNC(=O)CCC(=O)O,0 +Nc1ccc(-c2ccc(N)c(S(=O)(=O)O)c2)cc1S(=O)(=O)O,0 +CCC(CS(=O)(=O)O)[N+](=O)[O-],0 +Oc1nnc(O)c2ncccc12,0 +Clc1nc(Cl)nc(Nc2ccccc2Cl)n1,0 +O=Nc1ccc(O)c2ncccc12,0 +Oc1ccc(Cl)cc1C(c1cc(Cl)ccc1O)C(Cl)(Cl)Cl,1 +CCCCCCCCCCCCCCCCCC(=O)OCC(O)CO,0 +c1ccc2c(c1)SC1=[S+][Cu-3]3([S+]=C4Sc5ccccc5N43)N12,0 +Oc1c(Cl)cc(Cl)c2cccnc12,0 +CC(=O)Nc1c2sscc-2n(C)c1=O,0 +[O-][N+]1=Cc2ccc[o+]2[Cu-3]12[N+]([O-])=Cc1ccc[o+]12,0 +O=C1O[Cu-3]2(Nc3ccccc31)Nc1ccccc1C(=O)O2,0 +O=C(Oc1cccc2cccnc12)c1ccccc1,0 +CCN(CC)C(=S)S[Se](SC(=S)N(CC)CC)(SC(=S)N(CC)CC)SC(=S)N(CC)CC,0 +C1COCCN1.S=C(S)N1CCOCC1,0 +CC1=NNC(=O)C1,0 +O=c1ccc2cc(Cl)ccc2o1,0 +CN(C)c1ccc(C=Cc2ccnc3ccccc23)cc1,0 +CN(C)c1ccc(C=Cc2ccc3ccccc3[n+]2C)cc1,0 +CN(C)c1ccc(C=Cc2cc[n+](C)c3ccccc23)cc1,0 +c1cnc2c(c1)ccc1cccnc12,0 +Cc1ccc2ccc3ccc(C)nc3c2n1,0 +O=S(=O)(O)c1ccc2c(N=Nc3ccc(S(=O)(=O)O)c4ccccc34)c(O)c(S(=O)(=O)O)cc2c1,0 +O=C1OC(=O)C2C1C(c1ccccc1)N1C3C(=O)OC(=O)C3C(c3ccccc3)N21,0 +CCN(Cc1cccc(S(=O)(=O)O)c1)c1ccc(C(=C2C=CC(=[N+](CC)Cc3cccc(S(=O)(=O)O)c3)C=C2)c2ccccc2S(=O)(=O)O)cc1,0 +COc1ccc(C=CC(=O)O)cc1OC,0 +Cc1ccc(S(=O)(=O)Nc2cccc3c(O)ncnc23)cc1,0 +CCN(CCO)CCCC(C)Nc1ccnc2cc(Cl)ccc12.O=S(=O)(O)O,0 +c1ccc(C2=NC3(CCCCC3)N=C2c2ccccc2)cc1,0 +CSc1nc(N)nc(N)n1,0 +Nc1nc(N)nc(-c2ccccc2-c2nc(N)nc(N)n2)n1,0 +C1CN2CN1CN1CCN(C1)C2,0 +N=C(N)Nc1nnn[nH]1,0 +O=[N+]([O-])c1cc(S(=O)(=O)c2ccc(Cl)c([N+](=O)[O-])c2)ccc1Cl,0 +O=C1N(Cl)C2(c3ccccc3)N(Cl)C(=O)N(Cl)C2(c2ccccc2)N1Cl,0 +Nc1nc(N)nc(SCCOCCSc2nc(N)nc(N)n2)n1,0 +NNC(=O)c1ccccc1SSc1ccccc1C(=O)NN,1 +O=C(NN=Cc1ccc(Cl)cc1Cl)c1ccccc1SSc1ccccc1C(=O)NN=Cc1ccc(Cl)cc1Cl,1 +CCCC1CC=CC=CC=CC=CC(OC2OC(C)C(O)C(N)C2O)CC(O)C(C(=O)O)C(O)CC(=O)CC(O)CCCC(=O)CC(O)C(CC)C(=O)O1.O=S(=O)(O)O,0 +O=c1c2ccccc2oc2cc([N+](=O)[O-])cc([N+](=O)[O-])c12,0 +CC12CCC(C(=O)OC1=O)C2(C)C,0 +C[Si](C)(CCC(=O)O)O[Si](C)(C)CCC(=O)O,0 +O=[N+]([O-])c1ccc(SSc2ccc([N+](=O)[O-])cc2)cc1,0 +COc1ccc(C2c3cc(OC)c(OC)cc3CC3C(=O)OCC32)cc1OC,0 +O=S(=O)(O)C(Br)(Br)Br,0 +O=C(O)c1ccc(S(=O)(=O)O)o1,0 +S=c1[nH][nH]c(=S)s1,1 +OCC(S)CS,0 +O=C(O)C(O)(O)C(O)(O)C(=O)O,0 +CC1=[O+][V-]2(=O)([O+]=C(C)C1)[O+]=C(C)CC(C)=[O+]2,0 +CC1=[O+][Zr]234([O+]=C(C)C1)([O+]=C(C)CC(C)=[O+]2)([O+]=C(C)CC(C)=[O+]3)[O+]=C(C)CC(C)=[O+]4,0 +CC(=O)Oc1ccc2c(c1)Oc1cc(OC(C)=O)ccc1C21OC(=O)c2ccccc21,0 +N=c1[nH]ncs1,0 +CN1CSC(=S)N(C)C1,0 +CCCCC(CC)COS(=O)(=O)O,0 +O=C1CSC(=O)N1c1ccccc1,0 +O=C(O)C1=NN(c2ccc(S(=O)(=O)O)cc2)C(=O)C1N=Nc1ccc(S(=O)(=O)O)cc1,0 +Oc1ccc(O)c([PH](c2ccccc2)(c2ccccc2)c2ccccc2)c1,0 +CN(C)C(=S)SC(=S)N(C)C,0 +O=Nc1ccc(N=O)cc1,0 +O=c1oc(=O)c2cc3c(=O)oc(=O)c3cc12,0 +S=C(SSSSC(=S)N1CCCCC1)N1CCCCC1,0 +CC(C)S(=O)(=O)O,0 +CCN(CC)C(=S)S,0 +O=C1NS(=O)(=O)c2ccccc21,0 +O=C1c2ccc(O)c(O)c2C(=O)c2c(O)ccc(O)c21,0 +OCC1OC(n2cnc3c(Cl)ncnc32)C(O)C1O,0 +Oc1ncc(S)c(O)n1,0 +Cc1nc(N)c2cnn(C)c2n1,0 +Cc1n[nH]c2c(N(C)C)ncnc12,0 +Cc1cc(O)cc(C)c1Cl,0 +CC(C)=C1C=C2CCC3C(C)(C(=O)O)CCCC3(C)C2CC1,0 +CCN(CC)c1ccc(C(=C2C=CC(=[N+](CC)CC)C=C2)c2ccccc2)cc1.O=S(=O)(O)O,0 +Cc1cc(-c2ccc(N=Nc3cc(S(=O)(=O)O)c4ccccc4c3N)c(C)c2)ccc1N=Nc1cc(S(=O)(=O)O)c2ccccc2c1N,2 +O=Nc1ccc2ccccc2c1O,0 +c1ccc(N2N=C3N(c4ccccc4)C2N3c2ccccc2)cc1,0 +CN(C)c1ccc(C=C2SC(=S)NC2=O)cc1,0 +Cc1ccc2nc(-c3ccc(N=NNc4ccc(-c5nc6ccc(C)c(S(=O)(=O)O)c6s5)cc4)cc3)sc2c1S(=O)(=O)O,0 +O=C(N=Nc1ccccc1)NNc1ccccc1.O=C(NNc1ccccc1)NNc1ccccc1,0 +Cc1cc(O)cc(O)c1N=Nc1ccc([N+](=O)[O-])cc1,0 +Nc1ccc(N=Nc2ccc(C=Cc3ccc(N=Nc4ccc(N)c5ccccc45)cc3S(=O)(=O)O)c(S(=O)(=O)O)c2)c2ccccc12,0 +Nc1ccc2ccccc2c1N=Nc1ccc(C=Cc2ccc(N=Nc3c(N)ccc4ccccc34)cc2S(=O)(=O)O)c(S(=O)(=O)O)c1,1 +O=C(O)CC1OCC=C2CN3CCC45C6=CC(=O)C(=O)C([N+](=O)[O-])=C6NC4C1C2CC35,0 +O=[N+]([O-])c1ccc([As](=O)(O)O)cc1,0 +CC(C)c1cccc(C(C)C)c1O,0 +CCCCCCCCCCCCCCC(C(=O)O)S(=O)(=O)O,0 +O=c1c(-c2ccc(O)cc2)coc2cc(OC3OC(CO)C(O)C(O)C3O)cc(O)c12,0 +Cc1c(O)ccc2c(O)c(NC(=O)c3ccc4c(c3)CCC(C)(C)O4)c(=O)oc12,0 +C1CN2CCOB(O1)OCC2,0 +CCO[Si](C)(OCC)OCC,0 +CCCCOC(=S)SSC(=S)OCCCC,0 +O=c1c2ccccc2c2ccc3c4ccc5c(=O)c6ccccc6c6ccc(c7ccc1c2c73)c4c56,0 +CC[Sn](Cl)(CC)CC,0 +Cc1cc(=O)oc2cc(O)cc(O)c12,0 +COC(=O)c1ccccc1SSc1ccccc1C(=O)OC,0 +COc1ccc2c(c1OC)C(=O)OC2C1c2c(cc3c(c2OC)OCO3)CCN1C,0 +O=[N+]([O-])c1ccc(N=Nc2ccc(O)c3c(O)cc(S(=O)(=O)O)cc23)cc1S(=O)(=O)O,0 +CCCC(C)SP(=S)(SC(C)CCC)SC(C)CCC,0 +CCN(CC)CCCC(C)Nc1cc(-c2ccccc2)nc2ccc(OC)cc12.O=P(O)(O)O,0 +CCCCCCCCCCCCCCCCCCN(C)C,0 +CC1OC(OC2C(COc3cc(O)c4c(c3)OC(c3ccc(O)cc3)CC4=O)OOC(CO)C2O)C(O)C(O)C1O,0 +O=Nc1ccc(O)cc1O,0 +CCCCCCOP(OCCCCCC)OCCCCCC,0 +CC(=NN=C(C)C1CC(CC(=O)O)C1(C)C)C1CC(CC(=O)O)C1(C)C,0 +CCCCCCCCCCCCCCCC[N+](C)(C)CCN(Cc1ccc(OC)cc1)c1ncccn1,0 +CCCCCCCCCCCCCCCCC(C(=O)O)S(=O)(=O)O,0 +Nc1ccc2c(c1)N[Cu-3]1(Nc3cc(N)ccc3O1)O2,0 +C1CCc2nnnn2CC1,0 +CC1=[O+][Cu-3]2([O+]=C(C)CC(Nc3ccccc3)=[O+]2)[O+]=C(Nc2ccccc2)C1,0 +O=[N+]([O-])c1ccc(S(=O)(=O)NN2CCOCC2)cc1,0 +O=[N+]([O-])c1ccc(S(=O)(=O)NN=Cc2ccco2)cc1,0 +O=[N+]([O-])c1ccc(S(=O)(=O)NN=C2CCCCC2)cc1,0 +CC(=O)Nc1ccc(S(=O)(=O)NN=Cc2ccco2)cc1,0 +CC(C=Cc1ccccc1)=NNS(=O)(=O)c1ccc([N+](=O)[O-])cc1,0 +CC1CCCN1c1ccnc2cc(Cl)ccc12,0 +CN(C)C(=O)N1CC[N+](C)([O-])CC1,0 +Cc1ccc2oc(=O)ccc2c1,0 +COc1cc(C)cc2c1OC(c1ccccc1)CC2=O,0 +CC(=O)c1c(O)c(C)c(O)c2c1OC1=CC(=O)C(C(C)=O)C(=O)C12C,0 +C=C1C(=O)OC(CCCCCCCCCCCCC)C1C(=O)O,0 +CC(CCC(C)C(=O)O)C(=O)O,0 +O=C(Nc1ccccc1)Nc1ccccn1,0 +CC1CCCC2(C1)OCC(O)CO2,0 +O=c1c2cc3c(=O)n(O)c(=O)c3cc2c(=O)n1O,0 +Nc1ccc(S(=O)(=O)c2ccc(N)cc2)cc1,0 +CC(=NNc1ccc([N+](=O)[O-])cc1[N+](=O)[O-])c1ccccc1,0 +CCCCC(CC)CO[Si](OCC(CC)CCCC)(OCC(CC)CCCC)OCC(CC)CCCC,0 +O=C(O)C1C2OC3C(OC(=O)C31)C2Br,0 +Cc1c(C(=O)O)c(O)cc2c1C(=O)c1c(O)c(OC3OC(CO)C(O)C(O)C3O)c(O)c(O)c1C2=O,1 +CC1=CC(=C(c2cc(C)c(O)c(Br)c2)c2ccccc2S(=O)(=O)O)C=C(Br)C1=O,0 +O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1,0 +O=C(O)c1cc(I)cc(I)c1O,0 +NC(CO)(CO)CO,0 +C=CCc1cc(-c2cc(CC=C)c(O)c(CN(CC)CC)c2)cc(CN(CC)CC)c1O,0 +S=P(N1CC1)(N1CC1)N1CC1,0 +CC(CCN1CCN(CCC(C)CC(C)(C)C)S1(=O)=O)CC(C)(C)C,0 +O=C1NCCN1N=Cc1ccc([N+](=O)[O-])o1,0 +CCCSP(SCCC)SCCC,0 +CCCCOP(=S)(OCCCC)OCCCC,0 +COP(=O)(OC)OC=C(Cl)Cl,0 +CC(O)=C[PH](c1ccccc1)(c1ccccc1)c1ccccc1,0 +CC(=O)C[PH](c1ccccc1)(c1ccccc1)c1ccccc1,0 +OC(=C[PH](c1ccccc1)(c1ccccc1)c1ccccc1)c1ccccc1,0 +O=C(C[PH](c1ccccc1)(c1ccccc1)c1ccccc1)c1ccccc1,0 +O=C1CSC(=O)N1,0 +S=C1SCCS1,0 +OCC1(CO)CCCC(CO)(CO)C1O,0 +CCCCCCC(=O)C(O)C(O)C(=O)CCCCCCCC(=O)O,0 +Nc1cc(Cl)ccc1S,0 +O=C1c2ccccc2C(=O)c2c1ccc(O)c2O,0 +CCOc1ccc(N=Nc2ccc(N)cc2N)cc1,0 +O=[N+]([O-])c1ccc2c(N=Nc3ccc4ccccc4c3O)c(O)cc(S(=O)(=O)O)c2c1,0 +Cc1cc(C2(c3ccc(O)c(C)c3)OS(=O)(=O)c3ccccc32)ccc1O,0 +O=C1c2ccccc2C(=O)c2c(Nc3cccc4c3C(=O)c3ccccc3C4=O)cccc21,0 +O=S(=O)(O)c1cc2ccc1ccc1ccc(cc1S(=O)(=O)O)nnc1ccc(ccc3ccc(cc3S(=O)(=O)O)nn2)c(S(=O)(=O)O)c1,2 +Nc1c(N=Nc2ccc(-c3ccc(N=Nc4cc(S(=O)(=O)O)c5ccccc5c4O)cc3)cc2)cc(S(=O)(=O)O)c2ccccc12,2 +N=C(N)N.O=S(=O)(O)O,0 +CC[N+]12CN3CN(CN(C3)C1)C2,0 +O=C1C=C2CC3(O)COc4c(ccc(O)c4O)C3=C2C=C1O,0 +O=C(O)CN(CCN(CC(=O)O)CC(=O)O)CCN(CC(=O)O)CC(=O)O,0 +O=C1C[N+]23CC[N+]45CC(=O)O[Cu-5]24(O1)(OC(=O)C3)OC(=O)C5,0 +OCCCN(CCO)CCN(CCCO)CCCO,0 +Cc1nc(N)nc(N)c1-c1ccc(Cl)c(Cl)c1,0 +O=S(c1ccccc1O)S(=O)c1ccccc1O,1 +COc1c2c(cc3c1OCO3)C13C=CC(OC)CC1N(CC3)C2.[O-][Cl+3]([O-])([O-])O,0 +CC1=C2C(=O)C3C(CC=C4CC(O)CCC43C)C2CCC12OC1CC(C)CNC1C2C,0 +CC1OC(OC2C=C3CCC4C(CCC5(C)C(c6ccc(=O)oc6)CCC45O)C3(C)CC2)C(O)C(O)C1O,0 +COC1CC(OC2CCC3(C=O)C4CCC5(C)C(C6=CC(=O)OC6)CCC5(O)C4CCC3(O)C2)OC(C)C1O,0 +CC1OC(OC2C=C3CCC4C(CCC5(C)C(c6ccc(=O)oc6)CCC45O)C3(C)CC2)C(O)C(OC2OC(CO)C(O)C(O)C2O)C1O,0 +CCC(C)C(=O)OC1C(O)C2C(CN3CC(C)CCC3C2(C)O)C2CC34OC5(O)C(OC(=O)C(C)(O)CC)CCC3(C)C5C(OC(C)=O)C(OC(C)=O)C4C21O,0 +Cc1cc(O)n2cnnc2n1,0 +Nc1c2ccccc2nc2ccccc12,0 +O=C1C2=C(C(=O)c3ccccc31)C(O)(S(=O)(=O)O)C(O)(S(=O)(=O)O)C1N=CC=CC21,0 +CC(=O)C1C(=O)OC2C3C=CC(C3)C2C1=O,0 +CC(C)(O)S(=O)(=O)O,0 +CC(=O)Nc1ccccc1,0 +[O-][N+]1=c2c([nH]c3ccccc23)=[O+][Cu-3]12[O+]=c1[nH]c3ccccc3c1=[N+]2[O-],0 +C[n+]1ccccc1C=NO,0 +COc1ccc(C(=O)c2ccccc2)c(O)c1,0 +CCCCCCCCOC(=O)CC(C(=O)OCCCCCCCC)S(=O)(=O)O,0 +CCCCCCCCCCCCCOC(=O)CC(C(=O)OCCCCCCCCCCCCC)S(=O)(=O)O,0 +O=C1c2ccccc2C(=O)c2c1cc(S(=O)(=O)O)c(O)c2O,0 +O=C(O)c1cc(N=Nc2cccc([N+](=O)[O-])c2)ccc1O,0 +O=C1C=CC(=C(c2ccc(O)cc2)c2ccc(O)cc2)C=C1,0 +O=S(=O)(O)c1ccc(N=Nc2cc(S(=O)(=O)O)c3ccccc3c2O)c2ccccc12,0 +O=S(=O)(O)c1ccc(N=Nc2ccc(N=Nc3cccc4ccccc34)c(S(=O)(=O)O)c2)cc1,0 +O=S1(=O)OC(c2cc(Cl)c(O)c(Br)c2)(c2cc(Cl)c(O)c(Br)c2)c2ccccc21,1 +Cc1c(C2(c3cc(Br)c(O)c(Br)c3C)OS(=O)(=O)c3ccccc32)cc(Br)c(O)c1Br,0 +O=S1(=O)OC(c2cc(Br)c(O)c(Br)c2)(c2cc(Br)c(O)c(Br)c2)c2ccccc21,0 +Cc1c(C2(c3cc(C(C)C)c(O)c(Br)c3C)OS(=O)(=O)c3ccccc32)cc(C(C)C)c(O)c1Br,1 +Nc1c(N=Nc2ccc([N+](=O)[O-])cc2)c(S(=O)(=O)O)cc2cc(S(=O)(=O)O)c(N=Nc3ccccc3)c(O)c12,0 +O=S1(=O)OC(c2ccc(O)c(Cl)c2)(c2ccc(O)c(Cl)c2)c2ccccc21,0 +O=S(=O)(O)c1cccc2c(N=Nc3ccc(N=Nc4ccc(Nc5ccccc5)c5c(S(=O)(=O)O)cccc45)c4ccccc34)cccc12,0 +O=S(=O)(O)c1cc(S(=O)(=O)O)c2c(N=Nc3ccc(N=Nc4ccccc4)cc3)c(O)ccc2c1,0 +N=C(N)NN.O=C(O)O,0 +N=C(N)NCCCC(N)C(=O)O,0 +CN(C)P(=O)(N(C)C)N(C)C,0 +O=C1C=C2C(=CCOC2O)O1,0 +O=C(O)C=CCCCCCCCCC(=O)O,0 +CC(=O)C(C)(CCC(=O)O)CCC(=O)O,0 +N#CNC(=N)NC#N,0 +N#CN(CS(=O)(=O)O)C(=N)N,0 +N=c1nc2[nH][nH]c(=N)n2c(=N)[nH]1,0 +Nc1ccccc1SSc1ccccc1N,0 +CC(C)(C)CC(C)(C)SSc1n[nH]c(=S)s1,0 +CC(CN(C)C)Sc1nnc(SC(C)CN(C)C)s1,0 +C=Cc1ccc(CC)cn1,0 +O=C(O)c1cc(O)ccc1O,0 +Oc1ccccc1C1SC(c2ccccc2O)SC(c2ccccc2O)S1,0 +O=S(=O)(O)C(C(I)I)S(=O)(=O)O,0 +O=S(=O)(O)CS(=O)(=O)O,0 +O=C1C=C(O)C(=O)c2ccccc21,0 +O=C1C(=C2Nc3ccc(S(=O)(=O)O)cc3C2=O)Nc2ccc(S(=O)(=O)O)cc21,0 +Oc1ccc2c(c1)OCC1(O)Cc3cc(O)c(O)cc3C21,0 +Oc1cc2c(cc1O)C1c3ccc(O)c(O)c3OCC1(O)C2,0 +CC[n+]1c2ccc(C)cc2nc2c3ccccc3c(N)cc21,0 +Cc1cc(C(=C2C=CC(=[N+](C)C)C=C2)c2ccc([N+](C)(C)C)cc2)ccc1N(C)C,0 +CCN(CC)c1ccc(C(=C2C=CC(=[N+](CC)CC)C=C2)c2ccc(N(CC)CC)cc2)cc1,0 +O=C1NC(=O)C(c2ccccc2)(c2ccccc2)N1,0 +O=c1ccc2ccccc2o1,0 +CN(C)P(=O)(OP(=O)(N(C)C)N(C)C)N(C)C,0 +CCOP(=S)(OCC)Oc1cc(C)nc(C(C)C)n1,0 +CCOP(=S)(OCC)Oc1ccc2c(C)c(Cl)c(=O)oc2c1,0 +C=CS(=O)(=O)O,0 +Ic1nc(-c2ccccc2)[nH]c1I,0 +CC1(C)SSC(C)(C)SS1,0 +CCCC(C(=O)OCC(C)N1CCCC1(C)C)C1CCCC1,0 +O=C(OCCN1CCCC12CCCCC2)C(c1ccccc1)C1CCCC1,0 +CC(=O)C1CCC2C1(C)CC1OC13C21C=CC2(CC(O)CCC23C)C2C(=O)OC(=O)C21,0 +CC12CCC(=O)C=C1CCC1C2C(O)CC2(C)C1CCC2(O)C(=O)COC(=O)CCC(=O)O,0 +C[N+]1(CCC(C(N)=O)(c2ccccc2)c2ccccc2)CCCC12CCCCC2,0 +COC1C(OC(=O)C=CC=CC=CC=CC(=O)O)CCC2(CO2)C1C1(C)OC1CC=C(C)C,2 +CN(C)C1C(O)=C(C(N)=O)C(=O)C2(O)C(O)=C3C(=O)c4c(O)cccc4C(C)(O)C3C(O)C12,0 +O=c1c(O)c(-c2ccc(O)c(O)c2)oc2cc(O)cc(O)c12,0 +CC1OC(OCC2OC(Oc3c(-c4ccc(O)c(O)c4)oc4cc(O)cc(O)c4c3=O)C(O)C(O)C2O)C(O)C(O)C1O,0 +CC1OC(Oc2c(-c3ccc(O)c(O)c3)oc3cc(O)cc(O)c3c2=O)C(O)C(O)C1O,0 +CC1OC(OCC2OC(Oc3c(-c4ccc(O)cc4)oc4cc(OC5OC(C)C(O)C(O)C5O)cc(O)c4c3=O)C(O)C(O)C2O)C(O)C(O)C1O,1 +CN(C)c1ccc2nc3c(ccc4ccccc43)[o+]c2c1,0 +CCN(CC)CCCOP(=O)(OCCCN(CC)CC)OCCCN(CC)CC,0 +Oc1nnc(O)c2c1[nH]c(=S)n2-c1ccccc1,0 +Cc1cc(=O)oc2cc(O)ccc12,0 +Cc1ncc2c(n1)NC(=O)NC2,0 +O=C(O)CCCCCCCCCCCC(=O)O,0 +O=S(=O)(O)c1ccc(NN(c2c(O)ccc3ccccc23)S(=O)(=O)O)cc1,0 +Cc1cc(O)ccc1C1(c2ccc(O)cc2C)OS(=O)(=O)c2ccccc21,0 +COc1cc(-c2ccc(N=Nc3ccc4c(S(=O)(=O)O)cc(S(=O)(=O)O)c(N)c4c3O)c(OC)c2)ccc1N=Nc1ccc2c(S(=O)(=O)O)cc(S(=O)(=O)O)c(N)c2c1O,2 +CCN(Cc1cccc(S(=O)(=O)O)c1)c1ccc(C(=C2C=CC(=[N+](CC)Cc3cccc(S(=O)(=O)O)c3)C=C2)c2ccc(S(=O)(=O)O)cc2)cc1,0 +NNC(=O)c1ccncc1,0 +COC1C(O)CCC2(CO2)C1C1(C)OC1CC=C(C)C,0 +COc1ccc2c(c1)CCCN2CCCCCCCCCCN(CCC(C)C)CCC(C)C,0 +CC12CCC(=O)C=C1CCC1C2C(=O)CC2(C)C1CCC2(O)C(=O)CO,0 +CC(=O)C1CCC2C3CCC4=CC(=O)CCC4(C)C3CCC12C,0 +C1CN1c1nc(N2CC2)nc(N2CC2)n1,0 +CC(=O)C1CCC2C1(C)CC=C1C23C=CC2(CC(O)CCC12C)C(C(=O)O)C3C(=O)O,0 +Cc1cc(S(=O)(=O)O)ccc1N=Nc1ccc(N=Nc2cc(S(=O)(=O)O)c3ccccc3c2O)c(C)c1,0 +CC12CCC3C(CC=C4CC(O)CCC43C)C1CCC2=O,0 +CC(=O)OCC[N+](C)(C)C.CCCCC(CC)COC(=O)CC(C(=O)OCC(CC)CCCC)S(=O)(=O)O,0 +Oc1nnc(O)c2[nH]cnc12,0 +COc1ccc2nc(C)cc(NCCCCCCCCCNc3cc(C)nc4ccc(OC)cc34)c2c1,0 +CCC(N)CS(=O)(=O)O,0 +Cc1cc(N=Nc2ccc([N+](=O)[O-])cc2)c(NCC(O)C(O)C(O)CO)cc1C,0 +N#CC1(c2ccccc2)CCOCC1,0 +CC1=CC(C)=Nc2ccccc2N1,0 +COc1cc2c(cc1OC)NC(C)=CC(C)=N2,0 +COc1ccc2c(c1OC)CC1COCC21,0 +COc1cc([N+](=O)[O-])c([N+](=O)[O-])c([N+](=O)[O-])c1OC,0 +O=C1CCc2ccccc2O1,0 +CC1=CC(C)=Nc2cc(Cl)ccc2N1,0 +CCOC=C1C(=O)N(C(C)=O)c2cc(OC)c(OC)cc21,0 +C1=C(c2ccccc2)Nc2ccccc2N=C1c1ccccc1,0 +COc1ccc2c(c1OC)C(=O)OC2c1c2c(c(OC)c3c1OCO3)C(C1OC(=O)c3c1ccc(OC)c3OC)N(C)CC2,0 +CN1BN(C)BN(C)B1,0 +O=c1oc2ccc([N+](=O)[O-])cc2cc1Br,0 +Cc1ccc(C=C2CCOC2=O)cc1,0 +S=P(N1CCOCC1)(N1CC1)N1CC1,0 +O=S(=O)(O)c1ccc(N=Nc2ccc(O)cc2O)cc1,0 +Cc1ccc(N=Nc2c(O)c(S(=O)(=O)O)cc3cc(S(=O)(=O)O)ccc23)c(C)c1,0 +O=S1(=O)OC(c2ccc(O)cc2)(c2ccc(O)cc2)c2ccccc21,0 +N=C1C=CC(=C(c2ccc(N)cc2)c2ccc(N)cc2)C=C1,0 +Cc1ccc(N=Nc2ccc(O)c(N=Nc3ccc(S(=O)(=O)O)cc3)c2O)c(C)c1,0 +Cc1cc(C2(c3cc(C)c(O)cc3C)OS(=O)(=O)c3ccccc32)c(C)cc1O,0 +CCCCCCCCCCCCCCCCCC(=O)O.CCN(CC)c1ccc2c(-c3ccccc3C(=O)O)c3ccc(N(CC)CC)cc3[o+]c2c1,0 +CNCCS(=O)(=O)O,0 +CSP(=S)(SC)SC(C)c1ccc(C)cc1,0 +CSSC(SC)SC,0 +OC1CN=C2C=CC=CN2C1,0 +CC(=O)Oc1ccc2c(oc(=O)c3nc(C)oc32)c1C,0 +N#CCCN1C(=O)CCC2(CCC(=O)N(CCC#N)C2=O)C1=O,0 +CSc1ccc(C=NC(=N)SN)cc1C,0 +O=c1oc2c(ccc3c(O)cccc32)c2c1CCCC2,0 +Nc1nc(N)nc(Nc2ccc([As](=O)(O)O)cc2)n1,0 +CC(=O)C(CCC(=O)O)(CCC(=O)O)C(C)C,0 +Cc1cnc2cc(Cl)ccc2c1NCCN(CC(C)C)CC(C)C.O=P(O)(O)O,0 +CCCCN(CCCC)CCCCNc1c(C)cnc2cc(Cl)ccc12.O=P(O)(O)O,0 +Cc1ccc2oc3ccccc3c(=O)c2c1,0 +O=C1OS(=O)(=O)c2ccccc21,0 +Cc1cc2nc3ccc(N(C)C)cc3[s+]c2cc1N,0 +O=S1(=O)OC(c2cc(Br)c(O)c(Br)c2)(c2cc(Br)c(O)c(Br)c2)c2c(Br)c(Br)c(Br)c(Br)c21,0 +Cc1cc(O)c(C(C)C)cc1C1(c2cc(C(C)C)c(O)cc2C)OS(=O)(=O)c2ccccc21,0 +CC1(C)Nc2cccc3c(N=Nc4ccc(N=Nc5ccccc5)c5ccccc45)ccc(c23)N1,0 +Nc1c(S(=O)(=O)O)cc2cc(S(=O)(=O)O)ccc2c1N=Nc1ccc(-c2ccc(N=Nc3c(N)c(S(=O)(=O)O)cc4cc(S(=O)(=O)O)ccc34)c(S(=O)(=O)O)c2)cc1,1 +Cc1cc(-c2ccc(N=Nc3c(S(=O)(=O)O)cc4cc(S(=O)(=O)O)cc(N)c4c3O)c(C)c2)ccc1N=Nc1c(S(=O)(=O)O)cc2cc(S(=O)(=O)O)cc(N)c2c1O,1 diff --git a/chemprop-updated/tests/data/classification/test.npz b/chemprop-updated/tests/data/classification/test.npz new file mode 100644 index 0000000000000000000000000000000000000000..2388779431d95786f12c0d54a13424568b9ffcd7 --- /dev/null +++ b/chemprop-updated/tests/data/classification/test.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a55cb4edcb2d9ddfda244d363ff0adaf6eed4b39f1212b43ee58ba4c47866b6a +size 7368 diff --git a/chemprop-updated/tests/data/classification/test_true.csv b/chemprop-updated/tests/data/classification/test_true.csv new file mode 100644 index 0000000000000000000000000000000000000000..521510a318ab22fb477fb9d305fbba248227d954 --- /dev/null +++ b/chemprop-updated/tests/data/classification/test_true.csv @@ -0,0 +1,11 @@ +smiles,NR-AR,NR-AR-LBD,NR-AhR,NR-Aromatase,NR-ER,NR-ER-LBD,NR-PPAR-gamma,SR-ARE,SR-ATAD5,SR-HSE,SR-MMP,SR-p53 +CCc1cccc(C)c1N(C(=O)CCl)[C@@H](C)COC,0,0,0,,0,0,,1,0,0,,0 +O=C(O)c1ccccc1C(=O)Nc1cccc2ccccc12,0,0,1,,0,0,0,1,0,0,,0 +CCC(=O)OC1(c2ccccc2)CCN(C)CC1,0,,0,0,0,0,0,0,0,0,0,0 +COc1cc(-c2ccc(=O)[nH]n2)ccc1OC(F)F,0,0,,0,,0,0,0,1,0,0,0 +CCOc1ccc([N+](=O)[O-])cc1,0,0,0,,1,1,,,0,,1, +CCCCN(CCCC)CCCOC(=O)c1ccc(N)cc1,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCOc1cc(C(=O)NCCN(CC)CC)c2ccccc2n1,0,0,0,0,0,0,0,0,0,0,,0 +CC(C)(c1cc(Br)c(OCC(Br)CBr)c(Br)c1)c1cc(Br)c(OCC(Br)CBr)c(Br)c1,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)c1ccc(C(C)C)cc1,0,0,0,0,0,0,0,0,0,0,0,0 +COc1c2occc2cc2ccc(=O)oc12,0,0,1,0,0,1,0,0,0,0,0,0 diff --git a/chemprop-updated/tests/data/example_model_v1_4.pt b/chemprop-updated/tests/data/example_model_v1_4.pt new file mode 100644 index 0000000000000000000000000000000000000000..75d086a86bb59f04ba495624133dcc0e1c4cf694 --- /dev/null +++ b/chemprop-updated/tests/data/example_model_v1_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff25a9e3bb73ef6e659fd030f9c025050ae19a073daaa2cf5d5b50bcdb1bd05a +size 44070866 diff --git a/chemprop-updated/tests/data/example_model_v1_regression_mol.pt b/chemprop-updated/tests/data/example_model_v1_regression_mol.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a95ed0312789058ceb78af262ee29eac84a5911 --- /dev/null +++ b/chemprop-updated/tests/data/example_model_v1_regression_mol.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dad80688a828c46861df5c8b1100c4d4ea030eec28aab906577c8dd70f1fb4b +size 1428085 diff --git a/chemprop-updated/tests/data/example_model_v1_regression_mol_prediction.csv b/chemprop-updated/tests/data/example_model_v1_regression_mol_prediction.csv new file mode 100644 index 0000000000000000000000000000000000000000..78a3c6358385125640d9fd6f79d0b77b6754b57f --- /dev/null +++ b/chemprop-updated/tests/data/example_model_v1_regression_mol_prediction.csv @@ -0,0 +1,51 @@ +smiles,logSolubility +C/C1CCC(\C)CC1,-2.4685160026205875 +Cc1ccc(OP(=O)(Oc2cccc(C)c2)Oc3ccccc3C)cc1,-3.690997102347433 +c1c(Br)ccc2ccccc12,-4.400622334163299 +CCOc1ccc(cc1)C(C)(C)COCc3cccc(Oc2ccccc2)c3,-3.648697421001764 +CCC1(C(=O)NC(=O)NC1=O)C2=CCCCC2,-3.3428182733716643 +C(Cc1ccccc1)c2ccccc2,-3.503486854056 +Cc1cccc(N)c1,-3.5574339504454997 +CCN(CC(C)=C)c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O,-3.516950400153098 +CC(C)O,-1.7148572271406979 +CCCCCCCCO,-2.239583430830765 +CN(C)C(=O)SCCCCOc1ccccc1,-2.915142669267167 +CC2Nc1cc(Cl)c(cc1C(=O)N2c3ccccc3C)S(N)(=O)=O,-4.213283027865243 +CCCCCCC#C,-2.273294986429117 +COP(=S)(OC)Oc1cc(Cl)c(I)cc1Cl,-3.607098153202899 +CC(C)CCOC(=O)C,-2.0786930741810843 +CCN(CC)c1ccccc1,-3.343242983008425 +O=N(=O)c1cc(Cl)c(Cl)cc1,-3.7967865273571073 +ClC1=C(Cl)C2(Cl)C3C4CC(C=C4)C3C1(Cl)C2(Cl)Cl,-4.632335644884678 +CC(=O)Nc1ccc(F)cc1,-3.5089342880842556 +CC1(C)C(C=C(Br)Br)C1C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2,-3.640108243303718 +O=C1NC(=O)NC(=O)C1(CC)c1ccccc1,-3.764417879682843 +c1ccncc1,-2.8943703242072205 +OC1CCCCCC1,-2.25571404330129 +CCCCCCCCCCCCCCCCO,-2.35840188440722 +COC(=O)C1=C(C)NC(=C(C1c2ccccc2N(=O)=O)C(=O)OC)C,-3.6143835926716297 +Nc1nccs1,-3.0722337202225756 +CCCC(C)C,-2.022391309844789 +Cc1cccc(C)c1,-3.5775675448351674 +Clc2ccc(Oc1ccc(cc1)N(=O)=O)c(Cl)c2,-3.9504066050044386 +O=C1NC(=O)C(N1)(c2ccccc2)c3ccccc3,-4.051156505350688 +Oc1ccc(Cl)cc1C(=O)Nc2ccc(cc2Cl)N(=O)=O,-4.043370213576292 +CC(C)(C)Cc1ccccc1,-3.1206711627922026 +CCOC(=O)c1cncn1C(C)c2ccccc2,-3.6367735594944337 +OCC1OC(CO)(OC2OC(COC3OC(CO)C(O)C(O)C3O)C(O)C(O)C2O)C(O)C1O,-2.397844343815426 +ClCC#N,-2.244219687088453 +CCCCCCCCC(=O)C,-2.2648864967385514 +CCCOC(=O)C,-2.071270881117651 +OC3N=C(c1ccccc1)c2cc(Cl)ccc2NC3=O,-4.090765668009631 +CCOc2ccc1nc(sc1c2)S(N)(=O)=O,-3.7640808841730014 +CC(C)C(C)O,-1.9086673917669121 +Oc2ccc1ncccc1c2,-4.176121940189839 +Cc3nnc4CN=C(c1ccccc1Cl)c2cc(Cl)ccc2n34,-4.591402014379124 +COc1ccc(cc1)C(c2ccc(OC)cc2)C(Cl)(Cl)Cl,-3.806374501332118 +CCSCc1ccccc1OC(=O)NC,-3.2378846102796097 +Clc1ccc(c(Cl)c1)c2c(Cl)ccc(Cl)c2Cl,-4.711614117032452 +Clc1ccc(cc1)c2ccc(Cl)cc2,-4.262654293284488 +CCCC1CCCC1,-2.47728326938902 +CCCC(O)CC,-2.0445744136508104 +CCCCCCCC#C,-2.291813588932748 +ClC1=C(Cl)C(Cl)(C(=C1Cl)Cl)C2(Cl)C(=C(Cl)C(=C2Cl)Cl)Cl,-4.421838323973119 diff --git a/chemprop-updated/tests/data/example_model_v2_classification_dirichlet_mol.pt b/chemprop-updated/tests/data/example_model_v2_classification_dirichlet_mol.pt new file mode 100644 index 0000000000000000000000000000000000000000..3dbf9fd9446d785d9964d062cecb8e8b3563e22e --- /dev/null +++ b/chemprop-updated/tests/data/example_model_v2_classification_dirichlet_mol.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aff5fab340eabe94c131880eb3292dc8737f4a2de0ebf7c492b43bf07aa935fc +size 1287822 diff --git a/chemprop-updated/tests/data/example_model_v2_classification_mol.pt b/chemprop-updated/tests/data/example_model_v2_classification_mol.pt new file mode 100644 index 0000000000000000000000000000000000000000..56135039a2a0cf411e318c667d7a2ea2dcf75ec6 --- /dev/null +++ b/chemprop-updated/tests/data/example_model_v2_classification_mol.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f6d13a6094d197f543d32e502991fc5af4b37d92dfe03c8b6eef11c9b5c61ed +size 1290660 diff --git a/chemprop-updated/tests/data/example_model_v2_classification_mol_multiclass.pt b/chemprop-updated/tests/data/example_model_v2_classification_mol_multiclass.pt new file mode 100644 index 0000000000000000000000000000000000000000..bcc416f7dde670e4f64775d819d07da32c03b32c --- /dev/null +++ b/chemprop-updated/tests/data/example_model_v2_classification_mol_multiclass.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:845c7960c3c0245b8f7de946c2f90d4e7704860a53001cc31be3d031cfeb70af +size 1289770 diff --git a/chemprop-updated/tests/data/example_model_v2_classification_mol_with_metrics.ckpt b/chemprop-updated/tests/data/example_model_v2_classification_mol_with_metrics.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..33b7c505a5772490792be48426958184668f2c03 --- /dev/null +++ b/chemprop-updated/tests/data/example_model_v2_classification_mol_with_metrics.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ad46d783130249352d3da372a250ba2baca8a655f69c47ea7d6c9f183af8c43 +size 3858271 diff --git a/chemprop-updated/tests/data/example_model_v2_multiclass_dirichlet_mol.pt b/chemprop-updated/tests/data/example_model_v2_multiclass_dirichlet_mol.pt new file mode 100644 index 0000000000000000000000000000000000000000..de7e938459c5b6e38f1607e3563f4aec95b86576 --- /dev/null +++ b/chemprop-updated/tests/data/example_model_v2_multiclass_dirichlet_mol.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2301dd7918851f19022938716c15501c9397e0b2f629753c0b7c03349851b21b +size 1281870 diff --git a/chemprop-updated/tests/data/example_model_v2_regression_evidential_mol.pt b/chemprop-updated/tests/data/example_model_v2_regression_evidential_mol.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc414e4ccb58c8953d276d03e1d1ebb82eb01b2f --- /dev/null +++ b/chemprop-updated/tests/data/example_model_v2_regression_evidential_mol.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c056822e6e83801b7f4c89693df5404123a105142ece80737096be19a065f06 +size 1291106 diff --git a/chemprop-updated/tests/data/example_model_v2_regression_mol+mol.ckpt b/chemprop-updated/tests/data/example_model_v2_regression_mol+mol.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..d5e0d81f7e695b1e2f8a9a1e6259e141a2d54004 --- /dev/null +++ b/chemprop-updated/tests/data/example_model_v2_regression_mol+mol.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0314c3a1c8971ee55e6d5cf7fb90b867b861ea1950087701d7d1761a7a96314 +size 7680454 diff --git a/chemprop-updated/tests/data/example_model_v2_regression_mol+mol.pt b/chemprop-updated/tests/data/example_model_v2_regression_mol+mol.pt new file mode 100644 index 0000000000000000000000000000000000000000..c300fa6d16acafce7ea16bcda008ee8c22967d84 --- /dev/null +++ b/chemprop-updated/tests/data/example_model_v2_regression_mol+mol.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8a0a992770e486982f5ff7b24425886a6b7ffba92d975fc0840185d97b03da4 +size 2565818 diff --git a/chemprop-updated/tests/data/example_model_v2_regression_mol.ckpt b/chemprop-updated/tests/data/example_model_v2_regression_mol.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..09d85938196dcc30fb6beb0b21fd6faaa79343cc --- /dev/null +++ b/chemprop-updated/tests/data/example_model_v2_regression_mol.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec996c623cea37b6690f13c6e41560937eb1768c8b180a41e97b03526b12fd1b +size 3852134 diff --git a/chemprop-updated/tests/data/example_model_v2_regression_mol.pt b/chemprop-updated/tests/data/example_model_v2_regression_mol.pt new file mode 100644 index 0000000000000000000000000000000000000000..037a89f604600f1e706f698d14ece71b9a107054 --- /dev/null +++ b/chemprop-updated/tests/data/example_model_v2_regression_mol.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a222b920c9152d67f6e1970881221079ab8a321d2db57759921da51b5b4a825 +size 1287750 diff --git a/chemprop-updated/tests/data/example_model_v2_regression_mol_multitask.pt b/chemprop-updated/tests/data/example_model_v2_regression_mol_multitask.pt new file mode 100644 index 0000000000000000000000000000000000000000..78054c01c3c344e490cb7132765412e1e6a41d0b --- /dev/null +++ b/chemprop-updated/tests/data/example_model_v2_regression_mol_multitask.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d053982d9a9e524d9b30aa2df76f38f07d0752663515fc997d4824305b5b332 +size 1301582 diff --git a/chemprop-updated/tests/data/example_model_v2_regression_mol_with_metrics.ckpt b/chemprop-updated/tests/data/example_model_v2_regression_mol_with_metrics.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..ebc80590f655051f92fee02cc88af098c0ed9924 --- /dev/null +++ b/chemprop-updated/tests/data/example_model_v2_regression_mol_with_metrics.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18ac4e7f54c39128cbde9d3f04312af57db6b1ffe2e9f6154aceeca42964c81d +size 3852975 diff --git a/chemprop-updated/tests/data/example_model_v2_regression_mve_mol.pt b/chemprop-updated/tests/data/example_model_v2_regression_mve_mol.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c33dd3511e45334955dacb55b88284d04781cc9 --- /dev/null +++ b/chemprop-updated/tests/data/example_model_v2_regression_mve_mol.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b95ee7e06ff51b29868badb2f548bb24d458f351a979a0393321d003fa1ad15 +size 1288470 diff --git a/chemprop-updated/tests/data/example_model_v2_regression_rxn+mol.pt b/chemprop-updated/tests/data/example_model_v2_regression_rxn+mol.pt new file mode 100644 index 0000000000000000000000000000000000000000..172524ba7e3d88c3590a3171cb01b436a305a7ee --- /dev/null +++ b/chemprop-updated/tests/data/example_model_v2_regression_rxn+mol.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10908249c6ed837dab533ddb9b1613a229198f6210bdf8dc9b0ad9dbba4bce33 +size 2664186 diff --git a/chemprop-updated/tests/data/example_model_v2_regression_rxn.ckpt b/chemprop-updated/tests/data/example_model_v2_regression_rxn.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..5af92b627ef62d17bf6922e0d3cc078bf1f4d772 --- /dev/null +++ b/chemprop-updated/tests/data/example_model_v2_regression_rxn.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63289026b497e50517ba7b89aeb892bd0f6a0a2aaeea8053fd2a23c5c7eec892 +size 4147238 diff --git a/chemprop-updated/tests/data/example_model_v2_regression_rxn.pt b/chemprop-updated/tests/data/example_model_v2_regression_rxn.pt new file mode 100644 index 0000000000000000000000000000000000000000..13fdc0051ec9e03f9b21faeccb6e068ec9474eb9 --- /dev/null +++ b/chemprop-updated/tests/data/example_model_v2_regression_rxn.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aa792943e8575599b1d3af31667bcd8ec8961a04a6deb11d9ffe0e9cbcce278 +size 1386118 diff --git a/chemprop-updated/tests/data/example_model_v2_trained_on_cuda.pt b/chemprop-updated/tests/data/example_model_v2_trained_on_cuda.pt new file mode 100644 index 0000000000000000000000000000000000000000..d0bcc7be413683e858d6eaf9b07c4345ba2291bf --- /dev/null +++ b/chemprop-updated/tests/data/example_model_v2_trained_on_cuda.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1db40c0b5fae241ec8697cd066f0e3fa7384086db8f207f144229fb6be44241 +size 1280828 diff --git a/chemprop-updated/tests/data/regression.csv b/chemprop-updated/tests/data/regression.csv new file mode 100644 index 0000000000000000000000000000000000000000..1231d3626b5f6d59b5a61d715cb793c58e2e8a45 --- /dev/null +++ b/chemprop-updated/tests/data/regression.csv @@ -0,0 +1,501 @@ +smiles,logSolubility +OCC3OC(OCC2OC(OC(C#N)c1ccccc1)C(O)C(O)C2O)C(O)C(O)C3O,-0.77 +Cc1occc1C(=O)Nc2ccccc2,-3.3 +CC(C)=CCCC(C)=CC(=O),-2.06 +c1ccc2c(c1)ccc3c2ccc4c5ccccc5ccc43,-7.87 +c1ccsc1,-1.33 +c2ccc1scnc1c2,-1.5 +Clc1cc(Cl)c(c(Cl)c1)c2c(Cl)cccc2Cl,-7.32 +CC12CCC3C(CCc4cc(O)ccc34)C2CCC1O,-5.03 +ClC4=C(Cl)C5(Cl)C3C1CC(C2OC12)C3C4(Cl)C5(Cl)Cl,-6.29 +COc5cc4OCC3Oc2c1CC(Oc1ccc2C(=O)C3c4cc5OC)C(C)=C,-4.42 +O=C1CCCN1,1.07 +Clc1ccc2ccccc2c1,-4.14 +CCCC=C,-2.68 +CCC1(C(=O)NCNC1=O)c2ccccc2,-2.64 +CCCCCCCCCCCCCC,-7.96 +CC(C)Cl,-1.41 +CCC(C)CO,-0.47 +N#Cc1ccccc1,-1 +CCOP(=S)(OCC)Oc1cc(C)nc(n1)C(C)C,-3.64 +CCCCCCCCCC(C)O,-2.94 +Clc1ccc(c(Cl)c1)c2c(Cl)ccc(Cl)c2Cl,-7.43 +O=c2[nH]c1CCCc1c(=O)n2C3CCCCC3,-4.593999999999999 +CCOP(=S)(OCC)SCSCC,-4.11 +CCOc1ccc(NC(=O)C)cc1,-2.35 +CCN(CC)c1c(cc(c(N)c1N(=O)=O)C(F)(F)F)N(=O)=O,-5.47 +CCCCCCCO,-1.81 +Cn1c(=O)n(C)c2nc[nH]c2c1=O,-1.39 +CCCCC1(CC)C(=O)NC(=O)NC1=O,-1.661 +ClC(Cl)=C(c1ccc(Cl)cc1)c2ccc(Cl)cc2,-6.9 +CCCCCCCC(=O)OC,-3.17 +CCc1ccc(CC)cc1,-3.75 +CCOP(=S)(OCC)SCSC(C)(C)C,-4.755 +COC(=O)Nc1cccc(OC(=O)Nc2cccc(C)c2)c1,-4.805 +ClC(=C)Cl,-1.64 +Cc1cccc2c1Cc3ccccc32,-5.22 +CCCCC=O,-0.85 +N(c1ccccc1)c2ccccc2,-3.5039999999999996 +CN(C)C(=O)SCCCCOc1ccccc1,-3.927 +CCCOP(=S)(OCCC)SCC(=O)N1CCCCC1C,-4.15 +CCCCCCCI,-4.81 +c1c(Cl)cccc1c2ccccc2,-4.88 +OCCCC=C,-0.15 +O=C2NC(=O)C1(CCC1)C(=O)N2,-1.655 +CC(C)C1CCC(C)CC1O,-2.53 +CC(C)OC=O,-0.63 +CCCCCC(C)O,-1.55 +CC(=O)Nc1ccc(Br)cc1,-3.083 +c1ccccc1n2ncc(N)c(Br)c2(=O),-3.127 +COC(=O)C1=C(C)NC(=C(C1c2ccccc2N(=O)=O)C(=O)OC)C,-4.76 +c2c(C)cc1nc(C)ccc1c2,-1.94 +CCCCCCC#C,-3.66 +CCC1(C(=O)NC(=O)NC1=O)C2=CCCCC2,-2.17 +c1ccc2c(c1)ccc3c4ccccc4ccc23,-8.057 +CCC(C)n1c(=O)[nH]c(C)c(Br)c1=O,-2.523 +Clc1cccc(c1Cl)c2c(Cl)c(Cl)cc(Cl)c2Cl,-8.6 +Cc1ccccc1O,-0.62 +CC(C)CCC(C)(C)C,-5.05 +Cc1ccc(C)c2ccccc12,-4.14 +Cc1cc2c3ccccc3ccc2c4ccccc14,-6.57 +CCCC(=O)C,-0.19 +Clc1cc(Cl)c(Cl)c(c1Cl)c2c(Cl)c(Cl)cc(Cl)c2Cl,-9.15 +CCCOC(=O)CC,-0.82 +CC34CC(O)C1(F)C(CCC2=CC(=O)C=CC12C)C3CC(O)C4(O)C(=O)CO,-3.68 +Nc1ccc(O)cc1,-0.8 +O=C(Cn1ccnc1N(=O)=O)NCc2ccccc2,-2.81 +OC4=C(C1CCC(CC1)c2ccc(Cl)cc2)C(=O)c3ccccc3C4=O,-5.931 +CCNc1nc(Cl)nc(n1)N(CC)CC,-4.06 +NC(=O)c1cnccn1,-0.667 +CCC(Br)(CC)C(=O)NC(N)=O,-2.68 +Clc1ccccc1c2ccccc2Cl,-5.27 +O=C2CN(N=Cc1ccc(o1)N(=O)=O)C(=O)N2,-3.38 +Clc2ccc(Oc1ccc(cc1)N(=O)=O)c(Cl)c2,-5.46 +CC1(C)C2CCC1(C)C(=O)C2,-1.96 +O=C1NC(=O)NC(=O)C1(CC=C)c1ccccc1,-2.369 +CCCCC(=O)OCC,-2.25 +CC(C)CCOC(=O)C,-1.92 +O=C1N(COC(=O)CCCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3,-5.886 +Clc1cccc(c1)c2cc(Cl)ccc2Cl,-6.01 +CCCBr,-1.73 +CCCC1COC(Cn2cncn2)(O1)c3ccc(Cl)cc3Cl,-3.4930000000000003 +COP(=S)(OC)SCC(=O)N(C)C=O,-1.995 +Cc1ncnc2nccnc12,-0.466 +NC(=S)N,0.32 +Cc1ccc(C)cc1,-2.77 +CCc1ccccc1CC,-3.28 +ClC(Cl)(Cl)C(Cl)(Cl)Cl,-3.67 +CC(C)C(C(=O)OC(C#N)c1cccc(Oc2ccccc2)c1)c3ccc(OC(F)F)cc3,-6.876 +CCCN(=O)=O,-0.8 +CC(C)C1CCC(C)CC1=O,-2.35 +CCN2c1cc(Cl)ccc1NC(=O)c3cccnc23,-5.36 +O=N(=O)c1c(Cl)c(Cl)ccc1,-3.48 +CCCC(C)C1(CC=C)C(=O)NC(=S)NC1=O,-3.46 +c1ccc2c(c1)c3cccc4cccc2c34,-6 +CCCOC(C)C,-1.34 +Cc1cc(C)c2ccccc2c1,-4.29 +CCC(=C(CC)c1ccc(O)cc1)c2ccc(O)cc2,-4.07 +c1(C#N)c(Cl)c(C#N)c(Cl)c(Cl)c(Cl)1,-5.64 +Clc1ccc(Cl)c(c1)c2ccc(Cl)c(Cl)c2,-7.25 +C1OC1c2ccccc2,-1.6 +CC(C)c1ccccc1,-3.27 +CC12CCC3C(CCC4=CC(=O)CCC34C)C2CCC1C(=O)CO,-3.45 +c2(Cl)c(Cl)c(Cl)c1nccnc1c2(Cl),-5.43 +C1OC(O)C(O)C(O)C1O,0.39 +ClCCl,-0.63 +CCc1cccc2ccccc12,-4.17 +COC=O,0.58 +Oc1ccccc1N(=O)=O,-1.74 +Cc1c[nH]c(=O)[nH]c1=O,-1.506 +CC(C)C,-2.55 +OCC1OC(C(O)C1O)n2cnc3c(O)ncnc23,-1.23 +Oc1c(I)cc(C#N)cc1I,-3.61 +Oc1ccc(Cl)cc1C(=O)Nc2ccc(cc2Cl)N(=O)=O,-4.7 +CCCCC,-3.18 +c1ccccc1O,0 +Nc3ccc2cc1ccccc1cc2c3,-5.17 +Cn1cnc2n(C)c(=O)[nH]c(=O)c12,-2.523 +c1ccc2cnccc2c1,-1.45 +COP(=S)(OC)SCC(=O)N(C(C)C)c1ccc(Cl)cc1,-4.4319999999999995 +CCCCCCc1ccccc1,-5.21 +Clc1ccccc1c2ccccc2,-4.54 +CCCC(=C)C,-3.03 +CC(C)C(C)C(C)C,-4.8 +Clc1cc(Cl)c(Cl)c(Cl)c1Cl,-5.65 +Oc1cccc(c1)N(=O)=O,-1.01 +CCCCCCCCC=C,-5.51 +CC(=O)OCC(COC(=O)C)OC(=O)C,-0.6 +CCCCc1c(C)nc(nc1O)N(C)C,-2.24 +CC1(C)C(C=C(Cl)Cl)C1C(=O)OC(C#N)c2ccc(F)c(Oc3ccccc3)c2,-7.337000000000001 +c1ccncc1,0.76 +CCCCCCCBr,-4.43 +Cc1ccncc1C,0.36 +CC34CC(O)C1(F)C(CCC2=CC(=O)CCC12C)C3CCC4(O)C(=O)CO,-3.43 +CCSCc1ccccc1OC(=O)NC,-2.09 +CCOC(=O)CC(=O)OCC,-0.82 +CC1=CCC(CC1)C(C)=C,-4.26 +C1Cc2ccccc2C1,-3.04 +CC(C)(C)c1ccc(O)cc1,-2.41 +O=C2NC(=O)C1(CC1)C(=O)N2,-1.886 +Clc1cccc(I)c1,-3.55 +Brc1cccc2ccccc12,-4.35 +CC/C=C/C,-2.54 +Cc1cccc(C)n1,0.45 +ClC=C(Cl)Cl,-1.96 +Nc1cccc2ccccc12,-1.92 +Cc1cccc(C)c1,-2.82 +Oc2ncc1nccnc1n2,-1.9469999999999998 +CO,1.57 +CCC1(CCC(C)C)C(=O)NC(=O)NC1=O,-2.468 +CCC(=O)C,0.52 +Fc1c[nH]c(=O)[nH]c1=O,-1.077 +Nc1ncnc2n(ccc12)C3OC(CO)C(O)C3O,-1.95 +Oc1cccc(O)c1,0.81 +CCCCCCO,-1.24 +CCCCCCl,-2.73 +C=CC=C,-1.87 +CCCOC(=O)C,-0.72 +Oc2ccc1CCCCc1c2,-1.99 +NC(=O)CCl,-0.02 +COP(=S)(OC)Oc1cc(Cl)c(I)cc1Cl,-6.62 +Cc1ccc(Cl)cc1,-3.08 +CSc1nnc(c(=O)n1N)C(C)(C)C,-2.253 +Cc1ccc(OP(=O)(Oc2cccc(C)c2)Oc3ccccc3C)cc1,-6.01 +CCCCCC=O,-1.3 +CCCCOC(=O)c1ccc(N)cc1,-3.082 +O2c1cc(C)ccc1N(C)C(=O)c3cc(N)cnc23,-3.043 +CC(C)=CCC/C(C)=C\CO,-2.46 +Clc1ccc(cc1)c2ccccc2Cl,-5.28 +O=C1N(COC(=O)CCCCCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3,-6.523 +CCN(=O)=O,-0.22 +CCN(CC(C)=C)c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O,-6.124 +Clc1ccc(Cl)c(Cl)c1Cl,-4.57 +CCCC(C)(COC(N)=O)COC(N)=O,-1.807 +CC(=O)C3CCC4C2CC=C1CC(O)CCC1(C)C2CCC34C,-4.65 +CI,-1 +CC1CC(C)C(=O)C(C1)C(O)CC2CC(=O)NC(=O)C2,-1.13 +O=C1N(COC(=O)CCCCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3,-6.301 +CC1=CC(=O)CC(C)(C)C1,-1.06 +O=C1NC(=O)NC(=O)C1(CC)C(C)CC,-2.39 +CCCCC(=O)CCCC,-2.58 +CCC1(CCC(=O)NC1=O)c2ccccc2,-2.3369999999999997 +CCC(C)CC,-3.68 +CCOc1ccc(cc1)C(C)(C)COCc3cccc(Oc2ccccc2)c3,-8.6 +Cc1ccccc1n3c(C)nc2ccccc2c3=O,-2.925 +ClCC#N,-0.092 +CCOP(=S)(CC)Oc1cc(Cl)c(Cl)cc1Cl,-5.752000000000001 +CC12CCC(=O)C=C1CCC3C2CCC4(C)C3CCC4(O)C#C,-5.66 +c1ccnnc1,1.1 +Clc1cc(Cl)c(Cl)c(Cl)c1,-4.63 +C1C(O)CCC2(C)CC3CCC4(C)C5(C)CC6OCC(C)CC6OC5CC4C3C=C21,-7.32 +Nc1ccccc1O,-0.72 +CCCCCCCCC(=O)OCC,-3.8 +COCC(=O)N(C(C)C(=O)OC)c1c(C)cccc1C,-1.601 +CNC(=O)Oc1ccccc1OC(C)C,-2.05 +CCC(C)Cl,-1.96 +Oc1ccc2ccccc2c1,-2.28 +CC(C)Oc1cc(c(Cl)cc1Cl)n2nc(oc2=O)C(C)(C)C,-5.696000000000001 +CCCCC#C,-2.36 +CCCCCCCC#C,-4.24 +Cc1ccccc1Cl,-3.52 +CC(C)OC(C)C,-1.1 +Nc1ccc(cc1)S(=O)(=O)c2ccc(N)cc2,-3.094 +CNN,1.34 +CC#C,-0.41 +CCOP(=S)(OCC)ON=C(C#N)c1ccccc1,-4.862 +CCNP(=S)(OC)OC(=CC(=O)OC(C)C)C,-3.408 +C=CC=O,0.57 +O=c1[nH]cnc2nc[nH]c12,-2.296 +Oc2ccc1ncccc1c2,-2.16 +Fc1ccccc1,-1.8 +CCCCl,-1.47 +CCOC(=O)C,-0.04 +CCCC(C)(C)C,-4.36 +Cc1cc(C)c(C)c(C)c1C,-4 +CC12CCC(CC1)C(C)(C)O2,-1.64 +CCCCOC(=O)CCCCCCCCC(=O)OCCCC,-3.8960000000000004 +Clc1ccc(cc1)c2ccc(Cl)cc2,-6.56 +Cc1cccnc1C,0.38 +CC(=C)C1CC=C(C)C(=O)C1,-2.06 +CCOP(=S)(OCC)SCSc1ccc(Cl)cc1,-5.736000000000001 +COc1cc(cc(OC)c1O)C6C2C(COC2=O)C(OC4OC3COC(C)OC3C(O)C4O)c7cc5OCOc5cc67,-3.571 +c1cc2cccc3c4cccc5cccc(c(c1)c23)c54,-8.804 +Cc1ccc(cc1N(=O)=O)N(=O)=O,-2.82 +c1c(Br)ccc2ccccc12,-4.4 +CNC(=O)Oc1cccc(N=CN(C)C)c1,-2.34 +COc2cnc1ncncc1n2,-1.139 +Cc3ccnc4N(C1CC1)c2ncccc2C(=O)Nc34,-3.19 +CCOP(=S)(OCC)Oc1nc(Cl)n(n1)C(C)C,-3.658 +CC(=C)C=C,-2.03 +CC(C)=CCCC(O)(C)C=C,-1.99 +COP(=S)(OC)Oc1ccc(SC)c(C)c1,-4.57 +OC1CCCCC1,-0.44 +O=C1NC(=O)NC(=O)C1(C)CC=C,-1.16 +CC34CCC1C(CCC2CC(O)CCC12C)C3CCC4=O,-4.16 +OCC(O)C(O)C(O)C(O)CO,0.06 +Cc1ccc(cc1)c2ccccc2,-4.62 +CCNc1nc(Cl)nc(NC(C)C)n1,-3.85 +NC(=S)Nc1ccccc1,-1.77 +CCCC(=O)CCC,-1.3 +CC(=O)C(C)(C)C,-0.72 +Oc1ccc(Cl)cc1,-0.7 +O=C1CCCCC1,-0.6 +Cc1cccc(N)c1,-0.85 +ClC(Cl)(Cl)C#N,-2.168 +CNc2cnn(c1cccc(c1)C(F)(F)F)c(=O)c2Cl,-4.046 +CCCCCCCCC(=O)C,-3.3 +CCN(CC)c1nc(Cl)nc(NC(C)C)n1,-3.785 +CCOC(=O)c1ccc(N)cc1,-2.616 +Clc1ccc(Cl)c(Cl)c1,-3.59 +Cc3nnc4CN=C(c1ccccc1Cl)c2cc(Cl)ccc2n34,-4.09 +Oc1ccccc1O,0.62 +CCN2c1ncccc1N(C)C(=O)c3cccnc23,-2.62 +CSC,-0.45 +Cc1ccccc1Br,-2.23 +CCOC(=O)N,0.85 +CC(=O)OC3(CCC4C2C=C(C)C1=CC(=O)CCC1(C)C2CCC34C)C(C)=O,-5.35 +CC(C)C(O)C(C)C,-1.22 +c1ccc2ccccc2c1,-3.6 +CCNc1ccccc1,-1.7 +O=C1NC(=O)C(N1)(c2ccccc2)c3ccccc3,-4.0969999999999995 +Cc1c2ccccc2c(C)c3ccc4ccccc4c13,-7.02 +CCOP(=S)(OCC)SC(CCl)N1C(=O)c2ccccc2C1=O,-6.34 +COc1ccc(cc1)C(c2ccc(OC)cc2)C(Cl)(Cl)Cl,-6.89 +Fc1cccc(F)c1C(=O)NC(=O)Nc2cc(Cl)c(F)c(Cl)c2F,-7.28 +O=C1N(COC(=O)CCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3,-4.678 +CN(C)C(=O)Nc1ccc(Cl)cc1,-2.89 +OC(Cn1cncn1)(c2ccc(F)cc2)c3ccccc3F,-3.37 +CC(=O)OCC(=O)C3(O)C(CC4C2CCC1=CC(=O)C=CC1(C)C2(F)C(O)CC34C)OC(C)=O,-4.13 +CCCCBr,-2.37 +Brc1cc(Br)c(Br)cc1Br,-6.98 +CC(C)CC(=O)C,-0.74 +CCSC(=O)N(CC)C1CCCCC1,-3.4 +COc1ccc(Cl)cc1,-2.78 +CC1(C)C(C=C(Br)Br)C1C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2,-8.402000000000001 +CCC(C)C1(CC=C)C(=O)NC(=O)NC1=O,-2.016 +COP(=S)(OC)Oc1ccc(N(=O)=O)c(C)c1,-4.04 +Ic1cccc2ccccc12,-4.55 +OCC(O)C(O)C(O)C(O)CO,1.09 +CCS,-0.6 +ClCC(Cl)Cl,-1.48 +CN(C)C(=O)Oc1cc(C)nn1c2ccccc2,-2.09 +NC(=O)c1ccccc1O,-1.82 +Cc1ccccc1N(=O)=O,-2.33 +O=C1NC(=O)NC(=O)C1(C(C)C)C(C)C,-2.766 +CCc1ccccc1C,-3.21 +CCCCCCCCl,-4 +O=C1NC(=O)NC(=O)C1(CC)CC,-2.4 +C(Cc1ccccc1)c2ccccc2,-4.62 +ClC(Cl)C(Cl)Cl,-1.74 +CCN2c1cc(OC)cc(C)c1NC(=O)c3cccnc23,-5.153 +Cc1ccc2c(ccc3ccccc32)c1,-5.84 +CCCCOC(=O)c1ccccc1C(=O)OCCCC,-4.4 +COc1c(O)c(Cl)c(Cl)c(Cl)c1Cl,-4.02 +CCN(CC)C(=O)C(=CCOP(=O)(OC)OC)Cl,0.523 +CC34CCC1C(=CCc2cc(O)ccc12)C3CCC4=O,-5.282 +CCOC(=O)c1ccccc1S(=O)(=O)NN(C=O)c2nc(Cl)cc(OC)n2,-4.5760000000000005 +COc1ccc(cc1)N(=O)=O,-2.41 +CCCCCCCl,-3.12 +Clc1cc(c(Cl)c(Cl)c1Cl)c2cc(Cl)c(Cl)c(Cl)c2Cl,-9.16 +OCC1OC(CO)(OC2OC(COC3OC(CO)C(O)C(O)C3O)C(O)C(O)C2O)C(O)C1O,-0.41 +CCCCCCCCCCCCCCCCCCCCCCCCCC,-8.334 +CCN2c1ccccc1N(C)C(=O)c3cccnc23,-3.324 +CC(Cl)Cl,-1.29 +Nc1ccc(cc1)S(N)(=O)=O,-1.34 +CCCN(CCC)c1c(cc(cc1N(=O)=O)C(C)C)N(=O)=O,-6.49 +ClC1C(Cl)C(Cl)C(Cl)C(Cl)C1Cl,-4.64 +CCOP(=S)(NC(C)C)Oc1ccccc1C(=O)OC(C)C,-4.194 +Clc1cccc(Cl)c1Cl,-4 +ClC(Cl)(Cl)Cl,-2.31 +O=N(=O)c1cc(Cl)c(Cl)cc1,-3.2 +OC1CCCCCCC1,-1.29 +CC1(O)CCC2C3CCC4=CC(=O)CCC4(C)C3CCC21C,-3.9989999999999997 +CCOc1ccc(NC(N)=O)cc1,-2.17 +C/C1CCC(\C)CC1,-4.47 +c1cnc2c(c1)ccc3ncccc23,-2.68 +COC(C)(C)C,-0.24 +COc1ccc(C=CC)cc1,-3.13 +CCCCCCCCCCCCCCCCO,-7 +O=c1cc[nH]c(=O)[nH]1,-1.4880000000000002 +Nc1ncnc2nc[nH]c12,-2.12 +Clc1cc(Cl)c(cc1Cl)c2cccc(Cl)c2Cl,-7.21 +COc1ccc(cc1)C(O)(C2CC2)c3cncnc3,-2.596 +c1ccc2c(c1)c3cccc4c3c2cc5ccccc54,-8.23 +O=C(Nc1ccccc1)Nc2ccccc2,-3.15 +CCC1(C(=O)NC(=O)NC1=O)c2ccccc2,-2.322 +Clc1ccc(cc1)c2cccc(Cl)c2Cl,-6.29 +CC(C)c1ccc(NC(=O)N(C)C)cc1,-3.536 +CCN(CC)C(=O)CSc1ccc(Cl)nn1,-1.716 +CCC(C)(C)CO,-1.04 +CCCOC(=O)CCC,-1.75 +Cc1c(cc(cc1N(=O)=O)N(=O)=O)N(=O)=O,-3.22 +CC(C)OP(=S)(OC(C)C)SCCNS(=O)(=O)c1ccccc1,-4.2 +C1CCCCCC1,-3.51 +CCCOC=O,-0.49 +CC(C)c1ccccc1C,-3.76 +Nc1cccc(Cl)c1,-1.37 +CC(C)CC(C)C,-4.26 +o1c2ccccc2c3ccccc13,-4.6 +CCOC2Oc1ccc(OS(C)(=O)=O)cc1C2(C)C,-3.42 +CN(C)C(=O)Nc1cccc(c1)C(F)(F)F,-3.43 +c3ccc2nc1ccccc1cc2c3,-3.67 +CC12CC(=O)C3C(CCC4=CC(=O)CCC34C)C2CCC1(O)C(=O)CO,-3.11 +OCC1OC(O)C(O)C(O)C1O,0.74 +Cc1cccc(O)c1,-0.68 +CC2Cc1ccccc1N2NC(=O)c3ccc(Cl)c(c3)S(N)(=O)=O,-3.5860000000000003 +CCC(C)C(=O)OC2CC(C)C=C3C=CC(C)C(CCC1CC(O)CC(=O)O1)C23,-6.005 +O=N(=O)c1ccc(cc1)N(=O)=O,-3.39 +CCC1(C(=O)NC(=O)NC1=O)C2=CCC3CCC2C3,-2.696 +CCCCCCCCCC(=O)OCC,-4.1 +CN(C)C(=O)Nc1ccccc1,-1.6 +CCCOCC,-0.66 +CC(C)O,0.43 +Cc1ccc2ccccc2c1,-3.77 +ClC(Br)Br,-1.9 +CCC(C(CC)c1ccc(O)cc1)c2ccc(O)cc2,-4.43 +CCOC(=O)CC(SP(=S)(OC)OC)C(=O)OCC,-3.37 +ClCc1ccccc1,-2.39 +C/C=C/C=O,0.32 +CON(C)C(=O)Nc1ccc(Br)c(Cl)c1,-3.924 +Cc1c2ccccc2c(C)c3ccccc13,-6.57 +CCCCCC(=O)OC,-1.87 +CN(C)C(=O)Nc1ccc(c(Cl)c1)n2nc(oc2=O)C(C)(C)C,-4.328 +CC(=O)Nc1ccc(F)cc1,-1.78 +CCc1cccc(CC)c1N(COC)C(=O)CCl,-3.26 +C1CCC=CC1,-2.59 +CC12CC(O)C3C(CCC4=CC(=O)CCC34C)C2CCC1(O)C(=O)CO,-3.09 +c1cncnc1,1.1 +Clc1ccc(cc1)N(=O)=O,-2.92 +CCC(=O)OC,-0.14 +Clc1ccccc1N(=O)=O,-2.55 +CCCCN(C)C(=O)Nc1ccc(Cl)c(Cl)c1,-4.77 +CN1CC(O)N(C1=O)c2nnc(s2)C(C)(C)C,-1.8769999999999998 +O=N(=O)c1ccccc1,-1.8 +Ic1ccccc1,-3.01 +CC2Nc1cc(Cl)c(cc1C(=O)N2c3ccccc3C)S(N)(=O)=O,-3.78 +COc1ccccc1OCC(O)COC(N)=O,-0.985 +CCCCOCN(C(=O)CCl)c1c(CC)cccc1CC,-4.19 +Oc1cccc(Cl)c1Cl,-1.3 +CCCC(=O)OC,-1.92 +CCC(=O)Nc1ccc(Cl)c(Cl)c1,-3 +Nc3nc(N)c2nc(c1ccccc1)c(N)nc2n3,-2.404 +CCCCCC(=O)OCC,-2.35 +OCC(O)C2OC1OC(OC1C2O)C(Cl)(Cl)Cl,-1.84 +CN(C=Nc1ccc(C)cc1C)C=Nc2ccc(C)cc2C,-5.47 +COc1nc(NC(C)C)nc(NC(C)C)n1,-2.478 +CCCCCCC=C,-4.44 +Cc1ccc(N)cc1,-1.21 +Nc1nccs1,-0.36 +c1ccccc1(OC(=O)NC),-1.8030000000000002 +CCCC(O)CC,-0.8 +c3ccc2c(O)c1ccccc1cc2c3,-4.73 +Cc1ccc2cc3ccccc3cc2c1,-6.96 +Cc1cccc(C)c1C,-3.2 +CNC(=O)Oc1ccc(N(C)C)c(C)c1,-2.36 +CCCCCCCC(C)O,-2.74 +CN(C(=O)NC(C)(C)c1ccccc1)c2ccccc2,-3.35 +CCCC(=O)CC,-0.83 +Oc1c(Br)cc(C#N)cc1Br,-3.33 +Clc1ccc(cc1Cl)c2ccccc2,-6.39 +CN(C(=O)COc1nc2ccccc2s1)c3ccccc3,-4.873 +Oc1cccc2ncccc12,-2.54 +CC1=C(SCCO1)C(=O)Nc2ccccc2,-3.14 +CCOc2ccc1nc(sc1c2)S(N)(=O)=O,-3.81 +Oc1c(Cl)c(Cl)c(Cl)c(Cl)c1Cl,-4.28 +ClCBr,-0.89 +CCC1(CC)C(=O)NC(=O)N(C)C1=O,-2.23 +CC(=O)OCC(=O)C3CCC4C2CCC1=CC(=O)CCC1(C)C2CCC34C,-4.63 +NC(=O)NCc1ccccc1,-0.95 +CN(C)C(=O)Nc1ccc(C)c(Cl)c1,-3.483 +CON(C)C(=O)Nc1ccc(Cl)c(Cl)c1,-3.592 +OC1CCCCCC1,-0.88 +CS(=O)(=O)c1ccc(cc1)C(O)C(CO)NC(=O)C(Cl)Cl,-2.154 +CCCC(C)C1(CC)C(=O)NC(=S)NC1=O,-3.36 +CC(=O)Nc1nnc(s1)S(N)(=O)=O,-2.36 +Oc1ccc(cc1)N(=O)=O,-0.74 +ClC1=C(Cl)C2(Cl)C3C4CC(C=C4)C3C1(Cl)C2(Cl)Cl,-6.307 +C1CCOC1,0.49 +Nc1ccccc1N(=O)=O,-1.96 +Clc1cccc(c1Cl)c2cccc(Cl)c2Cl,-7.28 +CCCCC1C(=O)N(N(C1=O)c2ccccc2)c3ccccc3,-3.81 +Cc1c(cccc1N(=O)=O)N(=O)=O,-3 +CC(=O)C1CCC2C3CCC4=CC(=O)CCC4(C)C3CCC12C,-4.42 +CCN(CC)c1nc(Cl)nc(n1)N(CC)CC,-4.4110000000000005 +ClC(Cl)C(Cl)(Cl)SN2C(=O)C1CC=CCC1C2=O,-5.4 +c1(Br)c(Br)cc(Br)cc1,-4.5 +OC3N=C(c1ccccc1)c2cc(Cl)ccc2NC3=O,-3.952 +O=C1NC(=O)NC(=O)C1(C(C)CCC)CC=C,-2.356 +c1(O)c(C)ccc(C(C)C)c1,-2.08 +C1SC(=S)NC1(=O),-1.77 +Oc1ccc(c(O)c1)c3oc2cc(O)cc(O)c2c(=O)c3O,-3.083 +ClC1(C(=O)C2(Cl)C3(Cl)C14Cl)C5(Cl)C2(Cl)C3(Cl)C(Cl)(Cl)C45Cl,-5.2589999999999995 +CCN(CC)C(=S)SSC(=S)N(CC)CC,-4.86 +C1CCCCC1,-3.1 +ClC1=C(Cl)C(Cl)(C(=C1Cl)Cl)C2(Cl)C(=C(Cl)C(=C2Cl)Cl)Cl,-7.278 +CN(C)C=Nc1ccc(Cl)cc1C,-2.86 +CC34CCc1c(ccc2cc(O)ccc12)C3CCC4=O,-5.24 +CCCCCCCCO,-2.39 +CCSCC,-1.34 +ClCCCl,-1.06 +CCC(C)(C)Cl,-2.51 +ClCCBr,-1.32 +Nc1ccc(cc1)N(=O)=O,-2.37 +OCC1OC(OC2C(O)C(O)C(O)OC2CO)C(O)C(O)C1O,-0.244 +CCN2c1ncccc1N(CC)C(=O)c3cccnc23,-2.86 +Clc1ccccc1,-2.38 +CCCCCCCC=C,-5.05 +Brc1ccc(I)cc1,-4.56 +CCC(C)(O)CC,-0.36 +CCCCCc1ccccc1,-4.64 +NC(=O)NC1NC(=O)NC1=O,-1.6 +OCC(O)COC(=O)c1ccccc1Nc2ccnc3cc(Cl)ccc23,-4.571000000000001 +ClC(Cl)C(c1ccc(Cl)cc1)c2ccc(Cl)cc2,-7.2 +CC(=O)OC3CCC4C2CCC1=CC(=O)CCC1(C)C2CCC34C,-5.184 +Clc1cccc2ccccc12,-3.93 +CCN2c1ccccc1N(C)C(=O)c3ccccc23,-4.749 +CCCCC(C)O,-0.89 +CCCC1CCCC1,-4.74 +CCOC(=O)c1cncn1C(C)c2ccccc2,-4.735 +Oc1ccc(Cl)c(Cl)c1,-1.25 +CC1(C)C(C=C(Cl)Cl)C1C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2,-8.017000000000001 +c2ccc1ocnc1c2,-1.16 +CCCCCO,-0.6 +CCN(CC)c1ccccc1,-3.03 +Fc1cccc(F)c1,-2 +ClCCC#N,-0.29 +CC(C)(C)Cc1ccccc1,-4.15 +O=C1NC(=O)NC(=O)C1(CC)c1ccccc1,-2.322 +Clc1ccccc1I,-3.54 +c2ccc1[nH]nnc1c2,-0.78 +CNC(=O)Oc1cccc2CC(C)(C)Oc12,-2.8 +Cc1cccc(C)c1O,-1.29 +CC(C)C(C)O,-0.18 +c1ccccc1C(O)c2ccccc2,-2.55 +CCCCCCCCCC(=O)OC,-4.69 +COP(=S)(OC)Oc1ccc(cc1Cl)N(=O)=O,-4.31 +CC(C)CBr,-2.43 +CCI,-1.6 +CN(C)C(=O)Oc1nc(nc(C)c1C)N(C)C,-1.95 +CCCCCCBr,-3.81 +CCCC(C)C,-3.74 +Cc1c(F)c(F)c(COC(=O)C2C(C=C(Cl)C(F)(F)F)C2(C)C)c(F)c1F,-7.321000000000001 +CCc1cccc(C)c1N(C(C)COC)C(=O)CCl,-2.73 +ON=Cc1ccc(o1)N(=O)=O,-2.19 +CC(C)C(Nc1ccc(cc1Cl)C(F)(F)F)C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2,-8.003 +Nc1nc[nH]n1,0.522 +BrC(Br)Br,-1.91 +COP(=O)(OC)C(O)C(Cl)(Cl)Cl,-0.22 +CCOP(=S)(OCC)SCn1c(=O)oc2cc(Cl)ccc12,-5.233 +OCc1ccccc1,-0.4 +O=c2c(C3CCCc4ccccc43)c(O)c1ccccc1o2,-2.84 +Oc1ccc(Br)cc1,-1.09 +CC(C)Br,-1.59 +CC(C)CC(C)(C)C,-4.74 +O=N(=O)c1cc(cc(c1)N(=O)=O)N(=O)=O,-2.89 +CN2C(=O)CN=C(c1ccccc1)c3cc(ccc23)N(=O)=O,-3.7960000000000003 +CCC,-1.94 +Nc1cc(nc(N)n1=O)N2CCCCC2,-1.989 +Nc2cccc3nc1ccccc1cc23,-4.22 +c1ccc2cc3c4cccc5cccc(c3cc2c1)c45,-8.49 +OC(c1ccc(Cl)cc1)(c2ccc(Cl)cc2)C(Cl)(Cl)Cl,-5.666 +C1Cc2cccc3cccc1c23,-4.63 diff --git a/chemprop-updated/tests/data/regression.npz b/chemprop-updated/tests/data/regression.npz new file mode 100644 index 0000000000000000000000000000000000000000..b14954faf3380e2530cd4de2d5a0866fd4c2e515 --- /dev/null +++ b/chemprop-updated/tests/data/regression.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2640ae20ad2e8797f4f4458789473a2f6cad41f92284c340e40fea8b51dc15cc +size 242650 diff --git a/chemprop-updated/tests/data/regression/bounded.csv b/chemprop-updated/tests/data/regression/bounded.csv new file mode 100644 index 0000000000000000000000000000000000000000..5625e11da19b90d76116c5b0671abf427e333a25 --- /dev/null +++ b/chemprop-updated/tests/data/regression/bounded.csv @@ -0,0 +1,501 @@ +"smiles","logSolubility" +"OCC3OC(OCC2OC(OC(C#N)c1ccccc1)C(O)C(O)C2O)C(O)C(O)C3O","<-0.77" +"Cc1occc1C(=O)Nc2ccccc2","<-3.3" +"CC(C)=CCCC(C)=CC(=O)","<-2.06" +"c1ccc2c(c1)ccc3c2ccc4c5ccccc5ccc43","<-7.87" +"c1ccsc1","<-1.33" +"c2ccc1scnc1c2","<-1.5" +"Clc1cc(Cl)c(c(Cl)c1)c2c(Cl)cccc2Cl","<-7.32" +"CC12CCC3C(CCc4cc(O)ccc34)C2CCC1O","<-5.03" +"ClC4=C(Cl)C5(Cl)C3C1CC(C2OC12)C3C4(Cl)C5(Cl)Cl","<-6.29" +"COc5cc4OCC3Oc2c1CC(Oc1ccc2C(=O)C3c4cc5OC)C(C)=C","<-4.42" +"O=C1CCCN1","<1.07" +"Clc1ccc2ccccc2c1","<-4.14" +"CCCC=C","<-2.68" +"CCC1(C(=O)NCNC1=O)c2ccccc2","<-2.64" +"CCCCCCCCCCCCCC","<-7.96" +"CC(C)Cl","<-1.41" +"CCC(C)CO","<-0.47" +"N#Cc1ccccc1","<-1" +"CCOP(=S)(OCC)Oc1cc(C)nc(n1)C(C)C","<-3.64" +"CCCCCCCCCC(C)O","<-2.94" +"Clc1ccc(c(Cl)c1)c2c(Cl)ccc(Cl)c2Cl","<-7.43" +"O=c2[nH]c1CCCc1c(=O)n2C3CCCCC3","<-4.594" +"CCOP(=S)(OCC)SCSCC","<-4.11" +"CCOc1ccc(NC(=O)C)cc1","<-2.35" +"CCN(CC)c1c(cc(c(N)c1N(=O)=O)C(F)(F)F)N(=O)=O","<-5.47" +"CCCCCCCO","<-1.81" +"Cn1c(=O)n(C)c2nc[nH]c2c1=O","<-1.39" +"CCCCC1(CC)C(=O)NC(=O)NC1=O","<-1.661" +"ClC(Cl)=C(c1ccc(Cl)cc1)c2ccc(Cl)cc2","<-6.9" +"CCCCCCCC(=O)OC","<-3.17" +"CCc1ccc(CC)cc1","<-3.75" +"CCOP(=S)(OCC)SCSC(C)(C)C","<-4.755" +"COC(=O)Nc1cccc(OC(=O)Nc2cccc(C)c2)c1","<-4.805" +"ClC(=C)Cl","<-1.64" +"Cc1cccc2c1Cc3ccccc32","<-5.22" +"CCCCC=O","<-0.85" +"N(c1ccccc1)c2ccccc2","<-3.504" +"CN(C)C(=O)SCCCCOc1ccccc1","<-3.927" +"CCCOP(=S)(OCCC)SCC(=O)N1CCCCC1C","<-4.15" +"CCCCCCCI","<-4.81" +"c1c(Cl)cccc1c2ccccc2","<-4.88" +"OCCCC=C","<-0.15" +"O=C2NC(=O)C1(CCC1)C(=O)N2","<-1.655" +"CC(C)C1CCC(C)CC1O","<-2.53" +"CC(C)OC=O","<-0.63" +"CCCCCC(C)O","<-1.55" +"CC(=O)Nc1ccc(Br)cc1","<-3.083" +"c1ccccc1n2ncc(N)c(Br)c2(=O)","<-3.127" +"COC(=O)C1=C(C)NC(=C(C1c2ccccc2N(=O)=O)C(=O)OC)C","<-4.76" +"c2c(C)cc1nc(C)ccc1c2","<-1.94" +"CCCCCCC#C",">-3.66" +"CCC1(C(=O)NC(=O)NC1=O)C2=CCCCC2",">-2.17" +"c1ccc2c(c1)ccc3c4ccccc4ccc23",">-8.057" +"CCC(C)n1c(=O)[nH]c(C)c(Br)c1=O",">-2.523" +"Clc1cccc(c1Cl)c2c(Cl)c(Cl)cc(Cl)c2Cl",">-8.6" +"Cc1ccccc1O",">-0.62" +"CC(C)CCC(C)(C)C",">-5.05" +"Cc1ccc(C)c2ccccc12",">-4.14" +"Cc1cc2c3ccccc3ccc2c4ccccc14",">-6.57" +"CCCC(=O)C",">-0.19" +"Clc1cc(Cl)c(Cl)c(c1Cl)c2c(Cl)c(Cl)cc(Cl)c2Cl",">-9.15" +"CCCOC(=O)CC",">-0.82" +"CC34CC(O)C1(F)C(CCC2=CC(=O)C=CC12C)C3CC(O)C4(O)C(=O)CO",">-3.68" +"Nc1ccc(O)cc1",">-0.8" +"O=C(Cn1ccnc1N(=O)=O)NCc2ccccc2",">-2.81" +"OC4=C(C1CCC(CC1)c2ccc(Cl)cc2)C(=O)c3ccccc3C4=O",">-5.931" +"CCNc1nc(Cl)nc(n1)N(CC)CC",">-4.06" +"NC(=O)c1cnccn1",">-0.667" +"CCC(Br)(CC)C(=O)NC(N)=O",">-2.68" +"Clc1ccccc1c2ccccc2Cl",">-5.27" +"O=C2CN(N=Cc1ccc(o1)N(=O)=O)C(=O)N2",">-3.38" +"Clc2ccc(Oc1ccc(cc1)N(=O)=O)c(Cl)c2",">-5.46" +"CC1(C)C2CCC1(C)C(=O)C2",">-1.96" +"O=C1NC(=O)NC(=O)C1(CC=C)c1ccccc1",">-2.369" +"CCCCC(=O)OCC",">-2.25" +"CC(C)CCOC(=O)C",">-1.92" +"O=C1N(COC(=O)CCCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3",">-5.886" +"Clc1cccc(c1)c2cc(Cl)ccc2Cl",">-6.01" +"CCCBr",">-1.73" +"CCCC1COC(Cn2cncn2)(O1)c3ccc(Cl)cc3Cl",">-3.493" +"COP(=S)(OC)SCC(=O)N(C)C=O",">-1.995" +"Cc1ncnc2nccnc12",">-0.466" +"NC(=S)N",">0.32" +"Cc1ccc(C)cc1",">-2.77" +"CCc1ccccc1CC",">-3.28" +"ClC(Cl)(Cl)C(Cl)(Cl)Cl",">-3.67" +"CC(C)C(C(=O)OC(C#N)c1cccc(Oc2ccccc2)c1)c3ccc(OC(F)F)cc3",">-6.876" +"CCCN(=O)=O",">-0.8" +"CC(C)C1CCC(C)CC1=O",">-2.35" +"CCN2c1cc(Cl)ccc1NC(=O)c3cccnc23",">-5.36" +"O=N(=O)c1c(Cl)c(Cl)ccc1",">-3.48" +"CCCC(C)C1(CC=C)C(=O)NC(=S)NC1=O",">-3.46" +"c1ccc2c(c1)c3cccc4cccc2c34",">-6" +"CCCOC(C)C",">-1.34" +"Cc1cc(C)c2ccccc2c1",">-4.29" +"CCC(=C(CC)c1ccc(O)cc1)c2ccc(O)cc2",">-4.07" +"c1(C#N)c(Cl)c(C#N)c(Cl)c(Cl)c(Cl)1",">-5.64" +"Clc1ccc(Cl)c(c1)c2ccc(Cl)c(Cl)c2",">-7.25" +"C1OC1c2ccccc2",">-1.6" +"CC(C)c1ccccc1",">-3.27" +"CC12CCC3C(CCC4=CC(=O)CCC34C)C2CCC1C(=O)CO",-3.45 +"c2(Cl)c(Cl)c(Cl)c1nccnc1c2(Cl)",-5.43 +"C1OC(O)C(O)C(O)C1O",0.39 +"ClCCl",-0.63 +"CCc1cccc2ccccc12",-4.17 +"COC=O",0.58 +"Oc1ccccc1N(=O)=O",-1.74 +"Cc1c[nH]c(=O)[nH]c1=O",-1.506 +"CC(C)C",-2.55 +"OCC1OC(C(O)C1O)n2cnc3c(O)ncnc23",-1.23 +"Oc1c(I)cc(C#N)cc1I",-3.61 +"Oc1ccc(Cl)cc1C(=O)Nc2ccc(cc2Cl)N(=O)=O",-4.7 +"CCCCC",-3.18 +"c1ccccc1O",0 +"Nc3ccc2cc1ccccc1cc2c3",-5.17 +"Cn1cnc2n(C)c(=O)[nH]c(=O)c12",-2.523 +"c1ccc2cnccc2c1",-1.45 +"COP(=S)(OC)SCC(=O)N(C(C)C)c1ccc(Cl)cc1",-4.432 +"CCCCCCc1ccccc1",-5.21 +"Clc1ccccc1c2ccccc2",-4.54 +"CCCC(=C)C",-3.03 +"CC(C)C(C)C(C)C",-4.8 +"Clc1cc(Cl)c(Cl)c(Cl)c1Cl",-5.65 +"Oc1cccc(c1)N(=O)=O",-1.01 +"CCCCCCCCC=C",-5.51 +"CC(=O)OCC(COC(=O)C)OC(=O)C",-0.6 +"CCCCc1c(C)nc(nc1O)N(C)C",-2.24 +"CC1(C)C(C=C(Cl)Cl)C1C(=O)OC(C#N)c2ccc(F)c(Oc3ccccc3)c2",-7.337 +"c1ccncc1",0.76 +"CCCCCCCBr",-4.43 +"Cc1ccncc1C",0.36 +"CC34CC(O)C1(F)C(CCC2=CC(=O)CCC12C)C3CCC4(O)C(=O)CO",-3.43 +"CCSCc1ccccc1OC(=O)NC",-2.09 +"CCOC(=O)CC(=O)OCC",-0.82 +"CC1=CCC(CC1)C(C)=C",-4.26 +"C1Cc2ccccc2C1",-3.04 +"CC(C)(C)c1ccc(O)cc1",-2.41 +"O=C2NC(=O)C1(CC1)C(=O)N2",-1.886 +"Clc1cccc(I)c1",-3.55 +"Brc1cccc2ccccc12",-4.35 +"CC/C=C/C",-2.54 +"Cc1cccc(C)n1",0.45 +"ClC=C(Cl)Cl",-1.96 +"Nc1cccc2ccccc12",-1.92 +"Cc1cccc(C)c1",-2.82 +"Oc2ncc1nccnc1n2",-1.947 +"CO",1.57 +"CCC1(CCC(C)C)C(=O)NC(=O)NC1=O",-2.468 +"CCC(=O)C",0.52 +"Fc1c[nH]c(=O)[nH]c1=O",-1.077 +"Nc1ncnc2n(ccc12)C3OC(CO)C(O)C3O",-1.95 +"Oc1cccc(O)c1",0.81 +"CCCCCCO",-1.24 +"CCCCCCl",-2.73 +"C=CC=C",-1.87 +"CCCOC(=O)C",-0.72 +"Oc2ccc1CCCCc1c2",-1.99 +"NC(=O)CCl",-0.02 +"COP(=S)(OC)Oc1cc(Cl)c(I)cc1Cl",-6.62 +"Cc1ccc(Cl)cc1",-3.08 +"CSc1nnc(c(=O)n1N)C(C)(C)C",-2.253 +"Cc1ccc(OP(=O)(Oc2cccc(C)c2)Oc3ccccc3C)cc1",-6.01 +"CCCCCC=O",-1.3 +"CCCCOC(=O)c1ccc(N)cc1",-3.082 +"O2c1cc(C)ccc1N(C)C(=O)c3cc(N)cnc23",-3.043 +"CC(C)=CCC/C(C)=C\CO",-2.46 +"Clc1ccc(cc1)c2ccccc2Cl",-5.28 +"O=C1N(COC(=O)CCCCCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3",-6.523 +"CCN(=O)=O",-0.22 +"CCN(CC(C)=C)c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O",-6.124 +"Clc1ccc(Cl)c(Cl)c1Cl",-4.57 +"CCCC(C)(COC(N)=O)COC(N)=O",-1.807 +"CC(=O)C3CCC4C2CC=C1CC(O)CCC1(C)C2CCC34C",-4.65 +"CI",-1 +"CC1CC(C)C(=O)C(C1)C(O)CC2CC(=O)NC(=O)C2",-1.13 +"O=C1N(COC(=O)CCCCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3",-6.301 +"CC1=CC(=O)CC(C)(C)C1",-1.06 +"O=C1NC(=O)NC(=O)C1(CC)C(C)CC",-2.39 +"CCCCC(=O)CCCC",-2.58 +"CCC1(CCC(=O)NC1=O)c2ccccc2",-2.337 +"CCC(C)CC",-3.68 +"CCOc1ccc(cc1)C(C)(C)COCc3cccc(Oc2ccccc2)c3",-8.6 +"Cc1ccccc1n3c(C)nc2ccccc2c3=O",-2.925 +"ClCC#N",-0.092 +"CCOP(=S)(CC)Oc1cc(Cl)c(Cl)cc1Cl",-5.752 +"CC12CCC(=O)C=C1CCC3C2CCC4(C)C3CCC4(O)C#C",-5.66 +"c1ccnnc1",1.1 +"Clc1cc(Cl)c(Cl)c(Cl)c1",-4.63 +"C1C(O)CCC2(C)CC3CCC4(C)C5(C)CC6OCC(C)CC6OC5CC4C3C=C21",-7.32 +"Nc1ccccc1O",-0.72 +"CCCCCCCCC(=O)OCC",-3.8 +"COCC(=O)N(C(C)C(=O)OC)c1c(C)cccc1C",-1.601 +"CNC(=O)Oc1ccccc1OC(C)C",-2.05 +"CCC(C)Cl",-1.96 +"Oc1ccc2ccccc2c1",-2.28 +"CC(C)Oc1cc(c(Cl)cc1Cl)n2nc(oc2=O)C(C)(C)C",-5.696 +"CCCCC#C",-2.36 +"CCCCCCCC#C",-4.24 +"Cc1ccccc1Cl",-3.52 +"CC(C)OC(C)C",-1.1 +"Nc1ccc(cc1)S(=O)(=O)c2ccc(N)cc2",-3.094 +"CNN",1.34 +"CC#C",-0.41 +"CCOP(=S)(OCC)ON=C(C#N)c1ccccc1",-4.862 +"CCNP(=S)(OC)OC(=CC(=O)OC(C)C)C",-3.408 +"C=CC=O",0.57 +"O=c1[nH]cnc2nc[nH]c12",-2.296 +"Oc2ccc1ncccc1c2",-2.16 +"Fc1ccccc1",-1.8 +"CCCCl",-1.47 +"CCOC(=O)C",-0.04 +"CCCC(C)(C)C",-4.36 +"Cc1cc(C)c(C)c(C)c1C",-4 +"CC12CCC(CC1)C(C)(C)O2",-1.64 +"CCCCOC(=O)CCCCCCCCC(=O)OCCCC",-3.896 +"Clc1ccc(cc1)c2ccc(Cl)cc2",-6.56 +"Cc1cccnc1C",0.38 +"CC(=C)C1CC=C(C)C(=O)C1",-2.06 +"CCOP(=S)(OCC)SCSc1ccc(Cl)cc1",-5.736 +"COc1cc(cc(OC)c1O)C6C2C(COC2=O)C(OC4OC3COC(C)OC3C(O)C4O)c7cc5OCOc5cc67",-3.571 +"c1cc2cccc3c4cccc5cccc(c(c1)c23)c54",-8.804 +"Cc1ccc(cc1N(=O)=O)N(=O)=O",-2.82 +"c1c(Br)ccc2ccccc12",-4.4 +"CNC(=O)Oc1cccc(N=CN(C)C)c1",-2.34 +"COc2cnc1ncncc1n2",-1.139 +"Cc3ccnc4N(C1CC1)c2ncccc2C(=O)Nc34",-3.19 +"CCOP(=S)(OCC)Oc1nc(Cl)n(n1)C(C)C",-3.658 +"CC(=C)C=C",-2.03 +"CC(C)=CCCC(O)(C)C=C",-1.99 +"COP(=S)(OC)Oc1ccc(SC)c(C)c1",-4.57 +"OC1CCCCC1",-0.44 +"O=C1NC(=O)NC(=O)C1(C)CC=C",-1.16 +"CC34CCC1C(CCC2CC(O)CCC12C)C3CCC4=O",-4.16 +"OCC(O)C(O)C(O)C(O)CO",0.06 +"Cc1ccc(cc1)c2ccccc2",-4.62 +"CCNc1nc(Cl)nc(NC(C)C)n1",-3.85 +"NC(=S)Nc1ccccc1",-1.77 +"CCCC(=O)CCC",-1.3 +"CC(=O)C(C)(C)C",-0.72 +"Oc1ccc(Cl)cc1",-0.7 +"O=C1CCCCC1",-0.6 +"Cc1cccc(N)c1",-0.85 +"ClC(Cl)(Cl)C#N",-2.168 +"CNc2cnn(c1cccc(c1)C(F)(F)F)c(=O)c2Cl",-4.046 +"CCCCCCCCC(=O)C",-3.3 +"CCN(CC)c1nc(Cl)nc(NC(C)C)n1",-3.785 +"CCOC(=O)c1ccc(N)cc1",-2.616 +"Clc1ccc(Cl)c(Cl)c1",-3.59 +"Cc3nnc4CN=C(c1ccccc1Cl)c2cc(Cl)ccc2n34",-4.09 +"Oc1ccccc1O",0.62 +"CCN2c1ncccc1N(C)C(=O)c3cccnc23",-2.62 +"CSC",-0.45 +"Cc1ccccc1Br",-2.23 +"CCOC(=O)N",0.85 +"CC(=O)OC3(CCC4C2C=C(C)C1=CC(=O)CCC1(C)C2CCC34C)C(C)=O",-5.35 +"CC(C)C(O)C(C)C",-1.22 +"c1ccc2ccccc2c1",-3.6 +"CCNc1ccccc1",-1.7 +"O=C1NC(=O)C(N1)(c2ccccc2)c3ccccc3",-4.097 +"Cc1c2ccccc2c(C)c3ccc4ccccc4c13",-7.02 +"CCOP(=S)(OCC)SC(CCl)N1C(=O)c2ccccc2C1=O",-6.34 +"COc1ccc(cc1)C(c2ccc(OC)cc2)C(Cl)(Cl)Cl",-6.89 +"Fc1cccc(F)c1C(=O)NC(=O)Nc2cc(Cl)c(F)c(Cl)c2F",-7.28 +"O=C1N(COC(=O)CCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3",-4.678 +"CN(C)C(=O)Nc1ccc(Cl)cc1",-2.89 +"OC(Cn1cncn1)(c2ccc(F)cc2)c3ccccc3F",-3.37 +"CC(=O)OCC(=O)C3(O)C(CC4C2CCC1=CC(=O)C=CC1(C)C2(F)C(O)CC34C)OC(C)=O",-4.13 +"CCCCBr",-2.37 +"Brc1cc(Br)c(Br)cc1Br",-6.98 +"CC(C)CC(=O)C",-0.74 +"CCSC(=O)N(CC)C1CCCCC1",-3.4 +"COc1ccc(Cl)cc1",-2.78 +"CC1(C)C(C=C(Br)Br)C1C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2",-8.402 +"CCC(C)C1(CC=C)C(=O)NC(=O)NC1=O",-2.016 +"COP(=S)(OC)Oc1ccc(N(=O)=O)c(C)c1",-4.04 +"Ic1cccc2ccccc12",-4.55 +"OCC(O)C(O)C(O)C(O)CO",1.09 +"CCS",-0.6 +"ClCC(Cl)Cl",-1.48 +"CN(C)C(=O)Oc1cc(C)nn1c2ccccc2",-2.09 +"NC(=O)c1ccccc1O",-1.82 +"Cc1ccccc1N(=O)=O",-2.33 +"O=C1NC(=O)NC(=O)C1(C(C)C)C(C)C",-2.766 +"CCc1ccccc1C",-3.21 +"CCCCCCCCl",-4 +"O=C1NC(=O)NC(=O)C1(CC)CC",-2.4 +"C(Cc1ccccc1)c2ccccc2",-4.62 +"ClC(Cl)C(Cl)Cl",-1.74 +"CCN2c1cc(OC)cc(C)c1NC(=O)c3cccnc23",-5.153 +"Cc1ccc2c(ccc3ccccc32)c1",-5.84 +"CCCCOC(=O)c1ccccc1C(=O)OCCCC",-4.4 +"COc1c(O)c(Cl)c(Cl)c(Cl)c1Cl",-4.02 +"CCN(CC)C(=O)C(=CCOP(=O)(OC)OC)Cl",0.523 +"CC34CCC1C(=CCc2cc(O)ccc12)C3CCC4=O",-5.282 +"CCOC(=O)c1ccccc1S(=O)(=O)NN(C=O)c2nc(Cl)cc(OC)n2",-4.576 +"COc1ccc(cc1)N(=O)=O",-2.41 +"CCCCCCCl",-3.12 +"Clc1cc(c(Cl)c(Cl)c1Cl)c2cc(Cl)c(Cl)c(Cl)c2Cl",-9.16 +"OCC1OC(CO)(OC2OC(COC3OC(CO)C(O)C(O)C3O)C(O)C(O)C2O)C(O)C1O",-0.41 +"CCCCCCCCCCCCCCCCCCCCCCCCCC",-8.334 +"CCN2c1ccccc1N(C)C(=O)c3cccnc23",-3.324 +"CC(Cl)Cl",-1.29 +"Nc1ccc(cc1)S(N)(=O)=O",-1.34 +"CCCN(CCC)c1c(cc(cc1N(=O)=O)C(C)C)N(=O)=O",-6.49 +"ClC1C(Cl)C(Cl)C(Cl)C(Cl)C1Cl",-4.64 +"CCOP(=S)(NC(C)C)Oc1ccccc1C(=O)OC(C)C",-4.194 +"Clc1cccc(Cl)c1Cl",-4 +"ClC(Cl)(Cl)Cl",-2.31 +"O=N(=O)c1cc(Cl)c(Cl)cc1",-3.2 +"OC1CCCCCCC1",-1.29 +"CC1(O)CCC2C3CCC4=CC(=O)CCC4(C)C3CCC21C",-3.999 +"CCOc1ccc(NC(N)=O)cc1",-2.17 +"C/C1CCC(\C)CC1",-4.47 +"c1cnc2c(c1)ccc3ncccc23",-2.68 +"COC(C)(C)C",-0.24 +"COc1ccc(C=CC)cc1",-3.13 +"CCCCCCCCCCCCCCCCO",-7 +"O=c1cc[nH]c(=O)[nH]1",-1.488 +"Nc1ncnc2nc[nH]c12",-2.12 +"Clc1cc(Cl)c(cc1Cl)c2cccc(Cl)c2Cl",-7.21 +"COc1ccc(cc1)C(O)(C2CC2)c3cncnc3",-2.596 +"c1ccc2c(c1)c3cccc4c3c2cc5ccccc54",-8.23 +"O=C(Nc1ccccc1)Nc2ccccc2",-3.15 +"CCC1(C(=O)NC(=O)NC1=O)c2ccccc2",-2.322 +"Clc1ccc(cc1)c2cccc(Cl)c2Cl",-6.29 +"CC(C)c1ccc(NC(=O)N(C)C)cc1",-3.536 +"CCN(CC)C(=O)CSc1ccc(Cl)nn1",-1.716 +"CCC(C)(C)CO",-1.04 +"CCCOC(=O)CCC",-1.75 +"Cc1c(cc(cc1N(=O)=O)N(=O)=O)N(=O)=O",-3.22 +"CC(C)OP(=S)(OC(C)C)SCCNS(=O)(=O)c1ccccc1",-4.2 +"C1CCCCCC1",-3.51 +"CCCOC=O",-0.49 +"CC(C)c1ccccc1C",-3.76 +"Nc1cccc(Cl)c1",-1.37 +"CC(C)CC(C)C",-4.26 +"o1c2ccccc2c3ccccc13",-4.6 +"CCOC2Oc1ccc(OS(C)(=O)=O)cc1C2(C)C",-3.42 +"CN(C)C(=O)Nc1cccc(c1)C(F)(F)F",-3.43 +"c3ccc2nc1ccccc1cc2c3",-3.67 +"CC12CC(=O)C3C(CCC4=CC(=O)CCC34C)C2CCC1(O)C(=O)CO",-3.11 +"OCC1OC(O)C(O)C(O)C1O",0.74 +"Cc1cccc(O)c1",-0.68 +"CC2Cc1ccccc1N2NC(=O)c3ccc(Cl)c(c3)S(N)(=O)=O",-3.586 +"CCC(C)C(=O)OC2CC(C)C=C3C=CC(C)C(CCC1CC(O)CC(=O)O1)C23",-6.005 +"O=N(=O)c1ccc(cc1)N(=O)=O",-3.39 +"CCC1(C(=O)NC(=O)NC1=O)C2=CCC3CCC2C3",-2.696 +"CCCCCCCCCC(=O)OCC",-4.1 +"CN(C)C(=O)Nc1ccccc1",-1.6 +"CCCOCC",-0.66 +"CC(C)O",0.43 +"Cc1ccc2ccccc2c1",-3.77 +"ClC(Br)Br",-1.9 +"CCC(C(CC)c1ccc(O)cc1)c2ccc(O)cc2",-4.43 +"CCOC(=O)CC(SP(=S)(OC)OC)C(=O)OCC",-3.37 +"ClCc1ccccc1",-2.39 +"C/C=C/C=O",0.32 +"CON(C)C(=O)Nc1ccc(Br)c(Cl)c1",-3.924 +"Cc1c2ccccc2c(C)c3ccccc13",-6.57 +"CCCCCC(=O)OC",-1.87 +"CN(C)C(=O)Nc1ccc(c(Cl)c1)n2nc(oc2=O)C(C)(C)C",-4.328 +"CC(=O)Nc1ccc(F)cc1",-1.78 +"CCc1cccc(CC)c1N(COC)C(=O)CCl",-3.26 +"C1CCC=CC1",-2.59 +"CC12CC(O)C3C(CCC4=CC(=O)CCC34C)C2CCC1(O)C(=O)CO",-3.09 +"c1cncnc1",1.1 +"Clc1ccc(cc1)N(=O)=O",-2.92 +"CCC(=O)OC",-0.14 +"Clc1ccccc1N(=O)=O",-2.55 +"CCCCN(C)C(=O)Nc1ccc(Cl)c(Cl)c1",-4.77 +"CN1CC(O)N(C1=O)c2nnc(s2)C(C)(C)C",-1.877 +"O=N(=O)c1ccccc1",-1.8 +"Ic1ccccc1",-3.01 +"CC2Nc1cc(Cl)c(cc1C(=O)N2c3ccccc3C)S(N)(=O)=O",-3.78 +"COc1ccccc1OCC(O)COC(N)=O",-0.985 +"CCCCOCN(C(=O)CCl)c1c(CC)cccc1CC",-4.19 +"Oc1cccc(Cl)c1Cl",-1.3 +"CCCC(=O)OC",-1.92 +"CCC(=O)Nc1ccc(Cl)c(Cl)c1",-3 +"Nc3nc(N)c2nc(c1ccccc1)c(N)nc2n3",-2.404 +"CCCCCC(=O)OCC",-2.35 +"OCC(O)C2OC1OC(OC1C2O)C(Cl)(Cl)Cl",-1.84 +"CN(C=Nc1ccc(C)cc1C)C=Nc2ccc(C)cc2C",-5.47 +"COc1nc(NC(C)C)nc(NC(C)C)n1",-2.478 +"CCCCCCC=C",-4.44 +"Cc1ccc(N)cc1",-1.21 +"Nc1nccs1",-0.36 +"c1ccccc1(OC(=O)NC)",-1.803 +"CCCC(O)CC",-0.8 +"c3ccc2c(O)c1ccccc1cc2c3",-4.73 +"Cc1ccc2cc3ccccc3cc2c1",-6.96 +"Cc1cccc(C)c1C",-3.2 +"CNC(=O)Oc1ccc(N(C)C)c(C)c1",-2.36 +"CCCCCCCC(C)O",-2.74 +"CN(C(=O)NC(C)(C)c1ccccc1)c2ccccc2",-3.35 +"CCCC(=O)CC",-0.83 +"Oc1c(Br)cc(C#N)cc1Br",-3.33 +"Clc1ccc(cc1Cl)c2ccccc2",-6.39 +"CN(C(=O)COc1nc2ccccc2s1)c3ccccc3",-4.873 +"Oc1cccc2ncccc12",-2.54 +"CC1=C(SCCO1)C(=O)Nc2ccccc2",-3.14 +"CCOc2ccc1nc(sc1c2)S(N)(=O)=O",-3.81 +"Oc1c(Cl)c(Cl)c(Cl)c(Cl)c1Cl",-4.28 +"ClCBr",-0.89 +"CCC1(CC)C(=O)NC(=O)N(C)C1=O",-2.23 +"CC(=O)OCC(=O)C3CCC4C2CCC1=CC(=O)CCC1(C)C2CCC34C",-4.63 +"NC(=O)NCc1ccccc1",-0.95 +"CN(C)C(=O)Nc1ccc(C)c(Cl)c1",-3.483 +"CON(C)C(=O)Nc1ccc(Cl)c(Cl)c1",-3.592 +"OC1CCCCCC1",-0.88 +"CS(=O)(=O)c1ccc(cc1)C(O)C(CO)NC(=O)C(Cl)Cl",-2.154 +"CCCC(C)C1(CC)C(=O)NC(=S)NC1=O",-3.36 +"CC(=O)Nc1nnc(s1)S(N)(=O)=O",-2.36 +"Oc1ccc(cc1)N(=O)=O",-0.74 +"ClC1=C(Cl)C2(Cl)C3C4CC(C=C4)C3C1(Cl)C2(Cl)Cl",-6.307 +"C1CCOC1",0.49 +"Nc1ccccc1N(=O)=O",-1.96 +"Clc1cccc(c1Cl)c2cccc(Cl)c2Cl",-7.28 +"CCCCC1C(=O)N(N(C1=O)c2ccccc2)c3ccccc3",-3.81 +"Cc1c(cccc1N(=O)=O)N(=O)=O",-3 +"CC(=O)C1CCC2C3CCC4=CC(=O)CCC4(C)C3CCC12C",-4.42 +"CCN(CC)c1nc(Cl)nc(n1)N(CC)CC",-4.411 +"ClC(Cl)C(Cl)(Cl)SN2C(=O)C1CC=CCC1C2=O",-5.4 +"c1(Br)c(Br)cc(Br)cc1",-4.5 +"OC3N=C(c1ccccc1)c2cc(Cl)ccc2NC3=O",-3.952 +"O=C1NC(=O)NC(=O)C1(C(C)CCC)CC=C",-2.356 +"c1(O)c(C)ccc(C(C)C)c1",-2.08 +"C1SC(=S)NC1(=O)",-1.77 +"Oc1ccc(c(O)c1)c3oc2cc(O)cc(O)c2c(=O)c3O",-3.083 +"ClC1(C(=O)C2(Cl)C3(Cl)C14Cl)C5(Cl)C2(Cl)C3(Cl)C(Cl)(Cl)C45Cl",-5.259 +"CCN(CC)C(=S)SSC(=S)N(CC)CC",-4.86 +"C1CCCCC1",-3.1 +"ClC1=C(Cl)C(Cl)(C(=C1Cl)Cl)C2(Cl)C(=C(Cl)C(=C2Cl)Cl)Cl",-7.278 +"CN(C)C=Nc1ccc(Cl)cc1C",-2.86 +"CC34CCc1c(ccc2cc(O)ccc12)C3CCC4=O",-5.24 +"CCCCCCCCO",-2.39 +"CCSCC",-1.34 +"ClCCCl",-1.06 +"CCC(C)(C)Cl",-2.51 +"ClCCBr",-1.32 +"Nc1ccc(cc1)N(=O)=O",-2.37 +"OCC1OC(OC2C(O)C(O)C(O)OC2CO)C(O)C(O)C1O",-0.244 +"CCN2c1ncccc1N(CC)C(=O)c3cccnc23",-2.86 +"Clc1ccccc1",-2.38 +"CCCCCCCC=C",-5.05 +"Brc1ccc(I)cc1",-4.56 +"CCC(C)(O)CC",-0.36 +"CCCCCc1ccccc1",-4.64 +"NC(=O)NC1NC(=O)NC1=O",-1.6 +"OCC(O)COC(=O)c1ccccc1Nc2ccnc3cc(Cl)ccc23",-4.571 +"ClC(Cl)C(c1ccc(Cl)cc1)c2ccc(Cl)cc2",-7.2 +"CC(=O)OC3CCC4C2CCC1=CC(=O)CCC1(C)C2CCC34C",-5.184 +"Clc1cccc2ccccc12",-3.93 +"CCN2c1ccccc1N(C)C(=O)c3ccccc23",-4.749 +"CCCCC(C)O",-0.89 +"CCCC1CCCC1",-4.74 +"CCOC(=O)c1cncn1C(C)c2ccccc2",-4.735 +"Oc1ccc(Cl)c(Cl)c1",-1.25 +"CC1(C)C(C=C(Cl)Cl)C1C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2",-8.017 +"c2ccc1ocnc1c2",-1.16 +"CCCCCO",-0.6 +"CCN(CC)c1ccccc1",-3.03 +"Fc1cccc(F)c1",-2 +"ClCCC#N",-0.29 +"CC(C)(C)Cc1ccccc1",-4.15 +"O=C1NC(=O)NC(=O)C1(CC)c1ccccc1",-2.322 +"Clc1ccccc1I",-3.54 +"c2ccc1[nH]nnc1c2",-0.78 +"CNC(=O)Oc1cccc2CC(C)(C)Oc12",-2.8 +"Cc1cccc(C)c1O",-1.29 +"CC(C)C(C)O",-0.18 +"c1ccccc1C(O)c2ccccc2",-2.55 +"CCCCCCCCCC(=O)OC",-4.69 +"COP(=S)(OC)Oc1ccc(cc1Cl)N(=O)=O",-4.31 +"CC(C)CBr",-2.43 +"CCI",-1.6 +"CN(C)C(=O)Oc1nc(nc(C)c1C)N(C)C",-1.95 +"CCCCCCBr",-3.81 +"CCCC(C)C",-3.74 +"Cc1c(F)c(F)c(COC(=O)C2C(C=C(Cl)C(F)(F)F)C2(C)C)c(F)c1F",-7.321 +"CCc1cccc(C)c1N(C(C)COC)C(=O)CCl",-2.73 +"ON=Cc1ccc(o1)N(=O)=O",-2.19 +"CC(C)C(Nc1ccc(cc1Cl)C(F)(F)F)C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2",-8.003 +"Nc1nc[nH]n1",0.522 +"BrC(Br)Br",-1.91 +"COP(=O)(OC)C(O)C(Cl)(Cl)Cl",-0.22 +"CCOP(=S)(OCC)SCn1c(=O)oc2cc(Cl)ccc12",-5.233 +"OCc1ccccc1",-0.4 +"O=c2c(C3CCCc4ccccc43)c(O)c1ccccc1o2",-2.84 +"Oc1ccc(Br)cc1",-1.09 +"CC(C)Br",-1.59 +"CC(C)CC(C)(C)C",-4.74 +"O=N(=O)c1cc(cc(c1)N(=O)=O)N(=O)=O",-2.89 +"CN2C(=O)CN=C(c1ccccc1)c3cc(ccc23)N(=O)=O",-3.796 +"CCC",-1.94 +"Nc1cc(nc(N)n1=O)N2CCCCC2",-1.989 +"Nc2cccc3nc1ccccc1cc23",-4.22 +"c1ccc2cc3c4cccc5cccc(c3cc2c1)c45",-8.49 +"OC(c1ccc(Cl)cc1)(c2ccc(Cl)cc2)C(Cl)(Cl)Cl",-5.666 +"C1Cc2cccc3cccc1c23",-4.63 diff --git a/chemprop-updated/tests/data/regression/mol+mol/atom_descriptors_1.npz b/chemprop-updated/tests/data/regression/mol+mol/atom_descriptors_1.npz new file mode 100644 index 0000000000000000000000000000000000000000..c30f411f33ad2830bc3a1bacf6b4e9ee32b39aa5 --- /dev/null +++ b/chemprop-updated/tests/data/regression/mol+mol/atom_descriptors_1.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8be54ce6cb0cba7a8035fabeee8baff5d482f47e15de6889a69cbee56266e0da +size 35370 diff --git a/chemprop-updated/tests/data/regression/mol+mol/atom_features_0.npz b/chemprop-updated/tests/data/regression/mol+mol/atom_features_0.npz new file mode 100644 index 0000000000000000000000000000000000000000..08819b484c89426295d8200fd40ef28ceda2297e --- /dev/null +++ b/chemprop-updated/tests/data/regression/mol+mol/atom_features_0.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b290b1337a1d08b64e160701ab479b049d5efe48eefea69f2a7722ae7d117edf +size 50426 diff --git a/chemprop-updated/tests/data/regression/mol+mol/atom_features_1.npz b/chemprop-updated/tests/data/regression/mol+mol/atom_features_1.npz new file mode 100644 index 0000000000000000000000000000000000000000..163cfe61377346eb5aa7120613f1b35e3e819c01 --- /dev/null +++ b/chemprop-updated/tests/data/regression/mol+mol/atom_features_1.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44f018fca63691a64c103572d82315877524f2670ed08efdb20d26c511b3dc0e +size 28058 diff --git a/chemprop-updated/tests/data/regression/mol+mol/bond_features_0.npz b/chemprop-updated/tests/data/regression/mol+mol/bond_features_0.npz new file mode 100644 index 0000000000000000000000000000000000000000..4f1a32f61da6faf1b35229b97458d057db664955 --- /dev/null +++ b/chemprop-updated/tests/data/regression/mol+mol/bond_features_0.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6450233e5dbf8eb2589b681e26a2f30ec31eade141d592cf72fc7d23521110de +size 82882 diff --git a/chemprop-updated/tests/data/regression/mol+mol/descriptors.npz b/chemprop-updated/tests/data/regression/mol+mol/descriptors.npz new file mode 100644 index 0000000000000000000000000000000000000000..71f6ecf3040f851d050c0debafa051d5faee28fd --- /dev/null +++ b/chemprop-updated/tests/data/regression/mol+mol/descriptors.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee3183790461ef822b0a86439a7425cc9d8d304b5216a3b1008c02449e5fed67 +size 1864 diff --git a/chemprop-updated/tests/data/regression/mol+mol/mol+mol.csv b/chemprop-updated/tests/data/regression/mol+mol/mol+mol.csv new file mode 100644 index 0000000000000000000000000000000000000000..e6a63c99b36d27061727e6f05cc91df324e0915e --- /dev/null +++ b/chemprop-updated/tests/data/regression/mol+mol/mol+mol.csv @@ -0,0 +1,101 @@ +smiles,solvent,peakwavs_max +CCCCN1C(=O)C(=C/C=C/C=C/C=C2N(CCCC)c3ccccc3N2CCCC)C(=O)N(CCCC)C1=S,ClCCl,642.0 +C(=C/c1cnccn1)\c1ccc(N(c2ccccc2)c2ccc(/C=C/c3cnccn3)cc2)cc1,ClCCl,420.0 +CN(C)c1ccc2c(-c3ccc(N)cc3C(=O)[O-])c3ccc(=[N+](C)C)cc-3oc2c1,O,544.0 +c1ccc2[nH]ccc2c1,O,290.0 +CCN(CC)c1ccc2c(c1)OC1=C(/C=C/C3=[N+](C)c4ccc5ccccc5c4C3(C)C)CCCC1=C2c1ccccc1C(=O)O,ClC(Cl)Cl,736.0 +CCN1/C(=C\C=C\C=C\C2=[N+](CC)c3ccc4ccccc4c3C2(C)C)C(C)(C)c2c1ccc1ccccc21,CC(C)=O,680.0 +O=C([O-])c1c(Cl)c(Cl)c(Cl)c(Cl)c1-c1c2cc(I)c(=O)c(I)c-2oc2c(I)c([O-])c(I)cc12,CC(C)O,561.0 +O=P(c1c2ccccc2cc2ccccc12)(c1c2ccccc2cc2ccccc12)c1c2ccccc2cc2ccccc12,C1CCOC1,411.0 +COc1ccc(/C=C/c2nc(-c3ccc(C)cc3)[nH]c2/C=C/c2ccc(OC)cc2)cc1,ClCCl,375.0 +CN1CCN(c2ccc3c4c(cccc24)C(=O)c2ccccc2-3)CC1,C1CCCCC1,428.2 +COc1ccc(/C=C/C2=CC(/C=C/c3ccc(OC)c(OC)c3)=[O+][B-](F)(F)O2)cc1OC,CS(C)=O,513.0 +Nc1cc2ccc3cccc4ccc(c1)c2c34,CC#N,338.0 +C(#Cc1ccncc1)C(C#Cc1ccncc1)=Cc1ccc(C=C(C#Cc2ccncc2)C#Cc2ccncc2)s1,ClC(Cl)Cl,522.0 +CN(C)c1cccc(/C=C/c2ncc(-c3ccc(OCc4ccccc4)c(OCc4ccccc4)c3)o2)c1,ClCCl,350.0 +CCN(CC)c1ccc2nc3ccc(=[N+](CC)CC)cc-3oc2c1,CCCCCCc1ccc(-c2ccc(C#N)cc2)cc1,662.0 +CN1c2ccccc2C(O)(c2ccccc2)c2cc([N+](=O)[O-])ccc21,CCO,393.7 +CN(C)CCCN1c2ccccc2Sc2ccc(Cl)cc21,CC#N,312.0 +N#Cc1ccc(N2c3ccccc3C(c3ccccc3)(c3ccccc3)c3ccccc32)cc1,Cc1ccccc1,352.0 +CCN1/C(=C\C2=C([O-])/C(=C/C3=[N+](CC)c4c(ccc5ccc(C)nc45)C3(C)C)C2=O)C(C)(C)c2ccc3ccc(C)nc3c21,C1CCCCC1,682.0 +CN1C(=O)c2cccc3c(-n4c5ccccc5c5ccccc54)ccc(c23)C1=O,CCCCCC,402.0 +COc1ccc(C2=C(c3ccccc3)C(c3ccccc3)=[N+]3C2=Nc2c(-c4ccc(OC)cc4)c(-c4ccccc4)c(-c4ccccc4)n2[B-]3(F)F)cc1,Cc1ccccc1,656.0 +Cc1ccc(C2=C3C=CC(c4ccccc4)=[N+]3[B-](F)(F)n3c2ccc3-c2ccccc2)cc1,C1CCOC1,553.0 +CCCCCCCCN1C(=O)c2ccc3c4c(-c5ccc6c(c5)c5ccccc5n6-c5ccccc5)cc5c6c(cc(-c7ccc8c(c7)c7ccccc7n8-c7ccccc7)c(c7ccc(c2c37)C1=O)c64)C(=O)N(CCCCCCCC)C5=O,ClC(Cl)Cl,563.0 +CC1=C(C(C)(C)C)C(C)=[N+]2C1=C(C)c1c(C)c(C(C)(C)C)c(C)n1[B-]2(F)F,C1CCCCC1,529.0 +O=c1oc2ccc3ccccc3c2nc1-c1ccc(Cl)cc1,c1ccccc1,404.0 +CCN1CCN(c2ccc(/N=N/c3cc([N+](=O)[O-])cc([N+](=O)[O-])c3)cc2)CC1,CCOC(C)=O,454.0 +N#CC(C#N)=C(/C=C/c1ccc(N(c2ccccc2)c2ccccc2)cc1)c1ccccc1,C1COCCO1,480.0 +Cc1ccc(C)cc1,C1CCCCC1,262.8811777 +CC(C)CCCCN1CCc2c1ccc1c3c(ccc21)CCCC3=O,Cc1ccccc1,426.0 +[O-]c1c(-c2ccccc2)cc(-[n+]2c(-c3ccccc3)cc(-c3ccccc3)cc2-c2ccccc2)cc1-c1ccccc1,ClC(Cl)Cl,731.2276215 +CCCCCN(c1ccccc1)c1ccc2c(c1)C(CC)(CC)c1cc(/C=C/c3ccc(/C=C(\C#N)C(=O)O)s3)ccc1-2,CC#N,430.0 +c1ccc(-c2ccc(-c3ccc(N(c4ccccc4)c4ccccc4)cc3)c3nsnc23)cc1,C1CCOC1,437.0 +COC(=O)c1ccc2c3ccc(C(=O)OC)c4c(C(=O)OC)ccc(c5ccc(C(=O)OC)c1c25)c43,ClC(Cl)Cl,469.0 +Cc1ccc(C(=O)NN2C3=C(CC4=C2CCCC4=O)C(=O)CCC3)cc1,CC(C)=O,366.0 +CCn1ncc2c3c(C#N)c4c5ccccc5n(C)c4nc3ccc21,CCOC(C)=O,383.0 +CCOc1ccc2cc(-c3ccc(C)cc3)c(=O)oc2c1,CCCCCC,338.0 +CCn1c2ccccc2c2cc(-c3cc(-c4ccc5c(c4)c4ccccc4n5CC)nc(S(C)(=O)=O)n3)ccc21,ClCCl,384.0 +COC(=O)c1c2ccccc2nc2ccccc12,CCCO,361.0 +CCOC(=O)COc1ccc(C(c2c(C)[nH]c3ccccc23)c2c(C)[nH]c3ccccc23)cc1,CO,290.0 +CN(C)c1ccc(/C=C/C=C2\CC/C(=C\C=C\c3ccc(N(C)C)cc3)C2=O)cc1,CCCCCC,478.0 +N#Cc1ccc(/C=C/C=C/c2ccccc2)cc1,CC#N,344.0 +CSc1sc(C(C)=O)c2nn[nH]c(=O)c12,CCO,397.0 +CN(C)c1ccc2c(c1)C(C)(C)C(=O)C=C2,O,413.0 +O=C1NC(=O)/C(=C\c2ccccc2O)S1,OCC(O)CO,364.0 +O=Cc1ccc2c(c1)c1ccccc1n2-c1cc2ccccc2c2ccccc12,CCCCC,313.0 +CN(C)CCCN1c2ccccc2CCc2ccccc21,O,275.0 +Cc1ccc(-n2c(-c3ccccc3[NH-])[nH+]c3c4ccccc4c4ccccc4c32)cc1,Cc1ccccc1,363.0 +C[n+]1ccc(/C=C/c2ccc([O-])cc2)cc1,CCO,515.0 +CC(C)(C)c1ccc2c(c1)c1cc(C(C)(C)C)ccc1n2-c1nc2ccccc2nc1-n1c2ccc(C(C)(C)C)cc2c2cc(C(C)(C)C)ccc21,Cc1ccccc1,419.0 +CC(C)[Si]1(C(C)C)c2cc(C#N)ccc2-c2ccc(N(C)C)cc21,ClCCl,368.0 +CNc1cccc2c1C(=O)c1ccccc1-2,C1CCCCC1,432.0 +CC[N+]1=C(/C=C/c2ccc3ccc4cccc5ccc2c3c45)C(C)(C)c2ccccc21,CS(C)=O,506.0 +CCn1c2sccc2c2c3nsnc3c3c4ccsc4n(CC)c3c21,ClCCl,425.0 +COc1ccc(NC(=O)C(C(=O)Nc2ccc(OC)cc2)C(c2ccccc2O)c2c(C)[nH]c3ccccc23)cc1,CO,258.0 +CC(C)(C)c1ccc(-c2cc(-c3ccc(-c4cc(-c5ccc(C(C)(C)C)cc5)c(C#N)s4)s3)sc2C#N)cc1,ClCCl,392.0 +CCCCCCC(CCCCCC)N1C(=O)c2cccc3c(-c4ccc(OC)cc4)ccc(c23)C1=O,CCCCO,366.2 +CN(C)c1ccc2cc3ccc(=[N+](C)C)cc-3oc2c1,O,548.0 +C1=C2c3cc4ccccc4cc3C3=[N+]2[B-]2(Oc4ccccc43)Oc3ccccc3-c3c4cc5ccccc5cc4c1n32,C1CCOC1,830.0 +c1ccc(C2=C(c3ccccc3)[Si](c3ccccc3)(c3ccccc3)c3ccccc32)cc1,C1CCOC1,330.0 +c1ccc(N(c2ccccc2)c2ccc(-c3nc4c5ccccc5c5ccccc5c4[nH]3)cc2)cc1,C1COCCN1,383.0 +Cc1n[nH]c2c1C(c1ccc([N+](=O)[O-])cc1)C(C#N)=C(N)O2,C1CCOC1,261.0 +CCN(CC)c1ccc(/C=C/C2=[O+][B-](F)(F)Oc3c2c(=O)oc2cc(N(CC)CC)ccc32)cc1,ClC(Cl)Cl,581.0 +CN1C(=CC2=C([O-])C(=Cc3oc(-c4ccc(-c5ccccc5)cc4)c[n+]3C)C2=O)C(C)(C)c2ccccc21,ClC(Cl)Cl,606.0 +Nc1cc2ccc3cccc4ccc(c1)c2c34,CO,336.0 +CCN(CC)c1ccc(/N=N/c2nc3ccc(Br)cc3s2)cc1,CC#N,520.0 +CC1=CC(C)=[N+]2C1=C(c1ccc(N3CCCCC3)cc1)c1c(C)cc(C)n1[B-]2(F)F,CO,497.0 +N#Cc1c(C#N)c2cc(Br)ccc2c2ccc(Br)cc12,ClC(Cl)Cl,350.0 +Cc1cnc(NC(=O)C2=C([O-])c3ccccc3S(=O)(=O)N2C)s1,CCCCO,369.0 +O=c1ccc2cc3c(cc2o1)OCC3,CC(C)O,333.0 +Cc1cccc(-c2c3ccccc3cc3ccccc23)n1,ClCCl,385.0 +CB1C=Cc2ccc3ccccc3c2N1c1ccccc1,C1CCCCC1,359.0 +CCOC(=O)C1=CNC=C(C(=O)OCC)C1c1ccccc1,C1CCOC1,355.0 +CN(C)c1ccc(/C=C/C=C2\CC/C(=C\c3ccc(N(C)C)cc3)C2=O)cc1,CCCCO,499.0 +C#CCN1C(=O)/C(=C/c2cc(OC(C)C)ccc2OC(C)C)N=C1C,CCCO,400.0 +[O-]c1c(-c2ccccc2)cc(-[n+]2c(-c3ccccc3)cc(-c3ccccc3)cc2-c2ccccc2)cc1-c1ccccc1,CCCCCCCCCC,922.2903226 +CCCCCCCCOc1ccc(C#Cc2ccc(C#Cc3ccc(OCCCCCCCC)c(OCCCCCCCC)c3)c3nc4c5ccccc5c5ccccc5c4nc23)cc1OCCCCCCCC,Cc1ccccc1,461.0 +CCCCN(CCCC)c1ccc(C#Cc2cc(C#Cc3ccc(N(CCCC)CCCC)cc3)c(C#Cc3ccc(C(F)(F)F)cc3)cc2C#Cc2ccc(C(F)(F)F)cc2)cc1,ClCCl,427.0 +COc1ccc(C#Cc2c3ccccc3c(C#Cc3ccc(OC)cc3)c3cc4sc(C)cc4cc23)cc1,ClCCl,520.0 +N#CC(C#N)=C(/C=C/c1cn(-c2ccccc2)nc1-c1ccccc1)c1ccccc1,C1CCOC1,402.0 +COC(=O)c1ccc2c3ccc(C(=O)OC)c4c(C(=O)OC)ccc(c5ccc(C(=O)OC)c1c25)c43,CC(C)O,476.0 +COc1ccc2c(c1)C(=O)c1cc(OC)cc(OC)c1-2,C1CCCCC1,478.0 +CCOC(=O)c1ccc(C2=C(c3ccccc3)c3oc4ccccc4[n+]3[B-](F)(F)O2)cc1,C1CCOC1,350.0 +CC1(C)Oc2c(c3nc4ccccc4nc3c3ccccc23)CC1Br,CCCCCC,422.1190376 +CC(C)(C)c1ccc(-c2nnc(-c3ccc4ccc5cccc6ccc3c4c56)o2)cc1,Cc1ccccc1,394.0 +CCCCCCCCCCCCC(CCCCCCCCCC)Cn1c2cc(C=C(C#N)C#N)c3cccc4c5cccc6c(C=C(C#N)C#N)cc1c(c65)c2c34,ClCCl,581.0 +CC1=C(Br)C(C)=[N+]2C1=C(c1c(-c3ccccn3)nc3ccccn13)c1c(C)c(Br)c(C)n1[B-]2(F)F,ClCCl,548.0 +[O-]c1c(-c2ccccc2)cc(-[n+]2c(-c3ccccc3)cc(-c3ccccc3)cc2-c2ccccc2)cc1-c1ccccc1,CC(C)(C)c1cccc(C(C)(C)C)n1,840.9117647 +CCCCC(CC)Cn1c2ccc(C(=C(C#N)C#N)c3ccccc3)cc2c2c(-c3ccc(Br)cc3)c3c(c(-c4ccc(Br)cc4)c21)c1cc(C(=C(C#N)C#N)c2ccccc2)ccc1n3CC(CC)CCCC,CC#N,462.0 +COC(=O)CCC(NC(=O)CCNc1cccc2ccccc12)C(=O)OC,CCO,330.0 +O=P1(c2ccccc2)C(c2cc(C(F)(F)F)cc(C(F)(F)F)c2)=Cc2c3ccc4cccc5ccc(c6cc(-c7ccc(N(c8ccccc8)c8ccccc8)cc7)n1c26)c3c45,ClCCl,437.0 +Cc1nnc(-c2cc(-n3c4ccccc4c4ccccc43)c(-n3c4ccccc4c4ccccc43)cc2-c2nnc(C)o2)o1,ClCCl,353.0 +COc1ccc2c3c(cccc13)C(=O)c1ccccc1-2,CCOCC,414.0786749 +c1ccc2c3c(ccc2c1)C1(c2cc(-c4ccc5ccc6cccc7ccc4c5c67)ccc2-c2ccc(-c4ccc5ccc6cccc7ccc4c5c67)cc21)c1ccc2ccccc2c1O3,ClC(Cl)Cl,365.0 +Cc1ccc(C2=Nc3cccc4[n+]3[B-](F)(O2)OC(c2ccc(C)cc2)=N4)cc1,ClCCl,394.0 +CC(C)(C)c1ccc(-c2nc3c4ccc(-c5ccccc5)cc4c4cc(-c5ccccc5)ccc4c3n2-c2ccc(C(C)(C)C)cc2)cc1,C1COCCO1,333.0 +COc1ccc(C2CC(c3ccc(O)cc3)=NN2c2ccc(S(N)(=O)=O)cc2)cc1,C1CCOC1,359.0 +COc1ccc2c3c(c4ccc(OC)cc4c2c1)C1(c2ccccc2-c2ccccc21)c1ccccc1-3,C1CCCCC1,386.0 +CCCCOc1c(C=C2N(C)c3ccccc3C2(C)C)c(=O)c1=O,CCO,425.0 +Cc1cc2ccc(-c3cccc4cccc(-c5ccc6cc(C)c(=O)oc6c5)c34)cc2oc1=O,c1ccccc1,324.0 +Cc1ccc(C(=O)c2c(C)c3ccc4cccc5c6cccc7ccc2c(c76)c3c45)cc1,ClCCl,391.0 \ No newline at end of file diff --git a/chemprop-updated/tests/data/regression/mol/atom_descriptors.npz b/chemprop-updated/tests/data/regression/mol/atom_descriptors.npz new file mode 100644 index 0000000000000000000000000000000000000000..1daa8644ebbc9d63d9ea89f3178fee08ad6bb7e9 --- /dev/null +++ b/chemprop-updated/tests/data/regression/mol/atom_descriptors.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:627debbf7895a0cc77ad368a1d387193ee711e8139032940a2b2c5ff8d56ee67 +size 88986 diff --git a/chemprop-updated/tests/data/regression/mol/atom_features.npz b/chemprop-updated/tests/data/regression/mol/atom_features.npz new file mode 100644 index 0000000000000000000000000000000000000000..1daa8644ebbc9d63d9ea89f3178fee08ad6bb7e9 --- /dev/null +++ b/chemprop-updated/tests/data/regression/mol/atom_features.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:627debbf7895a0cc77ad368a1d387193ee711e8139032940a2b2c5ff8d56ee67 +size 88986 diff --git a/chemprop-updated/tests/data/regression/mol/bond_features.npz b/chemprop-updated/tests/data/regression/mol/bond_features.npz new file mode 100644 index 0000000000000000000000000000000000000000..3c8ada61368ac8bfc166fef6d9e2c9b0328ac610 --- /dev/null +++ b/chemprop-updated/tests/data/regression/mol/bond_features.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74926b490dcf8abcd91b8036a7649ec5d24d43d6e670ff8663b8bacef897cb66 +size 71458 diff --git a/chemprop-updated/tests/data/regression/mol/config.toml b/chemprop-updated/tests/data/regression/mol/config.toml new file mode 100644 index 0000000000000000000000000000000000000000..7cc86295a31156dcf488e718e495471125337597 --- /dev/null +++ b/chemprop-updated/tests/data/regression/mol/config.toml @@ -0,0 +1,33 @@ +data-path = tests/data/regression.csv +output-dir = trained_model +epochs = 1 +num-workers = 0 +batch-size = 64 +accelerator = auto +devices = auto +rxn-mode = REAC_DIFF +multi-hot-atom-featurizer-mode = V2 +frzn-ffn-layers = 0 +ensemble-size = 1 +message-hidden-dim = 300 +depth = 3 +dropout = 0.0 +activation = RELU +aggregation = mean +aggregation-norm = 100 +ffn-hidden-dim = 300 +ffn-num-layers = 1 +multiclass-num-classes = 3 +task-type = regression +v-kl = 0.0 +eps = 1e-08 +warmup-epochs = 2 +init-lr = 0.0001 +max-lr = 0.001 +final-lr = 0.0001 +split = RANDOM +split-sizes = [0.8, 0.1, 0.1] +split-key-molecule = 0 +num-replicates = 1 +data-seed = 0 +pytorch-seed = 0 diff --git a/chemprop-updated/tests/data/regression/mol/descriptors.npz b/chemprop-updated/tests/data/regression/mol/descriptors.npz new file mode 100644 index 0000000000000000000000000000000000000000..b74555148ab90514e118ee51ce94af12e254b2e6 --- /dev/null +++ b/chemprop-updated/tests/data/regression/mol/descriptors.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66d3c0cd9e972e0912a7e66aa19b3c70849041e84d47fa01922cb20e2f7988df +size 1064 diff --git a/chemprop-updated/tests/data/regression/mol/mol.csv b/chemprop-updated/tests/data/regression/mol/mol.csv new file mode 100644 index 0000000000000000000000000000000000000000..11a79f8386574c06f03f984654ea23d7bc8317d6 --- /dev/null +++ b/chemprop-updated/tests/data/regression/mol/mol.csv @@ -0,0 +1,101 @@ +smiles,lipo +Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14,3.54 +COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)CCc3ccccc23,-1.18 +COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl,3.69 +OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(Cl)sc4[nH]3,3.37 +Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)NCC#N)c1,3.1 +OC1(CN2CCC1CC2)C#Cc3ccc(cc3)c4ccccc4,3.14 +COc1cc(OC)c(cc1NC(=O)CCC(=O)O)S(=O)(=O)NCc2ccccc2N3CCCCC3,-.72 +CNc1cccc(CCOc2ccc(C[C@H](NC(=O)c3c(Cl)cccc3Cl)C(=O)O)cc2C)n1,.34 +COc1ccc(cc1)C2=COc3cc(OC)cc(OC)c3C2=O,3.05 +Oc1ncnc2scc(c3ccsc3)c12,2.25 +CS(=O)(=O)c1ccc(Oc2ccc(cc2)C#C[C@]3(O)CN4CCC3CC4)cc1,1.51 +C[C@H](Nc1nc(Nc2cc(C)[nH]n2)c(C)nc1C#N)c3ccc(F)cn3,2.61 +O=C1CCCCCN1,-.08 +CCCSc1ncccc1C(=O)N2CCCC2c3ccncc3,1.95 +CC1CCCCC1NC(=O)c2cnn(c2NS(=O)(=O)c3ccc(C)cc3)c4ccccc4,1.34 +Nc1ccc(cc1)c2nc3ccc(O)cc3s2,3.2 +COc1ccc(cc1)N2CCN(CC2)C(=O)[C@@H]3CCCC[C@H]3C(=O)NCC#N,1.6 +CCC(COC(=O)c1cc(OC)c(OC)c(OC)c1)(N(C)C)c2ccccc2,3.77 +COc1cc(ccc1N2CC[C@@H](O)C2)N3N=Nc4cc(sc4C3=O)c5ccc(Cl)cc5,3.15 +CO[C@H]1CN(CCN2C(=O)C=Cc3ccc(cc23)C#N)CC[C@H]1NCc4ccc5OCC(=O)Nc5n4,.32 +CC(C)(CCCCCOCCc1ccccc1)NCCc2ccc(O)c3nc(O)sc23,2.92 +Clc1ccc(cc1)C(=O)Nc2oc(nn2)C(=O)Nc3ccc(cc3)N4CCOCC4,1.92 +COc1ccc(Oc2cccc(CN3CCCC(C3)N4C=C(C)C(=O)NC4=O)c2)cc1,3.17 +OC(=O)c1cccc(c1)N2CCC(CN3CCC(CC3)Oc4ccc(Cl)c(Cl)c4)CC2,2.17 +CNCC[C@@H](Oc1ccccc1C)c2ccccc2,1.2 +Clc1ccc(N2CCN(CC2)C(=O)CCCc3ccncc3)c(Cl)c1,3.93 +COc1cnc(nc1N(C)C)c2ccccn2,1.9 +C(CCCCNc1cc(nc2ccccc12)c3ccccc3)CCCNc4cc(nc5ccccc45)c6ccccc6,2.27 +CSc1c(cnn1c2ccc(cc2)C(=O)O)C(=O)NC3C4CC5CC(CC3C5)C4,1.2 +CNC1=Nc2ncccc2C(=NC1c3cccs3)c4occn4,1.14 +CS(=O)(=O)C1(CC1)c2cc(nc(n2)c3cccc4[nH]ccc34)N5CC6CCC(C5)O6,2.6 +CN([C@@H]1CCN(Cc2ccc(cc2)C(F)(F)F)C[C@@H]1F)C(=O)Cc3ccc(cc3)n4cnnn4,3.3 +CC(=O)[C@H]1CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C,3.94 +CS(=O)(=O)c1ccccc1C(=O)NC[C@@H](O)CN2CCC(CC2)Oc3ccc(Cl)c(Cl)c3,2.34 +O=C(NCc1ccncc1)c2ccc(Oc3ccccc3C#N)cc2,2.57 +CN(C)c1ccnc2sc(C(=O)NCc3ccccc3)c(N)c12,3.62 +CN1CCN(CC1)c2ccc3N=CN(C(=O)c3c2)c4cc(NC(=O)c5cscn5)ccc4C,2.06 +Cn1cncc1c2c3C(=O)N(CC4CC4)C(=O)N(CC5CC5)c3nn2Cc6ccnc7ccc(Cl)cc67,4.33 +COc1ccc2ncc(C#N)c(CCN3CCC(CC3)NCc4cc5SCOc5cn4)c2c1,2.55 +CNC(=O)C1(CCN(CC[C@H](CN(C)C(=O)c2c(OC)c(cc3ccccc23)C#N)c4ccc(Cl)c(Cl)c4)CC1)N5CCCCC5=O,2.78 +OB1N(C(=O)Nc2ccccc12)c3ccccc3,1.4 +CC(C)N(CCC(C(=O)N)(c1ccccc1)c2ccccn2)C(C)C,-.54 +NC(=NC#N)c1sc(Nc2ccccc2)nc1N,2.91 +CCS(=O)(=O)c1ccc(c(C)c1)c2cc(ccc2O[C@H](C)C(=O)O)C(F)(F)F,-.4 +OC(=O)COc1ccc(cc1c2cc(ccc2F)C#N)C(F)(F)F,-.16 +COc1ccc(cn1)C2=Cc3c(C)nc(N)nc3N([C@@H]4CC[C@H](CC4)OCCO)C2=O,2.2 +CC(Nc1ncnc2ccccc12)c3ccccc3,3.4 +CC(C)c1ccc2Oc3nc(N)c(cc3C(=O)c2c1)C(=O)O,1.1 +O[C@@H](CNCCCOCCOCCc1cccc2ccccc12)c3ccc(O)c4NC(=O)Sc34,2.28 +COc1ccccc1Cn2c(C)nc3ccccc23,3.47 +OC(=O)c1ccc(NC(=O)c2cc(OCc3ccccc3F)cc(OCc4ccccc4F)c2)nc1,3 +NC(Cc1c[nH]c2ccccc12)C(=O)O,-1.17 +OC(=O)CCC[C@H]1[C@@H](Cc2ccccc12)NC(=O)c3cc4cc(F)ccc4[nH]3,1.95 +CCNC(=O)c1cc2c(c(cnc2[nH]1)c3cncc(c3)C(=O)O)n4ccc(n4)C(F)(F)F,-.99 +C[C@H](NC(=O)c1c(C)nn(C2CCCC2)c1NS(=O)(=O)c3ccc(C)cc3)C(C)(C)C,2 +N(c1ccccc1)c2cc(Nc3ccccc3)[nH]n2,3.8 +COCCNC(=O)c1cccc(Nc2ncc3cc(ccc3n2)c4ccncc4)c1,3.21 +CCC(CC)NC(=O)c1cnn(C)c1NS(=O)(=O)c2ccc(C)cc2,.36 +NC(=O)c1cc(F)cc(O[C@H]2C[C@H]3CC[C@@H](C2)N3Cc4ccccc4)c1,2.14 +O=C1NC(=NC(=C1C#N)c2ccccc2)SCCc3ccccc3,1.71 +OC(C(=O)OC1CN2CCC1CC2)(c3ccccc3)c4ccccc4,1.19 +Cc1ccccc1NC(=O)CCS(=O)(=O)c2ccc(Br)s2,2.7 +CC(C)n1c(C)ncc1c2nc(Nc3ccc(cc3)C(=O)N(C)C)ncc2F,2.77 +COc1cccc(c1)c2c[nH]c(n2)c3ccccc3,3.8 +O=C(COc1ccccc1)c2ccccc2,2.87 +COc1cc2ncc(C(=O)N)c(Nc3ccc(F)cc3F)c2cc1NCCN(C)C,1.91 +CO[C@@H]1CC[C@@]2(CC1)Cc3ccc(OCC(C)C)cc3C24N=C(C)C(=N4)N,3.4 +COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN4CCCC4,3.13 +O=C1CCOc2cc(COc3ccccc3)ccc12,3 +Clc1cccc2cn[nH]c12,2.33 +CNC(=O)c1ccc(CC(=O)N(C)C2CCN(Cc3ccc(cc3)C(F)(F)F)CC2)cc1,2.8 +COCCNCc1ccc(CCNC[C@H](O)c2ccc(O)c3NC(=O)Sc23)cc1,-.54 +Cn1cncc1c2c3C(=O)N(CC#C)C(=O)N(CC4CC4)c3nn2Cc5ccnc6ccc(Cl)cc56,3.16 +C[C@H](NC(=O)c1cccnc1Oc2ccccc2)c3ccccc3,2.91 +Clc1ccc(CN2CC3CNCC(C2)O3)cc1C(=O)NCC45CC6CC(CC(C6)C4)C5,1.55 +COc1cc(NS(=O)(=O)c2ccc(N)cc2)nc(OC)n1,.2 +Cc1cc(CCC2CCN(CC2)S(=O)(=O)CC3(CCOCC3)N(O)C=O)c(C)cn1,1.43 +C[C@H](Nc1ncc(F)c(Nc2cc([nH]n2)C3CC3)n1)c4ncc(F)cn4,2.47 +CC(=O)Nc1ccc2c(c1)c(cn2CCCO)c3cc(NC4CC4)n5ncc(C#N)c5n3,2.48 +CC1COc2c(N3CCN(C)CC3)c(F)cc4C(=O)C(=CN1c24)C(=O)O,-.45 +CC1(CC1)c2nc(ncc2C(=O)N[C@@H]3C4CC5CC3C[C@@](O)(C5)C4)N6CCOCC6,2 +COC(=O)c1ccc(C)c(NS(=O)(=O)c2ccc3N(C)SC(=O)c3c2)c1,2.6 +COc1ccc(cc1)C2=COc3cc(O)cc(O)c3C2=O,3.5 +CNCCCC12CCC(c3ccccc13)c4ccccc24,.89 +Oc1cc(nc2ccnn12)c3ccccc3,1.3 +Fc1cc(cc(F)c1C2=CCN(CC2)C=O)N3C[C@H](COc4ccon4)OC3=O,2.01 +CC(C#C)N1C(=O)N(CC2CC2)c3nn(Cc4ccnc5ccc(Cl)cc45)c(c3C1=O)c6cncn6C,3.59 +C[C@H]1CN(Cc2cc(Cl)ccc2OCC(=O)O)CCN1C(=O)Cc3ccccc3,.18 +COc1cc(Nc2nc(N[C@@H](C)c3ncc(F)cn3)ncc2Br)n[nH]1,2.6 +Cc1nc(C)c(nc1C(=O)N)c2ccc([C@@H]3CC[C@@H](CC(=O)O)CC3)c(F)c2,1.3 +COc1ccnc(CCc2nc3c(C)ccnc3[nH]2)c1,2.1 +Cc1cc(CCCOc2c(Cl)cc(cc2Cl)C3=NCCO3)on1,3.72 +CN(C)C(=O)c1ccc(CN2CCc3cc4nc(N)sc4cc3CC2)cc1,1.72 +COC(=O)[C@H]1[C@@H](O)CC[C@H]2CN3CCc4c([nH]c5ccccc45)[C@@H]3C[C@H]12,1.65 +CCN1CCN(CC1)c2ccc(Nc3cc(ncn3)N(C)C(=O)Nc4c(Cl)c(OC)cc(OC)c4Cl)cc2,3.7 +CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C@H]([C@H](O)[C@@H]2O)n3cnc4c(N)ncnc34,2.2 +CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)c3)c2n1)c4c(Cl)c(OC)cc(OC)c4Cl,2.04 +CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)C(=O)c13,4.49 +COc1ccc(Cc2c(N)n[nH]c2N)cc1,.2 +CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(=O)Nc3cccnc3,2 diff --git a/chemprop-updated/tests/data/regression/mol/mol_with_splits.csv b/chemprop-updated/tests/data/regression/mol/mol_with_splits.csv new file mode 100644 index 0000000000000000000000000000000000000000..0c4f1f4503ba169d4b69f87b9a28f4db735b7cfc --- /dev/null +++ b/chemprop-updated/tests/data/regression/mol/mol_with_splits.csv @@ -0,0 +1,101 @@ +smiles,lipo,split +Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14,3.54,train +COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)CCc3ccccc23,-1.18,val +COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl,3.69,test +OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(Cl)sc4[nH]3,3.37,train +Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)NCC#N)c1,3.1,val +OC1(CN2CCC1CC2)C#Cc3ccc(cc3)c4ccccc4,3.14,test +COc1cc(OC)c(cc1NC(=O)CCC(=O)O)S(=O)(=O)NCc2ccccc2N3CCCCC3,-0.72,train +CNc1cccc(CCOc2ccc(C[C@H](NC(=O)c3c(Cl)cccc3Cl)C(=O)O)cc2C)n1,0.34,val +COc1ccc(cc1)C2=COc3cc(OC)cc(OC)c3C2=O,3.05,test +Oc1ncnc2scc(c3ccsc3)c12,2.25,train +CS(=O)(=O)c1ccc(Oc2ccc(cc2)C#C[C@]3(O)CN4CCC3CC4)cc1,1.51,val +C[C@H](Nc1nc(Nc2cc(C)[nH]n2)c(C)nc1C#N)c3ccc(F)cn3,2.61,test +O=C1CCCCCN1,-0.08,train +CCCSc1ncccc1C(=O)N2CCCC2c3ccncc3,1.95,val +CC1CCCCC1NC(=O)c2cnn(c2NS(=O)(=O)c3ccc(C)cc3)c4ccccc4,1.34,test +Nc1ccc(cc1)c2nc3ccc(O)cc3s2,3.2,train +COc1ccc(cc1)N2CCN(CC2)C(=O)[C@@H]3CCCC[C@H]3C(=O)NCC#N,1.6,val +CCC(COC(=O)c1cc(OC)c(OC)c(OC)c1)(N(C)C)c2ccccc2,3.77,test +COc1cc(ccc1N2CC[C@@H](O)C2)N3N=Nc4cc(sc4C3=O)c5ccc(Cl)cc5,3.15,train +CO[C@H]1CN(CCN2C(=O)C=Cc3ccc(cc23)C#N)CC[C@H]1NCc4ccc5OCC(=O)Nc5n4,0.32,val +CC(C)(CCCCCOCCc1ccccc1)NCCc2ccc(O)c3nc(O)sc23,2.92,test +Clc1ccc(cc1)C(=O)Nc2oc(nn2)C(=O)Nc3ccc(cc3)N4CCOCC4,1.92,train +COc1ccc(Oc2cccc(CN3CCCC(C3)N4C=C(C)C(=O)NC4=O)c2)cc1,3.17,val +OC(=O)c1cccc(c1)N2CCC(CN3CCC(CC3)Oc4ccc(Cl)c(Cl)c4)CC2,2.17,test +CNCC[C@@H](Oc1ccccc1C)c2ccccc2,1.2,train +Clc1ccc(N2CCN(CC2)C(=O)CCCc3ccncc3)c(Cl)c1,3.93,val +COc1cnc(nc1N(C)C)c2ccccn2,1.9,test +C(CCCCNc1cc(nc2ccccc12)c3ccccc3)CCCNc4cc(nc5ccccc45)c6ccccc6,2.27,train +CSc1c(cnn1c2ccc(cc2)C(=O)O)C(=O)NC3C4CC5CC(CC3C5)C4,1.2,val +CNC1=Nc2ncccc2C(=NC1c3cccs3)c4occn4,1.14,test +CS(=O)(=O)C1(CC1)c2cc(nc(n2)c3cccc4[nH]ccc34)N5CC6CCC(C5)O6,2.6,train +CN([C@@H]1CCN(Cc2ccc(cc2)C(F)(F)F)C[C@@H]1F)C(=O)Cc3ccc(cc3)n4cnnn4,3.3,val +CC(=O)[C@H]1CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C,3.94,test +CS(=O)(=O)c1ccccc1C(=O)NC[C@@H](O)CN2CCC(CC2)Oc3ccc(Cl)c(Cl)c3,2.34,train +O=C(NCc1ccncc1)c2ccc(Oc3ccccc3C#N)cc2,2.57,val +CN(C)c1ccnc2sc(C(=O)NCc3ccccc3)c(N)c12,3.62,test +CN1CCN(CC1)c2ccc3N=CN(C(=O)c3c2)c4cc(NC(=O)c5cscn5)ccc4C,2.06,train +Cn1cncc1c2c3C(=O)N(CC4CC4)C(=O)N(CC5CC5)c3nn2Cc6ccnc7ccc(Cl)cc67,4.33,val +COc1ccc2ncc(C#N)c(CCN3CCC(CC3)NCc4cc5SCOc5cn4)c2c1,2.55,test +CNC(=O)C1(CCN(CC[C@H](CN(C)C(=O)c2c(OC)c(cc3ccccc23)C#N)c4ccc(Cl)c(Cl)c4)CC1)N5CCCCC5=O,2.78,train +OB1N(C(=O)Nc2ccccc12)c3ccccc3,1.4,val +CC(C)N(CCC(C(=O)N)(c1ccccc1)c2ccccn2)C(C)C,-0.54,test +NC(=NC#N)c1sc(Nc2ccccc2)nc1N,2.91,train +CCS(=O)(=O)c1ccc(c(C)c1)c2cc(ccc2O[C@H](C)C(=O)O)C(F)(F)F,-0.4,val +OC(=O)COc1ccc(cc1c2cc(ccc2F)C#N)C(F)(F)F,-0.16,test +COc1ccc(cn1)C2=Cc3c(C)nc(N)nc3N([C@@H]4CC[C@H](CC4)OCCO)C2=O,2.2,train +CC(Nc1ncnc2ccccc12)c3ccccc3,3.4,val +CC(C)c1ccc2Oc3nc(N)c(cc3C(=O)c2c1)C(=O)O,1.1,test +O[C@@H](CNCCCOCCOCCc1cccc2ccccc12)c3ccc(O)c4NC(=O)Sc34,2.28,train +COc1ccccc1Cn2c(C)nc3ccccc23,3.47,val +OC(=O)c1ccc(NC(=O)c2cc(OCc3ccccc3F)cc(OCc4ccccc4F)c2)nc1,3,test +NC(Cc1c[nH]c2ccccc12)C(=O)O,-1.17,train +OC(=O)CCC[C@H]1[C@@H](Cc2ccccc12)NC(=O)c3cc4cc(F)ccc4[nH]3,1.95,val +CCNC(=O)c1cc2c(c(cnc2[nH]1)c3cncc(c3)C(=O)O)n4ccc(n4)C(F)(F)F,-0.99,test +C[C@H](NC(=O)c1c(C)nn(C2CCCC2)c1NS(=O)(=O)c3ccc(C)cc3)C(C)(C)C,2,train +N(c1ccccc1)c2cc(Nc3ccccc3)[nH]n2,3.8,val +COCCNC(=O)c1cccc(Nc2ncc3cc(ccc3n2)c4ccncc4)c1,3.21,test +CCC(CC)NC(=O)c1cnn(C)c1NS(=O)(=O)c2ccc(C)cc2,0.36,train +NC(=O)c1cc(F)cc(O[C@H]2C[C@H]3CC[C@@H](C2)N3Cc4ccccc4)c1,2.14,val +O=C1NC(=NC(=C1C#N)c2ccccc2)SCCc3ccccc3,1.71,test +OC(C(=O)OC1CN2CCC1CC2)(c3ccccc3)c4ccccc4,1.19,train +Cc1ccccc1NC(=O)CCS(=O)(=O)c2ccc(Br)s2,2.7,val +CC(C)n1c(C)ncc1c2nc(Nc3ccc(cc3)C(=O)N(C)C)ncc2F,2.77,test +COc1cccc(c1)c2c[nH]c(n2)c3ccccc3,3.8,train +O=C(COc1ccccc1)c2ccccc2,2.87,val +COc1cc2ncc(C(=O)N)c(Nc3ccc(F)cc3F)c2cc1NCCN(C)C,1.91,test +CO[C@@H]1CC[C@@]2(CC1)Cc3ccc(OCC(C)C)cc3C24N=C(C)C(=N4)N,3.4,train +COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN4CCCC4,3.13,val +O=C1CCOc2cc(COc3ccccc3)ccc12,3,test +Clc1cccc2cn[nH]c12,2.33,train +CNC(=O)c1ccc(CC(=O)N(C)C2CCN(Cc3ccc(cc3)C(F)(F)F)CC2)cc1,2.8,val +COCCNCc1ccc(CCNC[C@H](O)c2ccc(O)c3NC(=O)Sc23)cc1,-0.54,test +Cn1cncc1c2c3C(=O)N(CC#C)C(=O)N(CC4CC4)c3nn2Cc5ccnc6ccc(Cl)cc56,3.16,train +C[C@H](NC(=O)c1cccnc1Oc2ccccc2)c3ccccc3,2.91,val +Clc1ccc(CN2CC3CNCC(C2)O3)cc1C(=O)NCC45CC6CC(CC(C6)C4)C5,1.55,test +COc1cc(NS(=O)(=O)c2ccc(N)cc2)nc(OC)n1,0.2,train +Cc1cc(CCC2CCN(CC2)S(=O)(=O)CC3(CCOCC3)N(O)C=O)c(C)cn1,1.43,val +C[C@H](Nc1ncc(F)c(Nc2cc([nH]n2)C3CC3)n1)c4ncc(F)cn4,2.47,test +CC(=O)Nc1ccc2c(c1)c(cn2CCCO)c3cc(NC4CC4)n5ncc(C#N)c5n3,2.48,train +CC1COc2c(N3CCN(C)CC3)c(F)cc4C(=O)C(=CN1c24)C(=O)O,-0.45,val +CC1(CC1)c2nc(ncc2C(=O)N[C@@H]3C4CC5CC3C[C@@](O)(C5)C4)N6CCOCC6,2,test +COC(=O)c1ccc(C)c(NS(=O)(=O)c2ccc3N(C)SC(=O)c3c2)c1,2.6,train +COc1ccc(cc1)C2=COc3cc(O)cc(O)c3C2=O,3.5,val +CNCCCC12CCC(c3ccccc13)c4ccccc24,0.89,test +Oc1cc(nc2ccnn12)c3ccccc3,1.3,train +Fc1cc(cc(F)c1C2=CCN(CC2)C=O)N3C[C@H](COc4ccon4)OC3=O,2.01,val +CC(C#C)N1C(=O)N(CC2CC2)c3nn(Cc4ccnc5ccc(Cl)cc45)c(c3C1=O)c6cncn6C,3.59,test +C[C@H]1CN(Cc2cc(Cl)ccc2OCC(=O)O)CCN1C(=O)Cc3ccccc3,0.18,train +COc1cc(Nc2nc(N[C@@H](C)c3ncc(F)cn3)ncc2Br)n[nH]1,2.6,val +Cc1nc(C)c(nc1C(=O)N)c2ccc([C@@H]3CC[C@@H](CC(=O)O)CC3)c(F)c2,1.3,test +COc1ccnc(CCc2nc3c(C)ccnc3[nH]2)c1,2.1,train +Cc1cc(CCCOc2c(Cl)cc(cc2Cl)C3=NCCO3)on1,3.72,val +CN(C)C(=O)c1ccc(CN2CCc3cc4nc(N)sc4cc3CC2)cc1,1.72,test +COC(=O)[C@H]1[C@@H](O)CC[C@H]2CN3CCc4c([nH]c5ccccc45)[C@@H]3C[C@H]12,1.65,train +CCN1CCN(CC1)c2ccc(Nc3cc(ncn3)N(C)C(=O)Nc4c(Cl)c(OC)cc(OC)c4Cl)cc2,3.7,val +CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C@H]([C@H](O)[C@@H]2O)n3cnc4c(N)ncnc34,2.2,test +CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)c3)c2n1)c4c(Cl)c(OC)cc(OC)c4Cl,2.04,train +CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)C(=O)c13,4.49,val +COc1ccc(Cc2c(N)n[nH]c2N)cc1,0.2,test +CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(=O)Nc3cccnc3,2, diff --git a/chemprop-updated/tests/data/regression/mol_multitask.csv b/chemprop-updated/tests/data/regression/mol_multitask.csv new file mode 100644 index 0000000000000000000000000000000000000000..361c274ae2a64496dc725912d62b6b5dcc426d2d --- /dev/null +++ b/chemprop-updated/tests/data/regression/mol_multitask.csv @@ -0,0 +1,500 @@ +"smiles","mu","alpha","homo","lumo","gap","r2","zpve","cv","u0","u298","h298","g298" +"C",0,13.21,-0.3877,0.1171,0.5048,35.3641,0.044749,6.469,-40.47893,-40.476062,-40.475117,-40.498597 +"N",1.6256,9.46,-0.257,0.0829,0.3399,26.1563,0.034358,6.316,-56.525887,-56.523026,-56.522082,-56.544961 +"O",1.8511,6.31,-0.2928,0.0687,0.3615,19.0002,0.021375,6.002,-76.404702,-76.401867,-76.400922,-76.422349 +"C#C",0,16.28,-0.2845,0.0506,0.3351,59.5248,0.026841,8.574,-77.308427,-77.305527,-77.304583,-77.327429 +"C#N",2.8937,12.99,-0.3604,0.0191,0.3796,48.7476,0.016601,6.278,-93.411888,-93.40937,-93.408425,-93.431246 +"C=O",2.1089,14.18,-0.267,-0.0406,0.2263,59.9891,0.026603,6.413,-114.483613,-114.480746,-114.479802,-114.505268 +"CC",0,23.95,-0.3385,0.1041,0.4426,109.5031,0.074542,10.098,-79.764152,-79.760666,-79.759722,-79.787269 +"CO",1.5258,16.97,-0.2653,0.0784,0.3437,83.794,0.051208,8.751,-115.679136,-115.675816,-115.674872,-115.701876 +"CC#C",0.7156,28.78,-0.2609,0.0613,0.3222,177.1963,0.05541,12.482,-116.609549,-116.60555,-116.604606,-116.633775 +"CC#N",3.8266,24.45,-0.3264,0.0376,0.364,160.7223,0.045286,10.287,-132.71815,-132.714563,-132.713619,-132.742149 +"CC=O",2.5682,25.11,-0.254,-0.0198,0.2342,166.9728,0.055355,11.219,-153.787612,-153.783728,-153.782784,-153.812518 +"C(=O)N",3.7286,21.57,-0.2543,0.0302,0.2845,145.3078,0.045279,10.89,-169.860788,-169.856903,-169.855958,-169.885594 +"CCC",0.0597,34.75,-0.323,0.0949,0.4179,227.1361,0.103182,14.84,-119.052475,-119.047927,-119.046983,-119.078157 +"CCO",1.4131,27.87,-0.2619,0.0798,0.3417,193.1659,0.079754,13.546,-154.972731,-154.968412,-154.967467,-154.998148 +"COC",1.1502,28.13,-0.2525,0.091,0.3435,187.1015,0.079534,12.934,-154.960361,-154.956045,-154.9551,-154.985747 +"C1CC1",0.0005,30.82,-0.2888,0.1042,0.393,155.8145,0.081231,11.041,-117.824798,-117.821426,-117.820482,-117.849087 +"C1CO1",1.7675,24.04,-0.2682,0.1042,0.3724,129.891,0.057289,9.176,-153.742562,-153.73941,-153.738466,-153.766642 +"CC(=O)C",2.7362,35.53,-0.2431,-0.0087,0.2344,292.4367,0.083382,16.893,-193.08834,-193.082969,-193.082024,-193.116476 +"CC(=O)N",3.6367,31.83,-0.2436,0.0347,0.2783,267.6148,0.07319,16.561,-209.159302,-209.15402,-209.153076,-209.187468 +"C(=O)(N)N",3.4869,28.07,-0.2495,0.0556,0.3051,244.2308,0.063824,15.292,-225.221461,-225.217075,-225.216131,-225.247724 +"CC(C)C",0.0897,45.46,-0.3167,0.0843,0.401,355.0621,0.131146,20.273,-158.342346,-158.336603,-158.335658,-158.370016 +"CC(C)O",1.4259,38.58,-0.2612,0.074,0.3351,318.3721,0.107673,19.052,-194.267232,-194.261748,-194.260804,-194.294663 +"C#CC#C",0,38.52,-0.2599,-0.0214,0.2386,278.6264,0.037354,15.312,-153.459846,-153.455442,-153.454498,-153.482621 +"C#CC#N",3.792,32.66,-0.3102,-0.0543,0.2559,260.1896,0.027259,12.93,-169.557758,-169.553764,-169.55282,-169.581024 +"C(#N)C#N",0.0023,27.7,-0.3696,-0.0926,0.277,242.9308,0.015951,10.398,-185.648533,-185.644825,-185.64388,-185.667652 +"C#CC=O",2.7824,31.14,-0.2777,-0.0735,0.2042,268.3921,0.037208,13.049,-190.624631,-190.620363,-190.619419,-190.650543 +"C(=O)C#N",2.3112,26.25,-0.3166,-0.11,0.2066,251.0007,0.02654,11.329,-206.721858,-206.717875,-206.716931,-206.747625 +"C(=O)C=O",0.002,26.12,-0.2668,-0.1113,0.1555,266.8164,0.036943,12.147,-227.798785,-227.79457,-227.793626,-227.825074 +"CC#CC",0,42.32,-0.2412,0.0684,0.3096,400.2236,0.083896,17.447,-155.908941,-155.90318,-155.902236,-155.937641 +"CCC#C",0.7067,40.09,-0.2592,0.0566,0.3157,333.9589,0.084338,17.13,-155.897345,-155.892291,-155.891347,-155.924226 +"CCC#N",3.9233,35.38,-0.3213,0.034,0.3553,314.5335,0.07419,14.988,-172.006141,-172.001467,-172.000523,-172.032826 +"C(C#N)N",4.4361,31.81,-0.2683,0.0173,0.2855,295.6635,0.063305,14.488,-188.042067,-188.037478,-188.036534,-188.06863 +"C#CCO",1.7211,33.1,-0.2595,0.0277,0.2872,300.0993,0.060632,15.855,-191.810916,-191.806025,-191.805081,-191.837634 +"C(C#N)O",4.6788,28.56,-0.3018,0.0022,0.3039,280.6659,0.050262,13.845,-207.916786,-207.912215,-207.911271,-207.943384 +"CCC=O",2.6741,35.83,-0.25,-0.0205,0.2295,333.3276,0.084175,15.954,-193.075202,-193.070116,-193.069171,-193.102798 +"CNC=O",3.7071,32.78,-0.2516,0.0335,0.2851,279.7863,0.074166,15.058,-209.144909,-209.139976,-209.139032,-209.172305 +"COC=O",3.92,29.47,-0.2814,0.0074,0.2888,293.917,0.061327,13.885,-229.013797,-229.009003,-229.008059,-229.041086 +"C(C=O)O",1.7341,28.53,-0.2537,-0.0341,0.2196,303.8129,0.060508,14.78,-228.992613,-228.987769,-228.986825,-229.019918 +"CCCC",0,45.71,-0.317,0.0937,0.4107,426.2996,0.131708,19.668,-158.340943,-158.33517,-158.334226,-158.36894 +"CCCO",1.3402,38.61,-0.2619,0.081,0.3429,382.8628,0.108241,18.431,-194.261089,-194.255495,-194.254551,-194.28893 +"CCOC",1.0363,39.34,-0.2503,0.0925,0.3428,368.9331,0.107895,17.888,-194.254127,-194.248585,-194.247641,-194.281899 +"C(CO)O",0.0075,31.42,-0.2594,0.0584,0.3179,297.8398,0.085172,16.837,-230.183076,-230.177723,-230.176779,-230.211195 +"CC1CC1",0.1136,41.96,-0.2727,0.1012,0.3738,298.6061,0.109284,16.49,-157.116735,-157.11209,-157.111146,-157.143262 +"CC1CO1",1.812,35.01,-0.2633,0.1052,0.3685,267.2979,0.085275,14.764,-193.039603,-193.035186,-193.034242,-193.065979 +"CN1CC1",1.1353,39.02,-0.2304,0.0968,0.3271,270.5508,0.097671,15.298,-173.147782,-173.143343,-173.142399,-173.174073 +"C1CC1O",1.3894,34.64,-0.239,0.0775,0.3166,263.966,0.085106,15.679,-193.034988,-193.030356,-193.029411,-193.061689 +"C1CCC1",0,41.83,-0.2982,0.0956,0.3938,268.4432,0.110511,14.696,-157.115484,-157.111322,-157.110378,-157.141657 +"C1COC1",1.7978,34.56,-0.2424,0.0859,0.3283,236.9759,0.086675,12.915,-193.034094,-193.029968,-193.029024,-193.060777 +"CC(=NO)C",0.6875,45.37,-0.2392,0.0192,0.2584,452.5112,0.100501,21.616,-248.375248,-248.368823,-248.367879,-248.405354 +"c1cc[nH]c1",1.8689,43.14,-0.2029,0.0499,0.2528,303.9808,0.082433,14.821,-210.101789,-210.097816,-210.096872,-210.12818 +"c1cnc[nH]1",3.6193,39.13,-0.2253,0.0332,0.2585,283.6817,0.071145,13.371,-226.160842,-226.157088,-226.156144,-226.187104 +"c1ccoc1",0.5571,39.2,-0.2246,0.0199,0.2445,289.005,0.069883,13.358,-229.969129,-229.965414,-229.96447,-229.995393 +"c1cocn1",1.5081,35.17,-0.2509,0.001,0.2519,269.2444,0.058593,12.04,-246.02915,-246.025614,-246.024669,-246.055309 +"CC(C)(C)C",0.0003,56.01,-0.3145,0.0737,0.3882,486.2719,0.158836,26.084,-197.632222,-197.625241,-197.624297,-197.661411 +"CC(C)(C)O",1.384,49.04,-0.2601,0.0664,0.3265,449.0573,0.134977,25.128,-233.560626,-233.553779,-233.552834,-233.589759 +"CC(=O)C#C",2.8579,42.02,-0.2654,-0.0575,0.2079,416.7799,0.065175,18.723,-229.927277,-229.921622,-229.920677,-229.955843 +"CC(=O)C#N",3.3351,37.08,-0.3007,-0.0889,0.2118,397.7757,0.05451,16.963,-246.027383,-246.022024,-246.02108,-246.055769 +"C(#N)C(=N)N",5.1815,38.31,-0.2739,-0.0438,0.2301,384.5574,0.056619,17.465,-242.19573,-242.190591,-242.189646,-242.223513 +"C#CC(=O)N",3.7167,38.29,-0.2629,-0.0277,0.2352,390.4619,0.054731,18.527,-245.997884,-245.992256,-245.991312,-246.026404 +"CC(=O)C=O",0.9461,36.51,-0.2538,-0.0964,0.1574,399.222,0.064992,17.806,-267.10335,-267.097658,-267.096714,-267.132534 +"C(=O)C(=N)N",2.7707,38.09,-0.2561,-0.064,0.192,384.4827,0.067668,17.972,-263.278851,-263.273589,-263.272645,-263.306835 +"C(=O)C(=O)N",5.1668,33.39,-0.2533,-0.0763,0.177,381.9882,0.054577,17.525,-283.16874,-283.163262,-283.162318,-283.197298 +"CC(C)C#C",0.6578,51.2,-0.2589,0.0571,0.316,481.9854,0.112471,22.569,-195.186772,-195.180446,-195.179502,-195.215658 +"CC(C)C#N",3.9512,46.23,-0.318,0.0365,0.3545,460.694,0.102281,20.467,-211.295796,-211.289821,-211.288877,-211.324525 +"CC(N)C#N",2.7429,42.87,-0.2704,0.0278,0.2983,440.0738,0.091554,19.946,-227.338075,-227.332253,-227.331309,-227.366638 +"CC(O)C#C",1.3582,44.03,-0.2665,0.0336,0.3001,444.6452,0.088908,21.306,-231.108368,-231.102292,-231.101348,-231.137061 +"CC(O)C#N",3.269,39.28,-0.3051,0.0115,0.3166,424.3395,0.078602,19.252,-247.214861,-247.209162,-247.208218,-247.243338 +"CC(C)C=O",2.6921,46.58,-0.2469,-0.0188,0.2281,482.0475,0.112328,21.434,-232.364952,-232.358577,-232.357633,-232.394589 +"CC(O)C=O",2.8354,39.16,-0.255,-0.0279,0.2271,432.1489,0.088443,20.344,-268.287661,-268.281505,-268.280561,-268.316982 +"CN(C)C=O",3.7163,44.42,-0.2424,0.0327,0.2751,441.85,0.10227,19.918,-248.430371,-248.424309,-248.423365,-248.459383 +"CC(=O)CO",2.9514,39.33,-0.2699,-0.0262,0.2437,440.1727,0.088924,19.824,-268.301176,-268.295084,-268.29414,-268.331307 +"CCC(=O)C",2.6168,46.19,-0.2423,-0.0072,0.2351,489.8518,0.112006,21.716,-232.377706,-232.371073,-232.370129,-232.408256 +"CCC(=O)N",3.499,42.54,-0.2438,0.0355,0.2793,457.447,0.101847,21.374,-248.448467,-248.441988,-248.441044,-248.478935 +"CC(=O)NC",3.5402,43.61,-0.2418,0.0387,0.2805,458.9803,0.101735,20.967,-248.443503,-248.436899,-248.435955,-248.474272 +"CNC(=O)N",3.5648,39.51,-0.2436,0.0599,0.3034,428.404,0.091713,20.274,-264.504487,-264.498452,-264.497508,-264.533633 +"COC(C)=N",1.1876,43.57,-0.2595,0.0352,0.2948,443.1687,0.102062,20.256,-248.416462,-248.410358,-248.409414,-248.445651 +"CC(=O)OC",1.7569,39.33,-0.2685,0.0174,0.2859,427.6606,0.089436,19.501,-268.32127,-268.315051,-268.314106,-268.351214 +"COC(=O)N",2.329,35.5,-0.2669,0.0575,0.3244,398.5908,0.079271,19.084,-284.385189,-284.379361,-284.378417,-284.414085 +"C(C(=O)N)O",4.5676,35.87,-0.246,0.0273,0.2733,418.5967,0.078222,20.089,-284.360325,-284.354148,-284.353204,-284.390143 +"[NH3+]CC([O-])=O",5.3004,35.19,-0.2527,0.0208,0.2735,408.0279,0.080317,17.931,-284.372483,-284.367172,-284.366228,-284.40095 +"CC(C)CO",1.3149,49.26,-0.2629,0.0789,0.3418,516.4357,0.136209,23.924,-233.551389,-233.544542,-233.543598,-233.581067 +"CC(O)CO",2.2854,42.01,-0.2597,0.0631,0.3228,438.3028,0.11345,22.072,-269.479234,-269.472993,-269.472049,-269.508213 +"CCC(C)C",0.0618,56.26,-0.3085,0.085,0.3934,565.8412,0.159632,25.169,-197.629387,-197.622325,-197.621381,-197.659365 +"CCC(C)O",1.3894,49.45,-0.2617,0.0733,0.335,521.8605,0.136091,23.99,-233.555951,-233.549143,-233.548199,-233.585602 +"CC(C)OC",1.0758,49.8,-0.2476,0.086,0.3336,507.9614,0.135681,23.573,-233.545899,-233.539034,-233.53809,-233.57582 +"CC1(CC1)C",0.1068,52.92,-0.2633,0.0893,0.3526,439.1643,0.137025,22.258,-196.409349,-196.403398,-196.402453,-196.437676 +"CC1(CO1)C",1.8235,45.7,-0.2596,0.091,0.3505,405.521,0.112851,20.719,-232.335768,-232.329981,-232.329037,-232.364091 +"CC1(CC1)O",1.3822,45.85,-0.2494,0.0776,0.327,401.4888,0.113237,21.17,-232.33341,-232.327734,-232.32679,-232.36147 +"N=C1CCO1",2.5732,40.19,-0.263,0.0277,0.2907,352.0378,0.080558,15.51,-247.201165,-247.196699,-247.195755,-247.22841 +"C1CC(=O)C1",2.7119,42.77,-0.2415,-0.0194,0.2222,379.6371,0.090544,17.076,-231.15578,-231.150804,-231.149859,-231.184401 +"C1CNC1=O",3.6671,39.73,-0.25,0.038,0.288,355.0934,0.080442,16.1,-247.225618,-247.220897,-247.219953,-247.253218 +"C1COC1=O",3.9339,35.4,-0.2788,0.0089,0.2878,336.0792,0.068574,14.483,-267.106213,-267.101929,-267.100985,-267.13332 +"C1C(=O)CN1",2.5257,39.34,-0.2437,-0.0258,0.2179,359.152,0.07965,16.083,-247.190194,-247.185543,-247.184599,-247.217688 +"C1C(=O)CO1",0.8477,35.94,-0.2647,-0.0352,0.2295,344.5473,0.066989,15.115,-267.068488,-267.063976,-267.063032,-267.095887 +"CC1CCC1",0.095,52.94,-0.2896,0.0927,0.3823,455.0215,0.138424,20.299,-196.407957,-196.40245,-196.401505,-196.436159 +"CC1CCO1",1.6826,45.72,-0.241,0.0915,0.3325,407.6909,0.11459,18.582,-232.33123,-232.325877,-232.324933,-232.359589 +"CC1COC1",1.8995,45.45,-0.2419,0.0814,0.3233,414.1575,0.114694,18.443,-232.325947,-232.320548,-232.319604,-232.354576 +"C1CC(C1)O",1.4604,45.55,-0.256,0.0801,0.3361,413.9118,0.115063,19.023,-232.333258,-232.328097,-232.327153,-232.361103 +"C1C(CO1)O",2.4158,38.58,-0.2465,0.0604,0.3069,374.4087,0.091005,17.344,-268.248371,-268.243234,-268.24229,-268.276572 +"CC1CC1C",0.1023,52.92,-0.2632,0.094,0.3573,465.3301,0.137341,22.005,-196.406419,-196.40034,-196.399396,-196.435152 +"CC1CC1O",1.3092,45.84,-0.2518,0.0822,0.334,418.298,0.113827,20.702,-232.32907,-232.323327,-232.322383,-232.357426 +"CC1CN1C",1.2483,49.79,-0.2199,0.0931,0.313,432.5081,0.125556,20.993,-212.438188,-212.432286,-212.431342,-212.466725 +"CC1OC1C",1.8159,46.02,-0.2573,0.0984,0.3557,432.2224,0.113178,20.451,-232.334436,-232.32857,-232.327626,-232.363064 +"OC1CC1O",2.4925,38.43,-0.2369,0.0642,0.3011,370.2795,0.090396,19.173,-268.251114,-268.245852,-268.244908,-268.278934 +"C1C2CC1C2",0.0002,49.82,-0.2974,0.1082,0.4056,328.2069,0.116844,15.584,-195.158734,-195.154774,-195.15383,-195.185188 +"C1C2CC1O2",1.8725,42.39,-0.2352,0.0994,0.3346,298.4772,0.092109,14.648,-231.069318,-231.065445,-231.064501,-231.095721 +"C#CCC#C",0.4777,45.22,-0.2633,0.0345,0.2978,470.3666,0.065276,19.399,-192.736096,-192.730524,-192.72958,-192.764175 +"C#CCC#N",3.5925,40.56,-0.2911,0.0111,0.3022,448.7393,0.05503,17.258,-208.842347,-208.837159,-208.836215,-208.87023 +"C(C#N)C#N",3.6958,36.12,-0.3494,-0.0151,0.3344,429.0927,0.044642,15.259,-224.94466,-224.939832,-224.938888,-224.972334 +"C#CCC=O",2.0711,41.41,-0.2583,-0.0337,0.2245,483.1343,0.065029,18.303,-229.918003,-229.912379,-229.911435,-229.946855 +"C(C=O)C#N",2.172,36.79,-0.2868,-0.0568,0.23,464.3999,0.054694,16.279,-246.023231,-246.017964,-246.017019,-246.051902 +"C(=N)NC=O",5.0884,38.8,-0.2491,-0.0216,0.2276,391.0845,0.068192,16.694,-263.280728,-263.275534,-263.27459,-263.308973 +"N=COC=O",4.2338,35.69,-0.2934,-0.0298,0.2636,460.7073,0.055229,15.704,-283.156647,-283.151564,-283.15062,-283.184645 +"C(=O)NC=O",5.2904,34.31,-0.2608,-0.0416,0.2191,372.7845,0.055704,15.692,-283.179781,-283.17476,-283.173816,-283.207861 +"CC#CC#C",1.1881,54.54,-0.242,-0.0085,0.2335,576.5936,0.065688,19.382,-192.762455,-192.756657,-192.755713,-192.790147 +"CC#CC#N",5.1545,47.82,-0.2871,-0.0377,0.2494,553.2094,0.055566,17.054,-208.863267,-208.85788,-208.856936,-208.890804 +"CC#CC=O",3.783,45.67,-0.2642,-0.0592,0.2051,568.7849,0.065532,18.156,-229.928448,-229.922366,-229.921422,-229.958956 +"CC#CCO",1.2486,46.92,-0.2421,0.0368,0.2789,613.8892,0.089036,20.884,-231.111403,-231.104707,-231.103763,-231.142502 +"CCC#CC",0.078,53.93,-0.2401,0.0631,0.3033,652.4812,0.112761,22.115,-195.196751,-195.18988,-195.188936,-195.228146 +"CN=COC",2.9091,46.78,-0.2481,0.0251,0.2732,567.546,0.10111,20.343,-248.395289,-248.388824,-248.38788,-248.425185 +"CCCC#C",0.7752,51.47,-0.2586,0.0583,0.3169,593.0141,0.112736,22.023,-195.186228,-195.179881,-195.178937,-195.215412 +"CCCC#N",4.0641,46.62,-0.3185,0.0365,0.355,569.5065,0.102603,19.863,-211.295163,-211.289209,-211.288265,-211.324151 +"CNCC#N",4.1469,43.52,-0.249,0.0208,0.2698,535.2152,0.091346,19.026,-227.324775,-227.318937,-227.317993,-227.353619 +"COCC#C",1.3623,45.04,-0.2565,0.0324,0.2889,532.4302,0.088734,20.194,-231.092208,-231.086089,-231.085144,-231.121215 +"COCC#N",4.5445,40.22,-0.2859,0.008,0.2939,509.5881,0.078474,18.14,-247.198,-247.192237,-247.191293,-247.226866 +"C#CCCO",1.3892,44.05,-0.2624,0.0549,0.3174,542.7424,0.089329,20.716,-231.10631,-231.100171,-231.099227,-231.135302 +"C(CO)C#N",3.7158,39.41,-0.2914,0.0323,0.3237,519.4529,0.079045,18.636,-247.21445,-247.20867,-247.207726,-247.243281 +"CCCC=O",2.7552,47.03,-0.2487,-0.0195,0.2292,599.5103,0.112697,20.766,-232.36363,-232.357283,-232.356339,-232.39356 +"CCNC=O",3.694,43.88,-0.2499,0.0335,0.2834,490.704,0.102695,19.864,-248.436061,-248.429994,-248.429049,-248.466001 +"CCOC=O",4.0425,40.48,-0.2778,0.0091,0.2868,510.9124,0.089832,18.727,-268.307873,-268.301991,-268.301047,-268.337439 +"COCC=O",2.7959,40.04,-0.2577,-0.0282,0.2295,546.3932,0.088447,19.186,-268.27324,-268.267033,-268.266088,-268.30312 +"C(CO)C=O",1.4229,39.92,-0.2536,-0.0285,0.225,550.4469,0.089449,19.422,-268.282665,-268.276602,-268.275658,-268.312338 +"CCCCC",0.0603,56.8,-0.3105,0.0925,0.403,721.0614,0.160138,24.552,-197.629416,-197.622321,-197.621376,-197.659721 +"CCCCO",1.3815,49.63,-0.2615,0.0813,0.3428,669.0122,0.136722,23.274,-233.549368,-233.542485,-233.54154,-233.579515 +"CCCOC",0.9755,50.27,-0.25,0.0926,0.3426,647.6919,0.136321,22.785,-233.542445,-233.535569,-233.534624,-233.572603 +"CCOCC",0.9301,50.62,-0.2483,0.0931,0.3414,631.0818,0.136152,22.881,-233.547877,-233.541027,-233.540083,-233.578016 +"COCCO",2.2019,42.99,-0.261,0.0821,0.3431,522.4181,0.113563,21.001,-269.465281,-269.45898,-269.458036,-269.494828 +"C(CO)CO",2.7191,42.52,-0.2607,0.0779,0.3386,618.0376,0.113242,22.005,-269.468415,-269.461727,-269.460783,-269.498456 +"C#CC1CC1",0.8635,48.65,-0.2425,0.0503,0.2928,429.6736,0.090505,18.785,-193.963318,-193.958152,-193.957208,-193.990986 +"C#CC1CN1",1.2763,45.22,-0.2422,0.0322,0.2744,408.3613,0.079399,17.704,-210.001565,-209.996559,-209.995615,-210.029132 +"C#CC1CO1",1.7013,41.39,-0.265,0.0189,0.2839,397.8388,0.066467,16.924,-229.879598,-229.87469,-229.873746,-229.907107 +"C1CC1C#N",4.1966,43.5,-0.2931,0.0289,0.322,407.3682,0.080403,16.544,-210.071468,-210.066701,-210.065757,-210.098937 +"N#CC1CN1",2.8377,40.21,-0.2775,0.0076,0.2851,386.5557,0.069125,15.572,-226.107205,-226.102575,-226.10163,-226.134578 +"N#CC1CO1",3.6533,36.45,-0.3077,-0.0072,0.3004,376.0875,0.056139,14.866,-245.983375,-245.978827,-245.977883,-246.010694 +"C1CC1C=O",3.147,44.1,-0.2486,-0.0157,0.2329,436.2586,0.090683,17.457,-231.145151,-231.140101,-231.139157,-231.173071 +"O=CC1CN1",2.03,40.19,-0.2639,-0.0381,0.2258,363.3567,0.079887,16.144,-247.184334,-247.179519,-247.178575,-247.211988 +"O=CC1CO1",2.3951,36.8,-0.2636,-0.0418,0.2217,402.0419,0.066465,15.818,-267.060326,-267.055484,-267.05454,-267.088115 +"C1CN1C=O",3.2517,40.77,-0.2555,-0.0013,0.2542,383.6013,0.079595,16.197,-247.192524,-247.187626,-247.186682,-247.22031 +"CCC1CC1",0.095,52.9,-0.2723,0.0986,0.3709,514.3053,0.137725,21.364,-196.40532,-196.399392,-196.398448,-196.434166 +"CCC1CO1",1.7468,45.95,-0.2623,0.0976,0.3599,471.3094,0.113861,19.579,-232.328175,-232.322501,-232.321557,-232.356864 +"CCN1CC1",1.0617,50.07,-0.2299,0.095,0.3249,474.34,0.126034,20.247,-212.439286,-212.433577,-212.432633,-212.467838 +"COC1CC1",1.0906,46.29,-0.244,0.0955,0.3395,452.0432,0.113725,19.738,-232.3174,-232.311743,-232.310798,-232.345925 +"C1CC1CO",1.3201,46.01,-0.2601,0.079,0.339,468.6229,0.1143,20.059,-232.324966,-232.319256,-232.318312,-232.35363 +"OCC1CN1",2.9395,42.64,-0.2436,0.0677,0.3113,445.0397,0.103108,19.038,-248.364357,-248.358808,-248.357864,-248.392932 +"OCC1CO1",1.7822,38.7,-0.2641,0.0777,0.3418,424.287,0.090736,18.033,-268.248806,-268.243491,-268.242547,-268.277228 +"C1CC=CC1",0.1413,50.34,-0.2335,0.0325,0.2661,375.5096,0.116471,17.052,-195.226072,-195.221411,-195.220467,-195.253489 +"C1COC=N1",1.6299,38.7,-0.2479,0.0241,0.272,315.6279,0.081779,14.168,-247.215049,-247.210764,-247.20982,-247.242343 +"C1C=CCO1",1.5715,43.73,-0.2303,0.0188,0.2491,336.0014,0.092218,15.461,-231.142581,-231.138099,-231.137155,-231.169932 +"C1CCCC1",0.001,52,-0.3078,0.0844,0.3922,414.0869,0.140316,18.624,-196.432825,-196.427608,-196.426663,-196.462198 +"C1CCOC1",1.465,45.06,-0.2479,0.0825,0.3304,374.1439,0.116649,16.873,-232.351764,-232.346824,-232.34588,-232.380488 +"C1COCO1",1.3482,38.2,-0.2572,0.0865,0.3437,333.797,0.092697,15.172,-268.273739,-268.269015,-268.268071,-268.302293 +"C1C2CCC12",0.2527,48.92,-0.2519,0.0987,0.3506,350.7167,0.116089,16.868,-195.179738,-195.175335,-195.174391,-195.206606 +"C1C2COC12",1.7167,41.77,-0.2337,0.0853,0.319,315.0958,0.091972,15.312,-231.093096,-231.088957,-231.088013,-231.119749 +"C1CC2OC12",1.8518,41.8,-0.2517,0.0951,0.3468,317.7261,0.091787,15.427,-231.102112,-231.097923,-231.096979,-231.128841 +"c1c[nH]nc1",2.2277,39.17,-0.2438,0.0239,0.2677,284.6224,0.071284,13.255,-226.144377,-226.140644,-226.1397,-226.170637 +"c1cnn[nH]1",4.2954,35.66,-0.2641,-0.0023,0.2618,266.8979,0.059084,12.248,-242.180146,-242.176561,-242.175617,-242.206312 +"c1[nH]ncn1",2.79,35.01,-0.2705,0.0059,0.2764,264.8693,0.059902,11.923,-242.205627,-242.202078,-242.201134,-242.231775 +"c1[nH]cnn1",5.498,35.27,-0.2551,0.0164,0.2715,266.6747,0.059116,12.36,-242.196351,-242.192733,-242.191789,-242.222534 +"c1cn[nH]n1",0.0597,35.3,-0.2742,-0.0055,0.2687,265.6333,0.059852,11.936,-242.18601,-242.182459,-242.181515,-242.212163 +"c1cnoc1",2.8629,35.74,-0.2677,-0.0138,0.2538,272.9362,0.058014,12.232,-245.994065,-245.9905,-245.989556,-246.020256 +"c1conn1",3.4411,32.49,-0.2862,-0.044,0.2422,256.5774,0.045382,11.821,-262.035127,-262.031571,-262.030627,-262.061292 +"c1ncon1",1.126,31.56,-0.3054,-0.0335,0.2719,253.56,0.046608,11.003,-262.056533,-262.053124,-262.05218,-262.082625 +"c1nnco1",3.1035,31.41,-0.2889,-0.0178,0.2711,252.7276,0.046502,11.122,-262.065493,-262.06207,-262.061126,-262.091582 +"c1nnno1",2.8915,28.58,-0.331,-0.0643,0.2667,239.8863,0.033398,10.82,-278.075775,-278.072348,-278.071404,-278.101859 +"c1cnon1",3.2148,32.29,-0.3213,-0.0515,0.2698,256.8034,0.045873,11.319,-262.017735,-262.014288,-262.013344,-262.043855 +"c1nnon1",1.9641,28.86,-0.3379,-0.0815,0.2563,240.038,0.033459,10.694,-278.059333,-278.055942,-278.054997,-278.0854 +"n1nnon1",0.6581,25.66,-0.3671,-0.1073,0.2598,226.0123,0.020349,10.401,-294.081616,-294.078226,-294.077281,-294.10766 +"CC(=NO)C#C",0.6411,54.16,-0.2383,-0.0315,0.2068,618.9851,0.081951,23.675,-285.217962,-285.211126,-285.210182,-285.248449 +"CC(=NO)CO",1.9975,49.13,-0.2502,0.0066,0.2567,639.6693,0.105605,25.106,-323.58568,-323.578268,-323.577324,-323.617025 +"CCC(=NO)C",0.6759,56.46,-0.239,0.0173,0.2562,699.8479,0.129195,26.442,-287.662612,-287.654946,-287.654001,-287.694375 +"C1CC(=NO)C1",0.8763,53.48,-0.2405,0.0074,0.2479,599.0421,0.107557,21.872,-286.438084,-286.4319,-286.430956,-286.468514 +"C1C(=NO)CN1",1.2383,49.95,-0.23,0.0026,0.2326,576.9778,0.096619,20.934,-302.472397,-302.466506,-302.465561,-302.501719 +"C1C(=NO)CO1",1.1556,46.34,-0.2498,-0.0046,0.2452,562.026,0.084017,19.932,-322.351046,-322.345309,-322.344365,-322.380266 +"C(F)(F)(F)F",0.0003,15.93,-0.4286,0.1935,0.6221,279.3208,0.017147,12.639,-437.484875,-437.480956,-437.480011,-437.512059 +"N=C1NC=CO1",3.2823,44.74,-0.1947,0.0288,0.2235,443.1895,0.074595,18.338,-301.367422,-301.362374,-301.36143,-301.395667 +"N=C1OC=CO1",3.6351,40.77,-0.2262,0.0118,0.238,424.2581,0.062567,16.38,-321.236907,-321.232405,-321.231461,-321.264644 +"c1c[nH]c(=O)[nH]1",3.6997,43.39,-0.1951,0.0406,0.2357,444.2644,0.075511,18.314,-301.405558,-301.400597,-301.399653,-301.433583 +"c1coc(=O)[nH]1",4.6537,39.72,-0.2215,0.0207,0.2422,425.4583,0.063077,16.761,-321.276274,-321.271636,-321.270692,-321.3041 +"c1coc(=O)o1",4.4195,36.02,-0.253,0.0027,0.2556,406.9334,0.050756,15.099,-341.145624,-341.141349,-341.140405,-341.173218 +"Cc1ccc[nH]1",1.9162,55.77,-0.1934,0.0496,0.243,523.1127,0.109918,20.832,-249.397368,-249.391707,-249.390763,-249.42627 +"Cc1ccco1",0.5222,51.86,-0.2124,0.0239,0.2363,501.9826,0.097415,19.38,-269.267574,-269.262222,-269.261277,-269.2962 +"Cc1cnco1",2.0636,47.63,-0.2364,0.0066,0.243,481.0455,0.086244,18.024,-285.328258,-285.323118,-285.322174,-285.356714 +"Cc1ncco1",1.3368,47.52,-0.2371,0.0082,0.2453,474.9666,0.086046,18.156,-285.330157,-285.32495,-285.324006,-285.358812 +"c1cc([nH]c1)N",1.5102,51.62,-0.185,0.0556,0.2406,495.5949,0.099063,20.507,-265.441614,-265.436072,-265.435128,-265.470365 +"c1cc(oc1)N",1.5774,48.3,-0.1844,0.0397,0.2241,472.7151,0.086472,19.374,-285.313612,-285.308395,-285.307451,-285.341898 +"c1c(ocn1)N",2.9659,44.3,-0.2049,0.0202,0.2251,451.8468,0.075243,18,-301.373496,-301.368495,-301.36755,-301.401649 +"c1coc(n1)N",1.8589,43.86,-0.2088,0.0284,0.2373,444.8744,0.075481,17.958,-301.381127,-301.376265,-301.375321,-301.409022 +"c1cc([nH]c1)O",0.9982,47.38,-0.1814,0.0635,0.2449,474.5019,0.0864,19.901,-285.320198,-285.314886,-285.313942,-285.348469 +"c1c([nH]cn1)O",2.938,43.45,-0.2022,0.0448,0.247,453.0651,0.075256,18.316,-301.377995,-301.372972,-301.372028,-301.40609 +"c1cnc([nH]1)O",2.18,43.23,-0.2051,0.0505,0.2555,445.8144,0.075677,18.016,-301.389192,-301.38437,-301.383426,-301.417032 +"Cc1cc[nH]c1",1.6978,55.32,-0.1982,0.0533,0.2515,528.9258,0.109903,20.948,-249.395323,-249.38964,-249.388696,-249.424242 +"Cc1c[nH]cn1",3.3147,51.32,-0.2157,0.0376,0.2533,500.6403,0.098598,19.536,-265.457424,-265.451994,-265.45105,-265.486153 +"Cc1ccoc1",0.8398,51.11,-0.2183,0.025,0.2434,512.5694,0.097541,19.399,-269.263476,-269.25813,-269.257186,-269.292093 +"Cc1cocn1",1.3204,47.12,-0.2395,0.0074,0.2468,485.0096,0.086195,18.123,-285.326726,-285.321574,-285.32063,-285.355221 +"c1c[nH]cc1N",1.8044,51.67,-0.1741,0.0558,0.2299,500.1351,0.098959,20.983,-265.43852,-265.432959,-265.432015,-265.467025 +"c1c(nc[nH]1)N",3.0074,47.82,-0.1849,0.0391,0.2241,471.183,0.087869,19.414,-281.504579,-281.499371,-281.498427,-281.532762 +"c1cocc1N",1.7218,47.62,-0.1947,0.029,0.2237,483.7409,0.086736,19.259,-285.306999,-285.301863,-285.300919,-285.33518 +"c1c(nco1)N",1.6737,43.75,-0.2057,0.0107,0.2164,455.3577,0.075457,17.986,-301.373831,-301.368911,-301.367967,-301.401842 +"c1c[nH]cc1O",1.6811,47.51,-0.1864,0.05,0.2364,480.4033,0.086407,19.995,-285.316421,-285.311084,-285.31014,-285.344731 +"c1c(nc[nH]1)O",2.8207,43.71,-0.2006,0.0315,0.2321,450.9127,0.075668,18.065,-301.385442,-301.380615,-301.379671,-301.413294 +"c1c(nco1)O",0.4508,39.79,-0.2242,0.001,0.2252,435.5044,0.063161,16.639,-321.252793,-321.24821,-321.247265,-321.280543 +"Cn1cccc1",2.0318,55.54,-0.2006,0.0463,0.2469,510.1975,0.110002,20.193,-249.387499,-249.381908,-249.380963,-249.416542 +"Cn1ccnc1",3.9054,51.11,-0.2216,0.0324,0.2541,488.8221,0.098859,18.783,-265.44727,-265.441887,-265.440943,-265.476219 +"c1ccccc1",0,57.28,-0.2475,0.0029,0.2503,456.6788,0.100175,17.214,-232.164586,-232.160188,-232.159244,-232.192047 +"c1ccncc1",2.1103,53.03,-0.2518,-0.0225,0.2293,432.2254,0.088683,16.093,-248.211932,-248.207665,-248.206721,-248.239325 +"c1cnccn1",0,49.2,-0.2493,-0.0511,0.1982,408.933,0.07674,15.146,-264.255612,-264.251446,-264.250502,-264.282954 +"c1cncnc1",2.2031,48.47,-0.2531,-0.0415,0.2116,408.2875,0.076933,15.138,-264.261826,-264.257646,-264.256702,-264.289179 +"c1ncncn1",0.0001,43.74,-0.2768,-0.0554,0.2214,384.8085,0.065246,14.209,-280.31408,-280.30997,-280.309026,-280.341405 +"CC(C)(C)C#C",0.6082,62.05,-0.2585,0.0629,0.3214,624.7654,0.140054,28.513,-234.476516,-234.468849,-234.467905,-234.506959 +"CC(C)(C)C#N",3.9525,56.9,-0.3156,0.0438,0.3594,602.0353,0.129886,26.404,-250.585843,-250.578524,-250.57758,-250.616127 +"CC(C)(C#N)N",2.8519,53.67,-0.2675,0.0349,0.3024,580.9124,0.119019,25.93,-266.630629,-266.623452,-266.622508,-266.660779 +"CC(C)(C#C)O",1.7721,54.93,-0.2589,0.0425,0.3014,587.1297,0.116168,27.393,-270.399081,-270.391629,-270.390685,-270.429359 +"CC(C)(C#N)O",4.8465,49.98,-0.2931,0.0216,0.3148,564.648,0.105718,25.46,-286.506282,-286.499096,-286.498152,-286.5365 +"CC(C)(C)C=O",2.5692,56.92,-0.247,-0.0179,0.2291,611.1634,0.139975,27.295,-271.656372,-271.648691,-271.647747,-271.687368 +"CC(C)(C=O)O",2.8176,49.7,-0.2523,-0.0258,0.2265,570.0556,0.115843,26.432,-307.582044,-307.57441,-307.573465,-307.613529 +"CC(C)(C)CO",1.3469,59.72,-0.2638,0.0725,0.3363,650.4504,0.163869,29.797,-272.841336,-272.83319,-272.832246,-272.872437 +"CC(C)(CO)O",0.1987,52.67,-0.2619,0.0727,0.3346,609.3369,0.139933,28.879,-308.769557,-308.761514,-308.76057,-308.800675 +"CCC(C)(C)C",0.0414,66.63,-0.3031,0.0748,0.378,704.4105,0.187572,30.906,-236.917458,-236.909222,-236.908277,-236.948672 +"CCC(C)(C)O",1.3435,59.74,-0.2601,0.0658,0.3259,660.7696,0.163685,29.948,-272.84753,-272.839427,-272.838483,-272.878675 +"CC(C)(C)OC",1.0896,60.03,-0.2455,0.079,0.3245,644.1742,0.163225,29.597,-272.835594,-272.827416,-272.826471,-272.867062 +"CC#CC(=O)C",3.5644,56.61,-0.2532,-0.0449,0.2082,786.3721,0.093473,23.826,-269.230258,-269.222736,-269.221792,-269.263526 +"CC#CC(=O)N",4.0759,52.6,-0.2502,-0.016,0.2342,756.8071,0.082966,23.663,-285.30044,-285.292874,-285.29193,-285.334226 +"CC#CC(C)C",0.1156,65.24,-0.2404,0.0633,0.3036,861.2875,0.140828,27.581,-234.486176,-234.478004,-234.477059,-234.519337 +"CC#CC(C)O",1.89,57.94,-0.2492,0.0416,0.2908,819.2949,0.117252,26.384,-270.408713,-270.400789,-270.399845,-270.441577 +"CC(=O)CC#C",2.2729,51.94,-0.246,-0.0185,0.2275,636.8813,0.093109,23.951,-269.219315,-269.212283,-269.211339,-269.250569 +"CC(=O)CC#N",5.6789,47.48,-0.274,-0.037,0.237,640.5669,0.082597,21.969,-285.324641,-285.317853,-285.316909,-285.356331 +"C#CCC(=O)N",3.7732,48.37,-0.2498,0.0244,0.2742,627.6953,0.082749,23.677,-285.287913,-285.280928,-285.279984,-285.319587 +"C(C#N)C(=O)N",6.7117,43.68,-0.2741,0.0051,0.2792,603.7082,0.072367,21.668,-301.394865,-301.38819,-301.387246,-301.426589 +"CC(=N)NC=O",0.9105,50.18,-0.2611,-0.0177,0.2434,587.7789,0.096761,21.828,-302.588564,-302.582194,-302.58125,-302.618688 +"CC(=N)OC=O",4.0954,45.3,-0.2723,-0.0251,0.2472,632.4706,0.083113,21.35,-322.460671,-322.454075,-322.45313,-322.491611 +"CC(=O)CC=O",2.6833,48.15,-0.2505,-0.0381,0.2124,667.8025,0.09322,22.667,-306.397642,-306.390688,-306.389744,-306.429362 +"CC(=O)NC=N",5.1423,49.82,-0.2397,-0.0114,0.2283,621.2041,0.095742,22.603,-302.579654,-302.57277,-302.571825,-302.610884 +"CC(=O)NC=O",5.6602,45.18,-0.2511,-0.0298,0.2213,600.473,0.083351,21.535,-322.479769,-322.473128,-322.472183,-322.510598 +"CC(=O)OC=N",0.9137,45.91,-0.2768,-0.0235,0.2533,633.5112,0.083216,21.339,-322.464725,-322.458133,-322.457189,-322.495484 +"C(=O)NC(=N)N",2.4787,46.25,-0.2336,-0.0146,0.2189,561.2611,0.08593,21.711,-318.647923,-318.641827,-318.640883,-318.67749 +"C(C=O)C(=O)N",3.7344,44.18,-0.2502,-0.0278,0.2223,637.4959,0.083052,22.336,-322.469844,-322.463011,-322.462067,-322.501174 +"C(=N)NC(=O)N",5.2585,46.05,-0.2443,0.0042,0.2484,586.8385,0.085659,22.001,-318.641916,-318.635509,-318.634565,-318.672134 +"C(=O)NC(=O)N",6.1119,41.58,-0.2548,-0.0128,0.242,566.0758,0.073232,20.96,-338.542975,-338.536767,-338.535823,-338.573022 +"NC(=O)OC=N",1.994,42.07,-0.2743,0.0024,0.2767,600.144,0.0727,21.164,-338.531538,-338.524995,-338.52405,-338.562187 +"CC(C)CC#C",0.6828,62.17,-0.259,0.0553,0.3143,738.9549,0.140781,27.444,-234.476081,-234.46852,-234.467575,-234.506975 +"CC(C)CC#N",3.9826,57.19,-0.3161,0.0346,0.3507,713.1432,0.130632,25.297,-250.585183,-250.578,-250.577056,-250.615918 +"CC(O)CC#C",1.4348,54.82,-0.2622,0.0545,0.3167,692.7907,0.117241,26.225,-270.401061,-270.39373,-270.392786,-270.431817 +"CC(O)CC#N",4.9064,50.45,-0.2866,0.0348,0.3214,674.7315,0.106844,24.229,-286.508374,-286.501342,-286.500397,-286.539045 +"CN(C)CC#N",4.0824,54.43,-0.2371,0.0217,0.2588,671.7683,0.119064,24.109,-266.609146,-266.602164,-266.60122,-266.639636 +"CC(C)CC=O",2.7416,57.47,-0.2481,-0.0197,0.2284,750.1795,0.140549,26.267,-271.653205,-271.645574,-271.644629,-271.685015 +"CC(C)NC=O",3.6603,54.8,-0.2484,0.0348,0.2832,684.3066,0.130467,25.451,-287.72789,-287.72048,-287.719536,-287.760264 +"CC(C)OC=O",4.1216,51.48,-0.2748,0.0115,0.2863,712.2912,0.117552,24.383,-307.60208,-307.594822,-307.593878,-307.634011 +"CC(O)CC=O",3.3205,50.52,-0.2437,-0.0204,0.2232,730.3237,0.11705,25.112,-307.577822,-307.570462,-307.569518,-307.609331 +"CN(C)CC=O",2.7328,54.59,-0.2267,-0.0252,0.2016,722.1956,0.129063,25.118,-287.681216,-287.673848,-287.672904,-287.712476 +"CC(=O)CCO",1.2601,50.08,-0.2458,-0.0111,0.2348,760.732,0.117054,25.274,-307.586014,-307.578343,-307.577399,-307.618405 +"CCCC(=O)C",2.5426,57.31,-0.2415,-0.0071,0.2345,817.4605,0.140374,26.57,-271.666055,-271.658117,-271.657173,-271.699054 +"CCCC(=O)N",3.4104,53.75,-0.243,0.0359,0.2789,778.0458,0.130316,26.19,-287.736841,-287.729087,-287.728143,-287.769373 +"CCNC(=O)C",3.4941,54.72,-0.2402,0.0381,0.2783,725.3315,0.130219,25.795,-287.734664,-287.726871,-287.725927,-287.767999 +"CCNC(=O)N",3.5648,50.96,-0.2427,0.0598,0.3025,719.1944,0.120088,25.169,-303.795699,-303.788437,-303.787493,-303.827496 +"CCOC(=O)C",1.9371,50.66,-0.2654,0.0196,0.285,719.0142,0.117704,24.457,-307.614861,-307.607387,-307.606443,-307.647075 +"CCOC(=O)N",2.4228,46.82,-0.2644,0.0591,0.3235,686.248,0.107556,24.01,-323.6788,-323.671741,-323.670797,-323.710041 +"C[NH2+]CC([O-])=O",5.2128,46.17,-0.2494,0.0196,0.2691,624.3195,0.108531,22.427,-323.654114,-323.64761,-323.646666,-323.684532 +"CC(=O)COC",3.6117,51.17,-0.2449,-0.0102,0.2347,743.2534,0.116412,24.783,-307.571921,-307.564235,-307.563291,-307.604498 +"COCC(=O)N",4.2922,47.53,-0.2464,0.0274,0.2738,706.1403,0.10636,24.375,-323.641347,-323.63393,-323.632986,-323.673408 +"C(CO)C(=O)N",2.1536,46.39,-0.2471,0.0327,0.2798,722.9054,0.106736,25.021,-323.657279,-323.649644,-323.6487,-323.69021 +"[NH3+]CCC([O-])=O",14.8809,54.51,-0.167,-0.0333,0.1338,714.9069,0.107753,23.572,-323.582949,-323.575941,-323.574996,-323.614532 +"CC(C)CCO",1.3839,60.25,-0.2613,0.0807,0.342,850.8528,0.164714,28.734,-272.837559,-272.829406,-272.828462,-272.869568 +"CC(O)CCO",0.18,53.18,-0.2583,0.0719,0.3302,802.5075,0.141169,27.553,-308.764658,-308.756772,-308.755828,-308.796363 +"CCCC(C)C",0.0898,67.45,-0.3042,0.0859,0.3901,906.7075,0.188151,29.991,-236.917664,-236.909314,-236.90837,-236.949789 +"CCCC(C)O",1.3758,60.39,-0.2613,0.0739,0.3352,858.1408,0.164563,28.823,-272.84456,-272.836468,-272.835524,-272.876391 +"CCOC(C)C",0.9735,61.19,-0.2458,0.0875,0.3333,814.3456,0.163926,28.558,-272.839582,-272.831387,-272.830442,-272.871875 +"CC(C)COC",0.9451,61.01,-0.2508,0.0876,0.3384,817.8428,0.16427,28.268,-272.83269,-272.824541,-272.823597,-272.864643 +"COCC(C)O",2.2037,53.61,-0.2581,0.0777,0.3359,711.2834,0.141325,26.669,-308.759135,-308.751467,-308.750523,-308.790567 +"CC(=O)C(=O)C",0.0005,47.03,-0.2408,-0.0826,0.1582,581.337,0.092927,23.569,-306.406578,-306.399338,-306.398393,-306.43831 +"CC(=O)C(=N)N",1.9354,48.43,-0.2483,-0.0515,0.1968,562.5203,0.09552,23.683,-302.580808,-302.574009,-302.573065,-302.611309 +"CC(=O)C(=O)N",1.1652,43.45,-0.2416,-0.0632,0.1784,547.5835,0.083247,22.796,-322.482659,-322.475927,-322.474982,-322.513285 +"NC(=[NH2+])C([O-])=O",8.5052,41.06,-0.2349,-0.0376,0.1973,506.6426,0.073524,20.891,-338.530072,-338.524237,-338.523292,-338.559249 +"C(=O)(C(=O)N)N",0.0024,39.85,-0.2422,-0.0321,0.21,513.8164,0.073748,21.839,-338.559964,-338.553802,-338.552858,-338.589499 +"CC(C)C(=O)C",2.5905,56.8,-0.2389,-0.0081,0.2308,650.5524,0.140412,27.077,-271.665153,-271.657306,-271.656362,-271.697004 +"CC(C)C(=O)N",3.4399,53.35,-0.2409,0.0371,0.278,617.1359,0.130071,26.737,-287.737498,-287.729645,-287.728701,-287.770224 +"CC([NH3+])C([O-])=O",5.0933,45.76,-0.251,0.0174,0.2683,578.6969,0.108391,23.344,-323.665247,-323.658667,-323.657723,-323.695493 +"CC(O)C(C)=O",2.9304,49.94,-0.263,-0.0258,0.2371,606.3478,0.117268,25.252,-307.594137,-307.586914,-307.58597,-307.625022 +"CC(O)C(N)=O",4.1096,46.24,-0.26,0.0186,0.2786,574.3293,0.10692,24.979,-323.665987,-323.658861,-323.657917,-323.69684 +"CC(=O)N(C)C",3.5335,54.99,-0.2342,0.0354,0.2696,622.3344,0.129864,25.808,-287.723848,-287.716142,-287.715198,-287.755519 +"CN(C)C(=O)N",3.6689,50.94,-0.2306,0.0604,0.2909,591.7927,0.119711,25.1,-303.785832,-303.778493,-303.777549,-303.816841 +"CC(C)C(C)C",0,66.87,-0.3016,0.0822,0.3838,747.1084,0.187721,30.606,-236.916033,-236.907667,-236.906723,-236.94792 +"CC(C)C(C)O",1.414,59.83,-0.2613,0.0684,0.3297,675.0478,0.164151,29.475,-272.843742,-272.8356,-272.834656,-272.875156 +"CC(O)C(C)O",0.1664,52.62,-0.2564,0.0643,0.3208,632.2372,0.140737,28.004,-308.772292,-308.764288,-308.763343,-308.804403 +"CC1(CCC1)C",0.0245,63.32,-0.2837,0.0812,0.3649,603.4335,0.165997,26.214,-235.699029,-235.692187,-235.691243,-235.728837 +"CC1(CCO1)C",1.636,56.17,-0.2392,0.0812,0.3205,560.0105,0.141947,24.691,-271.626697,-271.619919,-271.618974,-271.656931 +"CC1(COC1)C",1.888,56.09,-0.2425,0.0728,0.3154,569.1031,0.142192,24.362,-271.618179,-271.611373,-271.610429,-271.649191 +"CC1(CCC1)O",1.3454,56.29,-0.2526,0.0743,0.3269,562.0396,0.142376,25.093,-271.627826,-271.621262,-271.620317,-271.657345 +"CC1(COC1)O",2.486,49.2,-0.246,0.0616,0.3076,527.7516,0.118569,23.305,-307.544712,-307.538272,-307.537328,-307.574452 +"CC1(C)CC1O",1.2792,56.93,-0.2497,0.083,0.3327,599.392,0.141365,26.639,-271.621961,-271.61481,-271.613865,-271.652008 +"CC1(O)CC1O",1.9707,49.85,-0.2427,0.0794,0.3221,557.5787,0.117602,25.546,-307.546144,-307.539271,-307.538327,-307.575926 +"CC1CC1(C)C",0.0963,64.09,-0.2558,0.0887,0.3445,649.5784,0.16489,27.904,-235.698987,-235.691505,-235.69056,-235.729399 +"CC1CC1(C)O",1.2745,56.95,-0.2458,0.0798,0.3256,607.2163,0.141106,26.806,-271.623957,-271.616756,-271.615812,-271.654105 +"CC1OC1(C)C",1.7972,56.95,-0.2545,0.0923,0.3468,616.1558,0.140619,26.504,-271.630416,-271.623061,-271.622116,-271.660915 +"CC1(CN1C)C",1.1514,60.96,-0.219,0.0873,0.3063,612.8334,0.153033,26.999,-251.732828,-251.725481,-251.724536,-251.763141 +"CC1CC(=N)O1",2.8604,51.41,-0.2598,0.0305,0.2903,567.9978,0.108342,21.198,-286.498235,-286.492393,-286.491449,-286.527464 +"CC1CC(=O)C1",2.8997,53.83,-0.2394,-0.0163,0.2231,615.4797,0.118664,22.568,-270.446915,-270.440714,-270.43977,-270.476747 +"CC1CC(=O)N1",3.855,50.75,-0.2465,0.0387,0.2852,578.1159,0.108284,21.755,-286.519735,-286.513686,-286.512742,-286.549288 +"CC1CC(=O)O1",4.16,46.42,-0.2737,0.0124,0.2861,549.3072,0.096346,20.176,-306.403144,-306.397481,-306.396537,-306.432231 +"CN1CC(=O)C1",2.5981,51.1,-0.2304,-0.0233,0.2071,581.4874,0.107085,21.382,-286.476486,-286.470495,-286.469551,-286.505845 +"C1C(CC1=O)O",3.0457,46.78,-0.242,-0.0206,0.2214,568.1088,0.095023,21.395,-306.369631,-306.363717,-306.362773,-306.399081 +"CC1CC(C)C1",0.1412,64.11,-0.2832,0.0906,0.3739,708.7554,0.166237,25.93,-235.700285,-235.693375,-235.69243,-235.730474 +"CC1CC(C)O1",1.5588,56.92,-0.2399,0.0942,0.3341,627.2141,0.14242,24.277,-271.628346,-271.621668,-271.620724,-271.658548 +"CC1CC(O)C1",1.398,56.88,-0.253,0.0788,0.3318,659.729,0.142766,24.723,-271.62464,-271.618,-271.617056,-271.65458 +"CN1CC(C1)O",1.5574,53.9,-0.2196,0.0838,0.3034,603.8647,0.131404,23.314,-287.657053,-287.650703,-287.649759,-287.686678 +"OC1CC(O)C1",1.3233,49.09,-0.2525,0.0691,0.3216,617.3904,0.119358,23.5,-307.548619,-307.542315,-307.541371,-307.578204 +"CC12CC(C1)C2",0.2077,61.09,-0.2934,0.0984,0.3917,520.6157,0.144312,21.869,-234.455424,-234.449921,-234.448977,-234.483864 +"CC12CC(C1)O2",1.8191,53.68,-0.2307,0.1012,0.3319,482.6135,0.119521,21.01,-270.370102,-270.364657,-270.363713,-270.398537 +"CC12CN(C1)C2",1.9709,57.23,-0.2257,0.0834,0.3091,503.8125,0.132764,21.04,-250.478855,-250.473446,-250.472502,-250.507275 +"OC12CC(C1)C2",1.329,53.49,-0.2587,0.0809,0.3397,476.4978,0.120826,20.67,-270.382725,-270.377533,-270.376589,-270.410876 +"OC12CN(C1)C2",1.7613,50.04,-0.2445,0.0682,0.3126,459.4671,0.109323,19.785,-286.408473,-286.403385,-286.402441,-286.436586 +"C#CC(=O)C#C",3.1111,50.23,-0.28,-0.0898,0.1902,559.9421,0.04691,20.497,-266.763445,-266.757444,-266.7565,-266.792861 +"C#CC(=O)C#N",3.7814,44.9,-0.3156,-0.121,0.1947,539.3608,0.036304,18.651,-282.861121,-282.855418,-282.854474,-282.890406 +"C(#N)C(=O)C#N",0.6341,39.71,-0.3538,-0.1562,0.1977,520.8737,0.02554,16.955,-298.95295,-298.94752,-298.946576,-298.982096 +"C#CC(=O)C=O",4.3138,43.98,-0.2672,-0.1181,0.1491,570.4143,0.046458,19.716,-303.934323,-303.928202,-303.927258,-303.964386 +"C(=O)C(=O)C#N",1.4415,38.68,-0.2978,-0.1503,0.1475,551.5682,0.035739,18.042,-320.029854,-320.024005,-320.023061,-320.059734 +"C(=O)C(=O)C=O",3.4458,37.46,-0.2606,-0.1455,0.1151,583.5476,0.045968,18.951,-341.097446,-341.091177,-341.090233,-341.128555 +"CC(C#C)C#C",0.6022,56.95,-0.2622,0.034,0.2962,627.8377,0.093567,24.829,-232.02593,-232.019028,-232.018084,-232.056008 +"CC(C#C)C#N",3.7481,51.99,-0.2879,0.0116,0.2995,605.1461,0.083296,22.726,-248.132701,-248.126157,-248.125213,-248.162616 +"CC(C#N)C#N",4.1272,47.23,-0.3433,-0.0129,0.3304,583.9904,0.072887,20.75,-264.235803,-264.229597,-264.228653,-264.265543 +"NC(C#C)C#N",2.5088,48.67,-0.2798,-0.003,0.2768,583.2106,0.072434,22.216,-264.171923,-264.165508,-264.164564,-264.201717 +"C(#N)C(C#N)N",3.6178,43.93,-0.3037,-0.0279,0.2758,562.9596,0.061962,20.295,-280.273691,-280.267611,-280.266667,-280.303301 +"C#CC(C#C)O",1.3299,49.83,-0.2708,0.0039,0.2747,588.5174,0.070081,23.408,-267.944278,-267.937674,-267.93673,-267.97414 +"OC(C#C)C#N",3.263,45.03,-0.3019,-0.0199,0.2821,566.4572,0.05969,21.363,-284.048718,-284.042467,-284.041523,-284.078408 +"C(#N)C(C#N)O",2.3093,40.41,-0.3463,-0.0464,0.3,546.2842,0.04917,19.442,-300.148891,-300.142982,-300.142038,-300.178386 +"CC(C=O)C#C",2.3065,52.76,-0.2536,-0.0324,0.2213,632.3767,0.093434,23.743,-269.207716,-269.20074,-269.199796,-269.238596 +"CC(C=O)C#N",2.7273,47.71,-0.2801,-0.0545,0.2256,611.9888,0.083083,21.73,-285.313284,-285.306637,-285.305693,-285.344071 +"OC(C=O)C#C",2.1664,45.31,-0.2709,-0.0493,0.2216,567.4229,0.070272,21.821,-305.130184,-305.123844,-305.1229,-305.160099 +"OC(C=O)C#N",2.9799,40.49,-0.3073,-0.0733,0.234,547.2998,0.059833,19.861,-321.233569,-321.227582,-321.226638,-321.263248 +"CC(C=O)C=O",3.0426,48.36,-0.2559,-0.0573,0.1986,637.0667,0.093292,22.644,-306.383308,-306.376314,-306.37537,-306.414827 +"CN(C=N)C=O",2.4166,51.06,-0.2595,-0.0171,0.2424,607.0428,0.096802,21.581,-302.579718,-302.573326,-302.572382,-302.60969 +"CN(C=O)C=O",0.6161,45.8,-0.2725,-0.0352,0.2374,588.7943,0.084124,20.743,-322.477268,-322.470851,-322.469907,-322.507813 +"C(=O)C(C=O)O",3.3417,41.69,-0.2649,-0.0663,0.1986,555.1885,0.070094,20.624,-342.307696,-342.301481,-342.300537,-342.33778 +"CN=C(C#N)N",5.1006,50.92,-0.2622,-0.0399,0.2223,616.0271,0.084203,22.896,-281.480993,-281.474271,-281.473326,-281.51108 +"CN=C(C=O)N",2.7489,50.95,-0.2476,-0.0605,0.187,629.4902,0.095327,23.292,-302.564379,-302.557593,-302.556649,-302.594563 +"CCC(=O)C#C",2.733,53.26,-0.264,-0.0552,0.2087,663.4092,0.09367,23.625,-269.216969,-269.210012,-269.209068,-269.247773 +"CCC(=O)C#N",3.5163,48.11,-0.2976,-0.0854,0.2122,643.1284,0.08307,21.82,-285.317454,-285.31081,-285.309865,-285.34803 +"CNC(=N)C#N",3.0753,50.21,-0.2527,-0.0407,0.2121,618.7695,0.084889,22.345,-281.484877,-281.478065,-281.477121,-281.515408 +"CNC(=O)C#C",3.6839,51,-0.2591,-0.0243,0.2349,627.8913,0.0833,22.928,-285.282532,-285.275639,-285.274695,-285.313335 +"COC(=O)C#C",1.7556,46.66,-0.2806,-0.0424,0.2382,595.6777,0.071026,21.393,-305.155985,-305.149492,-305.148548,-305.186165 +"COC(=O)C#N",4.0593,41.61,-0.3197,-0.0717,0.248,575.2916,0.06048,19.572,-321.254976,-321.248781,-321.247837,-321.285034 +"C#CC(=O)CO",4.1336,46.43,-0.2661,-0.0573,0.2088,617.4214,0.070046,22.333,-305.130313,-305.123605,-305.122661,-305.160728 +"C(C(=O)C#N)O",4.1197,41.54,-0.3004,-0.0881,0.2122,596.5667,0.059291,20.617,-321.230055,-321.223616,-321.222672,-321.260329 +"CCC(=O)C=O",1.0401,47.28,-0.2515,-0.0944,0.1571,641.1289,0.093575,22.634,-306.392828,-306.385878,-306.384934,-306.424157 +"CNC(=N)C=O",3.7361,49.61,-0.2388,-0.0681,0.1707,643.2539,0.095538,22.92,-302.558547,-302.551612,-302.550668,-302.589229 +"CNC(=O)C=O",5.2806,45.79,-0.2505,-0.0726,0.178,633.9378,0.083118,21.909,-322.454079,-322.447243,-322.446299,-322.485128 +"COC(=O)C=O",3.7746,40.92,-0.2683,-0.0862,0.1821,600.9282,0.070937,20.46,-342.329232,-342.322736,-342.321792,-342.359811 +"C(C(=O)C=O)O",5.1353,41.26,-0.2516,-0.0953,0.1563,627.7041,0.069653,21.411,-342.297848,-342.291172,-342.290228,-342.328622 +"CCC(=O)CC",2.5012,57,-0.2412,-0.0057,0.2356,770.4633,0.140552,26.577,-271.666841,-271.658902,-271.657958,-271.699649 +"CCC(=O)CO",3.84,50.24,-0.2429,-0.0073,0.2356,723.3895,0.116949,25.279,-307.58009,-307.572434,-307.571489,-307.612431 +"CCC(=O)NC",3.4247,54.61,-0.2422,0.0378,0.28,725.6801,0.130452,25.744,-287.732329,-287.724552,-287.723608,-287.764875 +"CCC(=O)OC",1.6323,50.2,-0.268,0.0192,0.2871,694.4729,0.118017,24.367,-307.61045,-307.602997,-307.602053,-307.642398 +"CNC(=O)CO",4.3706,47.75,-0.2433,0.0274,0.2707,681.2313,0.106577,24.645,-323.644468,-323.636751,-323.635807,-323.67712 +"CNC(=O)NC",3.5856,51.35,-0.2383,0.067,0.3052,689.1369,0.119461,25.314,-303.787203,-303.779425,-303.778481,-303.819322 +"CNC(=O)OC",2.2967,47.42,-0.2496,0.0662,0.3158,657.9646,0.107261,23.827,-323.6692,-323.661699,-323.660754,-323.701214 +"COC(=O)CN",1.6419,46.49,-0.2466,0.015,0.2616,669.5959,0.107439,23.834,-323.652284,-323.644911,-323.643967,-323.684119 +"COC(=O)CO",2.8141,43.25,-0.2705,0.0032,0.2737,640.0833,0.094874,22.716,-343.531666,-343.524693,-343.523749,-343.562703 +"C(C(=O)CO)O",5.1141,43.53,-0.2447,-0.01,0.2348,677.014,0.09331,24.017,-343.492484,-343.485107,-343.484162,-343.524104 +"CC(=NC)OC",0.8775,55.68,-0.2427,0.0334,0.2761,667.2341,0.129176,25.912,-287.697316,-287.689416,-287.688472,-287.729866 +"CC(CO)C#C",1.3163,55.08,-0.2613,0.0553,0.3166,682.8459,0.117465,26.225,-270.3961,-270.388643,-270.387698,-270.426967 +"CC(CO)C#N",3.7371,50.15,-0.2896,0.035,0.3246,658.6943,0.107232,24.138,-286.504534,-286.497432,-286.496488,-286.535239 +"CCC(C)C#C",0.7089,62.36,-0.2575,0.0581,0.3157,736.4751,0.14095,27.469,-234.474088,-234.466434,-234.46549,-234.505226 +"CCC(C)C#N",4.0632,57.23,-0.3144,0.0383,0.3527,712.3282,0.130797,25.346,-250.583259,-250.575974,-250.57503,-250.614218 +"CCC(N)C#N",2.8538,54.01,-0.2695,0.0286,0.298,689.9612,0.119956,24.891,-266.626759,-266.619585,-266.618641,-266.657544 +"CCC(O)C#C",1.3296,55.33,-0.2649,0.0332,0.2982,693.4546,0.117373,26.198,-270.397474,-270.390091,-270.389147,-270.428268 +"CCC(O)C#N",3.3465,50.37,-0.3028,0.0117,0.3145,670.8765,0.107081,24.136,-286.50404,-286.497024,-286.49608,-286.534653 +"CNC(C)C#N",4.4075,54.08,-0.2424,0.0288,0.2711,680.4479,0.119301,24.789,-266.615452,-266.608241,-266.607297,-266.646318 +"COC(C)C#C",1.41,55.84,-0.2538,0.0395,0.2933,675.6208,0.116766,25.831,-270.38418,-270.376713,-270.375769,-270.415376 +"COC(C)C#N",3.2128,50.59,-0.2865,0.0177,0.3042,619.0012,0.106659,23.771,-286.495035,-286.487984,-286.48704,-286.525708 +"NC(CO)C#N",4.5745,46.66,-0.2619,0.0215,0.2834,634.7561,0.096396,23.603,-302.545112,-302.538187,-302.537242,-302.575574 +"NCC(N)C#N",4.8739,50.26,-0.2534,0.0266,0.28,658.1766,0.109366,24.235,-282.667163,-282.660209,-282.659265,-282.697592 +"NCC(O)C#N",3.5369,46.88,-0.2549,0.0078,0.2627,640.0746,0.096532,23.453,-302.545562,-302.538783,-302.537839,-302.575827 +"OCC(O)C#C",2.1731,48.25,-0.2624,0.0188,0.2813,638.4935,0.094585,24.263,-306.320806,-306.313986,-306.313042,-306.351017 +"OCC(O)C#N",2.1065,43.5,-0.2878,-0.0038,0.2839,617.6379,0.084113,22.348,-322.425235,-322.418738,-322.417794,-322.455292 +"CC(CO)C=O",1.4722,50.03,-0.2523,-0.0219,0.2304,656.247,0.117353,24.974,-307.574899,-307.567493,-307.566549,-307.606203 +"CCC(C)C=O",2.7619,57.5,-0.2451,-0.0181,0.227,739.4896,0.140783,26.342,-271.651913,-271.644175,-271.643231,-271.683985 +"CCC(O)C=O",2.216,50.71,-0.2736,-0.0372,0.2365,676.7821,0.117766,24.411,-307.581989,-307.574957,-307.574013,-307.612724 +"CCN(C)C=O",3.8246,55.18,-0.2407,0.0328,0.2735,680.8869,0.130695,24.851,-287.720383,-287.713033,-287.712089,-287.75175 +"COC(C)C=O",2.7851,50.45,-0.2545,-0.0276,0.2269,676.5067,0.116572,24.765,-307.566593,-307.559107,-307.558163,-307.598185 +"OCC(O)C=O",1.2875,43.58,-0.2662,-0.0476,0.2186,618.5337,0.094809,22.686,-343.5036,-343.497024,-343.49608,-343.533909 +"CC(CO)CO",2.6348,53.08,-0.2607,0.0735,0.3342,743.5937,0.141197,27.566,-308.75909,-308.751086,-308.750141,-308.790858 +"CCC(C)CC",0.067,67.19,-0.303,0.0843,0.3874,855.476,0.188227,30.015,-236.916125,-236.907738,-236.906794,-236.948351 +"CCC(C)CO",1.3482,60.09,-0.2625,0.0783,0.3408,798.4108,0.164837,28.743,-272.838189,-272.830053,-272.829108,-272.870013 +"CCC(C)OC",1.0377,60.6,-0.248,0.0861,0.3341,743.1891,0.164188,28.45,-272.834623,-272.826448,-272.825504,-272.866669 +"CCC(CC)O",1.4336,60.24,-0.2621,0.0721,0.3342,805.7386,0.164626,28.828,-272.844804,-272.836705,-272.835761,-272.876509 +"CCC(O)CO",2.3512,52.93,-0.256,0.0618,0.3178,748.8236,0.141924,26.859,-308.768073,-308.760556,-308.759611,-308.799183 +"COC(C)CO",2.2435,53.28,-0.2545,0.079,0.3335,670.6307,0.141335,26.699,-308.757315,-308.74959,-308.748646,-308.788936 +"C(C(CO)O)O",1.7715,45.75,-0.2606,0.049,0.3096,694.4975,0.118969,25.116,-344.689237,-344.682195,-344.681251,-344.719856 +"CC1(CC1)C#C",0.7654,59.61,-0.24,0.0571,0.2972,582.3195,0.118255,24.638,-233.255259,-233.2487,-233.247756,-233.284774 +"CC1(CC1)C#N",4.1425,54.31,-0.2853,0.037,0.3224,559.2985,0.108158,22.416,-249.363808,-249.357626,-249.356681,-249.393152 +"CC1(CN1)C#C",1.9146,56,-0.2444,0.0441,0.2885,564.5299,0.106988,23.611,-249.293044,-249.286674,-249.285729,-249.322437 +"CC1(CN1)C#N",4.9191,50.81,-0.2743,0.0226,0.2969,541.8201,0.096715,21.524,-265.399968,-265.393944,-265.393,-265.429201 +"CC1(CO1)C#C",1.7508,52.23,-0.2616,0.0263,0.2879,548.2216,0.094019,22.957,-269.176124,-269.169791,-269.168847,-269.205558 +"CC1(CO1)C#N",3.8379,47.2,-0.3007,0.0031,0.3038,525.818,0.083704,20.903,-285.281032,-285.275044,-285.2741,-285.310299 +"C1CC1(C#N)N",3.0748,51.03,-0.2722,0.029,0.3012,537.6808,0.09733,21.894,-265.408147,-265.402167,-265.401223,-265.437265 +"C#CC1(CC1)O",1.2031,52.54,-0.2424,0.035,0.2774,544.2268,0.094491,23.372,-269.176442,-269.170213,-269.169268,-269.205661 +"C1CC1(C#N)O",3.5524,47.42,-0.2828,0.0136,0.2964,521.6025,0.084237,21.226,-285.282853,-285.276998,-285.276054,-285.311886 +"CC1(CC1)C=O",2.9392,54.61,-0.2482,-0.0141,0.2341,572.0979,0.118497,23.265,-270.439135,-270.432679,-270.431735,-270.468877 +"CC1(CN1)C=O",3.0447,50.74,-0.2483,-0.0234,0.2249,553.6489,0.107129,22.432,-286.477365,-286.471021,-286.470076,-286.507087 +"CC1(CO1)C=O",2.2978,47.26,-0.2592,-0.0374,0.2218,538.1677,0.094183,21.75,-306.358581,-306.352296,-306.351351,-306.388334 +"C1CC1(C=O)O",2.375,48.35,-0.2519,-0.0315,0.2204,528.8623,0.094927,21.796,-306.364186,-306.358187,-306.357242,-306.393465 +"CC1(CC1)CO",1.3206,56.77,-0.2616,0.0757,0.3373,610.3779,0.142009,25.913,-271.618137,-271.611049,-271.610104,-271.648484 +"CC1(CO)CN1",2.1168,52.8,-0.2463,0.0724,0.3186,582.3057,0.13149,24.487,-287.663174,-287.656474,-287.65553,-287.693286 +"CC1(CO)CO1",1.7232,49.21,-0.2611,0.081,0.3421,566.509,0.118491,23.915,-307.545713,-307.539052,-307.538108,-307.575814 +"CCC1(CC1)C",0.1151,63.52,-0.2627,0.0859,0.3486,658.7763,0.165551,27.138,-235.696677,-235.689426,-235.688482,-235.727158 +"CCC1(C)CO1",1.7595,56.33,-0.2587,0.0877,0.3465,616.3779,0.141536,25.512,-271.623066,-271.616004,-271.61506,-271.653614 +"CCC1(CC1)O",1.3751,56.44,-0.2519,0.0723,0.3242,618.4343,0.141819,26.023,-271.621836,-271.614856,-271.613912,-271.652038 +"CC1(CC1)OC",1.104,56.8,-0.245,0.0897,0.3348,596.0747,0.141327,25.705,-271.611366,-271.604301,-271.603357,-271.641692 +"C1CC1(CO)O",2.0651,49.73,-0.2421,0.0639,0.306,568.4896,0.1187,24.458,-307.544316,-307.537691,-307.536747,-307.574152 +"CC1CCC1=O",2.6766,53.75,-0.2374,-0.0163,0.2211,567.3641,0.118901,22.502,-270.447268,-270.440964,-270.44002,-270.477179 +"CC1CNC1=O",3.5654,50.52,-0.2452,0.0373,0.2825,539.0137,0.108515,21.625,-286.517094,-286.510925,-286.509981,-286.54679 +"CC1COC1=N",2.5239,50.92,-0.2615,0.0284,0.2899,537.9634,0.108616,21.067,-286.493052,-286.487128,-286.486184,-286.522434 +"CC1COC1=O",3.9998,46.04,-0.2726,0.0099,0.2825,521.9194,0.096641,20.03,-306.397746,-306.391983,-306.391039,-306.427029 +"CC1NCC1=O",2.556,50.49,-0.2421,-0.0238,0.2182,545.7113,0.107628,21.712,-286.484904,-286.478789,-286.477845,-286.514513 +"CC1OCC1=O",0.9705,46.89,-0.2575,-0.0323,0.2252,525.6219,0.094977,20.774,-306.365166,-306.359174,-306.35823,-306.394728 +"CN1CCC1=O",3.6013,51.94,-0.2414,0.0394,0.2809,542.8932,0.108561,21.114,-286.514923,-286.508751,-286.507807,-286.544706 +"NC1COC1=N",1.33,47.4,-0.2491,0.0196,0.2687,518.7658,0.097893,20.618,-302.535621,-302.529817,-302.528873,-302.564883 +"NC1COC1=O",2.9069,42.57,-0.2481,0.0012,0.2493,503.6554,0.085902,19.618,-322.439472,-322.433831,-322.432887,-322.468624 +"OC1CCC1=O",2.1771,46.67,-0.2509,-0.0284,0.2226,517.5463,0.095379,21.28,-306.368494,-306.362423,-306.361478,-306.398118 +"OC1CNC1=O",2.5208,43.41,-0.2475,0.0204,0.2679,495.5449,0.08494,20.351,-322.436757,-322.430835,-322.429891,-322.46623 +"OC1COC1=N",1.2035,43.85,-0.2717,0.0123,0.284,494.7347,0.084855,19.922,-322.412936,-322.407158,-322.406214,-322.44232 +"OC1COC1=O",3.0125,39.11,-0.2674,-0.0058,0.2616,481.5842,0.072889,18.879,-342.315546,-342.309972,-342.309028,-342.344721 +"CC1CCC1C",0.1206,63.28,-0.2822,0.0842,0.3663,624.4665,0.166462,25.85,-235.696852,-235.689979,-235.689035,-235.726894 +"CC1CCC1O",1.3078,56.23,-0.2522,0.0751,0.3274,575.9494,0.142966,24.662,-271.622539,-271.615882,-271.614937,-271.652386 +"CC1COC1C",1.7671,56.23,-0.2398,0.085,0.3248,585.5983,0.142555,24.15,-271.620997,-271.61422,-271.613276,-271.651656 +"CC1OCC1O",2.3269,49.4,-0.2442,0.0626,0.3069,539.14,0.118783,23.068,-307.544728,-307.538101,-307.537157,-307.575262 +"OC1CCC1O",1.022,49.03,-0.2476,0.0591,0.3067,529.6164,0.11984,22.931,-307.55036,-307.544139,-307.543195,-307.579988 +"N=C1CN=CN1",3.4388,48.64,-0.2323,-0.0049,0.2275,470.3702,0.08707,18.459,-281.491976,-281.486859,-281.485915,-281.520391 +"C1C=CCC1=O",2.5958,50.91,-0.2409,-0.0164,0.2245,502.2039,0.096841,19.177,-269.263802,-269.258575,-269.257631,-269.292553 +"C1C=CNC1=O",3.246,47.64,-0.2165,-0.0013,0.2152,473.5305,0.08615,18.76,-285.335887,-285.330807,-285.329863,-285.364182 +"C1C(=O)NC=N1",2.525,43.5,-0.25,-0.0182,0.2318,452.4227,0.075083,17.274,-301.395982,-301.391168,-301.390224,-301.424079 +"C1C(=O)OC=N1",1.6338,39.3,-0.2793,-0.0315,0.2478,433.0637,0.062716,15.897,-321.271406,-321.26687,-321.265926,-321.299312 +"N=C1OCC=C1",3.3473,50.58,-0.2462,-0.0302,0.216,472.0954,0.086685,17.709,-285.311823,-285.307013,-285.306068,-285.339869 +"C1CC(=O)C=C1",3.5709,52.33,-0.2366,-0.0429,0.1938,505.106,0.097695,18.812,-269.27132,-269.266149,-269.265204,-269.299973 +"C1C=CC(=O)N1",4.2248,48.42,-0.2428,-0.0263,0.2166,475.489,0.086559,18.405,-285.336744,-285.331604,-285.33066,-285.36533 +"N=C1CCCO1",3.1568,50.78,-0.2549,0.0316,0.2866,515.9607,0.110489,19.419,-286.516174,-286.51087,-286.509925,-286.544899 +"N=C1COCO1",1.8361,43.66,-0.272,0.0187,0.2907,475.1154,0.08643,17.785,-322.434119,-322.429025,-322.428081,-322.462711 +"N=C1OCCO1",4.2769,43.85,-0.2556,0.0548,0.3105,471.4994,0.08682,17.613,-322.44415,-322.439107,-322.438163,-322.472719 +"C1CCC(=O)C1",2.8451,52.83,-0.235,-0.0124,0.2226,547.0211,0.120921,20.712,-270.47247,-270.466895,-270.465951,-270.501576 +"C1CC(=O)NC1",3.7977,49.83,-0.237,0.0377,0.2747,521.6537,0.110762,19.794,-286.542391,-286.536964,-286.53602,-286.571234 +"C1CC(=O)OC1",4.3159,45.82,-0.2646,0.0149,0.2795,499.82,0.098309,18.482,-306.418503,-306.413341,-306.412397,-306.447148 +"C1COCC1=O",1.8765,46.04,-0.2439,-0.0253,0.2186,504.709,0.096958,19.062,-306.386594,-306.381259,-306.380315,-306.415468 +"C1C(=O)OCO1",2.7134,38.99,-0.2775,-0.0003,0.2772,459.0271,0.074203,16.844,-342.335241,-342.330282,-342.329338,-342.36377 +"C1CNC(=O)N1",3.9802,46.25,-0.2457,0.0629,0.3086,491.6466,0.100038,18.931,-302.599262,-302.594109,-302.593165,-302.627663 +"C1COC(=O)N1",4.8739,42.52,-0.2632,0.052,0.3152,473.1926,0.087482,17.739,-322.47672,-322.471693,-322.470748,-322.505169 +"C1COC(=O)O1",5.0601,38.98,-0.2935,0.0401,0.3336,455.3293,0.074926,16.45,-342.352279,-342.34739,-342.346446,-342.380838 +"CC1=CCCC1",0.2336,62.6,-0.2238,0.037,0.2608,609.4914,0.144201,22.921,-234.522504,-234.51632,-234.515375,-234.551976 +"CC1=CCOC1",1.8189,55.75,-0.2252,0.0249,0.2501,566.4241,0.119949,21.372,-270.43928,-270.433217,-270.432273,-270.468829 +"CC1=NCCO1",1.2483,50.7,-0.2399,0.0319,0.2718,530.0606,0.109278,20.206,-286.516879,-286.510983,-286.510039,-286.546498 +"CC1CC=CC1",0.1489,61.16,-0.2335,0.0313,0.2648,587.2379,0.144419,22.613,-234.516241,-234.51027,-234.509326,-234.545523 +"CC1CN=CO1",1.8411,49.49,-0.246,0.0261,0.2721,504.1131,0.109554,19.819,-286.510514,-286.504912,-286.503968,-286.539796 +"OC1CC=CC1",1.4817,53.91,-0.2461,0.0209,0.267,510.4532,0.120948,21.267,-270.441325,-270.435739,-270.434794,-270.470141 +"CC1CCC=C1",0.1605,61.66,-0.2341,0.03,0.2642,589.2794,0.144442,22.655,-234.516409,-234.510406,-234.509462,-234.545692 +"CC1COC=N1",1.5301,49.75,-0.2473,0.0239,0.2712,509.7285,0.109629,19.822,-286.508233,-286.502604,-286.50166,-286.53756 +"CC1OCC=C1",1.4505,55,-0.2302,0.0182,0.2484,531.8706,0.120101,21.19,-270.437636,-270.4318,-270.430856,-270.466917 +"OC1CCC=C1",1.7344,54.16,-0.2491,0.0142,0.2633,528.2233,0.121083,21.367,-270.441145,-270.435384,-270.43444,-270.470414 +"CC1CCCC1",0.0756,63.03,-0.3044,0.0828,0.3872,641.8924,0.168121,24.306,-235.723792,-235.71718,-235.716236,-235.754746 +"CC1CCCO1",1.5053,56.51,-0.2379,0.0868,0.3246,586.3351,0.144187,22.689,-271.647465,-271.641158,-271.640214,-271.67776 +"CC1CCOC1",1.7082,56.14,-0.2381,0.0839,0.322,594.9572,0.144241,22.656,-271.642927,-271.63658,-271.635636,-271.673114 +"CC1COCO1",1.2942,49.23,-0.2502,0.0904,0.3406,539.3012,0.120486,20.939,-307.569255,-307.563241,-307.562296,-307.599195 +"CC1OCCO1",1.0448,49.6,-0.2542,0.0896,0.3439,537.0339,0.120277,21.023,-307.573371,-307.567272,-307.566327,-307.603679 +"C1CCC(C1)O",1.3247,55.74,-0.2601,0.0738,0.3338,550.8423,0.144785,22.879,-271.648392,-271.642208,-271.641264,-271.678618 +"OC1CCOC1",1.5931,49.05,-0.2428,0.0712,0.314,508.516,0.120836,21.242,-307.56626,-307.560326,-307.559382,-307.595858 +"CC1C(C)C1C",0.0164,63.6,-0.2571,0.0841,0.3412,642.4422,0.165387,27.583,-235.694216,-235.686596,-235.685652,-235.725092 +"CC1C(C)C1O",1.3139,56.45,-0.2441,0.0725,0.3165,589.5057,0.141848,26.227,-271.61858,-271.611272,-271.610327,-271.649185 +"CC1C(C)N1C",1.3293,60.42,-0.2104,0.0826,0.293,607.6457,0.153308,26.797,-251.727102,-251.719552,-251.718607,-251.758017 +"CC1C(O)C1O",1.9714,49.27,-0.2301,0.0537,0.2838,539.3304,0.118633,24.471,-307.540766,-307.533965,-307.53302,-307.570981 +"OC1C(O)C1O",0.6618,41.57,-0.2552,0.0505,0.3058,476.1977,0.096192,21.828,-343.468335,-343.462548,-343.461604,-343.497233 +"CC12CC1CC2",0.3029,60.48,-0.243,0.0951,0.338,542.9521,0.143756,22.864,-234.474951,-234.469056,-234.468112,-234.50378 +"CC12CC1CO2",1.6329,53.34,-0.2287,0.0868,0.3155,498.0795,0.119588,21.408,-270.392444,-270.386783,-270.385839,-270.421106 +"CC12CC1OC2",1.8852,53.11,-0.2293,0.0805,0.3098,506.8939,0.119607,21.353,-270.388222,-270.382545,-270.381601,-270.416941 +"CC12CCC1O2",1.8535,53.26,-0.2442,0.0955,0.3397,508.8551,0.119394,21.539,-270.401391,-270.39568,-270.394736,-270.430118 +"OC12CC1CC2",1.1413,53.1,-0.2475,0.0834,0.3309,499.7541,0.120085,21.746,-270.397623,-270.392001,-270.391057,-270.426196 +"OC12CC1OC2",2.2569,45.92,-0.2318,0.0706,0.3024,464.135,0.095993,20.187,-306.309029,-306.303682,-306.302738,-306.337396 +"C#CC#CC#C",0.0151,72.39,-0.2465,-0.0565,0.19,802.1485,0.046718,21.875,-229.615446,-229.609222,-229.608278,-229.639949 +"C#CC#CC#N",4.5941,63.48,-0.284,-0.085,0.199,777.4112,0.036829,19.266,-245.711897,-245.706152,-245.705208,-245.733762 +"C(#CC#N)C#N",0,55.28,-0.3277,-0.1164,0.2113,754.2424,0.027023,17.827,-261.802956,-261.79743,-261.796485,-261.828854 +"C#CCCC#C",0.0008,57.87,-0.2615,0.0391,0.3006,799.7895,0.093925,24.211,-232.030352,-232.023466,-232.022522,-232.06066 +"C#CCCC#N",3.4446,52.71,-0.2821,0.0193,0.3014,773.7747,0.083685,22.093,-248.137789,-248.131277,-248.130333,-248.167922 +"C(CC#N)C#N",0.0004,47.69,-0.3397,-0.0013,0.3384,748.3461,0.073373,20.047,-264.243028,-264.236876,-264.235932,-264.27298 +"C#CC#CC=O",3.248,59.66,-0.275,-0.0922,0.1828,801.3233,0.047327,20.052,-266.777425,-266.77129,-266.770346,-266.806816 +"C(=O)C#CC#N",2.4648,51.47,-0.3053,-0.1211,0.1843,776.7874,0.03702,17.93,-282.871678,-282.865901,-282.864957,-282.900943 +"C(=O)C#CC=O",2.3737,48.1,-0.2894,-0.0927,0.1966,801.4852,0.0471,19.025,-303.940271,-303.933923,-303.932979,-303.970909 +"N=COCC#C",3.3466,50.16,-0.2698,0.0135,0.2834,665.5192,0.083043,21.973,-285.242094,-285.235491,-285.234547,-285.272892 +"N=COCC#N",3.3728,45.57,-0.2899,-0.0071,0.2828,646.0972,0.07256,20.023,-301.345683,-301.339414,-301.33847,-301.376371 +"C#CCCC=O",2.1557,52.99,-0.2549,-0.0302,0.2247,817.4048,0.093631,23.097,-269.20692,-269.199987,-269.199043,-269.238076 +"C(CC#N)C=O",2.4084,48.12,-0.274,-0.0464,0.2276,792.702,0.083388,21.004,-285.313674,-285.307112,-285.306168,-285.344659 +"C#CCNC=O",3.3746,49.89,-0.2561,0.0191,0.2752,691.5345,0.083614,22.134,-285.276634,-285.270024,-285.269079,-285.307545 +"C(C#N)NC=O",3.6197,44.97,-0.2788,-0.0017,0.2772,652.22,0.073313,20.073,-301.382062,-301.375865,-301.37492,-301.412562 +"C#CCOC=O",3.7921,45.77,-0.2819,-0.0051,0.2768,660.5683,0.070779,20.964,-305.145002,-305.13858,-305.137636,-305.175765 diff --git a/chemprop-updated/tests/data/regression/rxn+mol/atom_descriptors.npz b/chemprop-updated/tests/data/regression/rxn+mol/atom_descriptors.npz new file mode 100644 index 0000000000000000000000000000000000000000..d7d3bd59275bef6056d78cf66cfbaa40372e1c08 --- /dev/null +++ b/chemprop-updated/tests/data/regression/rxn+mol/atom_descriptors.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8c9ff81b72dec4a5b85cbe4b6c6f107de6b9360567b451d67021a76cb4873ca +size 157746 diff --git a/chemprop-updated/tests/data/regression/rxn+mol/atom_features.npz b/chemprop-updated/tests/data/regression/rxn+mol/atom_features.npz new file mode 100644 index 0000000000000000000000000000000000000000..026aa368c4163687362c1ff314cb483cf48f3c73 --- /dev/null +++ b/chemprop-updated/tests/data/regression/rxn+mol/atom_features.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:091326ffbd3b5232187d05f8d8fdd3d139ba73e1a7f681fc3a9eb1dd313f0761 +size 118050 diff --git a/chemprop-updated/tests/data/regression/rxn+mol/bond_features.npz b/chemprop-updated/tests/data/regression/rxn+mol/bond_features.npz new file mode 100644 index 0000000000000000000000000000000000000000..ac4ccc75502420d3fab07fc750a4e36e80e13b4b --- /dev/null +++ b/chemprop-updated/tests/data/regression/rxn+mol/bond_features.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4743f823adc74e5e16c46fd4a933c7c03be519373342397b9af962a345d60543 +size 133386 diff --git a/chemprop-updated/tests/data/regression/rxn+mol/descriptors.npz b/chemprop-updated/tests/data/regression/rxn+mol/descriptors.npz new file mode 100644 index 0000000000000000000000000000000000000000..c635c4ceddb80f6ad13b974563dff4f6206b665f --- /dev/null +++ b/chemprop-updated/tests/data/regression/rxn+mol/descriptors.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:806d0418f1fd57c79ea43bf01f0f612951473c363d643e7b35b1a014379ba159 +size 3464 diff --git a/chemprop-updated/tests/data/regression/rxn+mol/rxn+mol.csv b/chemprop-updated/tests/data/regression/rxn+mol/rxn+mol.csv new file mode 100644 index 0000000000000000000000000000000000000000..2beffa74a57322f10de8f35484232dac29a8de80 --- /dev/null +++ b/chemprop-updated/tests/data/regression/rxn+mol/rxn+mol.csv @@ -0,0 +1,401 @@ +rxn_smiles,solvent_smiles,target +[C:4](=[C:5]=[O:6])([H:12])[H:13].[O:1]([C:2]([C:3](=[C:7]([H:14])[H:15])[H:11])([H:9])[H:10])[H:8]>>[O:1]([C:2]([C:3]1([H:11])[C:4]([H:12])([H:13])[C:5](=[O:6])[C:7]1([H:14])[H:15])([H:9])[H:10])[H:8],O,-3.76 +[O:1]([C:2]1=[N:3][C:4](=[O:5])[C:6]([H:9])([H:10])[N:7]1[H:11])[H:8]>>[O:1]([c:2]1=[n:3][c:4]([O:5][H:9])[c:6]([H:10])[n:7]1[H:11])[H:8],O,3.46 +[C:1]1([H:7])([H:8])[C:2]([H:9])([H:10])[O:3][C:4]([H:11])=[N+:5]=[C-:6]1.[H:12][H:13]>>[C:1]1([H:7])([H:8])[C:2]([H:9])([H:10])[O:3][C:4]([H:11])=[N:5][C:6]1([H:12])[H:13],O,1.54 +[O:1]=[C:2]([C:3]([C:5](=[C:4])[H:9])([H:7])[H:8])[H:6]>>[O:1]=[C:2]([C:3]([C:4]#[C:5][H:9])([H:7])[H:8])[H:6],CCCCCCCCO,-0.54 +[N:1](=[C:2]1\[O:3][C@:4]1([C:6]([C:5]([H:7])([H:10])[H:11])([H:12])[H:13])[H:9])\[H:8]>>[N:1]([C:2](=[O:3])[C:4]1([H:9])[C:5]([H:10])([H:11])[C:6]1([H:12])[H:13])([H:7])[H:8],CCCCCCCCO,-7.56 +[N:1]([c:2]1[c:3]([O:4][H:10])[c:5]([H:11])[c:6]([H:12])[n:7]1[H:13])([H:8])[H:9]>>[N:1]([C@:2]1([H:13])[C:3]([O:4][H:10])=[C:5]([H:11])[C:6]([H:12])=[N:7]1)([H:8])[H:9],CCCCCCCCO,-1.14 +[O:1]=[C:2]1[C:3]([H:8])([H:9])[C:4]([H:10])([H:11])[C@@:5]2([H:12])[C:6]([H:13])([H:14])[C@@:7]12[H:15]>>[O:1]=[C:2]([C@@:7]1([H:15])[C@:5]([C:4](=[C:3]([H:8])[H:9])[H:11])([H:12])[C:6]1([H:13])[H:14])[H:10],CCc1ccccc1,-2.2 +[O:1]([N:2]1[C:3]([H:9])([H:10])[C:4]([H:11])=[C:5][C:6]([H:12])([H:13])[C:7]1([H:14])[H:15])[H:8]>>[O:1]([N:2]1[C:3]([H:10])[C:4]([H:11])=[C:5]([H:9])[C:6]([H:12])([H:13])[C:7]1([H:14])[H:15])[H:8],CCc1ccccc1,-2.19 +[H:7][H:9].[O:1]=[N:2][C:3]1=[C:4]([H:8])[C:5]([H:10])([H:11])[C:6]1([H:12])[H:13]>>[O:1]([N:2]=[C:3]1[C:4]([H:8])([H:9])[C:5]([H:10])([H:11])[C:6]1([H:12])[H:13])[H:7],CCc1ccccc1,-2.32 +[N:1]([C@:2]12[C:3]([H:10])([H:11])[C@@:4]1([H:12])[O:5][C:6]2=[O:7])([H:8])[H:9]>>[N:1]([C:2]([C@@:4]1([H:12])[C:3]([H:10])([H:11])[O:5]1)=[C:6]=[O:7])([H:8])[H:9],C=Cc1ccccc1,-0.11 +[O:1]=[C:2]([c:3]1[c:4]([H:9])[n:5][c:6]([H:10])[n:7]1[H:11])[H:8]>>[O:1]=[C:2]=[C:3]1[C:4]([H:8])([H:9])[N:5]=[C:6]([H:10])[N:7]1[H:11],C=Cc1ccccc1,-0.32 +[C:1]([C:2]([N:3]([C:4]([H:12])([H:13])[H:14])[C:5](=[O:6])[H:15])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1](/[C:2](=[N:3]/[C:5](=[O:6])[H:15])[H:11])([H:7])([H:8])[H:9].[C:4]([H:10])([H:12])([H:13])[H:14],C=Cc1ccccc1,0.82 +[C:1]([C@@:2]1([H:10])[C:3]([H:12])([H:13])[C:4]([H:14])([H:15])/[C:5]1=[N:6]/[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2]([C:3]([C:4]([C:5]#[N:6])([H:14])[H:15])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9],NCc1ccccc1,0.27 +[N:1]([c:2]1[n:3][n:4]([H:9])[n:5][c:6]1[H:10])([H:7])[H:8]>>[N-:4]([N+:5]#[C:6][H:10])[H:9].[N:1]([C:2]#[N:3])([H:7])[H:8],NCc1ccccc1,0.57 +[C:1]([H:8])([H:9])([H:10])[H:14].[C:2]1=[C:3]([H:11])[N:4]([H:12])[C:5](=[O:6])[C:7]=1[H:13]>>[C:1]([C:2]1=[C:3]([H:11])[N:4]([H:12])[C:5](=[O:6])[C:7]1([H:13])[H:14])([H:8])([H:9])[H:10],NCc1ccccc1,-2.84 +[C+:1]([C:2]1=[C:3]([H:10])[N:4]([H:11])[C-:5]([H:12])[C:6]1([H:7])[H:13])([H:8])[H:9]>>[C:1]([c:2]1[c:3]([H:10])[n:4]([H:11])[c:5]([H:12])[c:6]1[H:13])([H:7])([H:8])[H:9],N#Cc1ccccc1,-0.32 +[C:1]([C:2]([C:3]#[C:4][C:5]([O:6][H:14])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2]([C:3](=[C:4]=[C:5]([H:12])[H:13])[O:6][H:14])([H:10])[H:11])([H:7])([H:8])[H:9],N#Cc1ccccc1,-3.65 +[C:1]([C:2]([C:3]([H:12])([H:13])[H:14])([C:4]([C:5]1([H:17])[C:6]([H:18])([H:19])[C:7]1([H:20])[H:21])([H:15])[H:16])[H:11])([H:8])([H:9])[H:10]>>[C:1](=[C:2]([C:3]([H:12])([H:13])[H:14])[H:11])([H:9])[H:10].[C:4](=[C:5]1[C:6]([H:18])([H:19])[C:7]1([H:20])[H:21])([H:15])[H:16].[H:8][H:17],N#Cc1ccccc1,-0.46 +[C:1]1([H:6])([H:7])[C:2]([H:8])([H:9])[C:3]([H:10])=[C:4]([H:11])[C:5]1([H:12])[H:13]>>[C:1]([C:5]([C:4](=[C:3]=[C:2]([H:8])[H:9])[H:11])([H:12])[H:13])([H:6])([H:7])[H:10],OCc1ccccc1,-8.41 +[C:1](=[C:2]([H:10])[H:11])([H:8])[H:9].[C:3]1[C:4]([H:12])([H:13])[C@@:5]2([H:14])[O:6][C@@:7]12[H:15]>>[C:1]1([H:8])([H:9])[C:2]([H:10])([H:11])[C:3]12[C:4]([H:12])([H:13])[C@@:5]1([H:14])[O:6][C@@:7]21[H:15],OCc1ccccc1,-3.6 +[N:1](=[C:2]1[C:3]([O:4][H:10])=[C:5]([H:11])[C-:6]([H:12])[N+:7]1([H:8])[H:13])[H:9]>>[N:1]([c:2]1[c:3]([O:4][H:10])[c:5]([H:11])[c:6]([H:12])[n:7]1[H:13])([H:8])[H:9],OCc1ccccc1,7.53 +[C:1]([C@@:2]1([H:10])[O:3][C@@:4]2([H:11])[C:5]([H:12])([H:13])[C@@:6]12[H:14])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[O+:3]=[C-:4][C:5]([H:12])([H:13])[C:6]1([H:11])[H:14])([H:7])([H:8])[H:9],COc1ccccc1,-1.84 +[C:1]([O:2][C:3](=[O:4])[C:5](=[C:6]([H:13])[H:14])[H:11])([H:8])([H:9])[H:10].[O:7]([H:12])[H:15]>>[C:1]([O:2][C:3](=[O:4])[C:5]([C:6]([O:7][H:15])([H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10],COc1ccccc1,1.07 +[N:1]([C@@:2]([C:3]([C:4]([O:5][H:15])([H:13])[H:14])([H:11])[H:12])([C:6]#[N:7])[H:10])([H:8])[H:9]>>[C:3](=[C:4]([H:13])[H:14])([H:11])[H:12].[N:1](=[C:2](\[C:6]#[N:7])[H:10])\[H:8].[O:5]([H:9])[H:15],COc1ccccc1,0.48 +[C:1]([N:2]([C:3](=[O:4])[C:5]([O:6][H:13])([H:11])[H:12])[H:10])([H:7])([H:8])[H:9]>>[C:1]([N:2]([C-:3]=[O+:4][C:5]([O:6][H:13])([H:11])[H:12])[H:10])([H:7])([H:8])[H:9],c1ccc(Oc2ccccc2)cc1,1.67 +[C:1]([O:2]/[C:3](=[C:4](\[C:5]([O:6][H:16])([H:14])[H:15])[H:12])[H:11])([H:7])([H:8])[H:9].[H:10][H:13]>>[C:1]([O:2][C:3]([C:4]([C:5]([O:6][H:16])([H:14])[H:15])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9],c1ccc(Oc2ccccc2)cc1,-2.7 +[N:1]([C:2](=[O:3])[C:4]([C:5](=[O:6])[H:11])([H:9])[H:10])([H:7])[H:8]>>[N:1]([C:2](=[O:3])[C:5]([C:4]([H:9])([H:10])[H:11])=[O:6])([H:7])[H:8],c1ccc(Oc2ccccc2)cc1,0.86 +[N:1]([C:2](=[O:3])[C:4]([C:5]([O:6][H:13])([H:11])[H:12])([H:9])[H:10])([H:7])[H:8]>>[C:4](=[C:5]([O:6][H:13])[H:11])([H:9])[H:10].[N:1]([C:2](=[O:3])[H:12])([H:7])[H:8],CCCCN(CCCC)CCCC,-2.4 +[C:1]([C-:2]1[N+:3]([H:11])([H:12])[C@:4]1([C:5]([H:13])([H:14])[H:15])[C:6]#[N:7])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[N:3]([H:12])[C@:4]1([C:5]([H:13])([H:14])[H:15])[C:6]#[N:7])([H:8])([H:9])[H:10],CCCCN(CCCC)CCCC,3.06 +[O+:1](=[C-:2][H:8])[c:3]1[n:4][c:5]([H:9])[n:6][o:7]1>>[O:1]=[C:2]([c:3]1[n:4][c:5]([H:9])[n:6][o:7]1)[H:8],CCCCN(CCCC)CCCC,-0.01 +[O:1]=[C:2]1[C:3]([H:7])([H:8])[N+:4]([H:9])=[C-:5][N:6]1[H:10]>>[O:1]=[C:2]1[C:3]([H:7])([H:8])[N:4]=[C:5]([H:9])[N:6]1[H:10],CCOc1ccccc1,1.66 +[C:1]([C:2]1([H:10])[O:3][C:4]([H:11])([H:12])[C:5]([H:13])([H:14])[O:6]1)([H:7])([H:8])[H:9]>>[C:1]([C:2]1=[O+:3][C-:4]([H:12])[C:5]([H:13])([H:14])[O:6]1)([H:7])([H:8])[H:9].[H:10][H:11],CCOc1ccccc1,-0.41 +[C:1]([C@@:2]1([H:10])[O:3][C@:4]1([C:5]#[C:6][H:12])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[O:3][C:6]([H:12])=[C:5]=[C:4]1[H:11])([H:7])([H:8])[H:9],CCOc1ccccc1,-1.16 +[C:1]([C:2]([C:5]([C:3]#[N:4])=[O:6])([C:7]([H:12])([H:13])[H:14])[H:11])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]([C:3]#[N:4])([C:5](=[O:6])[C:7]([H:12])([H:13])[H:14])[H:11])([H:8])([H:9])[H:10],CCCCC(CO)CC,-6.68 +[C:1]1([H:7])([H:8])[C:2]([H:9])([H:10])[C:3]([H:11])=[C:4]([H:12])[C:5]([H:13])([H:14])[O:6]1>>[C:1]1([H:8])=[O+:6][C:5]([H:13])([H:14])[C:4]([H:12])=[C-:3][C:2]1([H:9])[H:10].[H:7][H:11],CCCCC(CO)CC,-1.4 +[N:1](/[C:2](=[C:3]([C:4](=[C:5](/[C:6]=[N:7][H:14])[H:13])\[H:12])/[H:11])[H:10])([H:8])[H:9]>>[N:1]([C:2]([c:3]1[c:4]([H:12])[c:5]([H:13])[c:6][n:7]1[H:14])([H:10])[H:11])([H:8])[H:9],CCCCC(CO)CC,-2.89 +[C:1]1([H:7])([H:8])[C@@:2]2([H:9])[C@:3]1([H:10])[C@@:4]1([H:11])[C:5]([H:12])([H:13])[C@@:6]21[H:14]>>[C:1]1([H:8])=[C:2]([H:9])[C:3]1([C:4]1([H:11])[C:5]([H:12])=[C:6]1[H:14])[H:10].[H:7][H:13],CCOC(CCC)=O,-0.68 +[C:1]([C-:2](/[C:3](=[N:4]/[H:11])[H:10])[N+:6]#[N:5])([H:7])([H:8])[H:9]>>[C:1]([c:2]1[c:3]([H:10])[n:4]([H:11])[n:5][n:6]1)([H:7])([H:8])[H:9],CCOC(CCC)=O,-0.35 +[C:1]([C@:2]1(/[C:3](=[N:4]/[H:12])[H:11])[C:5]([N:6]([H:13])[H:14])=[C:7]1[H:15])([H:8])([H:9])[H:10]>>[C:1]([c:2]1[c:3]([H:11])[n:4]([H:12])[c:5]([N:6]([H:13])[H:14])[c:7]1[H:15])([H:8])([H:9])[H:10],CCOC(CCC)=O,-0.6 +[C:1](=[C:2]([H:9])[H:10])([H:6])[H:8].[C:3](=[C:4]([O:5][H:7])[H:13])([H:11])[H:12]>>[C:1]([C:2]([C:3]([C:4](=[O:5])[H:13])([H:11])[H:12])([H:9])[H:10])([H:6])([H:7])[H:8],Cc1ccc(C)cc1,-0.61 +[C:1]([C:2]([C@:3]([N:4][H:14])([C:5](=[O:6])[O:7][H:15])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([H:8])([H:9])[H:10].[C:2](/[C:3](=[N:4]/[H:14])[H:13])([C:5](=[O:6])[O:7][H:15])([H:11])[H:12],Cc1ccc(C)cc1,0.38 +[C:1]([C:2][C:6]([C:5]([C:3]([O:4][H:12])([H:10])[H:11])([H:13])[H:14])([H:15])[H:16])([H:7])([H:8])[H:9]>>[C:1]([C:2]1([C:3]([O:4][H:12])([H:10])[H:11])[C:5]([H:13])([H:14])[C:6]1([H:15])[H:16])([H:7])([H:8])[H:9],Cc1ccc(C)cc1,0.49 +[C:1]([H:7])([H:8])([H:9])[H:14].[O:2]=[C:3]([C:4]([C:5](=[O:6])[H:13])([H:11])[H:12])[H:10]>>[C:1]([O:2][C@@:3]1([H:10])[C:4]([H:11])([H:12])[C@:5]1([O:6][H:14])[H:13])([H:7])([H:8])[H:9],BrCCBr,0.95 +[C:1]([C@@:2]([C:3]#[N:4])([C:5](=[O:6])[C:7]([H:12])([H:13])[H:14])[H:11])([H:8])([H:9])[H:10]>>[C:1](/[C:2]([C:3]#[N:4])=[C:5](\[O:6][H:11])[C:7]([H:12])([H:13])[H:14])([H:8])([H:9])[H:10],BrCCBr,-1.14 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([H:14])([H:15])[O:5][C@:6]1([C:7]([H:17])([H:18])[H:19])[H:16])([H:8])([H:9])[H:10]>>[C:1]([C:2]1=[C:6]([H:16])[O:5][C:4]([H:14])([H:15])[C@@:3]1([C:7]([H:17])([H:18])[H:19])[H:12])([H:8])([H:9])[H:10].[H:11][H:13],BrCCBr,-2.11 +[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C@:4]1([C:5](=[O:6])[H:14])[H:13])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C@@:4]2([H:13])[C@:5]1([H:14])[O:6]2)([H:7])([H:8])[H:9],CCCBr,-1.41 +[O:1]([C@:2]12[C:3]([H:8])([H:9])[C@@:4]1([H:10])[C:5]([H:11])([H:12])[C:6]2([H:13])[H:14])[H:7]>>[C:2]12=[C:4]([C:3]1([H:8])[H:9])[C:5]([H:11])([H:12])[C:6]2([H:13])[H:14].[O:1]([H:7])[H:10],CCCBr,-3.59 +[C:1]([N:2]1[C:3]([H:11])([H:12])[C@:4]1([C:5](=[O:6])[C:7]([H:14])([H:15])[H:16])[H:13])([H:8])([H:9])[H:10]>>[C:1]([N+:2]1=[C:3]([H:12])[C@:4]1([C-:5]([O:6][H:11])[C:7]([H:14])([H:15])[H:16])[H:13])([H:8])([H:9])[H:10],CCCBr,0.41 +[C:1]1([H:7])([H:8])[C@@:2]2([H:9])[C:3]([H:10])([H:11])[C:4]([H:12])([H:14])[C:5]([H:13])=[C:6]12>>[C:1]1([H:7])([H:8])[C@@:2]2([H:9])[C:3]([H:10])([H:11])[C:4]([H:12])=[C:5]([H:13])[C@@:6]12[H:14],CCCC,0.03 +[O:1]=[C:2]([N-:3][N+:4]#[C:5][H:7])[H:6]>>[o:1]1[c:2]([H:6])[n:3][n:4][c:5]1[H:7],CCCC,0.23 +[O:1]=[C:2]([C@@:3]1([H:9])[C:4]([H:10])([H:11])[N:5]1[C:6](=[O:7])[H:12])[H:8]>>[O:1]=[C:2]([C@@:3]1([H:9])[C-:4]([H:10])[N+:5]1=[C:6]([O:7][H:11])[H:12])[H:8],CCCC,-0.07 +[O:1]=[C:2]1[C:3]([H:7])([H:8])[C:4]([H:9])=[C:5]([H:10])[N:6]1[H:11]>>[O-:1][C:2]1=[C:3]([H:7])[C:4]([H:8])([H:9])[C:5]([H:10])=[N+:6]1[H:11],ClCCCl,-3.65 +[N:1]([C:3]1=[C:4]([H:10])[C@:2]1([C:5]#[N:6])[H:9])([H:7])[H:8]>>[N:1]([C@@:2]([C:3]#[C:4][H:10])([C:5]#[N:6])[H:9])([H:7])[H:8],ClCCCl,-2.21 +[C:1]([C@@:2]12[C@:3]3([H:11])[C@@:4]([H:12])([C@:5]3([H:13])[C:6]1)[C:7]2([H:14])[H:15])([H:8])([H:9])[H:10]>>[C:1]([C@:2]12[C@@:3]3([H:11])[C@:4]4([H:12])[C@@:5]3([H:13])[C@@:6]1([H:14])[C@:7]24[H:15])([H:8])([H:9])[H:10],ClCCCl,1.4 +[O:1]([C:2]([C@@:3]1([H:11])[C:4]([H:12])([H:13])[C:5]([H:14])([H:15])[O:6][C:7]1([H:16])[H:17])([H:9])[H:10])[H:8]>>[H:14][H:15].[O:1]([C:2]([C@@:3]1([H:11])[C:4]([H:12])([H:13])[C-:5]=[O+:6][C:7]1([H:16])[H:17])([H:9])[H:10])[H:8],CCCN,-4.98 +[C:1]([C@@:2]([C:3]([O:4][H:13])([H:11])[H:12])([C:5](=[O:6])[H:14])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[C@:5]([C:3]([O:4][H:13])([H:11])[H:12])([H:14])[O:6]1)([H:7])([H:8])[H:9],CCCN,-6.62 +[C:1]([C:2]([O:3][H:11])[C@:4]([N:5]([H:13])[H:14])([C:6]([O:7][H:17])([H:15])[H:16])[H:12])([H:8])([H:9])[H:10]>>[C:1](/[C:2]([O:3][H:11])=[C:4](/[C:6]([O:7][H:17])([H:15])[H:16])[H:12])([H:8])([H:9])[H:10].[N:5]([H:13])[H:14],CCCN,-3.66 +[C:1]([C:2]([C:3]([C:4]([C:5]([H:13])([H:14])[H:15])=[C:6]([H:16])[H:17])([H:11])[H:12])=[O:7])([H:8])([H:9])[H:10]>>[C:1]([C:2]12[C:3]([H:11])([H:12])[C:4]([C:5]([H:13])([H:14])[H:15])([C:6]1([H:16])[H:17])[O:7]2)([H:8])([H:9])[H:10],CCC#N,-2.71 +[C:1]1([H:8])([H:9])[C:2]([H:10])([H:11])[C:3]12[C:4]([H:12])([H:13])[C:5]([H:14])=[C:6]([H:15])[C:7]2([H:16])[H:17]>>[C:1]1([H:8])([H:9])[C:2]([H:10])([H:11])[C:3]12[C:4]([H:13])=[C:5]([H:14])[C:6]([H:15])=[C:7]2[H:17].[H:12][H:16],CCC#N,-1.13 +[O:1]=[C:2]1[C:3]([H:7])([H:8])[C:4][C:5]([H:10])([H:11])[C:6]1([H:9])[H:12]>>[O:1]=[C:2]1[C:3]([H:7])([H:8])[C@@:4]2([H:9])[C:5]([H:10])([H:11])[C@@:6]12[H:12],CCC#N,1.31 +[n:1]1([H:6])[c-:2][n+:3]([H:7])[c:4]([H:8])[n:5]1>>[n:1]1([H:6])[c:2]([H:7])[n:3][c:4]([H:8])[n:5]1,OCCO,6.19 +[C:1]([C:2]([C:3]([H:11])([H:12])[H:13])([O:4][C:5](=[O:6])[H:14])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[C:3]([H:11])[H:13])[O:4][C:5](=[O:6])[H:14])([H:7])([H:8])[H:9].[H:10][H:12],OCCO,-0.74 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C@:4]1([C:5]([C:6](=[O:7])[H:17])([H:15])[H:16])[H:14])([H:8])([H:9])[H:10]>>[C:1]1([H:8])([H:10])[C@:2]2([H:11])[C:3]([H:12])([H:13])[C@:4]2([H:14])[C:5]([H:15])([H:16])[C@@:6]1([O:7][H:9])[H:17],OCCO,-4.23 +[C:1]([O:2][C:3]([C:4]([H:11])([H:12])[H:13])=[C:5]=[N:6][H:10])([H:7])([H:8])[H:9]>>[C:1]([O:2][C@@:3]([C:4]([H:11])([H:12])[H:13])([C:5]#[N:6])[H:10])([H:7])([H:8])[H:9],CCCC(C)C,0.34 +[C:1]([C@:2]1([H:10])[N:3]([H:11])[C@@:5]([O:6])([H:14])[C:4]1([H:12])[H:13])([H:7])([H:8])[H:9]>>[C:1]([C@:2]([N:3][H:11])([C:4]([C:5](=[O:6])[H:14])([H:12])[H:13])[H:10])([H:7])([H:8])[H:9],CCCC(C)C,0.03 +[C:1]([C:2]([C:3]([C:4]([C:5]([H:16])([H:17])[H:18])([H:14])[H:15])([N+:7]#[C-:6])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]([C:3]([C:4]([C:5]([H:16])([H:17])[H:18])([H:14])[H:15])([C:6]#[N:7])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10],CCCC(C)C,0.42 +[C:1](=[C:2]([H:9])[H:10])([H:6])[H:8].[C:3](=[C:4]([O:5][H:7])[H:13])([H:11])[H:12]>>[C:1]([C:2]([C:3]([C:4](=[O:5])[H:13])([H:11])[H:12])([H:9])[H:10])([H:6])([H:7])[H:8],CCCC(C)=O,1.75 +[C:1]([O:2][C:3]([C:4](/[N:5]=[C:6](/[O:7][H:15])[H:16])([H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([O:2][C:3]([C:4]([N:5]([C:6](=[O:7])[H:16])[H:15])([H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10],CCCC(C)=O,1.69 +[C:1]([C:2](=[O:3])[C:4]([H:10])([H:11])[H:13])([H:7])([H:8])[H:9].[C:5]([O:6][H:14])[H:12]>>[C:1]([C:2](=[O:3])[C:4]([C:5]([O:6][H:14])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9],CCCC(C)=O,6.74 +[C:1]1([H:7])([H:9])[N:2]2[C@:3]1([H:10])[C:4]([H:11])=[C:5]([H:12])[C:6]2([H:8])[H:13]>>[C:1]([n:2]1[c:3]([H:10])[c:4]([H:11])[c:5]([H:12])[c:6]1[H:13])([H:7])([H:8])[H:9],CCCN(=O)=O,-0.71 +[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[N:4]([H:14])[C@:5]1([C:6]#[N:7])[H:15])([H:8])([H:9])[H:10]>>[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[N:4]([H:14])[C:5]([H:15])=[C:6]=[N:7]1)([H:8])([H:9])[H:10],CCCN(=O)=O,-0.44 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])=[C:4]([H:13])[N:5]([H:14])[C:6]1=[O:7])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C:3]([H:12])=[C-:4][N:5]([H:14])[C+:6]1[O:7][H:13])([H:8])([H:9])[H:10],CCCN(=O)=O,2.45 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[O:4][C@:5]([C:6]([H:15])([H:16])[H:17])([H:14])[O:7]1)([H:8])([H:9])[H:10]>>[C:1]([C:2]([O:7][C:5](=[O+:4][C-:3]([H:12])[H:13])[C:6]([H:15])([H:16])[H:17])([H:11])[H:14])([H:8])([H:9])[H:10],CC(=O)CC(C)C,0.58 +[N:1]1=[C-:2][C@@:3]2([H:8])[O+:4]=[C:5]([H:9])[C@@:7]2([H:12])[C:6]1([H:10])[H:11]>>[N:1]#[C:2][C@@:3]1([H:8])[O:4][C@@:5]2([H:9])[C:6]([H:10])([H:11])[C@@:7]12[H:12],CC(=O)CC(C)C,0.95 +[C:1]([O:2][C:3]([C:4]([O:5][C:6]([H:14])([H:15])[H:16])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C-:1]([O+:2]=[C:3]([H:10])[H:11])([H:7])[H:9].[C:4]([O:5][C:6]([H:14])([H:15])[H:16])([H:8])([H:12])[H:13],CC(=O)CC(C)C,0.71 +[C:1](/[C:2](=[C:3](\[C:4]([C:5](=[C:6]=[C:7]([H:16])[H:17])[H:15])([H:13])[H:14])[H:12])[H:11])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])=[C:6]([H:16])[C@@:7]12[H:17])([H:8])([H:9])[H:10],CC(C)OC(C)C,-0.5 +[C:1]1([H:8])([H:9])[C:2]([H:10])([H:11])[C:3]1([C:4]([C@@:5]1([H:14])[C:6]([H:15])([H:16])[C:7]1)([H:12])[H:13])[H:17]>>[C:1]1([H:8])([H:9])[C:2]([H:10])([H:11])[C:3]12[C:4]([H:12])([H:13])[C@@:5]1([H:14])[C:6]([H:15])([H:16])[C@@:7]21[H:17],CC(C)OC(C)C,0.08 +[C:1]([n:2]1[c:3]([H:11])[c:4]([H:12])[c:5]([C:6]([H:13])([H:14])[H:15])[n:7]1)([H:8])([H:9])[H:10]>>[C:1]([N:2]1[C:3]([H:11])=[C+:4][C@@:5]([C:6]([H:13])([H:14])[H:15])([H:12])[N-:7]1)([H:8])([H:9])[H:10],CC(C)OC(C)C,-1.99 +[C:1]([C@@:2]1([O:3][H:11])[C:4]([H:12])([H:13])[C@@:5]2([H:14])[O:6][C@@:7]12[H:15])([H:8])([H:9])[H:10]>>[C:1]([C:2]([O:3][H:11])=[C:4]([H:12])[H:13])([H:8])([H:9])[H:10].[C:5]([O-:6])(=[C+:7][H:15])[H:14],CC(C)OC(C)=O,-3.03 +[C:1]([O:2][C:3]([C:4]([C:5]([H:13])([H:14])[H:15])([C:6]([H:16])([H:17])[H:18])[C:7]([H:19])([H:20])[H:21])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([O+:2]=[C-:3][H:12])([H:8])([H:9])[H:10].[C:4]([C:5]([H:13])([H:14])[H:15])([C:6]([H:16])([H:17])[H:18])([C:7]([H:19])([H:20])[H:21])[H:11],CC(C)OC(C)=O,-0.77 +[C:1](=[C:2]1[C:3]([H:11])=[C:4]([N:5]([H:12])[H:13])[C:6]([H:10])([H:14])[N:7]1[H:15])([H:8])[H:9]>>[C:1]([c:2]1[c:3]([H:11])[c:4]([N:5]([H:12])[H:13])[c:6]([H:14])[n:7]1[H:15])([H:8])([H:9])[H:10],CC(C)OC(C)=O,0.61 +[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[C-:4]([H:15])[O+:5]=[C:6]([H:16])[N:7]1[H:14])([H:8])([H:9])[H:10]>>[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[C:4]([H:14])([H:15])[O:5][C:6]([H:16])=[N:7]1)([H:8])([H:9])[H:10],Cc1cccc(C)c1,2.48 +[C:1]([C@@:2]1([H:9])[N:3]([H:10])[N:4]1[C:5]([C:6]([H:13])([H:14])[H:15])([H:11])[H:12])([H:7])[H:8]>>[C:1]1([H:7])([H:8])[C@@:2]2([H:9])[N:3]([H:10])[N@@:4]12.[C:5]([C:6]([H:13])([H:14])[H:15])([H:11])[H:12],Cc1cccc(C)c1,0.17 +[N:1](=[C:2]=[C:3]([N:4]=[C:5]([C:6](=[O:7])[H:11])[H:10])[H:9])[H:8]>>[N:1](=[c:2]1/[c:3]([H:9])[n:4][c:5]([H:10])[c:6]([H:11])[o:7]1)\[H:8],Cc1cccc(C)c1,0.52 +[C:1]([O:6]/[C:5](=[N:2]/[C:3](=[O:4])[H:11])[C:7]([H:12])([H:13])[H:14])([H:8])([H:9])[H:10]>>[C:1]([N:2]([C:3](=[O:4])[H:11])[C:5](=[O:6])[C:7]([H:12])([H:13])[H:14])([H:8])([H:9])[H:10],Cc1cccc(O)c1,-1.69 +[C:1]([N:2]1[C:3]([H:11])([H:12])[C@:4]1([C:5](=[O:6])[C:7]([H:14])([H:15])[H:16])[H:13])([H:8])([H:9])[H:10]>>[C:1]([C@@:4]([N:2]=[C:3]([H:11])[H:12])([C:5](=[O:6])[C:7]([H:14])([H:15])[H:16])[H:13])([H:8])([H:9])[H:10],Cc1cccc(O)c1,0.38 +[C:1]([C@@:2]1([H:11])[N:3]([H:12])[C@:4]1([C:5]([N:7]=[C:6]([H:15])[H:16])([H:14])[H:17])[H:13])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[N:3]([H:12])[C@:4]1([C@@:5]1([H:14])[C:6]([H:15])([H:16])[N:7]1[H:17])[H:13])([H:8])([H:9])[H:10],Cc1cccc(O)c1,-0.97 +[O:1]([C@@:2]1([H:8])[C:3]([H:9])([H:10])[C@:4]1([C:5]#[C:6][H:12])[H:11])[H:7]>>[O:1]([C:2]([C:3]([C-:4]=[C:5]=[C+:6][H:12])([H:9])[H:10])([H:8])[H:11])[H:7],Brc1ccccc1,-0.45 +[C:1]([O:2][C@@:3]1([H:10])[C-:4]=[O+:5][C:6]1([H:13])[H:14])([H:7])([H:8])[H:9].[H:11][H:12]>>[C:1]([O:2][C:3]1([H:10])[C:4]([H:11])([H:12])[O:5][C:6]1([H:13])[H:14])([H:7])([H:8])[H:9],Brc1ccccc1,-0.49 +[N:1](=[C:2]1/[C:3]([H:8])=[C:4]([H:9])[C:5]([H:10])([H:11])[O:6]1)\[H:7]>>[N:1]1([H:7])[C-:2]=[O+:6][C:5]([H:10])([H:11])[C:4]([H:9])=[C:3]1[H:8],Brc1ccccc1,-0.31 +[C:1]([C:2](=[O:3])[C:4]([C:5]([C:6](=[C:7]([H:18])[H:19])[H:16])([H:14])[H:15])([H:12])[H:13])([H:8])([H:9])[H:10].[H:11][H:17]>>[C:1]([C:2]1([O:3][H:11])[C:4]([H:12])([H:13])[C:5]([H:14])([H:15])[C:6]([H:16])([H:17])[C:7]1([H:18])[H:19])([H:8])([H:9])[H:10],CC1CCCCC1,-1.62 +[C:1]([C:2]([C:3]([C:4]([C:5]([O:6][C:7]([H:19])([H:20])[H:21])([H:17])[H:18])([H:15])[H:16])([H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([H:8])([H:9])([H:10])[H:14].[C:2](=[C:3]([C:4]([C:5]([O:6][C:7]([H:19])([H:20])[H:21])([H:17])[H:18])([H:15])[H:16])[H:13])([H:11])[H:12],CC1CCCCC1,0.02 +[C:1]([C:2]([N:3]([C:4]([O:5][H:13])[H:12])[H:11])([H:9])[H:10])([H:6])([H:7])[H:8]>>[C:1]([C:2]([N:3]([C:4]([O:5][H:13])([H:6])[H:12])[H:11])([H:9])[H:10])([H:7])[H:8],CC1CCCCC1,-0.23 +[O:1]([C@@:2]1([H:9])[C@@:3]2([H:10])[C@@:4]3([H:11])[C:5]([H:12])([H:13])[C@:6]1([H:14])[N:7]23)[H:8]>>[O:1]([C@@:2]1([H:9])[C@@:3]2([H:10])[C:4][C:5]([H:12])([H:13])[C@:6]1([H:14])[N:7]2[H:11])[H:8],Cc1ccccc1,-0.06 +[C:1](=[C:2]=[C:3]1[C:4]([H:9])([H:10])[O:5][C:6]1([H:11])[H:12])([H:7])[H:8]>>[C:1](#[C:2][C:3]1([H:8])[C:4]([H:9])([H:10])[O:5][C:6]1([H:11])[H:12])[H:7],Cc1ccccc1,-1.99 +[C:1]([C:2](/[C:3](=[N:4]/[H:11])[H:10])=[N+:6]=[C-:5][H:12])([H:7])([H:8])[H:9]>>[C:1]([c:2]1[c:3]([H:10])[n:4]([H:11])[c:5]([H:12])[n:6]1)([H:7])([H:8])[H:9],Cc1ccccc1,0.44 +[C:1]([O:2][C:3]([C:4]([C:5](=[O:6])[H:14])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]1([H:7])([H:8])[O:2][C:3]([H:10])([H:11])[C:4]([H:12])([H:13])[C@:5]1([O:6][H:9])[H:14],Clc1ccccc1,-1.27 +[C:1]1([H:7])([H:8])[C@:2]2([H:9])[C@:3]3([H:10])[C:4]([H:11])([H:12])[C@@:5]1([H:13])[C@:6]23[H:14]>>[C:1]1([H:7])([H:8])[C@@:2]2([H:9])[C@:3]([C:4]([H:11])([H:12])[H:13])([H:10])[C@@:6]2([H:14])[C:5]1,Clc1ccccc1,-1.49 +[C:1]([C-:2]/[N+:6](=[C:5](\[C:4]#[N:3])[H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([c:2]1[n:3][c:4][c:5]([H:10])[n:6]1[H:11])([H:7])([H:8])[H:9],Clc1ccccc1,0.23 +[C:1]([n:2]1[c:3]([H:10])[c:4]([H:11])[n:5][n:6]1)([H:7])([H:8])[H:9]>>[C:1]([C@:3]1([H:10])[N:2]=[N:6][N:5]=[C:4]1[H:11])([H:7])([H:8])[H:9],OC1CCCCC1,-0.98 +[C:1]([C@@:3]1([H:11])[N:2]=[C:6]([O:7][H:14])[C:5]([H:13])=[C:4]1[H:12])([H:8])([H:9])[H:10]>>[C:1]([n:2]1[c:3]([H:11])[c:4]([H:12])[c:5]([H:13])[c:6]1[O:7][H:14])([H:8])([H:9])[H:10],OC1CCCCC1,2.87 +[C:1]([C:2]([C:3]([C@:4]([N:5]([H:15])[H:16])([C:6]#[N:7])[H:14])([H:12])[H:13])[H:11])([H:8])([H:9])[H:10]>>[C:1]([C@:2]1([H:11])[C:3]([H:12])([H:13])[C@@:4]1([C:6]#[N:7])[H:14])([H:8])([H:9])[H:10].[N:5]([H:15])[H:16],OC1CCCCC1,-0.76 +[C:1]([N:2]1[C@@:3]2([H:11])[C@@:4]([C:7][H:16])([H:12])[C:5]([H:13])([H:14])[C@@:6]12[H:15])([H:8])([H:9])[H:10]>>[C:1]([N:2]1[C@@:3]2([H:11])[C@@:4]3([H:12])[C:5]([H:13])([H:14])[C@:6]1([H:15])[C@@:7]23[H:16])([H:8])([H:9])[H:10],O=C1CCCCC1,0.38 +[C:1]([C@@:2]1([H:10])[O:3][C@:4]1([C:5](=[O:6])[H:12])[H:11])([H:7])([H:8])[H:9]>>[C:1]([O:3][C@:4]([C:2][H:10])([C:5](=[O:6])[H:12])[H:11])([H:7])([H:8])[H:9],O=C1CCCCC1,0.66 +[O:1]=[C:2]([C:3]([O:4][C:5](=[O:6])[H:10])([H:7])[H:8])[H:9]>>[O:1]=[C:2]1[C:3]([H:7])([H:8])[O:4][C:5]([H:9])([H:10])[O:6]1,O=C1CCCCC1,-0.27 +[C:1]1([H:7])([H:8])[N:2]([H:9])[O:6][C:5]([H:12])([H:13])[C:4]([H:11])=[C:3]1[H:10]>>[C:1]1([H:7])([H:8])[N:2]([H:9])[C@:3]1([C@@:4]1([H:11])[C:5]([H:12])([H:13])[O:6]1)[H:10],Cc1ccccn1,-0.55 +[C:1]([C:2]([N:3]1[C:4]([H:12])([H:13])[C@:5]1([C:6]([H:15])([H:16])[H:17])[H:14])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2]([N:3]1[C:4]([H:12])=[C:5]1[H:14])([H:10])[H:11])([H:7])([H:8])[H:9].[C:6]([H:13])([H:15])([H:16])[H:17],Cc1ccccn1,-1.47 +[O:1]([C:2]([C:3]([C:4]#[C:5][H:12])([C:6]#[C:7][H:13])[H:11])([H:9])[H:10])[H:8]>>[O:1]([C:2]([C:3](=[C:4]=[C:5]([H:11])[H:12])[C:6]#[C:7][H:13])([H:9])[H:10])[H:8],Cc1ccccn1,-2.31 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[O:6][C:5]([H:15])([H:16])[C:4]([H:14])[C:7]1([H:17])[H:18])([H:8])([H:9])[H:10]>>[C:1]([C:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([C:5]([O:6])([H:15])[H:16])([H:14])[C:7]1([H:17])[H:18])([H:8])([H:9])[H:10],CCCOC(C)=O,-0.32 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C@:4]([C:5]([H:15])([H:16])[H:17])([H:14])[C@:6]1([C:7]([H:19])([H:20])[H:21])[H:18])([H:8])([H:9])[H:10]>>[C:1]([H:8])([H:9])([H:10])[H:11].[C:2]1[C:3]([H:12])([H:13])[C@:4]([C:5]([H:15])([H:16])[H:17])([H:14])[C@:6]1([C:7]([H:19])([H:20])[H:21])[H:18],CCCOC(C)=O,-1.5 +[C:1]([N:2]([C:3]([C:4]#[N:5])([C:6]#[N:7])[H:12])[H:11])([H:8])([H:9])[H:10]>>[C:1]([N+:2]([C-:3]([C:4]#[N:5])[C:6]#[N:7])([H:11])[H:12])([H:8])([H:9])[H:10],CCCOC(C)=O,-4.3 +[N:1]([C:2](=[O:3])/[N:4]=[C:5](/[O:6][H:9])[H:10])([H:7])[H:8]>>[N:1]([C:2](=[O:3])[N:4]([C:5](=[O:6])[H:10])[H:9])([H:7])[H:8],CCCCC,0 +[C:1]([C:2]1=[N:5][C@@:6]1(/[N:3]=[N:4]/[H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([c:2]1[n:3][n:4]([H:10])[n:5][c:6]1[H:11])([H:7])([H:8])[H:9],CCCCC,-0.46 +[C:1]([C@@:2]1([C+:5]=[N:6][H:10])[C-:3]([H:11])[C:4]1([H:12])[H:13])([H:7])([H:8])[H:9]>>[C:1]([C:2]1([C:5]#[N:6])[C:3]([H:10])([H:11])[C:4]1([H:12])[H:13])([H:7])([H:8])[H:9],CCCCC,0.25 +[C:1]([N:2]([C:3]([C:4]([C:5](=[C:6]([N:7][H:17])[H:16])[H:15])([H:14])[H:18])([H:12])[H:13])[H:11])([H:8])([H:9])[H:10]>>[C:1]([N:2]([C:3](/[C:4](=[C:5](/[C:6]([N:7]([H:17])[H:18])[H:16])[H:15])[H:14])([H:12])[H:13])[H:11])([H:8])([H:9])[H:10],CCCCCl,0.14 +[C:1](/[C:2](=[C:3](/[C:4]([C:5]([H:14])([H:15])[H:17])([H:12])[H:13])[H:11])[C:6](=[O:7])[H:16])([H:8])([H:9])[H:10]>>[C:1]([C:2]1=[C:3]([H:11])[C:4]([H:12])([H:13])[C:5]([H:14])([H:15])[C@:6]1([O:7][H:17])[H:16])([H:8])([H:9])[H:10],CCCCCl,-1.29 +[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[N:4]([H:12])[C@:5]1([C:6](=[O:7])[H:14])[H:13])[H:8]>>[O:1]([C:3]([C@@:2]1([H:9])[N:4]([H:12])[C@:5]1([C:6](=[O:7])[H:14])[H:13])([H:10])[H:11])[H:8],CCCCCl,-0.66 +[C:1]([C:2]([C:3]#[C:4][C:5](=[O:6])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]1([H:7])([H:9])[C:2]([H:10])([H:11])[C:3]1=[C:4]([C:5](=[O:6])[H:12])[H:8],CCCCN,-0.98 +[N:1](=[C:2]1/[C:3]([H:8])=[C:4]([H:9])[C:5]([H:10])([H:11])[O:6]1)\[H:7]>>[N:1]([c:2]1[c:3]([H:8])[c:4]([H:9])[c:5]([H:11])[o:6]1)([H:7])[H:10],CCCCN,0.71 +[O:1]([C:2]1=[C:3]([H:6])[O:4][C:5]1([H:8])[H:9])[H:7]>>[O:1]=[C:2]1[C:3]([H:6])([H:7])[O:4][C:5]1([H:8])[H:9],CCCCN,0.73 +[C:1]([C:2](=[C:3]([H:11])[H:13])[H:10])([H:7])([H:8])[H:9].[C:4](=[C:5]=[C:6]([H:12])[H:16])([H:14])[H:15]>>[C:1]([C:2]([C:3]([H:11])([H:12])[H:13])([C:4]([C:5]#[C:6][H:16])([H:14])[H:15])[H:10])([H:7])([H:8])[H:9],CCCC#N,-0.69 +[O:1]([C:2]([O+:4]=[C-:3][C:5]#[C:6][H:10])([H:8])[H:9])[H:7]>>[O:1]([C:2]([C:3](=[O:4])[C:5]#[C:6][H:10])([H:8])[H:9])[H:7],CCCC#N,-1.54 +[C:1]([C:2](=[C:3]=[C:4]([H:10])[H:11])[H:9])(/[C:6](=[N:5]\[H:12])[H:13])([H:7])[H:8]>>[C:1]1([H:7])([H:8])[C:2]([H:9])=[C:3]([H:10])[C@@:4]2([H:11])[N:5]([H:12])[C@@:6]12[H:13],CCCC#N,-1.42 +[C:1]([C:5]([C:4](=[C:3]=[C:2]([H:8])[H:9])[H:11])([H:12])[H:13])([H:6])([H:7])[H:10]>>[C:1]1([H:6])([H:7])[C:2]([H:8])([H:9])[C:3]([H:10])=[C:4]([H:11])[C:5]1([H:12])[H:13],COCCO,-8.49 +[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C@:4]1([C:5]#[N:6])[H:13])([H:7])([H:8])[H:9]>>[C:1]([C:2][C:3]([C:4]([C:5]#[N:6])([H:10])[H:13])([H:11])[H:12])([H:7])([H:8])[H:9],COCCO,-3.15 +[O:1]([C:2]([C:3]1([H:10])[C:4]([H:11])([H:12])[C:5]([H:13])([H:14])[C:6]1([H:15])[H:16])([H:8])[H:9])[H:7]>>[C:5](=[C:6]([H:15])[H:16])([H:13])[H:14].[O:1](/[C:2](=[C:3](/[C:4]([H:8])([H:11])[H:12])[H:10])[H:9])[H:7],COCCO,-2.21 +[O:1]([C@@:2]([C:3]([C:4]#[C:5][H:12])([H:10])[H:11])([C:6]#[C:7][H:13])[H:9])[H:8]>>[O:1]1[C@@:2]([C:6]#[C:7][H:13])([H:9])[C:3]([H:10])([H:11])[C:4]1=[C:5]([H:8])[H:12],C1CCOC1,-0.7 +[C:1]([C:2]1[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[N:5]=[C:6]([H:15])[O:7]1)([H:8])([H:9])[H:10]>>[C:1]([C@:2]12[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[N:5]1[C:6]([H:15])[O:7]2)([H:8])([H:9])[H:10],C1CCOC1,0.45 +[N:1](=[C:2]1/[O:3][C:4]([H:9])([H:10])[C-:5]=[N+:6]1[H:8])\[H:7]>>[N:1](=[C:2](/[O:3][C:4]([C:5]#[N:6])([H:9])[H:10])[H:8])\[H:7],C1CCOC1,2.89 +[C:1]1([H:7])([H:8])[C:2]([H:9])=[C:3]([H:10])[C@@:6]1([C:5](=[O:4])[H:11])[H:12]>>[C:1]1([H:7])([H:8])[C@@:2]2([H:9])[C@:3]3([H:10])[O:4][C@@:5]2([H:11])[C@:6]13[H:12],c1ccsc1,-0.95 +[C:1]([C@@:2]1([H:10])[N:3]([H:11])[C@:4]1([C:5]([O:6][H:15])([H:13])[H:14])[H:12])([H:7])([H:8])[H:9]>>[C:1]([C:2](/[N:3]=[C:4](/[C:5]([O:6][H:15])([H:13])[H:14])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9],c1ccsc1,-0.81 +[C:1]([O:2][C:3](=[O:4])[C:5]#[C:6][H:10])([H:7])([H:8])[H:9]>>[C:1](=[O:2])([H:7])[H:9].[c:3]1(=[O:4])[c:5]([H:8])[c:6]1[H:10],c1ccsc1,0.29 +[N:1]1([H:7])/[C:2](=[N:6]\[H:11])[C:3]1([H:8])[H:9].[N:4]#[C:5][H:10]>>[N:1](=[C:2]1\[C:3]([H:8])([H:9])[N:4]=[C:5]([H:10])[N:6]1[H:11])\[H:7],CCCCCC,-0.1 +[C:1]([C@@:2]1([C:5](=[O:6])[N:7]([H:14])[H:15])[C:3]([H:11])([H:12])[N:4]1[H:13])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]([N+:4](=[C-:3][H:11])[H:13])([C:5](=[O:6])[N:7]([H:14])[H:15])[H:12])([H:8])([H:9])[H:10],CCCCCC,0.15 +[C:1]([C@@:2]12[O:3][C@@:4]3([H:11])[C:5]([H:12])([H:13])[C@:6]1([H:14])[C@@:7]23[H:15])([H:8])([H:9])[H:10]>>[C:1]([C@:2]1([H:14])[O:3][C@@:4]2([H:11])[C:5]([H:12])([H:13])[C:6][C@@:7]12[H:15])([H:8])([H:9])[H:10],CCCCCC,-0.66 +[C-:1]1([H:8])[N+:2]([H:7])([H:9])[C:3]12[C:4]([H:10])([H:11])[O:5][C:6]2([H:12])[H:13]>>[C:1]1([H:7])([H:8])[N:2]([H:9])[C:3]12[C:4]([H:10])([H:11])[O:5][C:6]2([H:12])[H:13],CCOCCO,9.67 +[O:1]([C@@:2]1([H:7])[C:3]([H:8])([H:9])[C@:4]1([O:5][H:11])[H:10])[H:6]>>[C:4]([O:5][H:11])[H:10].[O:1]([C:2](=[C:3]([H:8])[H:9])[H:7])[H:6],CCOCCO,-5.74 +[C:1]([C@@:2]12[C:3]([H:11])([H:12])[C@@:4]3([H:13])[O:5][C@:6]1([H:14])[C@@:7]23[H:15])([H:8])([H:9])[H:10]>>[C:1]([C@@:7]1([H:15])[C@@:4]2([H:13])[C:3]([H:11])([H:12])[C:2][C@:6]1([H:14])[O:5]2)([H:8])([H:9])[H:10],CCOCCO,-1.56 +[C:1]([O:2][C:3]([C:6]([C:4]([C:5]([H:14])([H:15])[H:16])([H:13])[H:17])=[O:7])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([O:2][C:3]([C@@:4]([C:5]([H:14])([H:15])[H:16])([C:6](=[O:7])[H:17])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10],C1CCCCC1,-0.89 +[C:1]([C@@:2]([C:3]([O:4][H:12])([H:10])[H:11])([C:5][H:14])[O:6][H:13])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([C:3]([O:4][H:12])([H:10])[H:11])[C:5]([H:13])([H:14])[O:6]1)([H:7])([H:8])[H:9],C1CCCCC1,0.88 +[C:1]([H:7])([H:8])([H:9])[H:10].[N:2]=[C:3]([C:4]([N:5]([C:6]([H:15])([H:16])[H:17])[H:14])([H:12])[H:13])[H:11]>>[C:1]([N:2][C:3]([C:4]([N:5]([C:6]([H:15])([H:16])[H:17])[H:14])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9],C1CCCCC1,-0.89 +[C:1]([O:2][C:3]([N:4][C:5]#[C:6][H:11])=[C:7]([H:12])[H:13])([H:8])([H:9])[H:10]>>[C:1]([O:2][C:3]1=[N:4][C:5]=[C:6]([H:11])[C:7]1([H:12])[H:13])([H:8])([H:9])[H:10],C1=CCCCC1,-0.3 +[C:1]([C@:2]1([H:9])/[C:3](=[N:5]/[H:11])[O:4]1)([H:6])([H:7])[H:8].[H:10][H:12]>>[C:1]([C:2]([C:3](=[O:4])[N:5]([H:11])[H:12])([H:9])[H:10])([H:6])([H:7])[H:8],C1=CCCCC1,-2.48 +[C:1](=[C:2]([c:3]1[c:4]([H:10])[c:5]([H:11])[c:6]([H:12])[n:7]1[H:13])[H:9])[H:8]>>[C-:1](=[C:2]([C:3]1=[C:4]([H:10])[C:5]([H:11])([H:12])[C+:6][N:7]1[H:13])[H:9])[H:8],C1=CCCCC1,-0.44 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([H:14])=[C:5]([H:15])[C@:6]1([O:7][H:17])[H:16])([H:8])([H:9])[H:10]>>[C:1](=[C:2](/[C:3](=[C:4]([C:5](=[C:6](/[O:7][H:17])[H:16])\[H:15])/[H:14])[H:12])[H:11])([H:8])[H:10].[H:9][H:13],c1ccncc1,-0.73 +[N:1]([C:2]1=[N:7][N+:6](=[N-:5])[C:4]([H:10])=[N:3]1)([H:8])[H:9]>>[N:1]([c:2]1[n:3][c:4]([H:10])[n:5][n:6][n:7]1)([H:8])[H:9],c1ccncc1,3.58 +[N:1]#[C:2][C@@:3]1([H:8])[C:4]([H:9])([H:10])[N:5]=[C:6]([H:11])[N:7]1[H:12]>>[N:1]#[C:2][C@@:3]1([H:8])[C:4]([H:9])=[N:5][C-:6]([H:11])[N+:7]1([H:10])[H:12],c1ccncc1,-3.2 +[C:1]([C@@:2]1([C:5](=[O:6])[N:7]([H:13])[H:14])[C:3]([H:11])([H:12])[O:4]1)([H:8])([H:9])[H:10]>>[C:1]([O:6]/[C:5](=[C:2]1\[C:3]([H:11])([H:12])[O:4]1)[N:7]([H:13])[H:14])([H:8])([H:9])[H:10],C1CCNCC1,-0.6 +[C:1]([C:2]([C:3]([C:4]([N+:6]#[C-:5])([H:14])[H:15])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2]([C:3]([C:4]([C:5]#[N:6])([H:14])[H:15])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9],C1CCNCC1,0.96 +[N:1]([c:2]1[n:3][o:4][c:5]([H:9])[c:6]1[H:10])([H:7])[H:8]>>[C:5](#[C:6][H:10])[H:9].[N:1]([C:2]#[N+:3][O-:4])([H:7])[H:8],C1CCNCC1,-1.74 +[C:1]([C:2]([C:3]([H:11])([H:12])[H:13])([C:4](=[O:5])[N:6]([H:14])[H:15])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[C@@:4]([N:6]([H:14])[H:15])([H:10])[O:5]1)([H:7])([H:8])[H:9],CCCCCCO,-5.32 +[C-:6]#[O+:7].[C:1]([C:2]([C:3]([O:4][H:14])=[C:5]([H:15])[H:16])([H:11])[H:12])([H:8])([H:9])[H:10].[H:13][H:17]>>[C:1]([C:2]([C@@:3]([O:4][H:14])([C:5]([C:6](=[O:7])[H:17])([H:15])[H:16])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10],CCCCCCO,-3.93 +[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[C@@:4]2([O:5][H:12])[C:6]([H:13])([H:14])[C@@:7]12[H:15])[H:8]>>[O:1]([C:6]([C@:4]1([O:5][H:12])[C:3]([H:10])([H:11])[C:2]([H:9])=[C:7]1[H:15])([H:13])[H:14])[H:8],CCCCCCO,-0.62 +[C:1]([C:2]([C:3]([H:12])([H:13])[H:14])([N:4]([C:5]([H:15])([H:16])[H:17])[C:6](=[O:7])[H:18])[H:11])([H:8])([H:9])[H:10]>>[C:1]([C:2]([C:3]([H:12])([H:13])[H:14])([H:11])[H:18])([H:8])([H:9])[H:10].[N:4]([C:5]([H:15])([H:16])[H:17])=[C:6]=[O:7],CCCOCCC,-0.66 +[C:1]([C@:5]1([H:10])[C-:4]=[N+:3]=[C:2][N:6]1[H:11])([H:7])([H:8])[H:9]>>[C:1]([c:2]1[n:3][c:4][c:5]([H:10])[n:6]1[H:11])([H:7])([H:8])[H:9],CCCOCCC,2.53 +[H:7][H:8].[O:1]=[C:2]([C:3](=[O:4])[H:9])[C:5]#[N:6]>>[O:1]([C@@:2]([C:3](=[O:4])[H:9])([C:5]#[N:6])[H:8])[H:7],CCCOCCC,-3.42 +[C:1]([C:2]([N:3]([C:4](=[N:5][H:14])[N+:7]#[C-:6])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]([N:3](/[C:4](=[N:5]/[H:14])[C:6]#[N:7])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10],OCCOCCO,0.84 +[O:1]=[c:2]1[n:3]([H:7])[c:4]([H:8])[c:5]([H:9])[o:6]1>>[O:1]=[C:2]1[N:3]([H:7])[C:4]([H:8])([H:9])[C-:5]=[O+:6]1,OCCOCCO,-4.7 +[C:6](=[N:7][H:13])([H:11])[H:12].[N:1]([C:2](=[O:3])[C:4](=[O:5])[H:10])([H:8])[H:9]>>[N:1]([C:2](=[O:3])[C:4]([O:5]/[C:6](=[N:7]/[H:13])[H:12])([H:10])[H:11])([H:8])[H:9],OCCOCCO,-2.41 +[O:1]=[C:2]1[C:3]([H:7])([H:8])[C:4]([H:9])=[C:5]([H:10])[C:6]1([H:11])[H:12]>>[O:1]([C:2]1=[C:6]([H:11])[C:5]([H:10])=[C:4]([H:9])[C:3]1([H:7])[H:8])[H:12],CCCCCCCC,0.15 +[O:1](/[C:2](=[C:3](/[C:4]#[N:5])[H:7])[H:6])[H:8]>>[O:1]=[C:2]([C:3]([C:4]#[N:5])([H:7])[H:8])[H:6],CCCCCCCC,0.39 +[C:1]([C:2]([C:3]([H:11])([H:12])[H:13])=[C:4]1[C:5]([H:14])([H:15])[C:6]([H:16])[C:7]1([H:17])[H:18])([H:8])([H:9])[H:10]>>[C:1](=[C:2]([C:3]([H:11])([H:12])[H:13])[C:4]1[C:5]([H:14])([H:15])[C:6]([H:10])([H:16])[C:7]1([H:17])[H:18])([H:8])[H:9],CCCCCCCC,-0.08 +[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C@:4]1([C:5]([O:6][H:16])([H:14])[H:15])[H:13])([H:7])([H:8])[H:9]>>[C:1]([C:2]([C:3]([C@@:4]1([H:13])[C:5]([H:14])([H:15])[O:6]1)([H:11])[H:12])([H:10])[H:16])([H:7])([H:8])[H:9],N#CCCCCC#N,-6.8 +[C:1]([c:2]1[c:3]([H:10])[n:4]([H:11])[c:5]([H:12])[n:6]1)([H:7])([H:8])[H:9]>>[C:1]([N+:6]#[C:2][C-:3]([N+:4](=[C-:5][H:12])[H:11])[H:10])([H:7])([H:8])[H:9],N#CCCCCC#N,-2.96 +[C:1]([c:2]1[c:3]([H:11])[n:4]([H:12])[c:5]([O:6][H:13])[n:7]1)([H:8])([H:9])[H:10]>>[C:1](=[C:2]1[C:3]([H:11])=[N:4][C:5]([O:6][H:13])=[N:7]1)([H:8])[H:9].[H:10][H:12],N#CCCCCC#N,0.39 +[C:1]1([H:6])([H:7])[O:2][C:3]([H:8])([H:9])[C:4]([H:10])=[C:5]1[H:11]>>[C:1]([C:5]#[C:4][H:10])([H:6])([H:7])[H:11].[O:2]=[C:3]([H:8])[H:9],CCCCCCCO,-5.43 +[C:1]([C:2](=[O:3])[C:4]1([H:10])[C:5]([H:11])([H:12])[C:6]1([H:13])[H:14])([H:7])([H:8])[H:9]>>[C:1]1([H:7])([H:9])[C@:2]([C:4]2([H:10])[C:5]([H:11])([H:12])[C:6]2([H:13])[H:14])([H:8])[O:3]1,CCCCCCCO,-1.89 +[C:2](=[C:3]([O:4][C:5]1([H:12])[C:6]([H:13])([H:14])[C:7]1([H:15])[H:16])[H:10])[H:9].[O:1]([H:8])[H:11]>>[O:1]([C:2]([C:3]([O:4][C:5]1([H:12])[C:6]([H:13])([H:14])[C:7]1([H:15])[H:16])([H:10])[H:11])[H:9])[H:8],CCCCCCCO,-0.4 +[C:1]([C@:2]12[C:3]([H:10])([H:11])[C@@:4]1([H:12])[C:5]([H:13])([H:14])[O:6]2)([H:7])([H:8])[H:9]>>[C:1]([C@:2]1([O:6][H:14])[C:3]([H:10])([H:11])[C@:4]1([C:5][H:13])[H:12])([H:7])([H:8])[H:9],OCCOCCCC,-1.22 +[N:1](=[C:2]=[C:3]1[C:4]([H:8])([H:9])[C:5]([H:10])([H:11])[N:6]1[H:12])[H:7]>>[N:1]#[C:2][C@@:3]1([H:7])[C:4]([H:8])([H:9])[C:5]([H:10])([H:11])[N:6]1[H:12],OCCOCCCC,-1.49 +[C:1]([N:2]([C:3](=[O:4])[C:5]([C:6]([H:13])([H:14])[H:15])([C:7]([H:16])([H:17])[H:18])[H:12])[H:11])([H:8])([H:9])[H:10]>>[C:1](=[N:2][H:11])([H:9])[H:10].[C:3](=[O:4])([C:5]([C:6]([H:13])([H:14])[H:15])([C:7]([H:16])([H:17])[H:18])[H:12])[H:8],OCCOCCCC,-0.72 +[N+:1](#[C-:2])[C@@:3]1([H:8])[O:4][C@@:5]2([H:9])[C:6]([H:10])([H:11])[C@@:7]12[H:12]>>[N:1]#[C:2][C@@:3]1([H:8])[O:4][C@@:5]2([H:9])[C:6]([H:10])([H:11])[C@@:7]12[H:12],CCCCCCCCC,0.25 +[C:1]([C:2]([C:3]([C:4]1=[N:6][O:5]1)([H:11])[H:12])([H:10])[H:13])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C:4](=[O:5])[N:6]1[H:13])([H:7])([H:8])[H:9],CCCCCCCCC,-0.13 +[C-:2]1=[O+:7][C:6]([H:15])[N:5]=[C:4]([H:14])[C:3]1([H:11])[H:12].[C:1]([H:8])([H:9])([H:10])[H:13]>>[C:1]([C:2]1[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[N:5]=[C:6]([H:15])[O:7]1)([H:8])([H:9])[H:10],CCCCCCCCC,-1.21 +[C:1]1([H:7])([H:8])[O:2][C@:3]([C:4]([H:11])([H:12])[H:13])([H:10])[C:5]1([H:14])[H:15].[O:6]([H:9])[H:16]>>[C:1]([O:2][C@@:3]([C:4]([H:11])([H:12])[H:13])([C:5]([O:6][H:16])([H:14])[H:15])[H:10])([H:7])([H:8])[H:9],CCCCNCCCC,-0.91 +[C:1]([C@@:2]([O:3][C:4](=[C:5]([H:13])[H:14])[H:11])([O:6][H:12])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C:2]1([H:10])[O:3][C:4]([H:11])([H:12])[C:5]([H:13])([H:14])[O:6]1)([H:7])([H:8])[H:9],CCCCNCCCC,2.11 +[C:1]([C@@:2]1([O:3][H:10])[C:4]([H:11])([H:12])[C@:5]1([O:6][H:14])[H:13])([H:7])([H:8])[H:9]>>[C:1]([C:2]([O:3][H:10])([C:4]([H:11])([H:12])[H:14])[C:5](=[O:6])[H:13])([H:7])([H:8])[H:9],CCCCNCCCC,5.49 +[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[C@:4]2([H:12])[C:5]([H:13])([H:14])[C@@:6]1([H:15])[O:7]2)[H:8]>>[O:1]=[C:2]([C@@:6]([C:5]([C:4](=[C:3]([H:10])[H:11])[H:12])([H:13])[H:14])([O:7][H:8])[H:15])[H:9],CCCCCCCCCCCC,-0.68 +[N:1](=[C:2]1/[C:3]([H:9])([H:10])[C:4]([H:11])([H:12])[C:5]([H:13])([H:14])[C:6]([H:15])([H:16])[O:7]1)\[H:8]>>[N:1](=[C:2](/[C:3]([C:4]([C:5]([C:6](=[O:7])[H:15])([H:13])[H:14])([H:11])[H:12])([H:9])[H:10])[H:16])\[H:8],CCCCCCCCCCCC,0.02 +[O:1]([C:2][C:3]([C:4]([C:5](=[O:6])[H:8])([H:11])[H:12])([H:9])[H:10])[H:7]>>[O:1]([C@@:2]1([H:8])[C:3]([H:9])([H:10])[C:4]([H:11])([H:12])[C:5]1=[O:6])[H:7],CCCCCCCCCCCC,-0.84 +[C:1]([C@@:2]1([H:11])[N:3]=[C:4]1[H:13])([H:8])([H:9])[H:10].[C:5]1([H:14])=[N:7][C:6]1([H:15])[H:16].[H:12][H:17]>>[C:1]([C@@:2]1([H:11])[N:3]([H:12])[C@:4]1([C@@:5]1([H:14])[C:6]([H:15])([H:16])[N:7]1[H:17])[H:13])([H:8])([H:9])[H:10],O=C1CCCC1,-1.7 +[N:1]#[C:2][C:3]1([H:8])[C:4]([H:9])([H:10])[C:5]([H:11])=[C:6]([H:12])[C:7]1([H:13])[H:14]>>[N:1](=[C:2]=[C:3]1[C:4]([H:9])([H:10])[C:5]([H:11])=[C:6]([H:12])[C:7]1([H:13])[H:14])[H:8],O=C1CCCC1,-0.14 +[C:1]1([H:6])([H:7])[C:2]([H:8])([H:9])[C:3]([H:10])([H:11])[O:4][C:5]1([H:12])[H:13]>>[C:1](=[C:5]([H:12])[H:13])([H:6])[H:7].[C:2]1([H:8])([H:9])[C:3]([H:10])([H:11])[O:4]1,O=C1CCCC1,-0.08 +[O:1]([C:2]([C@:3]1([H:11])[C:4]([H:12])=[C:5]([O:6][H:13])[C:7]1([H:14])[H:15])([H:9])[H:10])[H:8]>>[O:1]([C:2]([C:3]1([H:11])[C:4]([H:12])([H:13])[C:5](=[O:6])[C:7]1([H:14])[H:15])([H:9])[H:10])[H:8],CCN(CC)CC,2.16 +[C:1](/[C:2](=[C:4](/[C:5]([H:15])([H:16])[H:17])[H:14])[H:10])([H:7])([H:8])[H:9].[C:3]([O:6][H:18])([H:11])([H:12])[H:13]>>[C:1]([C:2]([C:3]([H:11])([H:12])[H:13])([C@@:4]([C:5]([H:15])([H:16])[H:17])([O:6][H:18])[H:14])[H:10])([H:7])([H:8])[H:9],CCN(CC)CC,1.48 +[C:1]([O:2][C:3]1=[C:7]=[C:6]([H:12])[N:5]([H:11])[N:4]1[H:13])([H:8])([H:9])[H:10]>>[C:1]([O:2][c:3]1[n:4][n:5]([H:11])[c:6]([H:12])[c:7]1[H:13])([H:8])([H:9])[H:10],CCN(CC)CC,1.22 +[N:1]1=[C:2]2[C@@:3]3([H:8])[C:4]([H:9])([H:10])[C@@:6]([H:12])([C@@:5]12[H:11])[C:7]3([H:13])[H:14]>>[N:1]#[C:2][C:3]1([H:8])[C:4]([H:9])([H:10])[C:5]([H:11])=[C:6]([H:12])[C:7]1([H:13])[H:14],CC(C)CCO,-16.73 +[N:1]([c:2]1[c:3]([H:9])[o:4][c:5]([H:10])[n:6]1)([H:7])[H:8]>>[N:1]([c:2]1[c:3]([H:9])[o+:4][c-:5][n:6]1[H:10])([H:7])[H:8],CC(C)CCO,-2.04 +[C:1]([C@:2]12[C:3]([H:10])([H:11])[C@@:4]1([H:12])[C:5]([H:13])([H:14])[C:6]2([H:15])[H:16])([H:7])([H:8])[H:9]>>[C:1]([C@:2]1([H:11])[C@:4]([C:3][H:10])([H:12])[C:5]([H:13])([H:14])[C:6]1([H:15])[H:16])([H:7])([H:8])[H:9],CC(C)CCO,-6.59 +[C:1]([H:7])([H:8])([H:9])[H:11].[C:2](=[O:3])=[C:4]([C:5]([O:6][H:14])([H:12])[H:13])[H:10]>>[C:1]([C:2](=[O:3])[C:4]([C:5]([O:6][H:14])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9],CC(CC(C)=O)=O,-1.89 +[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[C:4]([H:14])([H:15])[O:5]/[C:6]1=[N:7]\[H:16])([H:8])([H:9])[H:10]>>[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[C-:4]([H:15])[O+:5]=[C:6]1[N:7]([H:14])[H:16])([H:8])([H:9])[H:10],CC(CC(C)=O)=O,0.86 +[N+:1]([C:2]1=[C:6]=[C:5]([H:10])[O:4][C-:3]1[H:9])([H:7])([H:8])[H:11]>>[N:1]([c:2]1[c:3]([H:9])[o:4][c:5]([H:10])[c:6]1[H:11])([H:7])[H:8],CC(CC(C)=O)=O,10.97 +[O:1]=[C:2]1[N:3]([H:8])[C:4]([H:9])([H:10])[C@@:5]2([H:11])[O:6][C@@:7]12[H:12]>>[O:1]=[C:2]([N+:3](=[C:4]([H:9])[H:10])[H:8])[C:7](=[C:5]([O-:6])[H:11])[H:12],C1CCNC1,-2.2 +[C:1]([C:3]1([O:2][H:8])[C:4]([H:9])([H:10])[C:5]([H:11])([H:12])[C:6]1([H:13])[H:14])[H:7]>>[C:1]1([H:7])([H:8])[O:2][C:3]12[C:4]([H:9])([H:10])[C:5]([H:11])([H:12])[C:6]2([H:13])[H:14],C1CCNC1,8.8 +[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[N:4]([H:14])[C:5]([H:15])([H:16])[C:6]1=[O:7])([H:8])([H:9])[H:10]>>[C:1]([C:2]([C:3]([H:11])([H:12])[H:13])=[N:4][H:14])([H:8])([H:9])[H:10].[C:5](=[C:6]=[O:7])([H:15])[H:16],C1CCNC1,-1.63 +[C:5](#[N:6])[H:12].[O:1]([C@@:2]1([C:7]([H:9])([H:13])[H:14])[C:3]([H:10])([H:11])[O:4]1)[H:8]>>[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[O:4][C:5]([H:12])=[N:6][C:7]1([H:13])[H:14])[H:8],CCCCOC(C)=O,-2.68 +[C:1]([C:2]([C@@:3]([C:4]([H:13])([H:14])[H:15])([C:5]([C:6][O:7][H:19])([H:16])[H:17])[H:18])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]([C@@:3]1([C:4]([H:13])([H:14])[H:15])[C:5]([H:16])([H:17])[C@:6]1([O:7][H:19])[H:18])([H:11])[H:12])([H:8])([H:9])[H:10],CCCCOC(C)=O,1.68 +[N:1](=[C:2](/[O:3][C:4](=[C:5]=[N:6][H:10])[H:9])[H:8])\[H:7]>>[N:1](=[C:2](/[O:3][C:4]([C:5]#[N:6])([H:9])[H:10])[H:8])\[H:7],CCCCOC(C)=O,-0.22 +[N:1](=[C:2]1\[O:3][C:4]([H:9])([H:10])[C@@:5]2([H:11])[C:6]([H:12])([H:13])[C@@:7]12[H:14])\[H:8]>>[H:10][H:14].[N-:1]([C:2]1=[C:7]2[C@@:5]([H:11])([C:4]([H:9])=[O+:3]1)[C:6]2([H:12])[H:13])[H:8],C1COCCO1,-0.77 +[C:1]1([H:7])([H:8])[O:2][C:3]12[C:4]([H:9])([H:10])[O:5][C:6]2([H:11])[H:12]>>[C:1]1([H:7])([H:8])[O+:2]=[C-:3][C:4]1([H:9])[H:10].[O:5]=[C:6]([H:11])[H:12],C1COCCO1,-4.77 +[C:1]([N:2]1[C:3]([H:10])=[C+:4][N:5]([H:11])[N-:6]1)([H:7])([H:8])[H:9]>>[C:1]([n:2]1[c:3]([H:10])[c:4]([H:11])[n:5][n:6]1)([H:7])([H:8])[H:9],C1COCCO1,1.74 +[C:1]([C:2](=[C:3]([H:11])[H:12])[H:10])(/[C:7](=[N:6]\[C:5](=[O:4])[H:13])[H:14])([H:8])[H:9]>>[C:1]1([H:8])([H:9])[C@@:2]2([H:10])[C:3]([H:11])([H:12])[O:4][C:5]([H:13])=[N:6][C@@:7]12[H:14],CCCCCCCCCC,-0.16 +[O:1]([C@@:2]1([H:7])[C:3]([H:8])([H:9])[C@:4]1([O:5][H:11])[H:10])[H:6]>>[C:4]([O:5][H:11])[H:10].[O:1]([C:2](=[C:3]([H:8])[H:9])[H:7])[H:6],CCCCCCCCCC,0.29 +[N:1]([c:2]1[n:3][n:4]([H:10])[c:5]([O:6][H:11])[n:7]1)([H:8])[H:9]>>[N:1]([C@@:5]([N:4]([N-:3])[H:10])([O:6][H:11])[N+:7]#[C:2])([H:8])[H:9],CCCCCCCCCC,-0.73 +[C:1](/[C:2](=[N:3]\[H:11])[H:9])([H:6])([H:7])[H:8].[C:4]=[N:5][H:10]>>[C:1]([C@@:2]([N:3]([H:10])[H:11])([C:4]#[N:5])[H:9])([H:6])([H:7])[H:8],ClC(Cl)=C(Cl)Cl,-6.67 +[O:1]([C:2]([C@@:3]([C:4]#[C:5][H:12])([C:6]#[N:7])[H:11])([H:9])[H:10])[H:8]>>[O:1]([C:2]([C@@:3]([C:5](=[C:4])[H:12])([C:6]#[N:7])[H:11])([H:9])[H:10])[H:8],ClC(Cl)=C(Cl)Cl,-0.59 +[O:1]=[C:2]([C:3]#[N+:4][N-:5][H:7])[H:6]>>[O-:1][C:2](=[C:3]([N+:4]#[N:5])[H:7])[H:6],ClC(Cl)=C(Cl)Cl,1.58 +[C:1]([C:2]([C:3]([C:4]([C:5]#[C:6][H:16])([H:14])[H:15])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[C:3]([H:12])[H:13])[H:10])([H:7])([H:8])[H:9].[C:4](=[C:5]=[C:6]([H:11])[H:16])([H:14])[H:15],CC(=O)N(C)C,0 +[O:1]=[C:2]([C@@:3]1([H:8])[C:4]([H:9])([H:10])[C:5]([H:11])([H:12])[O:6]1)[H:7]>>[O:1]=[C:2]([C:4]1([H:9])[C:3]([H:8])([H:10])[O:6][C:5]1([H:11])[H:12])[H:7],CC(=O)N(C)C,-5.48 +[O+:1](=[C-:2][H:7])[C:3]([O:4][C:5](=[O:6])[H:10])([H:8])[H:9]>>[O:1]=[C:2]([C:3]([O:4][C:5](=[O:6])[H:10])([H:8])[H:9])[H:7],CC(=O)N(C)C,-5.11 +[N+:1]([C:2]1=[C:6]=[N:5][N:4]([H:9])[N-:3]1)([H:7])([H:8])[H:10]>>[N:1]([c:2]1[n:3][n:4]([H:9])[n:5][c:6]1[H:10])([H:7])[H:8],CCC(C)CO,17.95 +[C-:1]1([H:9])[C@@:2]2([H:10])[C@@:3]3([H:11])[C:4]([H:12])([H:13])[C@:5]([H:14])([C:6]3([H:15])[H:16])[N+:7]12[H:8]>>[C:1]1([H:8])([H:9])[C@@:2]2([H:10])[C@@:3]3([H:11])[C:4]([H:12])([H:13])[C@:5]([H:14])([C:6]3([H:15])[H:16])[N:7]12,CCC(C)CO,7.75 +[N:1]([C+:2]1[C@@:3]([O:6][H:12])([H:10])[N:4]([H:11])[C-:5]=[C:7]1[H:13])([H:8])[H:9]>>[N:1]([c:2]1[c:3]([H:10])[n:4]([H:11])[c:5]([O:6][H:12])[c:7]1[H:13])([H:8])[H:9],CCC(C)CO,0.53 +[C:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[C@:4]1([O:5][H:13])[H:12])([H:6])([H:7])[H:8]>>[C:1]([C:2]([C:3]([C:4][O:5][H:13])([H:10])[H:11])([H:9])[H:12])([H:6])([H:7])[H:8],NCCO,-2.95 +[O:1]([C-:2](/[C:3](=[N:4]/[H:9])[H:8])[N+:6]#[N:5])[H:7]>>[O:1]([c:2]1[c:3]([H:8])[n:4]([H:9])[n:5][n:6]1)[H:7],NCCO,-0.23 +[O:1]([C@@:2]1([H:9])[C@@:3]2([H:10])[C@:4]3([H:11])[C:5]([H:12])([H:13])[N:6]2[C@:7]13[H:14])[H:8]>>[H:8][H:9].[O:1]=[C:2]1[C@@:3]2([H:10])[C@:4]3([H:11])[C:5]([H:12])([H:13])[N:6]2[C@:7]13[H:14],NCCO,-4.06 +[C:1]1([H:7])=[C:6]([H:10])[C:5]([H:9])=[N+:4]2[C@:2]1([H:8])[N-:3]2>>[c:1]1([H:7])[c:2]([H:8])[n:3][n:4][c:5]([H:9])[c:6]1[H:10],CCOC(C)=O,0.22 +[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[C:4]([H:14])([H:15])[N:5]([H:16])[C:6]1=[O:7])([H:8])([H:9])[H:10]>>[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[C:6]([N:5]([C-:4]([H:14])[H:15])[H:16])=[O+:7]1)([H:8])([H:9])[H:10],CCOC(C)=O,0.18 +[C:1]1([H:8])([H:9])[C:2]([H:10])=[C:7]([H:13])[O:6][C@@:5]2([H:12])[O:3][C@@:4]12[H:11]>>[C:1]1([H:8])([H:9])[C@:2]2([H:10])[O:3][C@@:4]1([H:11])[C@:5]1([H:12])[O:6][C@:7]21[H:13],CCOC(C)=O,-3.06 +[C:1](#[C:2][C:3]([C:4]1([H:10])[C:5]([H:11])([H:12])[C:6]1([H:13])[H:14])([H:8])[H:9])[H:7]>>[C:1](=[C:2]=[C:3]([H:8])[H:9])([C:6]([C:4](=[C:5]([H:11])[H:12])[H:10])([H:13])[H:14])[H:7],CCCCCCC,0.17 +[O+:1]1=[C:2][N:7]([H:10])[C:5](=[O:6])[N:4]([H:9])[C-:3]1[H:8]>>[O:1]=[C:2]1[C:3]([H:8])[N:4]([H:9])[C:5](=[O:6])[N:7]1[H:10],CCCCCCC,0.99 +[C:1]([C:2]([O:3]/[C:4](=[N:5]/[H:13])[C:6]#[N:7])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]([O+:3]=[C-:4][N:5]([N+:7]#[C-:6])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10],CCCCCCC,-2.14 +[C:1]([C:2]([C:3]1([C:4]([H:12])([H:13])[H:14])[C:5]([H:15])([H:16])[C:6]1([H:17])[H:18])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1](=[C:2]([H:10])[H:11])([H:7])[H:8].[C:3]1([C:4]([H:12])([H:13])[H:14])=[C:6]([H:17])[C:5]1([H:15])[H:16].[H:9][H:18],CCCCOCCCC,-0.85 +[O:1]([C:2]([C+:3]([N:5]([C-:4]([H:10])[H:11])[H:12])[H:9])([H:7])[H:8])[H:6]>>[O:1]([C:2]([C@@:3]1([H:9])[C:4]([H:10])([H:11])[N:5]1[H:12])([H:7])[H:8])[H:6],CCCCOCCCC,0.91 +[C:1]1([H:8])([H:9])[C:2]([H:10])([H:16])[C:3]([H:11])([H:12])[C@:4]2([H:13])[C:5]([H:14])([H:15])[C:6][C@:7]12[H:17]>>[C:1]1([H:8])([H:9])[C@@:2]2([H:10])[C:3]([H:11])([H:12])[C@:4]3([H:13])[C:5]([H:14])([H:15])[C@@:6]2([H:16])[C@:7]13[H:17],CCCCOCCCC,0.56 +[C:1]([C@@:2]1([H:10])[C@:3]([C:4]([H:12])([H:13])[H:14])([H:11])[C@:5]1([O:6][H:16])[H:15])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]([C:3]([C:4]([H:12])([H:13])[H:14])([H:11])[H:15])([C:5][O:6][H:16])[H:10])([H:7])([H:8])[H:9],OCCCCCCCCC,-3.34 +[N:1](=[C:2]1[C+:3]([H:9])[N:4]([H:10])[N-:5][C:6]1([H:7])[H:11])[H:8]>>[N:1]([c:2]1[c:3]([H:9])[n:4]([H:10])[n:5][c:6]1[H:11])([H:7])[H:8],OCCCCCCCCC,-1.63 +[O:1]([C:2][C:3]([C:4]1([O:5][H:12])[C:6]([H:13])([H:14])[C:7]1([H:9])[H:15])([H:10])[H:11])[H:8]>>[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[C@@:4]2([O:5][H:12])[C:6]([H:13])([H:14])[C@@:7]12[H:15])[H:8],OCCCCCCCCC,3.18 +[O:1]([C@@:2]1([H:8])[C:3]([H:9])([H:10])[N:4]([H:11])[C:5]1=[O:6])[H:7]>>[O:1]([C@@:2]1([H:8])[C:5]([N:4]([C-:3]([H:9])[H:10])[H:11])=[O+:6]1)[H:7],ClC=CCl,0.39 +[O:1]([C@@:2]1([H:8])[C:3]([H:9])([H:10])[C:4]([H:11])([H:12])[C:5]([H:13])=[C:6]1[H:14])[H:7]>>[H:7][H:8].[O:1]=[C:2]1[C:3]([H:9])([H:10])[C:4]([H:11])([H:12])[C:5]([H:13])=[C:6]1[H:14],ClC=CCl,-2.09 +[C-:2]([O+:3]=[C:4]([C:5]([O:6][C:7]([H:17])([H:18])[H:19])([H:15])[H:16])[H:13])([H:11])[H:12].[C:1]([H:8])([H:9])([H:10])[H:14]>>[C:1]([C:2]([O:3][C:4]([C:5]([O:6][C:7]([H:17])([H:18])[H:19])([H:15])[H:16])([H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10],ClC=CCl,-0.53 +[C:1]([N+:2]([C:3](=[C:4]=[N:5][H:11])[H:10])=[N-:6])([H:7])([H:8])[H:9]>>[C:1]([n:2]1[c:3]([H:10])[c:4]([H:11])[n:5][n:6]1)([H:7])([H:8])[H:9],C1CCCC1,-0.76 +[C:1]([N:2]([C:3](=[O:4])[C:5]([H:10])([H:11])[H:12])[H:9])([H:6])([H:7])[H:8]>>[C:1]([N:2]=[C:3]=[O:4])([H:6])([H:7])[H:8].[C:5]([H:9])([H:10])([H:11])[H:12],C1CCCC1,0.85 +[O:1]=[C:2]([C:3]([H:7])([H:8])[H:9])[O:6][C:5](=[C:4]([H:10])[H:11])[H:12]>>[O:1]=[C:2]([C:3]([C:4]([C:5](=[O:6])[H:12])([H:10])[H:11])([H:8])[H:9])[H:7],C1CCCC1,-0.95 +[C:1]([C@:2]1([H:14])[N:3]([H:11])[C@:4]1([C:5](=[C:6]=[O:7])[H:13])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C@:2]12[N:3]([H:11])[C@@:4]1([H:12])[C:5]([H:13])([H:14])[C:6]2=[O:7])([H:8])([H:9])[H:10],Fc1c(F)c(F)c(F)c(F)c1F,-2.49 +[C:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[C-:4]=[C:5]([H:13])[C+:6]1[H:14])([H:7])([H:8])[H:12]>>[C:1]1([H:7])([H:8])[C@@:2]2([H:9])[C:3]([H:10])([H:11])[C:4]([H:12])=[C:5]([H:13])[C@@:6]12[H:14],Fc1c(F)c(F)c(F)c(F)c1F,1.84 +[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C:4](=[O:5])[N:6]1[H:13])([H:7])([H:8])[H:9]>>[C:1]([C-:2]([N:6]([C:4]1=[O+:5][C:3]1([H:11])[H:12])[H:13])[H:10])([H:7])([H:8])[H:9],Fc1c(F)c(F)c(F)c(F)c1F,1.63 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C@:4]2([C:5]([H:14])([H:15])[O:6]2)[C:7]1([H:16])[H:17])([H:8])([H:9])[H:10]>>[C:1]([C:2](=[C:3]([H:12])[H:13])[H:11])([H:8])([H:9])[H:10].[C:4]1(=[C:7]([H:16])[H:17])[C:5]([H:14])([H:15])[O:6]1,Fc1ccccc1,0.11 +[C:1]1([H:7])([H:8])[N:2]([H:9])[C:3]12[C:4]([H:10])([H:11])[O:5][C:6]2([H:12])[H:13]>>[C:1](=[N:2][H:9])([H:7])[H:8].[C:3]1([H:11])=[C:4]([H:10])[O:5][C:6]1([H:12])[H:13],Fc1ccccc1,-2.95 +[C:1]([C@@:2]([O:3][H:11])([C:4]([C:5](=[O:6])[H:14])([H:12])[H:13])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[O:3])[H:10])([H:7])([H:8])[H:9].[C:4](=[C:5]([O:6][H:11])[H:14])([H:12])[H:13],Fc1ccccc1,0.71 +[N:1]([C:2](=[O:3])[C:4]([N:5]1[C:6]([H:12])([H:13])[C:7]1([H:14])[H:15])([H:10])[H:11])([H:8])[H:9]>>[N:1](=[C:2](/[O:3][H:9])[C:4]([N:5]1[C:6]([H:12])([H:13])[C:7]1([H:14])[H:15])([H:10])[H:11])\[H:8],CCCCl,1.31 +[C:1]([C@:2]1([N:3]([H:11])[H:12])[C:4]([H:13])([H:14])[C:5]([H:8])([H:15])[C:6]([H:16])([H:17])[O:7]1)([H:9])[H:10]>>[C:1]([C@:2]1([N:3]([H:11])[H:12])[C:4]([H:13])([H:14])[C:5]([H:15])[C:6]([H:16])([H:17])[O:7]1)([H:8])([H:9])[H:10],CCCCl,0.41 +[C:1]([O:2][C@@:3]([C:4]([H:11])([H:12])[H:13])([C:5]([O:6][H:16])([H:14])[H:15])[H:10])([H:7])([H:8])[H:9]>>[C:1]([O:6][C:5]([C@@:3]([O:2][H:16])([C:4]([H:11])([H:12])[H:13])[H:10])([H:14])[H:15])([H:7])([H:8])[H:9],CCCCl,-0.54 +[C:1](=[C:2]([C@@:3]1([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])=[C:6]=[C:7]1[H:17])[H:11])([H:8])[H:10].[H:9][H:16]>>[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])=[C:6]([H:16])[C@@:7]12[H:17])([H:8])([H:9])[H:10],CCCCCCCl,-1.87 +[C:1]([C@@:2]([O+:3]=[C-:4][H:12])([C:5]([C:6]([H:14])([H:15])[H:16])([C:7]([H:17])([H:18])[H:19])[H:13])[H:11])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[O:3][C:4]([H:12])([H:13])[C:5]1([C:6]([H:14])([H:15])[H:16])[C:7]([H:17])([H:18])[H:19])([H:8])([H:9])[H:10],CCCCCCCl,0.16 +[O:1]=[c:2]1[c:3]([H:8])[c:4]([H:9])[n:5]([H:10])[c:6]([H:11])[n:7]1>>[O+:1]1=[C:2]([H:8])[N:7]=[C:6]([H:11])[N:5]([H:10])[C:4]([H:9])=[C-:3]1,CCCCCCCl,1.66 +[C:1]([H:7])([H:8])([H:9])[H:12].[C:2]1([H:10])=[C:3]([H:11])[C:4]([H:13])([H:14])[C@:5]1([O:6][H:16])[H:15]>>[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[C@:5]1([O:6][H:16])[H:15])([H:7])([H:8])[H:9],CCCCCCCCCCCCCCCC,-1.28 +[C:1]([C:2]([C@:3]([N:4][H:14])([C:5](=[O:6])[O:7][H:15])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2](/[N+:4](=[C:3](\[C:5]([O-:6])[O:7][H:15])[H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10],CCCCCCCCCCCCCCCC,0.54 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])[C:4]([H:14])=[C:5]([H:15])[C:6]([H:13])([H:16])[O:7]1)([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([H:14])=[C:5]([H:15])[C:6]([H:16])[O:7]1)([H:8])([H:9])[H:10],CCCCCCCCCCCCCCCC,0.28 +[C:1](/[C:2](=[C:6](/[C:5]1([H:15])[C:3]([H:11])([H:12])[C:4]1([H:13])[H:14])[H:16])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])=[C:6]1[H:16])([H:7])([H:8])[H:9],ClC(Cl)(Cl)Cl,0.11 +[O:1]([C@@:2]1([H:9])[C:3]([H:10])[N:4]([H:11])[C:5]([H:12])([H:13])[C@@:6]1([O:7][H:15])[H:14])[H:8]>>[O:1]([C@@:2](/[C:3](=[N+:4](\[C-:5][H:13])[H:11])[H:10])([C:6]([O:7][H:15])([H:12])[H:14])[H:9])[H:8],ClC(Cl)(Cl)Cl,-0.84 +[C:1]([N:2]([C:3](=[O:4])[C:5]([H:10])([H:11])[H:12])[H:9])([H:6])([H:7])[H:8]>>[C:1]([O:4]/[C:3](=[N:2]/[H:9])[C:5]([H:10])([H:11])[H:12])([H:6])([H:7])[H:8],ClC(Cl)(Cl)Cl,1.28 +[N:1]1([H:7])[C@@:2]2([H:8])[C@:3]1([H:9])[C@@:4]1([H:10])[N:5]([H:11])[C@@:6]21[H:12]>>[N:1]1([H:7])[C@@:2]([C@@:6]2([H:12])[C:4][N:5]2[H:11])([H:8])[C:3]1([H:9])[H:10],OCC(O)CO,0.42 +[C:2]1([H:9])([H:10])[C:3]([H:11])([H:12])[C@@:4]2([H:13])[C@:5]1([H:15])[O:6][C:7]2([H:16])[H:17].[O:1]([H:8])[H:14]>>[O:1]([C:2]([C:3]([C:4]1([H:13])[C:5]([H:14])([H:15])[O:6][C:7]1([H:16])[H:17])([H:11])[H:12])([H:9])[H:10])[H:8],OCC(O)CO,2.25 +[O:1]([C:2]([C:3]1([H:10])[C:4]([H:11])([H:12])[C:5]([H:13])([H:14])[C:6]1([H:15])[H:16])([H:8])[H:9])[H:7]>>[O:1]([C:2]([C@@:3]([C:4]([H:11])([H:12])[H:13])([C:6]([C:5][H:14])([H:15])[H:16])[H:10])([H:8])[H:9])[H:7],OCC(O)CO,-5.82 +[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[O:4][C:5]([H:12])=[N:6][C:7]1([H:13])[H:14])[H:8]>>[O:1]([C:2](=[C:3]([H:10])[H:11])[H:9])[H:8].[O:4]=[C:5]([N:6]=[C:7]([H:13])[H:14])[H:12],Cc1cccnc1C,2.19 +[C:1]([C:2]([C:3]1=[C:5]([C:6]([H:16])([H:17])[H:18])[C:4]1([H:14])[H:15])([H:11])[H:12])([H:8])([H:9])[H:10].[O:7]([H:13])[H:19]>>[C:1]([C:2]([C@@:3]1([H:13])[C:4]([H:14])([H:15])[C@:5]1([C:6]([H:16])([H:17])[H:18])[O:7][H:19])([H:11])[H:12])([H:8])([H:9])[H:10],Cc1cccnc1C,0.21 +[C:1]([O:2][C@@:3]1([H:11])[C:4]([H:12])([H:13])[C@:5]1([C:6]([O:7][H:17])([H:15])[H:16])[H:14])([H:8])([H:9])[H:10]>>[C:1]([O:2][C@@:3]1([H:11])[C:4]([H:12])([H:13])[C:5]1=[C:6]([H:15])[H:16])([H:8])([H:9])[H:10].[O:7]([H:14])[H:17],Cc1cccnc1C,-3.52 +[C:1](=[C:2]([H:11])[H:12])([H:9])[H:10].[C:3](=[C:4]([O:5][H:8])[C:6](=[O:7])[H:15])([H:13])[H:14]>>[C:1]([C:2]([C:3]([C:4](=[O:5])[C:6](=[O:7])[H:15])([H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10],CCC(O)CC,3.87 +[C:1]([C:2]([N:3][N:4]([C:5]([H:13])([H:14])[H:15])[C:6]([C:7]([H:18])([H:19])[H:20])([H:16])[H:17])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]([N:3]([N:4]([C:5]([H:13])([H:14])[H:15])[C:6]([C:7]([H:18])([H:19])[H:20])([H:16])[H:17])[H:12])[H:11])([H:8])([H:9])[H:10],CCC(O)CC,0.24 +[C:1]([C@@:2]1([O:3][H:11])[C:4]([H:12])([H:13])[N:5]2[C:6]([H:14])([H:15])[C@@:7]12[H:16])([H:8])([H:9])[H:10]>>[C:1]([C:2]1=[C:7]2[N:5]([C:4]1([H:12])[H:13])[C:6]2([H:14])[H:15])([H:8])([H:9])[H:10].[O:3]([H:11])[H:16],CCC(O)CC,-8.86 +[O:1]([C:3](=[C:2]=[N:6][C:5](=[N:4][H:9])[H:10])[H:8])[H:7]>>[O:1]([c:2]1[c:3]([H:8])[n:4]([H:9])[c:5]([H:10])[n:6]1)[H:7],Ic1ccccc1,-0.1 +[N:1]#[C:2][C:3]([C@@:4]1([H:9])[C:5]([H:10])([H:11])[O:6]1)([H:7])[H:8]>>[N:1]#[C:2][C:3]([C-:4]=[O+:6][C:5]([H:9])([H:10])[H:11])([H:7])[H:8],Ic1ccccc1,-0.96 +[C:1]([C@@:2]1([H:11])[N:3]([H:12])[C@:4]1([C@@:5]1([H:14])[C:6]([H:15])([H:16])[O:7]1)[H:13])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[N:3]([H:12])[O:7][C:6]([H:15])([H:16])[C:5]([H:14])=[C:4]1[H:13])([H:8])([H:9])[H:10],Ic1ccccc1,-1.04 +[C:1]1([H:7])([H:8])[O:2][C@@:3]2([H:9])[C:4]([H:10])([H:11])[O:5][C@@:6]12[H:12]>>[C:1]1([H:7])([H:8])[O:2][C:3]([H:9])=[C:6]([H:12])[O:5][C:4]1([H:10])[H:11],CCCCC(C)=O,1.4 +[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[C@:4]2([H:12])[C:5]([H:13])([H:14])[C@@:6]1([H:15])[O:7]2)[H:8]>>[C:3]1([H:10])([H:11])[C@@:4]2([H:12])[C:5]([H:13])([H:14])[C@:6]1([H:15])[O:7]2.[O:1]([C:2][H:9])[H:8],CCCCC(C)=O,-2.14 +[C:1]([C:2]([N:3]([H:11])[H:13])=[O:4])([H:8])([H:9])[H:10].[C:5]1([H:12])=[C:6]([H:14])[C:7]1([H:15])[H:16]>>[C:1](/[C:2](=[N:3]/[H:11])[O:4][C:5]1([H:12])[C:6]([H:13])([H:14])[C:7]1([H:15])[H:16])([H:8])([H:9])[H:10],CCCCC(C)=O,2.67 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4][C:6]([H:15])([H:16])[C@@:7]1([O:5][H:14])[H:17])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C@@:4]2([O:5][H:14])[C:6]([H:15])([H:16])[C@@:7]12[H:17])([H:8])([H:9])[H:10],C=CCCCC,1.07 +[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])=[C:6]([H:16])[C@@:7]12[H:17])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C@:3]2([H:12])[C:4]([H:13])([H:14])[C@@:5]1([H:15])[C:6]([H:16])=[C:7]2[H:17])([H:8])([H:9])[H:10],C=CCCCC,-0.08 +[C:1]([C@@:2]1([H:11])[C:3](=[C:4]([H:12])[H:13])[N:5]([H:14])[C:6]1=[O:7])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C:3]([H:12])=[C:4]([H:13])[N:5]([H:14])[C:6]1=[O:7])([H:8])([H:9])[H:10],C=CCCCC,-4.27 +[O:1]([C:2][C:3]([O:4]/[C:5](=[N:6]\[C:7]([H:9])([H:13])[H:14])[H:12])([H:10])[H:11])[H:8]>>[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[O:4][C:5]([H:12])=[N:6][C:7]1([H:13])[H:14])[H:8],CCOCC,0.31 +[N+:1](#[C-:2])[C@:3]1([H:6])[C:4]([H:7])[C:5]1([H:8])[H:9]>>[N:1]#[C:2][C@:3]1([H:6])[C:4]([H:7])[C:5]1([H:8])[H:9],CCOCC,0.73 +[C:1](=[C:2]([H:11])[H:12])([H:8])[H:9].[C:3]1(=[C:6]=[C:7]([H:10])[H:15])[C:4]([H:13])([H:14])[O:5]1>>[C:1]([C:2]([C@@:3]1([C:6]#[C:7][H:15])[C:4]([H:13])([H:14])[O:5]1)([H:11])[H:12])([H:8])([H:9])[H:10],CCOCC,-0.55 +[O:1]([c:2]1[n:3][n:4]([H:8])[c:5]([H:9])[c:6]1[H:10])[H:7]>>[O:1]([C:2]1=[C:6]([H:10])[C@@:5]2([H:9])[N:3]1[N:4]2[H:8])[H:7],Nc1ccccc1,2.87 +[N:1]1([H:8])[C-:2]=[O+:7][N:6]=[C:5]([H:11])[C:4]([H:10])=[C:3]1[H:9]>>[N:1](=[c:2]1/[c:3]([H:9])[c:4]([H:10])[c:5]([H:11])[n:6][o:7]1)\[H:8],Nc1ccccc1,2.84 +[O:1]=[C:2]([c:3]1[n:4][c:5]([H:9])[c:6]([H:10])[o:7]1)[H:8]>>[O:1]=[C:2]([C:3]([N:4]=[C:5]=[C:6]([H:9])[H:10])=[O:7])[H:8],Nc1ccccc1,-1.75 +[C:1]([C@@:2]1([H:10])[O:3][C@:4]1([C:5]([O:6][H:14])([H:12])[H:13])[H:11])([H:7])([H:8])[H:9]>>[C:1]([H:7])([H:8])([H:9])[H:12].[C@@:2]12([H:10])[O:3][C@:4]1([H:11])[C@:5]2([O:6][H:14])[H:13],CCCC(=O)OC,-2.26 +[O:1]=[C:2]([C@@:3]1([H:9])[C@@:4]2([H:10])[C:5]([H:11])([H:12])[C:6]([H:13])([H:14])[N:7]12)[H:8]>>[O:1]=[C:2](/[C:3](=[N:7]/[C:4]1([H:10])[C:5]([H:11])([H:12])[C:6]1([H:13])[H:14])[H:9])[H:8],CCCC(=O)OC,-1.47 +[N:1]([c:2]1[n:3][n:4]([H:10])[c:5]([O:6][H:11])[n:7]1)([H:8])[H:9]>>[N:1]([c:2]1[n:3]([H:10])[n:4][c:5]([O:6][H:11])[n:7]1)([H:8])[H:9],CCCC(=O)OC,0.67 +[C:1]([C@@:2]1([H:11])[N:3]([H:12])[C:4]([O:5][C:7]([H:14])([H:15])[H:16])=[C:6]1[H:13])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[N:3]([H:12])[C:4](=[O:5])[C@:6]1([C:7]([H:14])([H:15])[H:16])[H:13])([H:8])([H:9])[H:10],CCCCCOC(C)=O,-0.52 +[C:1]([C@@:2]1([H:10])[O:3][C:4]1=[C:5]([H:12])[H:13])([H:7])([H:8])[H:9].[O:6]([H:11])[H:14]>>[C:1]([C@@:2]1([H:10])[O:3][C@:4]1([C:5]([O:6][H:14])([H:12])[H:13])[H:11])([H:7])([H:8])[H:9],CCCCCOC(C)=O,-1 +[C:1]([O:2][H:12])([H:8])([H:9])[H:10].[C:3](=[O:4])=[C:5]([C:6]([O:7][H:15])([H:13])[H:14])[H:11]>>[C:1]([O:2][C:3](=[O:4])[C:5]([C:6]([O:7][H:15])([H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10],CCCCCOC(C)=O,2.3 +[O:1]([C:2]1([H:9])[C:3]([H:10])([H:11])[N:4]([C:6](=[O:7])[H:14])[C:5]1([H:12])[H:13])[H:8]>>[C:2]1([H:9])=[C:3]([H:10])[N:4]([C:6](=[O:7])[H:14])[C:5]1([H:12])[H:13].[O:1]([H:8])[H:11],CCCCCC#N,0.16 +[C:1]1([H:4])([H:5])[C:2]([H:6])([H:7])[O:3]1>>[C:1]1([H:4])([H:5])[C-:2]=[O+:3]1.[H:6][H:7],CCCCCC#N,-0.34 +[C:1]([C@@:2]1([H:11])[N:3]([H:12])[N:6]([O:7][H:15])[C:5]1=[C:4]([H:13])[H:14])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[N:3]([H:12])[C:4]([H:13])([H:14])/[C:5]1=[N:6]\[O:7][H:15])([H:8])([H:9])[H:10],CCCCCC#N,-1.61 +[N:1](=[C:2]1/[C:3]([H:8])=[C:4]([C:5]([H:9])([H:10])[H:11])[O:6]1)\[H:7]>>[N:1](=[C:2]1/[C:3]([H:8])=[C:4]([H:9])[C:5]([H:10])([H:11])[O:6]1)\[H:7],CCO,-13.72 +[O:1]=[C:2]([C@:6]1([H:14])[C:5]([H:12])([H:13])[C@:4]2([H:11])[C:3]([H:8])([H:10])[N:7]21)[H:9]>>[O:1]([C@@:2]1([H:9])[C@@:3]2([H:10])[C@@:4]3([H:11])[C:5]([H:12])([H:13])[C@:6]1([H:14])[N:7]23)[H:8],CCO,-1.76 +[O:1]([C:2]([c:3]1[n:4][o:5][c:6]([H:11])[c:7]1[H:12])([H:9])[H:10])[H:8]>>[O:1]([C:2]([C:3]1=[C:7]=[C:6]([H:11])[O:5][N:4]1[H:12])([H:9])[H:10])[H:8],CCO,-6.14 +[C:1](/[C:4]([C:3]([C:2]([H:9])[H:10])([H:11])[H:12])=[N:5]\[H:13])([H:6])([H:7])[H:8]>>[C:1]([C:2]([C:3]([C:4]=[N:5][H:13])([H:11])[H:12])([H:9])[H:10])([H:6])([H:7])[H:8],CC(=O)O,1.77 +[N:1]([C:2]1=[N:7][N:6]2[C@@:3]1([H:10])[N:4]=[C:5]2[H:11])([H:8])[H:9]>>[N:1]([c:2]1[c:3]([H:10])[n:4][c:5]([H:11])[n:6][n:7]1)([H:8])[H:9],CC(=O)O,-0.89 +[c:1]1([H:7])[c:2]([H:8])[n:3][n:4][c:5]([H:9])[c:6]1[H:10]>>[C:1]1([H:7])=[C:6]=[C:5]([H:9])[N-:4][N:3]([H:10])[C+:2]1[H:8],CC(=O)O,4.45 +[C:1]([C@:2]1([H:11])[C@:5]([O+:4]=[C-:3][H:12])([H:13])[C@:6]1([O:7][H:15])[H:14])([H:8])([H:9])[H:10]>>[C:1]([C@:2]12[C:3]([H:11])([H:12])[O:4][C@@:5]1([H:13])[C@:6]2([O:7][H:15])[H:14])([H:8])([H:9])[H:10],CO,2.54 +[C:1]([C@@:2]([O:3][H:12])([C@@:4]([C:5]([H:14])([H:15])[H:16])([C:6]#[N:7])[H:13])[H:11])([H:8])([H:9])[H:10]>>[C:1](/[C:2]([O:3][H:12])=[C:4](/[C:5]([H:14])([H:15])[H:16])[C:6]#[N:7])([H:8])([H:9])[H:10].[H:11][H:13],CO,-4.7 +[C:1]([C@@:2]([C@@:3]([C:4]([H:12])([H:13])[H:14])([O:6][H:16])[H:11])([C:5][H:15])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[C@:3]([C:4]([H:12])([H:13])[H:14])([H:11])[C@:5]1([O:6][H:16])[H:15])([H:7])([H:8])[H:9],CO,2.07 +[C:1]([C@@:2]12[C:3]([H:11])([H:12])[C@:4]1([O:5][H:13])[C:6]([H:14])([H:15])[O:7]2)([H:8])([H:9])[H:10]>>[C:1]([C:2]1=[O+:7][C-:6]([H:15])[C@:4]1([C:3]([H:11])([H:12])[H:14])[O:5][H:13])([H:8])([H:9])[H:10],CC(C)O,1.08 +[C:1]([C:2]1=[C:3]([H:11])[C:4]([H:12])([H:13])[C@@:5]2([H:14])[O:6][C@@:7]12[H:15])([H:8])([H:9])[H:10]>>[C:1]([C:2]1=[C:7]2[C@@:5]([H:14])([C:4]([H:12])([H:13])[C:3]1([H:11])[H:15])[O:6]2)([H:8])([H:9])[H:10],CC(C)O,0.33 +[O:1]([C:4](=[C:3]=[C:2]([H:8])[H:9])[C:5]([O:6][H:12])([H:10])[H:11])[H:7]>>[O:1]([C:2]([C:3]#[C:4][C:5]([O:6][H:12])([H:10])[H:11])([H:8])[H:9])[H:7],CC(C)O,-9.44 +[C:1]([N:2]([C:3](=[O:4])[N:5]([C-:6]=[N+:7]([H:13])[H:14])[H:12])[H:11])([H:8])([H:9])[H:10]>>[C:1]([N:2]([C:3](=[O:4])[N:5](/[C:6](=[N:7]/[H:14])[H:13])[H:12])[H:11])([H:8])([H:9])[H:10],CC(C)=O,3.89 +[O:1]([C:2]1([C:5](=[O:6])[H:12])[C:3]([H:8])([H:9])[C:4]1([H:10])[H:11])[H:7]>>[O:1]([C:2]([C:5]1([H:12])[C:3]([H:8])([H:9])[C:4]1([H:10])[H:11])=[O:6])[H:7],CC(C)=O,-5.58 +[N:1](=[C:2]=[C:3]([C@@:4]1([H:9])[C:5]([H:10])([H:11])[O:6]1)[H:8])[H:7]>>[N:1]#[C:2][C:3]([C@@:4]1([H:9])[C:5]([H:10])([H:11])[O:6]1)([H:7])[H:8],CC(C)=O,-2.45 +[C:1]([C:2]([C:4](=[O:5])[C:6](=[O:7])[H:15])([H:11])[H:13])([H:8])([H:9])[H:10].[O:3]([H:12])[H:14]>>[C:1]([C@@:2]([O:3][H:12])([C@@:4]([O:5][H:14])([C:6](=[O:7])[H:15])[H:13])[H:11])([H:8])([H:9])[H:10],ClC(Cl)Cl,-5.36 +[C:1]([C:2]([N:3]1[C:4]([H:11])([H:12])[C:5]1([H:13])[H:14])([H:9])[H:10])([H:6])([H:7])[H:8]>>[C:1]([C-:2]([N+:3]1=[C:5]([H:14])[C:4]1([H:11])[H:12])[H:10])([H:6])([H:7])[H:8].[H:9][H:13],ClC(Cl)Cl,-0.44 +[C:1]([C@@:2]12[C:3]([H:11])([H:12])[C@:4]1([O:5][H:13])[C:6]([H:14])([H:15])[O:7]2)([H:8])([H:9])[H:10]>>[C:1]([C:2]1=[C:4]([O:5][H:13])[C:6]([H:14])([H:15])[O:7][C:3]1([H:11])[H:12])([H:8])([H:9])[H:10],ClC(Cl)Cl,-4.34 +[O:1]=[C:2]([C:3]#[C:4][C:5](=[O:6])[H:8])[H:7]>>[C-:3]#[C:4][C+:5]([O:6][H:7])[H:8].[O+:1]#[C-:2],CS(C)=O,0.36 +[C:1](=[C:2](/[C:3](=[N:4]\[C:5]([N:6]([H:12])[H:13])=[O:7])[H:11])[H:9])([H:8])[H:10]>>[C:1]([c:2]1[c:3]([H:11])[n:4][c:5]([N:6]([H:12])[H:13])[o:7]1)([H:8])([H:9])[H:10],CS(C)=O,-1.45 +[O:1]([N:2]=[C:3]=[C:4]([C:5]([C:6]([O:7][H:14])([H:9])[H:13])([H:11])[H:12])[H:10])[H:8]>>[O:1](/[N:2]=[C:3]1/[C:4]([H:9])([H:10])[C:5]([H:11])([H:12])[C@:6]1([O:7][H:14])[H:13])[H:8],CS(C)=O,1.13 +[C:1]([C@@:2]1([C:5](=[O:6])[H:13])[C:3]([H:10])([H:11])[N:4]1[H:12])([H:7])([H:8])[H:9]>>[C:1]([O:6]/[C:5](=[C:2]1/[C:3]([H:10])([H:11])[N:4]1[H:12])[H:13])([H:7])([H:8])[H:9],CN(C)C=O,-0.6 +[C:1]([C:2](=[O:3])[C:4]([C:5]([N:7]([H:18])[H:19])[H:14])([H:12])[H:13])([H:8])([H:9])[H:10].[C:6]([H:11])([H:15])([H:16])[H:17]>>[C:1]([C:2]([O:3][H:11])[C:4]([C@:5]([C:6]([H:15])([H:16])[H:17])([N:7]([H:18])[H:19])[H:14])([H:12])[H:13])([H:8])([H:9])[H:10],CN(C)C=O,-2.55 +[N-:1]=[C:2]=[C+:3][C:4]([C:5]([C:6]([H:7])([H:12])[H:13])([H:10])[H:11])([H:8])[H:9]>>[N:1]#[C:2][C:3]1([H:7])[C:4]([H:8])([H:9])[C:5]([H:10])([H:11])[C:6]1([H:12])[H:13],CN(C)C=O,-1.05 +[C:1]([C:2][C:3]([H:10])([H:11])[H:12])([H:7])([H:8])[H:9].[O:4]([C:5]([O:6][H:16])([H:14])[H:15])[H:13]>>[C:1]([C:2]([C:3]([H:10])([H:11])[H:12])([O:4][H:13])[C:5]([O:6][H:16])([H:14])[H:15])([H:7])([H:8])[H:9],CCCO,4.57 +[C:1]([C:2]1[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[N:5]=[C:6]([H:15])[O:7]1)([H:8])([H:9])[H:10]>>[C:1]([C:2]([C:3]([C:4]([H:13])[H:14])([H:11])[H:12])=[O:7])([H:8])([H:9])[H:10].[N:5]#[C:6][H:15],CCCO,-0.02 +[C:1]([N:2]1[C:3]([H:10])([H:11])[C:4]1([C:5]([H:12])([H:13])[H:14])[C:6]([H:15])([H:16])[H:17])([H:7])([H:8])[H:9]>>[C:1]([N+:2]1([H:10])[C-:3]([H:11])[C:4]1([C:5]([H:12])([H:13])[H:14])[C:6]([H:15])([H:16])[H:17])([H:7])([H:8])[H:9],CCCO,-3.9 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C@@:4]2([O:5][H:14])[C:6]([H:15])([H:16])[C@@:7]12[H:17])([H:8])([H:9])[H:10]>>[C:1]1([H:8])([H:9])[C@@:2]2([H:11])[C:3]([H:12])([H:13])[C@@:4]([O:5][H:14])([C:6]([H:10])([H:15])[H:16])[C@@:7]12[H:17],CCCCO,-0.3 +[C:1]1([H:7])=[C:2]=[C:3]([H:8])[O:4][C@@:5]1([C:6]([H:10])[H:11])[H:9]>>[C:1](=[C:2]=[C:3]([O:4][C:5](=[C:6]([H:10])[H:11])[H:9])[H:8])[H:7],CCCCO,5.96 +[C:1]([C@@:2]1([H:8])[C:3]([H:9])([H:10])[O:4]1)([H:5])([H:6])[H:7]>>[C:1](=[C:2]([H:6])[H:8])([H:5])[H:7].[C:3](=[O:4])([H:9])[H:10],CCCCO,-9.37 +[C:1]([C:2]([C:3]#[C:4][C:5]([O:6][H:14])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[C:3]=[C:4]([C:5]([O:6][H:14])([H:12])[H:13])[H:10])[H:11])([H:7])([H:8])[H:9],CCCCCO,-4.31 +[C:1]([C:2][C:3]([C:4]([C@@:5]1([H:15])[C:6]([H:16])([H:17])[O:7]1)([H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C@:2]12[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[C@@:5]1([H:15])[C:6]([H:16])([H:17])[O:7]2)([H:8])([H:9])[H:10],CCCCCO,1.86 +[C:1]([C:2]([C:3]([H:11])([H:12])[H:13])([C:4](=[O:5])[N:6]([H:14])[H:15])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[C@@:4]([N:6]([H:14])[H:15])([H:10])[O:5]1)([H:7])([H:8])[H:9],CCCCCO,-5.39 +[C:1]([C:2]([C@@:3]([O:4][H:14])([C@@:5]([C:6]([H:16])([H:17])[H:18])([O:7][H:19])[H:15])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2](/[C:3](=[C:5](\[C:6]([H:16])([H:17])[H:18])[O:7][H:19])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10].[O:4]([H:14])[H:15],c1ccccc1,-0.28 +[C:1]([N:2][C:3]([C:4]([N:5]([C:6]([H:15])([H:16])[H:17])[H:14])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([N-:2][C:3]([C+:4][N:5]([C:6]([H:15])([H:16])[H:17])[H:14])([H:10])[H:11])([H:7])([H:8])[H:9].[H:12][H:13],c1ccccc1,-0.57 +[C:1]([O:6]/[C:5](=[C:2](\[C:3](=[O:4])[H:11])[H:10])[H:12])([H:7])([H:8])[H:9]>>[C:1]([C:2]([C:3](=[O:4])[H:11])([C:5](=[O:6])[H:12])[H:10])([H:7])([H:8])[H:9],c1ccccc1,0.8 +[N:1]#[C:2][C@:3]1([H:6])[C:4]([H:7])[C:5]1([H:8])[H:9]>>[N:1]=[C:2]=[C:3]([C:4](=[C:5]([H:8])[H:9])[H:7])[H:6],CC(Cl)(Cl)Cl,-0.24 +[C:1]([O:2][C:3]([C@@:4]1([H:13])[C:5]([H:14])([H:15])[C@:6]1([O:7][H:17])[H:16])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([O:2][C:3]([C:4][C:5]([C:6]([O:7][H:17])([H:13])[H:16])([H:14])[H:15])([H:11])[H:12])([H:8])([H:9])[H:10],CC(Cl)(Cl)Cl,-0.72 +[C:1]1([H:6])=[C:2]=[C:3]([H:7])[C:4]([H:8])([H:9])[O:5]1>>[C:1](#[C:2][C@@:3]1([H:7])[C:4]([H:8])([H:9])[O:5]1)[H:6],CC(Cl)(Cl)Cl,3.17 +[C:1]([C@:2]([N:3]([H:11])[H:12])([C:4](=[C:5]([O:6])[H:14])[H:13])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C@:2]([N:3][H:11])([C:4]([C:5](=[O:6])[H:14])([H:12])[H:13])[H:10])([H:7])([H:8])[H:9],CI,0.54 +[C:1]1([H:7])([H:8])[C:2]([H:9])([H:12])[C:6]1([C:5](=[C:4]=[C:3]([H:10])[H:11])[H:13])[H:14]>>[C:1]1([H:7])([H:8])[C@@:2]2([H:9])[C:3]([H:10])([H:11])[C:4]([H:12])=[C:5]([H:13])[C@@:6]12[H:14],CI,-4.55 +[O:1]([C@@:2]1([H:8])[N:3]([H:9])[C:4]([H:10])([H:11])[C:5]([H:12])=[C:6]=[C:7]1[H:14])[H:13]>>[O:1]=[C:2]([N:3]([C:4]([C:5]([C:6]#[C:7][H:14])([H:12])[H:13])([H:10])[H:11])[H:9])[H:8],CI,-0.46 +[C:1]([N:2]1[C:3]([H:10])([H:11])[C@:4]1([C:5](=[O:6])[H:13])[H:12])([H:7])([H:8])[H:9]>>[C:1](=[N+:2]1[C:3]([H:10])([H:11])[C:4]1=[C:5]([O-:6])[H:13])([H:7])[H:8].[H:9][H:12],CCBr,-0.57 +[C:1](/[C:2](=[N:3]/[H:11])[O:4][C:5]1([H:12])[C:6]([H:13])([H:14])[C:7]1([H:15])[H:16])([H:8])([H:9])[H:10]>>[C:1](/[C:2](=[N:3]/[H:11])[O:4][C@:5]([C:6][H:13])([C:7]([H:14])([H:15])[H:16])[H:12])([H:8])([H:9])[H:10],CCBr,-1.56 +[O:1]=[C:2]([c:3]1[c:4]([H:9])[n:5][c:6]([H:10])[n:7]1[H:11])[H:8]>>[O:1]=[C:2]([c:3]1[c-:4][n:5]([H:9])[c+:6]([H:10])[n:7]1[H:11])[H:8],CCBr,-1.03 +[C:1]1([H:8])([H:9])[C:2]([H:10])([H:11])[C:3]12[C:4]([H:12])([H:13])[C@@:5]1([H:14])[O:6][C@@:7]21[H:15]>>[C:1]1([H:8])([H:9])[C:2]([H:10])([H:11])[C:3]1([C@@:7]1([H:15])[C@:5]([C:4][H:12])([H:14])[O:6]1)[H:13],CCC,-0.64 +[N:1]([C:2](=[O:3])[C:4]([O:5]/[C:6](=[N:7]/[H:13])[H:12])([H:10])[H:11])([H:8])[H:9]>>[N:1]([C:2](=[O:3])[C:4]([N:7]([C:6](=[O:5])[H:12])[H:13])([H:10])[H:11])([H:8])[H:9],CCC,0.18 +[O:1]([C+:2]([N:3]([C:4](=[C:5]=[N-:6])[H:10])[H:8])[H:7])[H:9]>>[O:1]=[C:2]([N:3]([C:4]([C:5]#[N:6])([H:9])[H:10])[H:8])[H:7],CCC,-0.57 +[C:1](/[C:2]([C:3](=[C:4]([H:12])[H:13])[H:11])=[C:7](/[C:5](=[O:6])[H:14])[H:15])([H:8])([H:9])[H:10]>>[C:1]([C:2]1=[C:3]([H:11])[C:4]([H:12])([H:13])[C@@:5]2([H:14])[O:6][C@@:7]12[H:15])([H:8])([H:9])[H:10],CCI,-2.29 +[C:1]([C:2]([C@@:3]1([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])([H:16])[O:6]1)([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2](/[C:3](=[C:4](/[C:5]([O:6][H:13])([H:15])[H:16])[H:14])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9],CCI,-0.43 +[C:1]([C@@:2]([O:3][H:11])([C@@:4]1([H:12])[C:5]([H:13])([H:14])[N:6]1[H:15])[H:10])([H:7])([H:8])[H:9]>>[C:1](/[C:2](=[C:4]1\[C:5]([H:13])([H:14])[N:6]1[H:15])[H:10])([H:7])([H:8])[H:9].[O:3]([H:11])[H:12],CCI,-0.85 +[C:1]([N:2]([C:3]([H:10])([H:11])[H:12])[C:4]([C:5]#[N:6])([H:13])[H:14])([H:7])([H:8])[H:9]>>[C:1]([N+:2]([C:3]([H:10])([H:11])[H:12])([C-:4]([C:5]#[N:6])[H:13])[H:14])([H:7])([H:8])[H:9],CCN,-2.78 +[C:1]([C@@:2]1([H:10])[O:3][C@:4]1([C:5]#[N:6])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]([O:3][C:5]#[N:6])([C:4][H:11])[H:10])([H:7])([H:8])[H:9],CCN,-2.71 +[C:1]([c:2]1[c:3]([H:10])[o:4][n:5][n:6]1)([H:7])([H:8])[H:9]>>[C:1]([C:2]1([C:3](=[O:4])[H:10])[N:5]=[N:6]1)([H:7])([H:8])[H:9],CCN,0.2 +[C:1]([c:2]1[n:3][c:4][c:5]([H:10])[n:6]1[H:11])([H:7])([H:8])[H:9]>>[C:1](/[C:2]([C:4]#[N:3])=[N+:6](/[C-:5][H:10])[H:11])([H:7])([H:8])[H:9],CC#N,0.07 +[C:1]([C:2]([N:3]([C:4](=[O:5])[H:12])[H:11])([H:9])[H:10])([H:6])([H:7])[H:8]>>[C:1]1([H:6])([H:7])[C:2]([H:9])([H:10])[N:3]([H:11])[C@@:4]1([O:5][H:8])[H:12],CC#N,-0.01 +[C:1]([C@:2]([O:3][H:11])([N:4]1[C:5]([H:12])[C:6]1([H:13])[H:14])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C:2]([O:3][H:11])[N:4]1[C:5]([H:10])([H:12])[C:6]1([H:13])[H:14])([H:7])([H:8])[H:9],CC#N,-1.94 +[C+:1](=[C:2]=[C-:3][H:9])[H:7].[C:4]1([H:8])([H:10])[C:5]([H:11])([H:12])[C:6]1([H:13])[H:14]>>[C:1](#[C:2][C:3]([C:4]1([H:10])[C:5]([H:11])([H:12])[C:6]1([H:13])[H:14])([H:8])[H:9])[H:7],CC=O,-0.75 +[C:1]([C@@:2]1([C:5]#[O+:6])[C:3]([H:10])([H:11])[C-:4]1[H:13])([H:7])([H:8])[H:9].[H:12][H:14]>>[C:1]([C:2]1([C:5](=[O:6])[H:14])[C:3]([H:10])([H:11])[C:4]1([H:12])[H:13])([H:7])([H:8])[H:9],CC=O,-1.16 +[C:1]([C:2](/[C:3](=[N:4]\[H:14])[C@@:5]([O:6])([O:7][H:15])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]([C@:3]([N:4][H:14])([C:5](=[O:6])[O:7][H:15])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10],CC=O,0.26 +[C:1](/[N:2]=[C:3](/[N:4]([C:5](=[O:6])[H:12])[H:11])[H:10])([H:7])([H:8])[H:9]>>[C-:5]#[O+:6].[C:1]([N:2](/[C:3](=[N:4]/[H:11])[H:10])[H:12])([H:7])([H:8])[H:9],ClCCl,-0.73 +[C:1]([C:2](=[O:3])[C:4](=[O:5])[N:6]([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[O:3])[H:10])([H:7])([H:8])[H:9].[C:4](=[O:5])=[N:6][H:11],ClCCl,0.94 +[C:1]([C:2]1([C:6]#[N:7])[C:4]([H:13])([H:14])[C:5]1([H:15])[H:16])([H:8])([H:9])[H:10].[C:3]([H:11])[H:12]>>[C:1]([C:2]1([C:6]#[N:7])[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[C:5]1([H:15])[H:16])([H:8])([H:9])[H:10],ClCCl,-0.68 +[O:1]=[C:2]1[C:3]([H:6])([H:7])[C:4]([H:8])([H:9])[C:5]1([H:10])[H:11]>>[O:1]([C:2]1=[C:5]([H:10])[C:4]([H:8])([H:9])[C:3]1([H:6])[H:7])[H:11],S=C=S,0.31 +[C:1]([C:2]([C@@:3]1([H:13])[C:4]([H:14])([H:15])[C@@:5]2([H:16])[C:6]([H:17])([H:18])[C@@:7]12[H:19])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C@:2]1([H:11])[C@@:3]2([H:13])[C:4]([H:14])([H:15])[C@@:5]3([H:16])[C@:6]1([H:18])[C@@:7]23[H:19])([H:8])([H:9])[H:10].[H:12][H:17],S=C=S,-0.71 +[N:1]#[C:2]/[C:3](=[C:7](\[C:6]([C:5](=[O:4])[H:9])([H:10])[H:11])[H:12])[H:8]>>[N:1]#[C:2][C@@:3]1([H:8])[O:4][C@@:5]2([H:9])[C:6]([H:10])([H:11])[C@@:7]12[H:12],S=C=S,0.25 +[C:1](=[C:2]([C:3]([O:4][H:11])[C@:5]1([H:12])[C:6]([H:13])([H:14])[O:7]1)[H:10])([H:8])[H:9]>>[C:1](=[C:2](/[C:3]([O:4][H:11])=[C:5]1\[C:6]([H:13])([H:14])[O:7]1)[H:10])[H:8].[H:9][H:12],CSC,-0.7 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])=[C:4]([H:13])[N:5]([H:14])[C:6]1=[O:7])([H:8])([H:9])[H:10]>>[C:1](/[C:2](=[C:3]([C:4](=[N:5]/[H:14])/[H:13])\[H:12])[C:6](=[O:7])[H:11])([H:8])([H:9])[H:10],CSC,-3.01 +[C:1]([C@@:2]12[O:3][C@@:4]3([H:11])[C:5]([H:12])([H:13])[C@:6]1([H:14])[C@@:7]23[H:15])([H:8])([H:9])[H:10]>>[C:1]([C:2](=[O:3])[C@:6]1([H:14])[C:5]([H:12])([H:13])[C:4]([H:11])=[C:7]1[H:15])([H:8])([H:9])[H:10],CSC,-0.84 +[C:1]1([H:7])([H:8])[C@:2]2([H:9])[C@:3]3([H:10])[C:4]([H:11])([H:12])[C@@:5]1([H:13])[C@:6]23[H:14]>>[C:1]1([H:7])([H:8])[C@@:2]2([H:9])[C@:3]([C:4]([H:11])([H:12])[H:13])([H:10])[C@@:6]2([H:14])[C:5]1,BrC(Br)Br,-5.44 +[C:1](=[C:2]([C:3]([O:4][H:11])[C@:5]1([H:12])[C:6]([H:13])([H:14])[O:7]1)[H:10])([H:8])[H:9]>>[C:1](=[C:2]([C:3]([O:4][H:11])[C:5](=[C:6]([O:7][H:13])[H:14])[H:12])[H:10])([H:8])[H:9],BrC(Br)Br,-0.04 +[C:1]([C:2]([C:3]([C:4]([C:5]([H:16])([H:17])[H:18])([H:14])[H:15])([C:6](=[O:7])[H:19])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1](/[C:2](=[C:3](/[C:6](=[O:7])[H:19])[H:13])[H:12])([H:8])([H:9])[H:10].[C:4](=[C:5]([H:16])[H:18])([H:14])[H:15].[H:11][H:17],BrC(Br)Br,-1.92 +[C:1]([C:2]1([H:10])[C:3]([H:11])=[C:6]1[H:14])([H:7])([H:8])[H:9].[C:4](=[O:5])([H:12])[H:13]>>[C:1]([C@@:2]1([H:10])[C@@:3]2([H:11])[C:4]([H:12])([H:13])[O:5][C@@:6]12[H:14])([H:7])([H:8])[H:9],NC(C)C,-1.08 +[O:1]=[C:2]([C:3]1([H:8])[C:4]([H:9])([H:10])[C:5]([H:11])([H:12])[C:6]1([H:13])[H:14])[H:7]>>[O:1](/[C:2](=[C:3](\[C:6]1([H:14])[C:4]([H:9])([H:10])[C:5]1([H:11])[H:12])[H:8])[H:7])[H:13],NC(C)C,0.03 +[C:1](=[C:2]([C:6]([C:5]([C:4]([C:3]([H:7])([H:11])[H:12])([H:13])[H:14])([H:15])[H:16])([H:17])[H:18])[H:10])([H:8])[H:9]>>[C:1]([C:2]1([H:10])[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])([H:16])[C:6]1([H:17])[H:18])([H:7])([H:8])[H:9],NC(C)C,-0.53 +[C:2]1([H:8])([H:9])[C:3]([H:10])([H:11])[O:4]/[C:5]1=[N:6]/[H:13].[O:1]([H:7])[H:12]>>[O:1]([C:2]([C:3]([O:4]/[C:5](=[N:6]/[H:13])[H:12])([H:10])[H:11])([H:8])[H:9])[H:7],CC(Cl)Cl,0.25 +[C:1]1([H:6])([H:7])[C:2]([H:8])([H:9])[C:3]([H:10])=[C:4]([H:11])[C:5]1([H:12])[H:13]>>[C:1]([C:2]([C:3](=[C:4])[H:10])([H:8])[H:9])([C:5]([H:11])([H:12])[H:13])([H:6])[H:7],CC(Cl)Cl,-1.76 +[C:1]([C:2]([C:3]([C:4]([N:5]([H:15])[H:16])=[C:6]=[N:7])([H:12])[H:13])([H:11])[H:14])([H:8])([H:9])[H:10]>>[C:1]([C:2]([C:3]([C@:4]([N:5]([H:15])[H:16])([C:6]#[N:7])[H:14])([H:12])[H:13])[H:11])([H:8])([H:9])[H:10],CC(Cl)Cl,1.09 +[N:1]([C:2](=[C:3]([N+:4]#[N:5])[H:9])[N-:6][H:10])([H:7])[H:8]>>[N:1]([c:2]1[c:3]([H:9])[n:4][n:5][n:6]1[H:10])([H:7])[H:8],C[N+](=O)[O-],0.08 +[C:1]([N:2]([c:3]1[n:4]([H:12])[n:5][c:6]([H:13])[c:7]1[H:14])[H:11])([H:8])([H:9])[H:10]>>[C:1]([N:2]([c:3]1[n:4][n:5]([H:12])[c:6]([H:13])[c:7]1[H:14])[H:11])([H:8])([H:9])[H:10],C[N+](=O)[O-],2.07 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[O:4][C:5]1=[C:6]=[C:7]([H:14])[H:15])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[O:4][C@:5]1([C:6]#[C:7][H:15])[H:14])([H:8])([H:9])[H:10],C[N+](=O)[O-],-5.36 +[O:1]([C:2]([C@@:3]1([H:10])[C:4]([H:11])([H:12])[C:5]([H:13])([H:14])[O:6]1)([H:8])[H:9])[H:7]>>[O:1]([C:2](/[C:3](=[C:4](/[C:5]([O:6][H:12])([H:13])[H:14])[H:11])[H:10])([H:8])[H:9])[H:7],CC(C)(C)O,-0.13 +[C:1]([C:2]([N:3]([C:4](=[O:5])[C:6]([H:13])([H:14])[H:15])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2]([N:3]([C-:4]=[O+:5][C:6]([H:13])([H:14])[H:15])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9],CC(C)(C)O,1.45 +[C:1]1([H:7])([H:8])[O+:2]=[C-:3][C@@:4]2([H:11])[N:5]([H:12])[C@@:6]12[H:13].[H:9][H:10]>>[C:1]1([H:7])([H:8])[O:2][C:3]([H:9])([H:10])[C@@:4]2([H:11])[N:5]([H:12])[C@@:6]12[H:13],CC(C)(C)O,2.15 +[O:1]=[N:2][C:3]1=[C:6]([H:13])[C:5]([H:11])([H:12])[C:4]1([H:9])[H:10].[O:7]([H:8])[H:14]>>[O:1](/[N:2]=[C:3]1/[C:4]([H:9])([H:10])[C:5]([H:11])([H:12])[C@:6]1([O:7][H:14])[H:13])[H:8],CCC(C)C,-1.46 +[N+:1](=[C-:2][C:3]([N:4]=[C:5]([H:9])[H:10])([H:7])[H:8])=[C:6]([H:11])[H:12]>>[N:1]#[C:2][C:3]([N:4]1[C:5]([H:9])([H:10])[C:6]1([H:11])[H:12])([H:7])[H:8],CCC(C)C,-2.11 +[C:1](/[C:2]([C-:3]([N+:4]#[N:5])[H:10])=[N:6]\[H:11])([H:7])([H:8])[H:9]>>[C:1]([c:2]1[c:3]([H:10])[n:4][n:5][n:6]1[H:11])([H:7])([H:8])[H:9],CCC(C)C,-0.08 +[C:1]([C@@:2]1([H:10])[O+:3]=[C-:4][C@:5]1([O:6][H:14])[H:13])([H:7])([H:8])[H:9].[H:11][H:12]>>[C:1]([C@@:2]1([H:10])[O:3][C:4]([H:11])([H:12])[C@:5]1([O:6][H:14])[H:13])([H:7])([H:8])[H:9],CCC(O)C,2.84 +[C:1]1([H:7])([H:8])[C:2]([H:9])([H:10])[C:3]([H:11])([H:12])[C:4]([H:13])=[C:5]([H:14])[C:6]1([H:15])[H:16]>>[C:1]1([H:7])([H:8])[C:2]([H:9])([H:10])[C:3]([H:11])([H:12])[C:4]1([C:5](=[C:6]([H:15])[H:16])[H:14])[H:13],CCC(O)C,0.34 +[C:1]([C@@:2]([O:3][H:11])([C+:4]=[C:5]([O-:6])[H:12])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[O:3][C@:4]1([C:5](=[O:6])[H:12])[H:11])([H:7])([H:8])[H:9],CCC(O)C,10.23 +[O:1]([C@@:2]12[C:3]([H:9])([H:10])[C@@:4]3([H:11])[C:5]([H:12])([H:13])[C@:6]1([H:14])[C@@:7]23[H:15])[H:8]>>[O:1](/[C:2](=[C:3](/[C@@:4]1([H:11])[C:5]([H:12])([H:13])[C:6]([H:14])=[C:7]1[H:15])[H:9])[H:10])[H:8],CCC(C)=O,-1.85 +[C:1]([C:2]([C:3]1=[C:6]=[C:7]([H:15])[C:4]([H:13])([H:14])[O:5]1)([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]([C@@:3]1([C:6]#[C:7][H:15])[C:4]([H:13])([H:14])[O:5]1)([H:11])[H:12])([H:8])([H:9])[H:10],CCC(C)=O,1.01 +[N:1]#[C:2][C:3]([C@@:4]1([H:9])[C:5]([H:10])([H:11])[N:6]1[H:12])([H:7])[H:8]>>[N:1]#[C:2][C:3]([C:4][N:6]([C:5]([H:9])([H:10])[H:11])[H:12])([H:7])[H:8],CCC(C)=O,-1.07 +[C:1]([N-:2][N+:7]#[C:6][H:15])([H:8])([H:9])[H:10].[C:3](#[C:4][C:5]([H:12])([H:13])[H:14])[H:11]>>[C:1]([n:2]1[c:3]([H:11])[c:4]([C:5]([H:12])([H:13])[H:14])[c:6]([H:15])[n:7]1)([H:8])([H:9])[H:10],COC(C)=O,0.2 +[O:1]=[C:2]1[C:3]([H:7])([H:8])[C@@:4]2([H:9])[N:5]([H:10])[C@@:6]12[H:11]>>[O:1]([C:2]1=[C:3]([H:8])[C@@:4]2([H:9])[N:5]([H:10])[C@@:6]12[H:11])[H:7],COC(C)=O,1.21 +[O-:1][C:2](=[C:3]=[O+:6][C:5]([C:4]([H:8])([H:9])[H:10])([H:11])[H:12])[H:7]>>[O:1]=[C:2]([C@@:3]1([H:8])[C:4]([H:9])([H:10])[C:5]([H:11])([H:12])[O:6]1)[H:7],COC(C)=O,-1.06 +[C:1]([C@:2]([N:3][H:11])([C:4]([C:5](=[O:6])[H:14])([H:12])[H:13])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C@:2]([N-:3][H:11])([C:4]([C+:5][O:6][H:14])([H:12])[H:13])[H:10])([H:7])([H:8])[H:9],CC[N+](=O)[O-],1.24 +[C:1]([C:2]([O:3][C@@:4]([C:5]([H:14])([H:15])[H:16])([C:6](=[O:7])[H:17])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([H:8])([H:9])([H:10])[H:13].[C:2](=[O+:3][C:4]([C:5]([H:14])([H:15])[H:16])=[C:6]([O-:7])[H:17])([H:11])[H:12],CC[N+](=O)[O-],-1.25 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([H:14])=[C:5]([H:15])[C:6]([H:16])[O:7]1)([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([H:14])=[C:5]([H:15])[C:6]([H:9])([H:16])[O:7]1)([H:8])[H:10],CC[N+](=O)[O-],-0.46 +[C:1]([C:2]1([C:3]([O:4][H:12])([H:10])[H:11])[C:5]([H:13])([H:14])[C:6]1([H:15])[H:16])([H:7])([H:8])[H:9]>>[C:1]([C:2]1([O:4][H:12])[C:5]([H:13])([H:14])[C:6]1([H:15])[H:16])([H:7])([H:8])[H:9].[C:3]([H:10])[H:11],ClC(Cl)C(Cl)Cl,-0.46 +[C:1]([C:2]([N:3][N:4]([C:5]([H:13])([H:14])[H:15])[C:6]([C:7]([H:18])([H:19])[H:20])([H:16])[H:17])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([N:3]([C:2]([H:11])[H:12])[N:4]([C:5]([H:13])([H:14])[H:15])[C:6]([C:7]([H:18])([H:19])[H:20])([H:16])[H:17])([H:8])([H:9])[H:10],ClC(Cl)C(Cl)Cl,1.06 +[C:1]([C:2]([C:3](=[C:4]=[O:5])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10].[C:6](=[O:7])([H:14])[H:15]>>[C:1]([C:2]([C:3]([C:4](=[O:5])[C:6](=[O:7])[H:15])([H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10],ClC(Cl)C(Cl)Cl,-4.59 +[C:1]([N:2]([C:3]([C:4]#[N:5])([H:10])[H:11])[H:9])([H:6])([H:7])[H:8]>>[C:1]([N:2]([C:3]([N+:5]#[C-:4])([H:10])[H:11])[H:9])([H:6])([H:7])[H:8],CC(C)[N+](=O)[O-],-3.83 +[C:1]([C+:2]1[N:3]([H:11])[C@:4]1([C-:5]=[N:6][H:10])[H:12])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[N:3]([H:11])[C@:4]1([C:5]#[N:6])[H:12])([H:7])([H:8])[H:9],CC(C)[N+](=O)[O-],0.54 +[O:1]([C:2]([C:3]([C:4]([C:5]#[C:6][H:14])([H:12])[H:13])([H:10])[H:11])([H:8])[H:9])[H:7]>>[O:1]([C@:2]1([H:9])[C:3]([H:10])([H:11])[C:4]([H:12])([H:13])[C:5][C:6]1([H:8])[H:14])[H:7],CC(C)[N+](=O)[O-],-4.74 +[C:1]([C:2]([O:3][H:11])=[C:4]([H:12])[H:13])([H:8])([H:9])[H:10].[C:5](=[C:6]=[O:7])([H:14])[H:15]>>[C:1]([C@@:2]1([O:3][H:11])[C:4]([H:12])([H:13])[C:5]([H:14])([H:15])[C:6]1=[O:7])([H:8])([H:9])[H:10],c1ccc2ncccc2c1,1.23 +[C:1]([C:2]([C:3]([C@:4]([O:5][H:15])([C:6]([N:7]([H:18])[H:19])([H:16])[H:17])[H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]([C:3]([C:4]([O:5][H:15])[C:6]([N:7]([H:18])[H:19])([H:16])[H:17])([H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10],c1ccc2ncccc2c1,0.79 +[C-:1]([O+:2]=[C:3]([C:4]([C:5](=[O:6])[H:13])([H:11])[H:12])[H:10])([H:7])[H:9].[H:8][H:14]>>[C:1]([O:2][C@@:3]1([H:10])[C:4]([H:11])([H:12])[C@:5]1([O:6][H:14])[H:13])([H:7])([H:8])[H:9],c1ccc2ncccc2c1,-4.68 +[C:1]([C@@:2]([O:3][C:4](=[O:5])[H:12])([C:7](=[C:6])[H:13])[H:11])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]([O:3][C:4](=[O:5])[H:12])([C:6]#[C:7][H:13])[H:11])([H:8])([H:9])[H:10],Cc1ccccc1C,0.41 +[C:1]([C@@:2]([C:3](=[O:4])[H:11])([C:5]#[N:6])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]([C:3](=[O:4])[H:11])([N+:6]#[C-:5])[H:10])([H:7])([H:8])[H:9],Cc1ccccc1C,1.35 +[C:1]([C@@:2]1([C:3]([C:4]#[N:5])([H:11])[H:12])[C:6]([H:13])([H:14])[N:7]1[H:15])([H:8])([H:9])[H:10]>>[C:1](/[C:2]([C:3]([C:4]1=[N:5][C:6]1([H:13])[H:14])([H:11])[H:12])=[N:7]\[H:15])([H:8])([H:9])[H:10],Cc1ccccc1C,0.29 +[O:1]([N:2]1[C:3]([H:9])([H:10])[C:4]([H:11])=[C:5][C:6]([H:12])([H:13])[C:7]1([H:14])[H:15])[H:8]>>[H:13].[O:1]([N:2]1[C:3]([H:9])([H:10])[C:4]([H:11])=[C:5]=[C:6]([H:12])[C:7]1([H:14])[H:15])[H:8],Clc1ccccc1Cl,0.13 +[C:1]([C@@:2]1([H:11])[O:3][C:4]([H:12])([H:13])[C@@:5]2([H:14])[C:6]([H:15])([H:16])[C@@:7]12[H:17])([H:8])([H:9])[H:10]>>[C:1]1([H:9])([H:10])[C@@:2]2([H:11])[O:3][C:4]([H:12])([H:13])[C@:5]([C:6]([H:8])([H:15])[H:16])([H:14])[C@@:7]12[H:17],Clc1ccccc1Cl,-1.16 +[N:1]([C:2](=[O:3])[C:4]([O:5]/[C:6](=[N:7]/[H:13])[H:12])([H:10])[H:11])([H:8])[H:9]>>[N:1]([C:2]([O:3][C@@:6]1([H:12])[O:5][N:7]1[H:13])=[C:4]([H:10])[H:11])([H:8])[H:9],Clc1ccccc1Cl,-1.5 +[C:1]([C:2]([C@@:3]1([H:10])[C:4]([H:11])=[N:5]1)=[O:6])([H:7])([H:8])[H:9]>>[C:1]([c:2]1[c:3]([H:10])[c:4]([H:11])[n:5][o:6]1)([H:7])([H:8])[H:9],Cc1ccc(C)c(C)c1,-0.54 diff --git a/chemprop-updated/tests/data/regression/rxn/descriptors.npz b/chemprop-updated/tests/data/regression/rxn/descriptors.npz new file mode 100644 index 0000000000000000000000000000000000000000..c112116630a424986657dc26d88462556c2575f8 --- /dev/null +++ b/chemprop-updated/tests/data/regression/rxn/descriptors.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b67d2f6e56f86a3de6f5756ba90119c648c8eacbe4796597fa2cf82f44312aba +size 1064 diff --git a/chemprop-updated/tests/data/regression/rxn/rxn.csv b/chemprop-updated/tests/data/regression/rxn/rxn.csv new file mode 100644 index 0000000000000000000000000000000000000000..9654358baadc8adbb104994c76f3cfc2703afdbd --- /dev/null +++ b/chemprop-updated/tests/data/regression/rxn/rxn.csv @@ -0,0 +1,101 @@ +smiles,ea +[O:1]([C:2]([C:3]([C:4](=[O:5])[C:6]([O:7][H:15])([H:13])[H:14])([H:11])[H:12])([H:9])[H:10])[H:8]>>[C:3](=[C:4]=[O:5])([H:11])[H:12].[C:6]([O:7][H:15])([H:8])([H:13])[H:14].[O:1]=[C:2]([H:9])[H:10],8.89893350229384 +[C:1]1([H:8])([H:9])[O:2][C@@:3]2([H:10])[C@@:4]3([H:11])[O:5][C@:6]1([H:12])[C@@:7]23[H:13]>>[C:1]1([H:8])([H:9])[O:2][C:3]([H:10])=[C:7]([H:13])[C@:6]1([O+:5]=[C-:4][H:11])[H:12],5.464327694301 +[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])=[C:6]([H:16])[C@@:7]12[H:17])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([H:14])=[C:5]([H:15])[C:6]([H:16])=[C:7]1[H:17])([H:8])([H:9])[H:10],5.270552275670961 +[C:1]([O:2][C:3]([C@@:4]([C:5]([H:14])([H:15])[H:16])([C:6]([O:7][H:19])([H:17])[H:18])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C-:1]([O+:2]=[C:3]([C@@:4]([C:5]([H:14])([H:15])[H:16])([C:6]([O:7][H:19])([H:17])[H:18])[H:13])[H:12])([H:8])[H:10].[H:9][H:11],8.47300569018029 +[C:1]([C:2]#[C:3][C:4]([C:5](=[O:6])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[C:3]=[C:4]([H:10])[H:11])[C:5](=[O:6])[H:12])([H:7])([H:8])[H:9],5.579036955502979 +[O:1]=[c:2]1[n:3]([H:7])[c:4]([H:8])[n:5][o:6]1>>[N:3]([C:4]#[N:5])([H:7])[H:8].[O:1]=[C:2]=[O:6],5.87179986296395 +[C:1](/[C:2](=[N:3]\[O:4][H:10])[C:5]#[C:6][H:11])([H:7])([H:8])[H:9]>>[C:1]([C@:2]12[N:3]([O:4][H:10])[C@@:6]1([H:11])[C:5]2)([H:7])([H:8])[H:9],6.249862206930191 +[O:1]=[C:2]1[N:3]([H:7])[C:4]([H:8])([H:9])[C:5]([H:10])([H:11])[O:6]1>>[C:5](=[O:6])([H:10])[H:11].[O:1]=[C:2]1[N:3]([H:7])[C:4]1([H:8])[H:9],10.554549809087401 +[N:1]#[C:2][C@@:3]1([H:8])[C:4]([H:9])([H:10])[N:5]=[C:6]([H:11])[N:7]1[H:12]>>[N:1]1=[C:6]([H:11])[N:7]([H:12])[C@@:3]2([H:8])[C:2]1=[N:5][C:4]2([H:9])[H:10],12.321927083334499 +[N:1]([C:2]([C@@:3]([N:4]([H:12])[H:13])([C:5]#[N:6])[H:11])([H:9])[H:10])([H:7])[H:8]>>[N:1]([C:2]([C:3]([N:4]([H:12])[H:13])=[C:5]=[N:6][H:11])([H:9])[H:10])([H:7])[H:8],8.50568077785716 +[C:1]([C@@:2]([O:3][H:11])([C@@:4]1([H:12])[C:5]([H:13])([H:14])[O:6]1)[H:10])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[O:3])[C@@:4]1([H:12])[C:5]([H:13])([H:14])[O:6]1)([H:7])([H:8])[H:9].[H:10][H:11],8.63150208656758 +[C:1]([C:2]([O:3][C@@:4]1([H:13])[C:5]([H:14])([H:15])[C@:6]1([C:7]([H:17])([H:18])[H:19])[H:16])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]([O:3][H:19])([H:11])[H:12])([H:8])([H:9])[H:10].[C@@:4]12([H:13])[C:5]([H:14])([H:15])[C@:6]1([H:16])[C:7]2([H:17])[H:18],10.6849610584282 +[N:1]#[C:2][C:3]([C@@:4]1([H:9])[C:5]([H:10])([H:11])[N:6]1[H:12])([H:7])[H:8]>>[N+:1](#[C-:2])[C:3]([C@@:4]1([H:9])[C:5]([H:10])([H:11])[N:6]1[H:12])([H:7])[H:8],5.98241138069479 +[N:1]([c:2]1[c:3]([O:4][H:10])[c:5]([H:11])[c:6]([H:12])[n:7]1[H:13])([H:8])[H:9]>>[N:1]([C:2]1=[N:7][C:6]([H:12])([H:13])[C:5]([H:11])=[C:3]1[O:4][H:10])([H:8])[H:9],4.05263477722536 +[O:1]([N:2]=[C:3]1[C:4]([H:8])([H:9])[O:5][C:6]1([H:10])[H:11])[H:7]>>[O:1](/[N:2]=[C:3](/[C:4]([O+:5]=[C-:6][H:10])([H:8])[H:9])[H:11])[H:7],7.9198700949550584 +[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[O:5][C:6]1([H:15])[H:16])([H:7])([H:8])[H:9]>>[C:1]([C:2]([C:3](=[C:4]([H:13])[H:14])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9].[O:5]=[C:6]([H:15])[H:16],9.885120970662388 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([H:14])([H:15])/[C:5]1=[N:6]\[O:7][H:16])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([H:14])([H:15])[C@@:5]1([N:6])[O:7][H:16])([H:8])([H:9])[H:10],6.842806303982721 +[C:1]([C@@:2]1([H:11])[O:3][C@@:4]2([H:12])[C:5]([H:13])([H:14])[C@@:6]12[O:7][H:15])([H:8])([H:9])[H:10]>>[C:1](=[C:2]([C@@:6]1([O:7][H:15])[C@:4]([O:3][H:8])([H:12])[C:5]1([H:13])[H:14])[H:11])([H:9])[H:10],6.17621139932046 +[C:1]([O:2][C:3](=[O:4])[C:5]([N:6]([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([O+:2]=[C-:3][O:4][H:12])([H:7])([H:8])[H:9].[C:5](=[N:6][H:13])([H:10])[H:11],6.90879668948285 +[O:1]([C:2]([C@@:3]1([H:11])[C:4]([H:12])([H:13])[C:5]([H:14])([H:15])[C:6]1=[O:7])([H:9])[H:10])[H:8]>>[C:4](=[C:5]([H:14])[H:15])([H:12])[H:13].[O:1]([C:2]([C@@:3]1([H:11])[C-:6]=[O+:7]1)([H:9])[H:10])[H:8],11.083881155868001 +[C:1]([C@@:2]1([H:10])[O:3][C:4]([H:11])([H:12])[C@:5]1([O:6][H:14])[H:13])([H:7])([H:8])[H:9]>>[C-:2]1=[O+:3][C:4]([H:11])([H:12])[C@:5]1([O:6][H:14])[H:13].[C:1]([H:7])([H:8])([H:9])[H:10],8.76558117959312 +[N:1]([C:2](=[O:3])[C:4]([C:5]#[N:6])([H:9])[H:10])([H:7])[H:8]>>[N:1]([C:2](=[O:3])[C:4]([N+:6]#[C-:5])([H:9])[H:10])([H:7])[H:8],6.054472281944199 +[C:1]([C:2]([C:3](=[O:4])[C:5]#[C:6][H:12])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2]([O+:4]=[C-:3][C:5]#[C:6][H:12])([H:10])[H:11])([H:7])([H:8])[H:9],11.0582607831632 +[C:1]([C:2]([O:3][C:4]([C:5]#[N:6])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([H:7])([H:8])([H:9])[H:13].[C:2](=[O+:3][C:4](=[C:5]=[N-:6])[H:12])([H:10])[H:11],8.939102673241631 +[N:1]([c+:2]1[n-:3][c:4]([H:9])[n:5][o:6]1)([H:7])[H:8]>>[N:1]([C:2]([N:3]1[C:4]([H:9])=[N:5]1)=[O:6])([H:7])[H:8],6.465258149913429 +[O:1]([N:2]=[C:3]1[C:4]([H:8])([H:9])[N:5]([H:10])[C:6]1([H:11])[H:12])[H:7]>>[O:1](/[N:2]=[C:3](/[C:4]([N:5]([C:6][H:12])[H:10])([H:8])[H:9])[H:11])[H:7],7.08729333341998 +[N:1]([C:2](=[O:3])[C:4]([O:5]/[C:6](=[N:7]/[H:13])[H:12])([H:10])[H:11])([H:8])[H:9]>>[N:1]([C:4]([C:2](=[O:3])[N:7]([C:6](=[O:5])[H:12])[H:13])([H:10])[H:11])([H:8])[H:9],6.8389747156693605 +[C:1]([C:2]([C:3]1([O:4][H:12])[C:5]([H:13])([H:14])[C:6]1([H:15])[H:16])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2]([C:3]1=[C:5]([H:14])[C:6]1([H:15])[H:16])([H:10])[H:11])([H:7])([H:8])[H:9].[O:4]([H:12])[H:13],7.8402621209318415 +[C:1]([C@@:2]1([H:10])[C@:3]([C:4]([H:12])([H:13])[H:14])([H:11])[N:5]1[C:6]([H:15])([H:16])[H:17])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[C-:3]([H:11])[N+:5]1([C:4]([H:12])([H:13])[H:14])[C:6]([H:15])([H:16])[H:17])([H:7])([H:8])[H:9],10.753621206032602 +[N:1](=[C:2]1/[C:3]([H:9])([H:10])[C:4]([H:11])([H:12])[C:5]([H:13])([H:14])[C:6]([H:15])([H:16])[O:7]1)\[H:8]>>[N:1]([C:2]1=[C:3]([H:10])[C:4]([H:11])([H:12])[C:5]([H:13])([H:14])[C:6]([H:15])([H:16])[O:7]1)([H:8])[H:9],6.19361281168903 +[C:1]([c:2]1[c:3]([H:10])[n:4]([H:11])[c:5]([H:12])[c:6]1[H:13])([H:7])([H:8])[H:9]>>[C:1]([C:2]1=[C-:6][C:5]([H:12])([H:13])[N:4]([H:11])[C+:3]1[H:10])([H:7])([H:8])[H:9],7.12814220907606 +[C:1]([C@@:2]1([H:10])[N:3]([H:11])[C@:4]1([C:5](=[O:6])[H:13])[H:12])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[N:3]([H:11])[C@:4]1([C:5][O:6][H:13])[H:12])([H:7])([H:8])[H:9],8.06324514779744 +[C:1]([C:2]([n:3]1[c:4]([H:13])[c:5]([H:14])[n:6][c:7]1[H:15])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]([N:3]1[C:4]([H:13])([H:14])[C-:5]=[N+:6]=[C:7]1[H:15])([H:11])[H:12])([H:8])([H:9])[H:10],7.644209569137141 +[C:1]([C:2]([C:3](=[O:4])[C:5]#[C:6][C:7]([H:13])([H:14])[H:15])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([H:8])([H:9])([H:10])[H:12].[C@:2]12([H:11])[C:3](=[O:4])[C:5]1=[C:6]2[C:7]([H:13])([H:14])[H:15],9.1644919104469 +[C:1]([C:2]([C:3]([C:4]([H:13])([H:14])[H:15])([C:5]([H:16])([H:17])[H:18])[C:6]#[N:7])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]1([H:8])([H:10])[C:2]([H:11])([H:12])[C:3]([C:4]([H:13])([H:14])[H:15])([C:5]([H:16])([H:17])[H:18])[C:6][N:7]1[H:9],9.56724442441322 +[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[N:4]([H:12])[C@:5]1([C:6](=[O:7])[H:14])[H:13])[H:8]>>[O:1]([C:3]([C@@:2]1([H:9])[N:4]([H:12])[C@:5]1([C:6](=[O:7])[H:14])[H:13])([H:10])[H:11])[H:8],9.558144644427909 +[C:1]([C@@:2]1([H:10])[N:3]([H:11])[C@:4]1([C:5]#[N:6])[H:12])([H:7])([H:8])[H:9]>>[C:1]([C:2]([N:3]([C+:4]=[C:5]=[N-:6])[H:11])([H:10])[H:12])([H:7])([H:8])[H:9],7.43925231218507 +[C:1](#[C:2][C:3]#[C:4][C@@:5]1([H:9])[C:6]([H:10])([H:11])[O:7]1)[H:8]>>[C:1]12([H:8])[C:2]3=[C:3]1[C:4]23[C@@:5]1([H:9])[C:6]([H:10])([H:11])[O:7]1,8.8875305817719 +[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[N:4]([H:12])[C:5]([H:13])([H:14])[C:6]1=[O:7])[H:8]>>[O:1]([O:7][C:6]1=[C:2]([H:9])[C:3]([H:10])([H:11])[N:4]([H:12])[C:5]1([H:13])[H:14])[H:8],10.939094127895299 +[C:1]([C:2]#[C:3][C:4]([H:8])([H:9])[H:10])([H:5])([H:6])[H:7]>>[C:1]([C:2](=[C:3]=[C:4]([H:8])[H:9])[H:10])([H:5])([H:6])[H:7],8.51817600177597 +[C:1]([C:2]1([H:10])[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])([H:16])[C:6]1([H:17])[H:18])([H:7])([H:8])[H:9]>>[C:1]([C:2]1([H:10])[C:3]([H:11])([H:12])[C:4]([H:14])=[C:5]([H:15])[C:6]1([H:17])[H:18])([H:7])([H:8])[H:9].[H:13][H:16],11.890462046069599 +[O:1]=[C:2]1[C:3]([H:8])([H:9])[C:4]([H:10])([H:11])[C:5](=[O:6])[C:7]1([H:12])[H:13]>>[O:1]=[C:2]1[C:3]([H:8])([H:9])[C:4]([H:10])([H:11])[C:5]([O:6][H:13])=[C:7]1[H:12],6.4213837564242295 +[C:1]([C:2]([C:3](=[O:4])[N:5]([C:6]([H:13])([H:14])[H:15])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[C:3]([O-:4])[N:5]([C+:6]([H:13])[H:14])[H:12])[H:11])([H:7])([H:8])[H:9].[H:10][H:15],9.33923135580394 +[C:1]([C:2]#[C:3][C:4]([C@@:5]1([H:13])[C:6]([H:14])([H:15])[N:7]1[H:16])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]/[C:3](=[C:4](\[C@@:5]1([H:13])[C:6]([H:14])([H:15])[N:7]1[H:16])[H:12])[H:11])([H:8])([H:9])[H:10],7.90054772381408 +[C:1]([C:2]1([C:3]([H:10])([H:11])[H:12])[C:4]([H:13])([H:14])[C@:5]1([O:6][H:16])[H:15])([H:7])([H:8])[H:9]>>[C:1]([C:2]1([C:3]([H:10])([H:11])[H:12])[C:4]([H:13])=[C:5]1[H:15])([H:7])([H:8])[H:9].[O:6]([H:14])[H:16],8.05505987173428 +[C:1]([O:2][C:3]([C:4]([H:10])([H:11])[H:12])([C:5]([H:13])([H:14])[H:15])[H:9])([H:6])([H:7])[H:8]>>[C:1]([O:2][H:15])([H:6])([H:7])[H:8].[C:3]([C:4]([H:10])([H:11])[H:12])(=[C:5]([H:13])[H:14])[H:9],6.503993884214461 +[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[N:4]([H:13])[C:5]1=[O:6])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[C@@:5]2([C:3]([H:11])([H:12])[N:4]2[H:13])[O:6]1)([H:7])([H:8])[H:9],10.3092592578813 +[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[O:4][C:5]([H:12])=[N:6][C:7]1([H:13])[H:14])[H:8]>>[O:1]([C@@:2]1([C:3]([H:9])([H:10])[H:11])[N:6]([C:5](=[O:4])[H:12])[C:7]1([H:13])[H:14])[H:8],6.7021388750542 +[C:1]([N:2]([C@@:3]([C:4]([O:5][H:15])([H:13])[H:14])([C:6]#[N:7])[H:12])[H:11])([H:8])([H:9])[H:10]>>[C:1]([N:2]([C:3](=[C:6]=[N:7][H:15])[H:12])[H:11])([H:8])([H:9])[H:10].[C:4](=[O:5])([H:13])[H:14],5.05460912630239 +[C:1]([n:2]1[c:3]([H:11])[c:4]([H:12])[c:5]([N:6]([H:13])[H:14])[c:7]1[H:15])([H:8])([H:9])[H:10]>>[C-:1]([N+:2]1([H:8])[C:3]([H:11])=[C:4]([H:12])[C:5]([N:6]([H:13])[H:14])=[C:7]1[H:15])([H:9])[H:10],9.458794004936673 +[C:1]([N:2]1[C:3]([H:10])([H:11])[C@:4]1([C:5]#[N:6])[H:12])([H:7])([H:8])[H:9]>>[C:1]([N+:2]1([C:5]#[N:6])[C:3]([H:10])([H:11])[C-:4]1[H:12])([H:7])([H:8])[H:9],8.10652473175005 +[C:1]([C@@:2]([O:3][H:12])([C:4](=[O:5])[C:6]#[C:7][H:13])[H:11])([H:8])([H:9])[H:10]>>[C:1](=[C:2]([O:3][H:12])[H:11])([H:8])[H:10].[C:4](=[O:5])=[C:6]=[C:7]([H:9])[H:13],5.6522318507892795 +[C:1]([C:2]([C@@:3]([O:4][H:13])([C:5]([O:6][H:16])([H:14])[H:15])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1](/[C:2](=[C:3](\[O:4][H:13])[H:12])[H:11])([H:7])([H:8])[H:9].[C:5](=[O:6])([H:14])[H:15].[H:10][H:16],8.905118208654372 +[C:1]([C:2](=[O:3])[N:4]([H:8])[H:9])([H:5])([H:6])[H:7]>>[C:1]([O+:3]=[C-:2][N:4]([H:8])[H:9])([H:5])([H:6])[H:7],10.6072245863762 +[C:1]1([H:7])([H:8])[C:2]([H:9])([H:10])[O:3][C:4]([H:11])=[N:5][C:6]1([H:12])[H:13]>>[C:1]([C:6](/[N:5]=[C:4](\[O+:3]=[C-:2][H:9])[H:11])([H:12])[H:13])([H:7])([H:8])[H:10],9.147762893239692 +[N:1]([c:2]1[c:3]([H:10])[o:4][n:5][c:6]1[N:7]([H:11])[H:12])([H:8])[H:9]>>[N-:1]([C:2]1=[C:3]([H:10])[O+:4]=[N:5][C@@:6]1([N:7]([H:11])[H:12])[H:8])[H:9],7.744038630612399 +[C:1]([n:2]1[c:3]([H:11])[c:4]([H:12])[c:5]([H:13])[c:6]1[O:7][H:14])([H:8])([H:9])[H:10]>>[C:1]([N:2]1[C+:3]([H:11])[C:4]([H:12])=[C-:5][C@@:6]1([O:7][H:14])[H:13])([H:8])([H:9])[H:10],7.03393272372824 +[C:1](/[N:2]=[C:3](\[N:4]([H:11])[H:12])[C:5](=[O:6])[N:7]([H:13])[H:14])([H:8])([H:9])[H:10]>>[C:1]([N:2]([C+:3]([N:4]([H:11])[H:12])[C+:5]([O-:6])[N-:7][H:14])[H:13])([H:8])([H:9])[H:10],2.012001777188 +[N:1](=[C:2]1\[O:3][C@@:4]2([H:9])[C:5]([H:10])([H:11])[C:6]([H:12])([H:13])[C@@:7]12[H:14])\[H:8]>>[N:1](=[C:2]1\[O:3][C@:4]([C:5]([C:6][H:12])([H:10])[H:11])([H:9])[C:7]1([H:13])[H:14])\[H:8],8.349162775142108 +[O:1]([c:2]1[c:3]([H:8])[c:4]([H:9])[n:5][n:6]1[H:10])[H:7]>>[O:1]([C@@:2]1([H:10])[C:3]([H:8])=[C:4]([H:9])[N:5]=[N:6]1)[H:7],5.40886416542282 +[C:1]1([H:8])([H:9])[C@@:2]2([H:10])[C@@:3]3([H:11])[O:4][C:5]([H:12])([H:13])[C@:6]1([H:14])[C@@:7]23[H:15]>>[C:1]1([H:8])([H:9])[C@:2]2([H:10])[C:3]([H:11])([H:15])[O:4][C:5]([H:12])([H:13])[C@@:6]1([H:14])[C:7]2,6.6811223736896395 +[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])=[C:6]1[H:16])([H:7])([H:8])[H:9]>>[C:1]([C:2]1=[C:6]([H:16])[C:5]([H:10])([H:15])[C:4]([H:13])([H:14])[C:3]1([H:11])[H:12])([H:7])([H:8])[H:9],8.48233187657502 +[O:1]([N:2]=[C:3]1[C:4]([H:8])([H:9])[O:5][C:6]1([H:10])[H:11])[H:7]>>[O:1]([N:2]1[C:3](=[C:4]([H:8])[H:9])[C:6]([H:10])([H:11])[O:5]1)[H:7],8.162662393860531 +[C:1]([C@@:2]([O:3][H:12])([C@@:4]1([H:13])[C:5]([H:14])([H:15])[C:6]([H:16])([H:17])[O:7]1)[H:11])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]([O:3][H:12])([C:5]([C:6]([O+:7]=[C-:4][H:13])([H:16])[H:17])([H:14])[H:15])[H:11])([H:8])([H:9])[H:10],12.523488930972402 +[C:1]([C:2]([C:3]#[C:4][C:5]([O:6][H:14])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2]([C:3](=[C:4])[C:5]([O:6][H:14])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9],5.14514691462105 +[C:1]([C@@:2]1([H:10])[O:3][C@:4]1([C:5]#[N:6])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[O+:3][C:4](=[C:5]=[N-:6])[H:11])[H:10])([H:7])([H:8])[H:9],5.5145468136589 +[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C@:4]([O:5][H:14])([H:13])[C:6]1([H:15])[H:16])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[C:6]([H:15])[H:16])[H:10])([H:7])([H:8])[H:9].[C:3](=[C:4]([O:5][H:14])[H:13])([H:11])[H:12],9.04772087578722 +[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[C@:4]2([H:12])[C:5]([H:13])([H:14])[C@@:6]1([H:15])[O:7]2)[H:8]>>[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[C-:4]=[O+:7][C@@:6]1([C:5]([H:12])([H:13])[H:14])[H:15])[H:8],7.487274734764421 +[C:1]([C:2]([C@@:3]1([C:4]([H:12])([H:13])[H:14])[C:5]([H:15])([H:16])[O:6]1)([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2]([C@:3]([C:4]([H:12])([H:13])[H:14])([O+:6]=[C-:5][H:16])[H:15])([H:10])[H:11])([H:7])([H:8])[H:9],7.77205306076843 +[C:1]([C:2]([C@@:3]([C:4]([H:12])([H:13])[H:14])([O:5][H:15])[H:11])([H:9])[H:10])([H:6])([H:7])[H:8]>>[C:1](=[C:2]([H:9])[H:10])([H:7])[H:8].[C:3]([C:4]([H:12])([H:13])[H:14])(=[O:5])[H:11].[H:6][H:15],8.54804185198168 +[O:1]=[c:2]1[n:3]([H:7])[c:4]([H:8])[c:5]([H:9])[o:6]1>>[O:1]([c:2]1[n:3][c:4]([H:8])[c:5]([H:9])[o:6]1)[H:7],5.452475451259422 +[O:1]([C@@:2]1([H:8])[C:3]([H:9])([H:10])[N:4]2[C:5]([H:11])([H:12])[C@@:6]12[H:13])[H:7]>>[H:7][H:8].[O:1]=[C:2]1[C:3]([H:9])([H:10])[N:4]2[C:5]([H:11])([H:12])[C@@:6]12[H:13],8.423323392042459 +[O:1]=[C:2]1[C:3]([H:8])([H:9])[C:4]([H:10])=[C:5]([H:11])[C:6]1=[O:7]>>[O:1]=[C:2]1[C:3]([H:8])([H:9])[C@@:4]1([C:5](=[C:6]=[O:7])[H:11])[H:10],8.18779353216338 +[C:1]([N:2]1[C:3]([H:10])([H:11])[C:4]([H:12])([H:13])[C:5]1=[O:6])([H:7])([H:8])[H:9]>>[C:1](=[N:2][C:3]([C:4]([C:5](=[O:6])[H:7])([H:12])[H:13])([H:10])[H:11])([H:8])[H:9],8.16424839016317 +[C:1]1([H:7])([H:8])[C:2]([H:9])=[C:3]([H:10])[C@@:4]2([H:11])[O:5][C@@:6]12[H:12]>>[C:1]1([H:7])([H:8])[C:2][C:3]([H:9])([H:10])[C@@:4]2([H:11])[O:5][C@@:6]12[H:12],6.932872801210389 +[N:1]([c:2]1[n:3][o:4][c:5]([H:9])[n:6]1)([H:7])[H:8]>>[C:5](#[N:6])[H:9].[N:1]([C:2]#[N+:3][O-:4])([H:7])[H:8],7.26403614761611 +[O:1]([C:2]([C:3]([C:4](=[O:5])[H:11])([H:9])[H:10])([H:7])[H:8])[H:6]>>[C:2](=[C:3]([C:4](=[O:5])[H:11])[H:10])([H:7])[H:8].[O:1]([H:6])[H:9],5.672914243715029 +[O:1]([C:2]([C:3]([O:4][C:5](=[O:6])[H:12])([H:10])[H:11])([H:8])[H:9])[H:7]>>[C-:5]#[O+:6].[O:1]([C:2]([C:3]([O:4][H:12])([H:10])[H:11])([H:8])[H:9])[H:7],6.89586730282268 +[C:1]([C@@:2]([O:3][H:11])([C:4]([N:5]([H:14])[H:15])([H:12])[H:13])[C:6]#[N:7])([H:8])([H:9])[H:10]>>[C:1]([C@:2]([O:3][H:11])([C:6]#[N:7])[H:13])([H:8])([H:9])[H:10].[C:4]([N:5]([H:14])[H:15])[H:12],7.213900926218651 +[O:1]([c:2]1[c:3]([H:8])[n:4]([H:9])[n:5][n:6]1)[H:7]>>[O:1]([c:2]1[c:3]([H:8])[n:4][n:5]([H:9])[n:6]1)[H:7],4.47025769519276 +[C:1]([C@@:2]1([O:3][H:10])[C:4]([H:11])([H:12])[C@:5]1([O:6][H:14])[H:13])([H:7])([H:8])[H:9]>>[C:1](=[C:2]([O:3][H:10])[C:4]([C:5](=[O:6])[H:13])([H:11])[H:12])([H:7])[H:9].[H:8][H:14],6.09028387277675 +[C:1]([C@:2]12[N:3]([H:11])[C@@:4]1([H:12])[C:5]([H:13])([H:14])[C:6]2=[O:7])([H:8])([H:9])[H:10]>>[C-:6]#[O+:7].[C:1](/[C:2](=[N:3]/[H:11])[C:4](=[C:5]([H:13])[H:14])[H:12])([H:8])([H:9])[H:10],7.338289583240521 +[N:1]#[C:2][C@@:3]1([H:8])[C:4]([H:9])([H:10])[C@@:5]2([H:11])[C:6]([H:12])([H:13])[N:7]12>>[N:1]#[C:2]/[C:3](=[N:7]\[C:6]([C:5](=[C:4]([H:9])[H:10])[H:11])([H:12])[H:13])[H:8],5.848991692576071 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C@:4]1([C:5]([C:6]#[C:7][H:17])([H:15])[H:16])[H:14])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]([C:4](=[C:3]([H:12])[H:13])[H:14])([C:7](=[C:6]=[C:5]([H:15])[H:16])[H:17])[H:11])([H:8])([H:9])[H:10],7.843525163692592 +[O:1]=[C:2]([c:3]1[c:4]([H:9])[n:5][c:6]([H:10])[n:7]1[H:11])[H:8]>>[O:1]=[C:2]([c:3]1[c-:4][n:5]([H:9])[c+:6]([H:10])[n:7]1[H:11])[H:8],8.29205506684849 +[C:1]([C@@:2]1([H:11])[O:3][C@@:4]2([H:12])[C:5]([H:13])([H:14])[C@@:6]12[C:7]([H:15])([H:16])[H:17])([H:8])([H:9])[H:10]>>[C:1](/[C:2](=[C:6](/[C@@:4]1([H:12])[O:3][C:5]1([H:13])[H:14])[C:7]([H:15])([H:16])[H:17])[H:11])([H:8])([H:9])[H:10],9.58527585055238 +[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[N:4]([H:13])[C:5]1=[O:6])([H:7])([H:8])[H:9]>>[C-:1]([O+:6]=[C:5]1[C:2]([H:9])([H:10])[C:3]([H:11])([H:12])[N:4]1[H:13])([H:7])[H:8],10.7853171818489 +[C:1]1([H:7])([H:8])[C@@:2]2([H:9])[C@@:3]3([H:10])[O:4][C@:5]1([H:11])[C@@:6]23[H:12]>>[C:1]([C:2]1([H:9])[C:3]([H:10])=[C:6]1[H:12])([C:5](=[O:4])[H:11])([H:7])[H:8],4.03622656043101 +[O:1]([c+:2]1[n-:3][c:4]([H:8])[n:5][n:6]1[H:9])[H:7]>>[O:1]=[c:2]1[n:3]([H:7])[c:4]([H:8])[n:5][n:6]1[H:9],4.30770484353892 +[N:1]([C:2](=[O:3])[C:4]([C:5]([C:6]([O:7][H:16])([H:14])[H:15])([H:12])[H:13])([H:10])[H:11])([H:8])[H:9]>>[C:5]([C:6](=[O:7])[H:14])([H:12])([H:13])[H:15].[N:1]([C:2](=[O:3])[C:4]([H:10])([H:11])[H:16])([H:8])[H:9],9.57678082079487 +[O:1]=[C:2]([C@@:3]1([H:9])[C:4]([H:10])([H:11])[N:5]=[C:6]([H:12])[O:7]1)[H:8]>>[O:1]([C:2][C@@:3]1([H:9])[C:4]([H:10])([H:11])[N:5]=[C:6]([H:12])[O:7]1)[H:8],8.086546852034171 +[O:1]=[C:2]1[N:3]([H:8])[C:4]([H:9])([H:10])[C@@:5]2([H:11])[C:6]([H:12])([H:13])[N:7]12>>[C:4](=[C:5]([N:7]=[C:6]([H:12])[H:13])[H:11])([H:9])[H:10].[O:1]=[C:2]=[N:3][H:8],9.240672854865 +[C:1]([C:2]1([C:3]([H:10])([H:11])[H:12])[C:4]([H:13])([H:14])[C@:5]1([O:6][H:16])[H:15])([H:7])([H:8])[H:9]>>[C:1]([C:4]([C@:5]([C:2][C:3]([H:10])([H:11])[H:12])([O:6][H:16])[H:15])([H:13])[H:14])([H:7])([H:8])[H:9],9.630105332396308 +[N:1]#[C:2][c:3]1[c:4]([H:8])[o:5][c:6]([H:9])[c:7]1[H:10]>>[N:1]#[C:2][C:3]1=[C:4]([H:8])[O:5][C:7]1=[C:6]([H:9])[H:10],8.54712866156014 +[C:1](#[C:2][C:3]([C:4]([C:5]#[C:6][H:12])([H:10])[H:11])([H:8])[H:9])[H:7]>>[C:1](=[C:2]1[C:3]([H:8])([H:9])[C@@:4]1([C:5]#[C:6][H:12])[H:11])([H:7])[H:10],8.92566573467996 +[C:1]([C:2]([C:3]([H:12])([H:13])[H:14])([C:4]#[C:5][C:6](=[O:7])[H:15])[H:11])([H:8])([H:9])[H:10]>>[C:1]([C:2](=[C:4]=[C:5]([C:3]([H:12])([H:13])[H:14])[C:6](=[O:7])[H:15])[H:11])([H:8])([H:9])[H:10],9.29566511350831 +[O:1]=[C:2]([C@@:3]1([H:9])[C:4]([H:10])([H:11])[N:5]=[C:6]([H:12])[O:7]1)[H:8]>>[O:1]([C@@:2]1([H:8])[C@@:3]2([H:9])[C:4]([H:10])=[N:5][C@:6]1([H:12])[O:7]2)[H:11],7.75344158332724 +[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])=[C:6]([H:16])[C@@:7]12[H:17])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])([H:17])[C:6]([H:16])=[C:7]12)([H:8])([H:9])[H:10],10.650215451201401 +[C:1]1([H:8])([H:9])[C@@:2]2([H:10])[N:3]1[C:4]([H:11])([H:12])[C:5]21[C:6]([H:13])([H:14])[C:7]1([H:15])[H:16]>>[C:1]1([H:8])([H:9])[C@@:2]2([C:7]([H:10])([H:15])[H:16])[N:3]1[C:4]([H:11])([H:12])[C:5]2=[C:6]([H:13])[H:14],10.1389447353643 +[C:1]([C@@:2]1([C:3]([C:4]([O:5][H:15])([H:13])[H:14])([H:11])[H:12])[C:6]([H:16])([H:17])[O:7]1)([H:8])([H:9])[H:10]>>[C:1]([C:2]([C:3]([C:4]([O:5][H:15])([H:13])[H:14])([H:11])[H:12])=[O+:7][C-:6]([H:16])[H:17])([H:8])([H:9])[H:10],6.97993447045958 diff --git a/chemprop-updated/tests/data/regression/test.npz b/chemprop-updated/tests/data/regression/test.npz new file mode 100644 index 0000000000000000000000000000000000000000..65762d371dc586da86cebc785dfe9bb762657c02 --- /dev/null +++ b/chemprop-updated/tests/data/regression/test.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f9b2d92f681644101bbb7172ebce2fd3901661c88604326132e0fcd31b42d5c +size 6663 diff --git a/chemprop-updated/tests/data/regression/test_true.csv b/chemprop-updated/tests/data/regression/test_true.csv new file mode 100644 index 0000000000000000000000000000000000000000..7d03a83f1ebe7e26bf342be35e58825874ebe8ea --- /dev/null +++ b/chemprop-updated/tests/data/regression/test_true.csv @@ -0,0 +1,11 @@ +smiles,logSolubility +CCOP(=S)(OCC)SC(CCl)N2C(=O)c1ccccc1C2=O,-6.34 +Brc1ccc(Br)cc1,-4.07 +Cn2c(=O)on(c1ccc(Cl)c(Cl)c1)c2=O,-2.82 +Oc1ccc(cc1)c2ccccc2,-3.48 +CC1=C(CCCO1)C(=O)Nc2ccccc2,-2.56 +CCOC=C,-0.85 +CCC#C,-1.24 +COc1ncnc2nccnc12,-1.11 +CCCCC(C)(O)CC,-1.6 +Clc1ccc(Cl)cc1,-3.27 diff --git a/chemprop-updated/tests/data/regression/weights.csv b/chemprop-updated/tests/data/regression/weights.csv new file mode 100644 index 0000000000000000000000000000000000000000..e160ad7b5289db68deb464624745de23bee5754a --- /dev/null +++ b/chemprop-updated/tests/data/regression/weights.csv @@ -0,0 +1,500 @@ +1 +2 +3 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 \ No newline at end of file diff --git a/chemprop-updated/tests/data/smis.csv b/chemprop-updated/tests/data/smis.csv new file mode 100644 index 0000000000000000000000000000000000000000..c96748bd2dd531384e7e4a79b9fef8126fbc1eb1 --- /dev/null +++ b/chemprop-updated/tests/data/smis.csv @@ -0,0 +1,101 @@ +smiles +Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14 +COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)CCc3ccccc23 +COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl +OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(Cl)sc4[nH]3 +Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)NCC#N)c1 +OC1(CN2CCC1CC2)C#Cc3ccc(cc3)c4ccccc4 +COc1cc(OC)c(cc1NC(=O)CCC(=O)O)S(=O)(=O)NCc2ccccc2N3CCCCC3 +CNc1cccc(CCOc2ccc(C[C@H](NC(=O)c3c(Cl)cccc3Cl)C(=O)O)cc2C)n1 +COc1ccc(cc1)C2=COc3cc(OC)cc(OC)c3C2=O +Oc1ncnc2scc(c3ccsc3)c12 +CS(=O)(=O)c1ccc(Oc2ccc(cc2)C#C[C@]3(O)CN4CCC3CC4)cc1 +C[C@H](Nc1nc(Nc2cc(C)[nH]n2)c(C)nc1C#N)c3ccc(F)cn3 +O=C1CCCCCN1 +CCCSc1ncccc1C(=O)N2CCCC2c3ccncc3 +CC1CCCCC1NC(=O)c2cnn(c2NS(=O)(=O)c3ccc(C)cc3)c4ccccc4 +Nc1ccc(cc1)c2nc3ccc(O)cc3s2 +COc1ccc(cc1)N2CCN(CC2)C(=O)[C@@H]3CCCC[C@H]3C(=O)NCC#N +CCC(COC(=O)c1cc(OC)c(OC)c(OC)c1)(N(C)C)c2ccccc2 +COc1cc(ccc1N2CC[C@@H](O)C2)N3N=Nc4cc(sc4C3=O)c5ccc(Cl)cc5 +CO[C@H]1CN(CCN2C(=O)C=Cc3ccc(cc23)C#N)CC[C@H]1NCc4ccc5OCC(=O)Nc5n4 +CC(C)(CCCCCOCCc1ccccc1)NCCc2ccc(O)c3nc(O)sc23 +Clc1ccc(cc1)C(=O)Nc2oc(nn2)C(=O)Nc3ccc(cc3)N4CCOCC4 +COc1ccc(Oc2cccc(CN3CCCC(C3)N4C=C(C)C(=O)NC4=O)c2)cc1 +OC(=O)c1cccc(c1)N2CCC(CN3CCC(CC3)Oc4ccc(Cl)c(Cl)c4)CC2 +CNCC[C@@H](Oc1ccccc1C)c2ccccc2 +Clc1ccc(N2CCN(CC2)C(=O)CCCc3ccncc3)c(Cl)c1 +COc1cnc(nc1N(C)C)c2ccccn2 +C(CCCCNc1cc(nc2ccccc12)c3ccccc3)CCCNc4cc(nc5ccccc45)c6ccccc6 +CSc1c(cnn1c2ccc(cc2)C(=O)O)C(=O)NC3C4CC5CC(CC3C5)C4 +CNC1=Nc2ncccc2C(=NC1c3cccs3)c4occn4 +CS(=O)(=O)C1(CC1)c2cc(nc(n2)c3cccc4[nH]ccc34)N5CC6CCC(C5)O6 +CN([C@@H]1CCN(Cc2ccc(cc2)C(F)(F)F)C[C@@H]1F)C(=O)Cc3ccc(cc3)n4cnnn4 +CC(=O)[C@H]1CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C +CS(=O)(=O)c1ccccc1C(=O)NC[C@@H](O)CN2CCC(CC2)Oc3ccc(Cl)c(Cl)c3 +O=C(NCc1ccncc1)c2ccc(Oc3ccccc3C#N)cc2 +CN(C)c1ccnc2sc(C(=O)NCc3ccccc3)c(N)c12 +CN1CCN(CC1)c2ccc3N=CN(C(=O)c3c2)c4cc(NC(=O)c5cscn5)ccc4C +Cn1cncc1c2c3C(=O)N(CC4CC4)C(=O)N(CC5CC5)c3nn2Cc6ccnc7ccc(Cl)cc67 +COc1ccc2ncc(C#N)c(CCN3CCC(CC3)NCc4cc5SCOc5cn4)c2c1 +CNC(=O)C1(CCN(CC[C@H](CN(C)C(=O)c2c(OC)c(cc3ccccc23)C#N)c4ccc(Cl)c(Cl)c4)CC1)N5CCCCC5=O +OB1N(C(=O)Nc2ccccc12)c3ccccc3 +CC(C)N(CCC(C(=O)N)(c1ccccc1)c2ccccn2)C(C)C +NC(=NC#N)c1sc(Nc2ccccc2)nc1N +CCS(=O)(=O)c1ccc(c(C)c1)c2cc(ccc2O[C@H](C)C(=O)O)C(F)(F)F +OC(=O)COc1ccc(cc1c2cc(ccc2F)C#N)C(F)(F)F +COc1ccc(cn1)C2=Cc3c(C)nc(N)nc3N([C@@H]4CC[C@H](CC4)OCCO)C2=O +CC(Nc1ncnc2ccccc12)c3ccccc3 +CC(C)c1ccc2Oc3nc(N)c(cc3C(=O)c2c1)C(=O)O +O[C@@H](CNCCCOCCOCCc1cccc2ccccc12)c3ccc(O)c4NC(=O)Sc34 +COc1ccccc1Cn2c(C)nc3ccccc23 +OC(=O)c1ccc(NC(=O)c2cc(OCc3ccccc3F)cc(OCc4ccccc4F)c2)nc1 +NC(Cc1c[nH]c2ccccc12)C(=O)O +OC(=O)CCC[C@H]1[C@@H](Cc2ccccc12)NC(=O)c3cc4cc(F)ccc4[nH]3 +CCNC(=O)c1cc2c(c(cnc2[nH]1)c3cncc(c3)C(=O)O)n4ccc(n4)C(F)(F)F +C[C@H](NC(=O)c1c(C)nn(C2CCCC2)c1NS(=O)(=O)c3ccc(C)cc3)C(C)(C)C +N(c1ccccc1)c2cc(Nc3ccccc3)[nH]n2 +COCCNC(=O)c1cccc(Nc2ncc3cc(ccc3n2)c4ccncc4)c1 +CCC(CC)NC(=O)c1cnn(C)c1NS(=O)(=O)c2ccc(C)cc2 +NC(=O)c1cc(F)cc(O[C@H]2C[C@H]3CC[C@@H](C2)N3Cc4ccccc4)c1 +O=C1NC(=NC(=C1C#N)c2ccccc2)SCCc3ccccc3 +OC(C(=O)OC1CN2CCC1CC2)(c3ccccc3)c4ccccc4 +Cc1ccccc1NC(=O)CCS(=O)(=O)c2ccc(Br)s2 +CC(C)n1c(C)ncc1c2nc(Nc3ccc(cc3)C(=O)N(C)C)ncc2F +COc1cccc(c1)c2c[nH]c(n2)c3ccccc3 +O=C(COc1ccccc1)c2ccccc2 +COc1cc2ncc(C(=O)N)c(Nc3ccc(F)cc3F)c2cc1NCCN(C)C +CO[C@@H]1CC[C@@]2(CC1)Cc3ccc(OCC(C)C)cc3C24N=C(C)C(=N4)N +COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN4CCCC4 +O=C1CCOc2cc(COc3ccccc3)ccc12 +Clc1cccc2cn[nH]c12 +CNC(=O)c1ccc(CC(=O)N(C)C2CCN(Cc3ccc(cc3)C(F)(F)F)CC2)cc1 +COCCNCc1ccc(CCNC[C@H](O)c2ccc(O)c3NC(=O)Sc23)cc1 +Cn1cncc1c2c3C(=O)N(CC#C)C(=O)N(CC4CC4)c3nn2Cc5ccnc6ccc(Cl)cc56 +C[C@H](NC(=O)c1cccnc1Oc2ccccc2)c3ccccc3 +Clc1ccc(CN2CC3CNCC(C2)O3)cc1C(=O)NCC45CC6CC(CC(C6)C4)C5 +COc1cc(NS(=O)(=O)c2ccc(N)cc2)nc(OC)n1 +Cc1cc(CCC2CCN(CC2)S(=O)(=O)CC3(CCOCC3)N(O)C=O)c(C)cn1 +C[C@H](Nc1ncc(F)c(Nc2cc([nH]n2)C3CC3)n1)c4ncc(F)cn4 +CC(=O)Nc1ccc2c(c1)c(cn2CCCO)c3cc(NC4CC4)n5ncc(C#N)c5n3 +CC1COc2c(N3CCN(C)CC3)c(F)cc4C(=O)C(=CN1c24)C(=O)O +CC1(CC1)c2nc(ncc2C(=O)N[C@@H]3C4CC5CC3C[C@@](O)(C5)C4)N6CCOCC6 +COC(=O)c1ccc(C)c(NS(=O)(=O)c2ccc3N(C)SC(=O)c3c2)c1 +COc1ccc(cc1)C2=COc3cc(O)cc(O)c3C2=O +CNCCCC12CCC(c3ccccc13)c4ccccc24 +Oc1cc(nc2ccnn12)c3ccccc3 +Fc1cc(cc(F)c1C2=CCN(CC2)C=O)N3C[C@H](COc4ccon4)OC3=O +CC(C#C)N1C(=O)N(CC2CC2)c3nn(Cc4ccnc5ccc(Cl)cc45)c(c3C1=O)c6cncn6C +C[C@H]1CN(Cc2cc(Cl)ccc2OCC(=O)O)CCN1C(=O)Cc3ccccc3 +COc1cc(Nc2nc(N[C@@H](C)c3ncc(F)cn3)ncc2Br)n[nH]1 +Cc1nc(C)c(nc1C(=O)N)c2ccc([C@@H]3CC[C@@H](CC(=O)O)CC3)c(F)c2 +COc1ccnc(CCc2nc3c(C)ccnc3[nH]2)c1 +Cc1cc(CCCOc2c(Cl)cc(cc2Cl)C3=NCCO3)on1 +CN(C)C(=O)c1ccc(CN2CCc3cc4nc(N)sc4cc3CC2)cc1 +COC(=O)[C@H]1[C@@H](O)CC[C@H]2CN3CCc4c([nH]c5ccccc45)[C@@H]3C[C@H]12 +CCN1CCN(CC1)c2ccc(Nc3cc(ncn3)N(C)C(=O)Nc4c(Cl)c(OC)cc(OC)c4Cl)cc2 +CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C@H]([C@H](O)[C@@H]2O)n3cnc4c(N)ncnc34 +CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)c3)c2n1)c4c(Cl)c(OC)cc(OC)c4Cl +CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)C(=O)c13 +COc1ccc(Cc2c(N)n[nH]c2N)cc1 +CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(=O)Nc3cccnc3 diff --git a/chemprop-updated/tests/data/spectra.csv b/chemprop-updated/tests/data/spectra.csv new file mode 100644 index 0000000000000000000000000000000000000000..3fd81c6aad01de70fdaaac5ee2ffb836c047f6ad --- /dev/null +++ b/chemprop-updated/tests/data/spectra.csv @@ -0,0 +1,201 @@ +"smiles",400,402,404,406,408,410 +"O=C(O)c1ccco1",0.001718021194011,0.001718021194011,0.001716797003396,0.001701030921568,0.001677361856277,0.001643664219237 +"O=C(O)c1ccco1",0.000814858567868,0.000814858567868,0.000814658731673,0.000821599292867,0.000841480209384,0.000869636808942 +"CCOP(=O)(OCC)C(O)[C@@H]1OC(C)(C)O[C@H]1[C@@H]1COC(C)(C)O1",0.00095975940077,0.00095975940077,0.00095869222154,0.000944127265653,0.000921543744199,0.000889734581559 +"CCOP(=O)(OCC)C(O)[C@@H]1OC(C)(C)O[C@H]1[C@@H]1COC(C)(C)O1",0.003490215139879,0.003490215139879,0.003489255010247,0.003467171609632,0.003426105091663,0.003371304442035 +"c1ccc(C2=NOC(c3ccccc3)C2)cc1",0.00052270050794,0.00052270050794,0.00052104532661,0.000525180733987,0.000558042717288,0.000607812473411 +"c1ccc(C2=NOC(c3ccccc3)C2)cc1",0.002354314549285,0.002354314549285,0.002352638290808,0.002313031156778,0.002241742882841,0.002152567395278 +"Clc1ccc(OCc2ccccc2)cc1",0.000810842444357,0.000810842444357,0.000809552075127,0.000797494015839,0.000787722398537,0.000781087910526 +"Clc1ccc(OCc2ccccc2)cc1",0.001320808947919,0.001320808947919,0.001318776307599,0.001303206221412,0.001293945404753,0.001287487770948 +"Cc1ccc(OCc2ccccc2)cc1",0.00065840172601,0.00065840172601,0.000658059716896,0.000660817032034,0.000672467742524,0.000689709782268 +"Cc1ccc(OCc2ccccc2)cc1",0.002311699163512,0.002311699163512,0.00230898717372,0.00228016095968,0.002246088363437,0.002203490933737 +"c1ccc(P(CCP(c2ccccc2)c2ccccc2)c2ccccc2)cc1",0.003332960946086,0.003332960946086,0.00332042146637,0.003217893403366,0.003133245997156,0.003033806612718 +"c1ccc(P(CCP(c2ccccc2)c2ccccc2)c2ccccc2)cc1",0.001509156146077,0.001509156146077,0.001507610193323,0.001520821867592,0.001578102687275,0.001669170936526 +"CC(=O)Oc1ccc(C=O)cc1Br",0.000724402388643,0.000724402388643,0.000722504043091,0.000688800993576,0.000634126899912,0.000566945373484 +"CC(=O)Oc1ccc(C=O)cc1Br",0.001859697519601,0.001859697519601,0.001856053125476,0.001801014627982,0.00171752205043,0.001613157562645 +"CN=C(O)Oc1cccc(C(=O)O)c1",0.000833728357547,0.000833728357547,0.000833187241774,0.000831338884827,0.000834735160083,0.000841583837956 +"CN=C(O)Oc1cccc(C(=O)O)c1",0.002454620804169,0.002454620804169,0.002453797647828,0.002428621423434,0.002375550593143,0.00229918968797 +"CN=C(O)Oc1cccc(C(=O)OC)c1",0.000683722707528,0.000683722707528,0.000684397488895,0.000693601845895,0.000706472042492,0.000721941351579 +"CN=C(O)Oc1cccc(C(=O)OC)c1",0.001449498255641,0.001449498255641,0.001450402275959,0.00146115471988,0.001474975870809,0.001491953211684 +"COc1ccc(OCc2ccccc2)cc1",0.001168586792138,0.001168586792138,0.001167098893544,0.00116276184151,0.001170652662053,0.001180761792532 +"COc1ccc(OCc2ccccc2)cc1",0.00187424523919,0.00187424523919,0.001869902994179,0.00182610967031,0.001777420007798,0.001717747254148 +"COc1cc(/C=C/C(=O)O)ccc1OC(C)=O",0.000748475867829,0.000748475867829,0.00074777539367,0.000739575347073,0.00072915813677,0.000716381478054 +"COc1cc(/C=C/C(=O)O)ccc1OC(C)=O",0.001872254480516,0.001872254480516,0.001867656371957,0.001814070300071,0.00174291769209,0.001649459469235 +"CC1(C)CC[C@]2(C(=O)O)CC[C@]3(C)C(=CC[C@@H]4[C@@]5(C)CC[C@H](O)[C@@](C)(CO)[C@@H]5CC[C@]43C)[C@@H]2C1",0.000925660580061,0.000925660580061,0.000925149182147,0.000919817013521,0.000914304058597,0.000908836890005 +"CC1(C)CC[C@]2(C(=O)O)CC[C@]3(C)C(=CC[C@@H]4[C@@]5(C)CC[C@H](O)[C@@](C)(CO)[C@@H]5CC[C@]43C)[C@@H]2C1",0.003479867685431,0.003479867685431,0.0034699028362,0.003365943491687,0.003249261106792,0.003112495517379 +"CS(=O)(=O)OC1C2CC3CC(C2)C(=O)C1C3",0.000566294668051,0.000566294668051,0.000565900733051,0.000561280089324,0.000554703167765,0.000545142371854 +"CS(=O)(=O)OC1C2CC3CC(C2)C(=O)C1C3",0.000984637464029,0.000984637464029,0.000983018797351,0.000958989120183,0.000923439312541,0.000880071975387 +"O=C(O)[C@@H]1C[C@@H]2C=CC[C@H](C1)C2",0.000625654312298,0.000625654312298,0.000623982463689,0.000618667821929,0.000628416987333,0.000645396405794 +"O=C(O)[C@@H]1C[C@@H]2C=CC[C@H](C1)C2",0.0017941024604,0.0017941024604,0.001792234148276,0.001760999404394,0.001710831440675,0.001647138710888 +"OC1=NC2CC3CC(C2)CC1C3",0.001375942061624,0.001375942061624,0.001373785909499,0.001360153794491,0.00135670662078,0.001357301687633 +"OC1=NC2CC3CC(C2)CC1C3",0.001525946854972,0.001525946854972,0.001520763540364,0.001486197670848,0.001474704959552,0.001474739115898 +"ON=C1C2CC3CC(C2)CC1C3",0.001106720880744,0.001106720880744,0.001104863881011,0.001080706591626,0.001046051258337,0.001000516628517 +"ON=C1C2CC3CC(C2)CC1C3",0.003135448864042,0.003135448864042,0.00313439589494,0.003089116297134,0.002991609157676,0.002858281532411 +"OC1C2CC3CC(C2)CC1C3",0.001562789367383,0.001562789367383,0.001559532867229,0.001534658302208,0.001517257045199,0.001497967028673 +"OC1C2CC3CC(C2)CC1C3",0.002150075447414,0.002150075447414,0.002145521096006,0.002083811575201,0.001993555534015,0.001876469753293 +"O=C(O)C1CC2CCCC(C2)C1",0.001341037932348,0.001341037932348,0.001338867547583,0.001314578115846,0.001283442456079,0.001242423385961 +"O=C(O)C1CC2CCCC(C2)C1",0.003744276484873,0.003744276484873,0.003743535751921,0.003723029510948,0.003683201519234,0.003630424961493 +"NC1C2CC3CC(C2)CC1C3",0.001199712916371,0.001199712916371,0.001199313817154,0.001190270315545,0.001173447219109,0.001150845414807 +"NC1C2CC3CC(C2)CC1C3",0.001467837890395,0.001467837890395,0.001464977162953,0.001440153362869,0.001414851371905,0.00137874257272 +"O=C(O)C12CC3CC(CC(C3)C1)C2",0.001152332162315,0.001152332162315,0.001152071621577,0.001143428667892,0.001126071710292,0.001103226286078 +"O=C(O)C12CC3CC(CC(C3)C1)C2",0.001615398037049,0.001615398037049,0.001615398037049,0.001597272715252,0.001572518098465,0.001548153095967 +"O=C(O)C12CC3CC(CC(C3)C1)C2",0.002134517520757,0.002134517520757,0.002133285602166,0.00212154199788,0.002110432292872,0.002099190727388 +"O/N=C1/CC2C[C@H]3CC1C[C@@H](C2)C3",0.001243662931256,0.001243662931256,0.001238035303857,0.001185792256904,0.001135315641163,0.001078919928777 +"O/N=C1/CC2C[C@H]3CC1C[C@@H](C2)C3",0.003137976147899,0.003137976147899,0.003134698735437,0.003084031520585,0.003000987961505,0.002886258043889 +"O=C(O)CC1C[C@H]2CCC[C@@H](C1)C2",0.000919363720206,0.000919363720206,0.000921039474642,0.000941929944071,0.000968723026689,0.000999237610327 +"O=C(O)CC1C[C@H]2CCC[C@@H](C1)C2",0.003252326473162,0.003252326473162,0.003242679122482,0.003153227070565,0.003061135781429,0.002945080771062 +"ClC(Cl)[C@]12C[C@@H]3C[C@H](C1)C[C@@](C(Cl)Cl)(C3)C2",0.00534845173677,0.00534845173677,0.005343782329951,0.005291911881391,0.005224859663568,0.00513414348467 +"ClC(Cl)[C@]12C[C@@H]3C[C@H](C1)C[C@@](C(Cl)Cl)(C3)C2",0.001945640809474,0.001945640809474,0.001942987714868,0.001919738862473,0.001899780873369,0.001879028550369 +"O=C(O)C12CC3CC(CC(CBr)(C3)C1)C2",0.000916370311921,0.000916370311921,0.000916482405903,0.000916772814012,0.000916250198849,0.000915929494363 +"O=C(O)C12CC3CC(CC(CBr)(C3)C1)C2",0.007858812308727,0.007858812308727,0.007852310122188,0.007762536593329,0.007634486461968,0.007476892821513 +"CC(O)=NC12CC3CC(CC(C3)C1)C2",0.00161158093703,0.00161158093703,0.001612359938931,0.001601063351429,0.001563688507875,0.001512924253318 +"CC(O)=NC12CC3CC(CC(C3)C1)C2",0.003741126095613,0.003741126095613,0.00373433803993,0.003651674846398,0.003538834963848,0.003391395319184 +"CC(O)=NC1(C)C2CC3CC(C2)CC1C3",0.002618478953991,0.002618478953991,0.002615725666243,0.002592869533172,0.002571911936553,0.002543254268616 +"CC(O)=NC1(C)C2CC3CC(C2)CC1C3",0.000893786366761,0.000893786366761,0.000893795311505,0.000893818480477,0.000893776781004,0.000893751181174 +"CCCCCCCCCC1CCCCC1",0.00334670096896,0.00334670096896,0.003343590820542,0.003296713317767,0.003224588616061,0.003131906766355 +"CCCCCCCCCCC1CCCCC1",0.006340314839409,0.006340314839409,0.006340498215315,0.006278594629684,0.006126584700986,0.005922089429307 +"c1ccc2c(c1)Cc1cc3ccccc3cc1-2",0.001114625848908,0.001114625848908,0.001112721022853,0.001101521731849,0.00110150219775,0.001108222616505 +"c1ccc2c(c1)Cc1cc3ccccc3cc1-2",0.003596873343085,0.003596873343085,0.003592651122822,0.003520922404186,0.003399436134198,0.003234602239806 +"O=C(O)c1ccccc1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1",0.00057790463162,0.00057790463162,0.000576775593398,0.000568041722842,0.000561798345277,0.000554927895698 +"O=C(O)c1ccccc1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1",0.000801589964393,0.000801589964393,0.00080186919873,0.000802975186025,0.000800929261023,0.000795683091934 +"Cc1ccc(/C=C/C(=O)O)o1",0.001737047861411,0.001737047861411,0.001734981293203,0.001714651911826,0.001693055636889,0.001667313852754 +"Cc1ccc(/C=C/C(=O)O)o1",0.002403074396331,0.002403074396331,0.00240138076174,0.002372487682542,0.002325768587323,0.002266614549837 +"CCCCNc1ccccc1",0.001448557231584,0.001448557231584,0.00144780897147,0.001452239033618,0.001473340300037,0.001503993482388 +"CCCCNc1ccccc1",0.001395728285315,0.001395728285315,0.001396301467008,0.001395168959663,0.001382050527161,0.001357968830524 +"NCCNCCN",0.001301158513397,0.001301158513397,0.001299248800555,0.001280077319527,0.001259397847293,0.001235077097568 +"COC(=O)/C=C/c1ccco1",0.0006680060611,0.0006680060611,0.000667152636274,0.000658060221716,0.000648497505044,0.00063908627888 +"COC(=O)/C=C/c1ccco1",0.000495688605345,0.000495688605345,0.000494683202909,0.000486304382877,0.000478167807101,0.00046634033155 +"COC(=O)c1ccc(C(C)=O)o1",0.000437024622862,0.000437024622862,0.000435900468775,0.000431960427333,0.000439234183464,0.00045513935411 +"COC(=O)c1ccc(C(C)=O)o1",0.003290130195429,0.003290130195429,0.003289035887059,0.003285587606594,0.003291887745201,0.003302257448652 +"COC(=O)/C=C/c1ccc(C)o1",0.000355106417068,0.000355106417068,0.000355576192307,0.000363368185437,0.000375731528014,0.000391180201359 +"COC(=O)/C=C/c1ccc(C)o1",0.001408683538118,0.001408683538118,0.001406781729215,0.001389552782119,0.00137414614952,0.00135830917763 +"OCc1cccs1",0.001246981971416,0.001246981971416,0.001246430066127,0.001231250066406,0.001200760429801,0.001158371504403 +"Cc1ccc(C=O)s1",0.002489728664158,0.002489728664158,0.002487585792396,0.002494911596595,0.002540700165362,0.002605171776548 +"Cc1ccc(C=O)s1",0.000658200227859,0.000658200227859,0.000657930690997,0.000659913974168,0.000668579611328,0.000681328571521 +"Cc1ccc(C=O)s1",0.000875323720374,0.000875323720374,0.000875816823539,0.000889239356529,0.0009136490722,0.00094251224163 +"c1cscn1",0.000998241037688,0.000998241037688,0.000998427145578,0.000999186783385,0.000997778982712,0.000994009967793 +"C#CCCO",0.001643112620979,0.001643112620979,0.001640688282696,0.001612580628741,0.001577227307734,0.00153409570317 +"CC/C=C/CC",0.004916553689412,0.004916553689412,0.004911941961109,0.004861441768465,0.004800464218471,0.004724956106222 +"C/C=C/C(CC)CC",0.00274067330068,0.00274067330068,0.002741975036957,0.002789781185404,0.00289129940497,0.003033654734951 +"CCCCCC(C)C(C)C",0.004202068981141,0.004202068981141,0.004196401141147,0.004117398926314,0.004002498663441,0.003857989138471 +"CCCCCC(C)(C)CC",0.003472157975402,0.003472157975402,0.003462863433988,0.003377112873657,0.003291519489775,0.003187764407276 +"CC1(C)C(=O)[C@]2(C)CC[C@H]1C2",0.003577895131815,0.003577895131815,0.003581344870233,0.003608946910462,0.0036266480917,0.00363888982431 +"CCCCCCC(C)(C)C",0.005722584947602,0.005722584947602,0.005721429212805,0.005667275162823,0.005553997348974,0.005407709938593 +"CCC(C)CCCC(C)C",0.005745379979516,0.005745379979516,0.00573877598369,0.005613385765233,0.005398374012888,0.005120185452244 +"Cc1cc(C)c2ccccc2c1",0.002301083694001,0.002301083694001,0.002301523769017,0.002313828853115,0.002337856288365,0.002369894114871 +"C=CCCC(C)=O",0.000908943007738,0.000908943007738,0.000907540885199,0.000895325901391,0.000884913417855,0.000874061876574 +"C=CCCC(C)=O",0.000859837077208,0.000859837077208,0.000862324096253,0.000887498621312,0.000912852970538,0.000938220738353 +"OC/C=C/c1ccccc1",0.000252673524188,0.000252673524188,0.000252191760701,0.000245647412717,0.000236203694218,0.000224208876858 +"OC/C=C/c1ccccc1",0.001044371847333,0.001044371847333,0.001044709335114,0.001025652899677,0.000972238129473,0.000895359451555 +"OC/C=C/c1ccccc1",0.000430908276566,0.000430908276566,0.000430101543328,0.000423122932184,0.000415358210613,0.000402981709727 +"OC/C=C/c1ccccc1",0.002683496806127,0.002683496806127,0.002683496806127,0.002618518763381,0.00249045810214,0.002333891535566 +"C/C=C/C=O",0.001692808975099,0.001692808975099,0.001691644113279,0.001670698975495,0.001634382188318,0.001584953843554 +"C/C=C/C=O",0.001414324753441,0.001414324753441,0.001410311637082,0.001356654807834,0.001282861991468,0.001194902945053 +"Clc1cc(Cl)cc(Cl)c1",0.001110120979772,0.001110120979772,0.001110370012591,0.001117979858102,0.001133072649557,0.001153078165939 +"Clc1cc(Cl)cc(Cl)c1",0.001100341638378,0.001100341638378,0.00110462622788,0.001153042176604,0.001212993584033,0.001287639803331 +"Clc1cc(Cl)cc(Cl)c1",0.001095249777361,0.001095249777361,0.001094503807592,0.001086137647633,0.001075831201719,0.001063078978437 +"C#CCO",0.00058441750236,0.00058441750236,0.000583636109991,0.00057694925639,0.000571384658625,0.000565545499676 +"C#CCO",0.001177642618512,0.001177642618512,0.001176739898586,0.001162562139579,0.0011391844742,0.001106895214881 +"O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1",0.001258785286516,0.001258785286516,0.001258526554426,0.001265999002663,0.001286263405937,0.001311340606229 +"O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1",0.000885758280606,0.000885758280606,0.000885316721161,0.000900572377555,0.000943338429253,0.001001806787228 +"O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1",0.002390863978396,0.002390863978396,0.002389385075432,0.002345663608308,0.002260096760039,0.002148284885808 +"BrC(Br)c1cccc(C(Br)Br)c1",0.000590317533026,0.000590317533026,0.000591686331479,0.000602879275115,0.000611499237075,0.000620744191319 +"BrC(Br)c1cccc(C(Br)Br)c1",0.000636209163384,0.000636209163384,0.000636006843913,0.000635050894586,0.000636836585259,0.000642513976296 +"BrC(Br)c1cccc(C(Br)Br)c1",0.004042107521661,0.004042107521661,0.004036783885137,0.00397427922163,0.003896694215722,0.003805833716808 +"CCSC#N",0.003910657833755,0.003910657833755,0.003905707289248,0.003828503427476,0.00370523952941,0.003542392466382 +"CCOC(=O)CC(C(=O)OCC)C(C(=O)OCC)C(=O)OCC",0.002277592362436,0.002277592362436,0.002276414796252,0.002261461377192,0.002240773249748,0.002214276421924 +"CCCCCCCCCCCCCCCCCCCCCC",0.004994984462008,0.004994984462008,0.004989466382052,0.004923812713151,0.004836282746394,0.004722534573558 +"CCCCCCCCCCCCCCCCCCCCCC",0.003929900476866,0.003929900476866,0.003929739664487,0.003910543024829,0.00386756188552,0.003812486739059 +"CC(C)CCC#N",0.000969720221234,0.000969720221234,0.000967864269898,0.000961642510016,0.000973083625161,0.000995616079035 +"CC(C)CCC#N",0.003244411988843,0.003244411988843,0.003232110486425,0.003324855043373,0.003610427694565,0.00386829697764 +"C=CCOC(=O)CCC(=O)OCC=C",0.001470167487509,0.001470167487509,0.001467587142741,0.001436342580937,0.001394808566463,0.001342346141865 +"C=CC1CC=CCC1",0.000562749956281,0.000562749956281,0.000563970290969,0.000568396694315,0.000560271748922,0.000542604792027 +"C=CC1CC=CCC1",0.000767878383041,0.000767878383041,0.000765940433071,0.000753520725764,0.000752016816537,0.000759027582971 +"CCc1ccccn1",0.004409861887012,0.004409861887012,0.004400725560892,0.004281393167948,0.004111545227854,0.003892314902875 +"CCCCCCCC/C=C\CCCCCCCC(=O)OCCCC",0.000776615776034,0.000776615776034,0.000778643756685,0.000795913000439,0.00080879429736,0.000819325343349 +"C#CC(O)c1ccccc1",0.000620778333046,0.000620778333046,0.000620428076424,0.000608552149248,0.000583924153737,0.000550343998713 +"CC(N)CN",0.001924668669902,0.001924668669902,0.001923010622307,0.001900711893441,0.001867862171346,0.001824175756107 +"CCOC(=O)C(=O)C(C)C(=O)OCC",0.000805514213129,0.000805514213129,0.000805136190559,0.000799862414442,0.000792436353711,0.0007836011027 +"O=S(=O)(Cl)c1ccccc1",0.000464934288403,0.000464934288403,0.000465605340513,0.000472154420445,0.000478509094289,0.000484924890707 +"C=CCc1ccc(O)c(OC)c1",0.001850196251208,0.001850196251208,0.001851533135608,0.001875871520544,0.001916496246382,0.001968326004411 +"C=CCc1ccc(O)c(OC)c1",0.001543725936885,0.001543725936885,0.00154178843564,0.0015190564819,0.00149085229164,0.00145781239583 +"ClCC(Cl)CCl",0.002701306496333,0.002701306496333,0.002699788526744,0.002690416137517,0.002688403842426,0.002689004776899 +"C=CCc1ccc(OC(C)=O)c(OC)c1",0.000623804770522,0.000623804770522,0.000622247524325,0.000606862816259,0.000588980919232,0.00056474208899 +"CCc1ccccc1[N+](=O)[O-]",0.001123020103993,0.001123020103993,0.001120753642306,0.001086114064694,0.001034173086507,0.000971041005104 +"CCc1ccccc1[N+](=O)[O-]",0.002548625192808,0.002548625192808,0.002546804920817,0.002507940984867,0.002439432724395,0.002352405155428 +"Cc1ccccc1",0.003027615937978,0.003027615937978,0.003020276589253,0.002926049354785,0.002791305316685,0.002613708788309 +"CC(=O)OC1CCCCC1",0.000508351036366,0.000508351036366,0.000509099010623,0.000515766117555,0.000521588169948,0.00052762642869 +"CC1(C)CC(O)CC(C)(C)N1[O]",0.001456558638968,0.001456558638968,0.001453106489428,0.001420316444776,0.001383219017534,0.001331604967575 +"CC1(C)CC(O)CC(C)(C)N1[O]",0.002062916952092,0.002062916952092,0.002061752091391,0.002037043066483,0.001992084392317,0.001931917938831 +"C/C=C/C=C/C(=O)O",0.000638912210565,0.000638912210565,0.000637998034308,0.000639559146041,0.000656612557764,0.000683754033542 +"C/C=C/C=C/C(=O)O",0.001408764499532,0.001408764499532,0.001407706097217,0.001394786530306,0.001377306077887,0.001354719573875 +"O=C(O)CCN1C(=O)c2ccccc2C1=O",0.000955020306053,0.000955020306053,0.000957064647584,0.000996924937896,0.001066609421572,0.001158227169969 +"O=C(O)CCN1C(=O)c2ccccc2C1=O",0.00066013516606,0.00066013516606,0.000662304048604,0.000687102131517,0.000719663662809,0.000763932700238 +"CCCCCCCCCCCCCCCCC(=O)OC",0.00408949853518,0.00408949853518,0.004082324203529,0.003969672203026,0.003792776989075,0.003566511282459 +"CCCCCCCCCCCCCCCCC(=O)OC",0.001457749049853,0.001457749049853,0.001455477357252,0.001419446005857,0.001359380382703,0.001276393968015 +"O=C1c2ccccc2C(=O)c2cc(Cl)ccc21",0.00082210696046,0.00082210696046,0.000819278493129,0.0008045293702,0.000807011029494,0.00081658486468 +"O=C1c2ccccc2C(=O)c2cc(Cl)ccc21",0.003643428769101,0.003643428769101,0.003641126761025,0.003597628739648,0.003524758073951,0.003433401932937 +"O=C1c2ccccc2C(=O)c2ccccc21",0.002393759466363,0.002393759466363,0.002387549137707,0.002314532589091,0.002221070981982,0.002106251577641 +"O=C1c2ccccc2C(=O)c2ccccc21",0.001363319981965,0.001363319981965,0.001361544242838,0.00133751527733,0.001302973122143,0.001259086181017 +"CCCCCCc1c2ccccc2cc2ccccc12",0.004935719541282,0.004935719541282,0.004926118043482,0.004838101990824,0.004749489588371,0.004638510548077 +"CCCCCCc1c2ccccc2cc2ccccc12",0.000447649326779,0.000447649326779,0.00045031624782,0.000468960155242,0.000479136882499,0.000491236088445 +"FC1(F)C(F)(F)C(F)(F)C2(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C2(F)C1(F)F",0.001506751653308,0.001506751653308,0.001506562979692,0.001498098357858,0.001478181337091,0.001447701012395 +"CSC1=CC(=O)C=CC1=O",0.000680334744945,0.000680334744945,0.000682601528949,0.000706206461363,0.000732086948856,0.00076153828672 +"NCc1ccccc1",0.001092425906411,0.001092425906411,0.001091860026198,0.001083129051778,0.001069798314772,0.001053195682325 +"NCc1ccccc1",0.000108653789486,0.000108653789486,0.000108761935631,0.000107854165763,0.000104037227273,9.84311719646078E-05 +"N=c1nc(O)c2[nH]c(=O)cnc2[nH]1",0.001099641038101,0.001099641038101,0.001099641038101,0.001092135969085,0.001071222889446,0.00104393405244 +"N=c1nc(O)c2[nH]c(=O)cnc2[nH]1",0.000624460599789,0.000624460599789,0.000623718467253,0.000627242458678,0.000645762124857,0.000672352650184 +"CCCCCCCCCCCc1c2ccccc2cc2ccccc12",0.002934491741835,0.002934491741835,0.002929721576638,0.002864769498483,0.002770435423108,0.002649543747599 +"CCCCCCCCCCCc1c2ccccc2cc2ccccc12",0.002238921272485,0.002238921272485,0.002240783746441,0.002237826941303,0.002203289309605,0.002148993733298 +"CC(CC(=O)O)c1ccccc1",0.000652429788979,0.000652429788979,0.000651998751927,0.000639206908178,0.000614009842144,0.00058083283661 +"CC(CC(=O)O)c1ccccc1",0.001482328962704,0.001482328962704,0.001480589679768,0.001450931277174,0.001403469486115,0.00134441324605 +"CC(CC(=O)O)c1ccccc1",0.002659047324208,0.002659047324208,0.002655461742804,0.002602062168189,0.002520710964502,0.00241691668491 +"COC(=O)[C@@H]1CCC[C@H]1C(=O)OC",0.001001450445642,0.001001450445642,0.001000736314585,0.00098937284165,0.000970544362528,0.000944584651821 +"CC(=O)n1cnc(/C=C2\N=C(c3ccccc3)OC2=O)c1",0.00135240208531,0.00135240208531,0.001349499873797,0.001322716564314,0.001294392861261,0.001256649074795 +"CC(=O)n1cnc(/C=C2\N=C(c3ccccc3)OC2=O)c1",0.004171883538026,0.004171883538026,0.004165772091148,0.004092425579288,0.00399345740674,0.003864014917082 +"COC(=O)/C=C/c1ccc([N+](=O)[O-])s1",0.001201276992511,0.001201276992511,0.001198042710525,0.0011546269286,0.001089022564254,0.000999812579256 +"COC(=O)/C=C/c1ccc([N+](=O)[O-])s1",0.003582717070019,0.003582717070019,0.003579002460625,0.003521023742011,0.003431539030957,0.003319251424274 +"COC(=O)/C=C/c1ccc(Br)s1",0.000935722616009,0.000935722616009,0.000934810776445,0.000936175414041,0.000952615255936,0.000978553209962 +"COC(=O)/C=C/c1ccc(Br)s1",0.007763918244485,0.007763918244485,0.00775576965242,0.007639252563432,0.00747176203788,0.007270127390359 +"O=C(O)/C=C/c1ccc([N+](=O)[O-])s1",0.000620214389512,0.000620214389512,0.000618581387968,0.000598902496643,0.000573369091056,0.000542270705958 +"O=C(O)/C=C/c1ccc([N+](=O)[O-])s1",0.001873009041489,0.001873009041489,0.001865665095909,0.001798360571644,0.001731497194345,0.001650531358074 +"O=C(O)/C=C/c1ccc(Cl)s1",0.002575923417334,0.002575923417334,0.002572500301067,0.002521705204504,0.002439830527684,0.002326258705307 +"O=C(O)/C=C/c1ccc(Cl)s1",0.004765302802949,0.004765302802949,0.004757410728739,0.004683418332195,0.004611057513713,0.004529205487703 +"O=C(O)C=Cc1ccc(Br)s1",0.002159911677497,0.002159911677497,0.00215562438673,0.002098904538199,0.002017060157172,0.00191053927471 +"O=C(O)C=Cc1ccc(Br)s1",0.001711612555351,0.001711612555351,0.001711622994992,0.001698446770849,0.001665894396997,0.001620968157293 +"C/C=C/C(=O)OC(C)(C)C",0.000545599990522,0.000545599990522,0.000544310639453,0.000526270055461,0.000498540940207,0.000461140224687 +"O=C(O)[C@@H]1CC[C@H]1C(=O)O",0.001258387118705,0.001258387118705,0.001258497202538,0.001271927290944,0.001303903445908,0.00134906368119 +"O=C(O)[C@@H]1CC[C@H]1C(=O)O",0.001347510688857,0.001347510688857,0.001348113796452,0.001350182047046,0.001346387333535,0.001338789705495 +"Cc1ccc(C=NO)s1",0.001951859605456,0.001951859605456,0.001949151324343,0.0019265135138,0.001905590642066,0.001877206867095 +"Cc1ccc(C=NO)s1",0.001206283257257,0.001206283257257,0.001205416539329,0.001193600173009,0.001177158981928,0.001157526274622 +"Cc1ccc(Br)s1",0.002958940397577,0.002958940397577,0.002955725089221,0.002887856137041,0.002767603160027,0.002613051533751 +"O=[N+]([O-])c1cccs1",0.00121976090843,0.00121976090843,0.001219451392425,0.001204299650943,0.001172339055533,0.001130890798451 +"CCCCCCCCCCC(C)C",0.003679026398294,0.003679026398294,0.00367470646763,0.003623919127817,0.003556715308429,0.003469161729441 +"CCCCCCCC1CCCCC1",0.00523019493678,0.00523019493678,0.005224737390558,0.005159683858506,0.005076219349197,0.004974621655032 +"c1ccc(CC2CCNCC2)cc1",0.002003307668452,0.002003307668452,0.002001778671835,0.001974037345974,0.001927542088939,0.001867731340188 +"C/C=C\Cl",0.000256313554212,0.000256313554212,0.000255960825317,0.000250821597679,0.000243158438802,0.000233527124157 +"N#Cc1ccc2c(Cl)cccc2n1",0.001825744162353,0.001825744162353,0.001824845405623,0.001811574061474,0.001792363320881,0.001769694961224 +"N#Cc1ccc2c(Cl)cccc2n1",0.002192530310623,0.002192530310623,0.002186500405607,0.002139620093867,0.002110795281417,0.002090768614637 +"Cc1nc(-c2ccccc2)c(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1",0.004719585457034,0.004719585457034,0.004714412622066,0.004628797677538,0.004484975890453,0.004288701315043 +"Cc1nc(-c2ccccc2)c(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1",0.000644355887184,0.000644355887184,0.000644979958832,0.000647129789641,0.000643187548324,0.000635278356872 +"O=c1c(O[C@@H]2O[C@@H](CO)[C@H](O)[C@H]2O)c(-c2ccc(O)c(O)c2)oc2cc(O)cc(O)c12",0.000146866708032,0.000146866708032,0.000147734787553,0.000161598561242,0.000184279585597,0.000214835127367 +"O=c1c(O[C@@H]2O[C@@H](CO)[C@H](O)[C@H]2O)c(-c2ccc(O)c(O)c2)oc2cc(O)cc(O)c12",0.000813923805172,0.000813923805172,0.000813649790502,0.000818602335527,0.000834283226656,0.000856039816226 +"CC1(C)CC1C(=O)OCC(=O)c1ccc(-c2ccccc2)cc1",0.001600427562802,0.001600427562802,0.001600256641187,0.001589812946977,0.001567339982101,0.001538327116728 +"CC1(C)CC1C(=O)OCC(=O)c1ccc(-c2ccccc2)cc1",0.003495395407337,0.003495395407337,0.003488279343857,0.003407485665784,0.003300844256072,0.003157242089638 +"CCOC(=O)CCc1ccccc1",0.001801679591141,0.001801679591141,0.001798456757132,0.001760929958604,0.001713657476351,0.001656223795507 +"CCCN(CCC)C(=O)c1ccccc1",0.002921779086415,0.002921779086415,0.002916724536848,0.00287884440293,0.002857322696033,0.002842301414084 +"O=C(Cl)CCl",0.000442319724511,0.000442319724511,0.000444359085458,0.000465648171886,0.000489471109914,0.000517689492641 +"O=C(Cl)CCl",0.001028435476674,0.001028435476674,0.001025172672375,0.000979074868873,0.000909751498723,0.000820260607825 +"O=C(Cl)CCl",0.000234633280642,0.000234633280642,0.000234122469318,0.000229617919318,0.000226696275458,0.00022597057443 +"O=C(Cl)CCl",0.000514462517725,0.000514462517725,0.000512457728144,0.000490162362369,0.000460856892773,0.000421270375364 +"Oc1cccnc1O",0.000523957679607,0.000523957679607,0.000525146683315,0.000543381074787,0.000571826906485,0.000608765153379 +"Oc1cccnc1O",0.001376451598925,0.001376451598925,0.001376090840527,0.00137473855941,0.001377235383874,0.001383186710387 +"Oc1cccnc1O",0.001572651018218,0.001572651018218,0.001566950854991,0.001496223625128,0.001399299531239,0.00127587134396 +"CCC=C(CC)CC",0.004331797155989,0.004331797155989,0.004321571254328,0.004213072228435,0.004088980725559,0.003942602777491 +"CCCC(CC)CCC",0.003998977942541,0.003998977942541,0.003996853114573,0.003967274317822,0.003925669539396,0.00387614831022 +"CCC(CC)C(CC)CC",0.004792548774896,0.004792548774896,0.004793293237769,0.004785947419391,0.004756865263184,0.004713649987006 +"CCC(C)CCC(C)CC",0.002001793736694,0.002001793736694,0.001996255606332,0.001956546171054,0.001931924288507,0.001903482293456 +"CCCCC(C)(C)CCC",0.002778685707643,0.002778685707643,0.002774251771334,0.002744298378059,0.002725097737185,0.002694624214078 +"CCCCC(C)(CC)CC",0.00332617481953,0.00332617481953,0.003327885281865,0.003365976144788,0.003432510276172,0.003513539894214 +"CCCCCCCC(C)C",0.00781520215437,0.00781520215437,0.00779797128071,0.007621110676782,0.007430400486176,0.00721619563018 +"CCCCCCC(C)CC",0.004129425306313,0.004129425306313,0.004126485602405,0.004076343290657,0.003995268174461,0.00389260968071 diff --git a/chemprop-updated/tests/data/spectra/exclusions.csv b/chemprop-updated/tests/data/spectra/exclusions.csv new file mode 100644 index 0000000000000000000000000000000000000000..dd962fa410db0652116349bde25712e982dfe41d --- /dev/null +++ b/chemprop-updated/tests/data/spectra/exclusions.csv @@ -0,0 +1,201 @@ +"smiles",400,402,404,406,408,410 +"O=C(O)c1ccco1",0.001718021194011,,0.001716797003396,0.001701030921568,0.001677361856277,0.001643664219237 +"O=C(O)c1ccco1",0.000814858567868,0.000814858567868,,0.000821599292867,0.000841480209384,0.000869636808942 +"CCOP(=O)(OCC)C(O)[C@@H]1OC(C)(C)O[C@H]1[C@@H]1COC(C)(C)O1",0.00095975940077,0.00095975940077,,0.000944127265653,0.000921543744199,0.000889734581559 +"CCOP(=O)(OCC)C(O)[C@@H]1OC(C)(C)O[C@H]1[C@@H]1COC(C)(C)O1",0.003490215139879,0.003490215139879,,0.003467171609632,0.003426105091663,0.003371304442035 +"c1ccc(C2=NOC(c3ccccc3)C2)cc1",0.00052270050794,,0.00052104532661,0.000525180733987,0.000558042717288,0.000607812473411 +"c1ccc(C2=NOC(c3ccccc3)C2)cc1",0.002354314549285,,0.002352638290808,0.002313031156778,0.002241742882841,0.002152567395278 +"Clc1ccc(OCc2ccccc2)cc1",0.000810842444357,,0.000809552075127,0.000797494015839,0.000787722398537,0.000781087910526 +"Clc1ccc(OCc2ccccc2)cc1",0.001320808947919,0.001320808947919,,0.001303206221412,0.001293945404753,0.001287487770948 +"Cc1ccc(OCc2ccccc2)cc1",0.00065840172601,0.00065840172601,0.000658059716896,,0.000672467742524,0.000689709782268 +"Cc1ccc(OCc2ccccc2)cc1",0.002311699163512,0.002311699163512,0.00230898717372,,0.002246088363437,0.002203490933737 +"c1ccc(P(CCP(c2ccccc2)c2ccccc2)c2ccccc2)cc1",0.003332960946086,0.003332960946086,0.00332042146637,,0.003133245997156,0.003033806612718 +"c1ccc(P(CCP(c2ccccc2)c2ccccc2)c2ccccc2)cc1",0.001509156146077,0.001509156146077,0.001507610193323,,0.001578102687275,0.001669170936526 +"CC(=O)Oc1ccc(C=O)cc1Br",0.000724402388643,0.000724402388643,,0.000688800993576,0.000634126899912,0.000566945373484 +"CC(=O)Oc1ccc(C=O)cc1Br",0.001859697519601,0.001859697519601,,0.001801014627982,0.00171752205043,0.001613157562645 +"CN=C(O)Oc1cccc(C(=O)O)c1",0.000833728357547,0.000833728357547,0.000833187241774,0.000831338884827,,0.000841583837956 +"CN=C(O)Oc1cccc(C(=O)O)c1",0.002454620804169,0.002454620804169,0.002453797647828,0.002428621423434,,0.00229918968797 +"CN=C(O)Oc1cccc(C(=O)OC)c1",0.000683722707528,0.000683722707528,0.000684397488895,0.000693601845895,0.000706472042492, +"CN=C(O)Oc1cccc(C(=O)OC)c1",0.001449498255641,0.001449498255641,0.001450402275959,0.00146115471988,0.001474975870809, +"COc1ccc(OCc2ccccc2)cc1",0.001168586792138,0.001168586792138,0.001167098893544,0.00116276184151,0.001170652662053, +"COc1ccc(OCc2ccccc2)cc1",0.00187424523919,0.00187424523919,0.001869902994179,0.00182610967031,0.001777420007798, +"COc1cc(/C=C/C(=O)O)ccc1OC(C)=O",0.000748475867829,0.000748475867829,0.00074777539367,0.000739575347073,0.00072915813677, +"COc1cc(/C=C/C(=O)O)ccc1OC(C)=O",0.001872254480516,0.001872254480516,0.001867656371957,0.001814070300071,0.00174291769209, +"CC1(C)CC[C@]2(C(=O)O)CC[C@]3(C)C(=CC[C@@H]4[C@@]5(C)CC[C@H](O)[C@@](C)(CO)[C@@H]5CC[C@]43C)[C@@H]2C1",0.000925660580061,0.000925660580061,0.000925149182147,0.000919817013521,0.000914304058597, +"CC1(C)CC[C@]2(C(=O)O)CC[C@]3(C)C(=CC[C@@H]4[C@@]5(C)CC[C@H](O)[C@@](C)(CO)[C@@H]5CC[C@]43C)[C@@H]2C1",0.003479867685431,0.003479867685431,0.0034699028362,0.003365943491687,0.003249261106792, +"CS(=O)(=O)OC1C2CC3CC(C2)C(=O)C1C3",0.000566294668051,0.000566294668051,0.000565900733051,0.000561280089324,0.000554703167765, +"CS(=O)(=O)OC1C2CC3CC(C2)C(=O)C1C3",0.000984637464029,0.000984637464029,0.000983018797351,0.000958989120183,0.000923439312541, +"O=C(O)[C@@H]1C[C@@H]2C=CC[C@H](C1)C2",0.000625654312298,0.000625654312298,0.000623982463689,0.000618667821929,0.000628416987333, +"O=C(O)[C@@H]1C[C@@H]2C=CC[C@H](C1)C2",0.0017941024604,0.0017941024604,0.001792234148276,0.001760999404394,0.001710831440675, +"OC1=NC2CC3CC(C2)CC1C3",0.001375942061624,0.001375942061624,0.001373785909499,0.001360153794491,0.00135670662078, +"OC1=NC2CC3CC(C2)CC1C3",0.001525946854972,0.001525946854972,0.001520763540364,0.001486197670848,0.001474704959552, +"ON=C1C2CC3CC(C2)CC1C3",0.001106720880744,0.001106720880744,0.001104863881011,0.001080706591626,0.001046051258337, +"ON=C1C2CC3CC(C2)CC1C3",0.003135448864042,0.003135448864042,0.00313439589494,0.003089116297134,0.002991609157676, +"OC1C2CC3CC(C2)CC1C3",0.001562789367383,0.001562789367383,0.001559532867229,0.001534658302208,0.001517257045199, +"OC1C2CC3CC(C2)CC1C3",0.002150075447414,0.002150075447414,0.002145521096006,0.002083811575201,0.001993555534015, +"O=C(O)C1CC2CCCC(C2)C1",0.001341037932348,0.001341037932348,0.001338867547583,0.001314578115846,,0.001242423385961 +"O=C(O)C1CC2CCCC(C2)C1",0.003744276484873,0.003744276484873,0.003743535751921,0.003723029510948,,0.003630424961493 +"NC1C2CC3CC(C2)CC1C3",0.001199712916371,0.001199712916371,0.001199313817154,0.001190270315545,,0.001150845414807 +"NC1C2CC3CC(C2)CC1C3",0.001467837890395,0.001467837890395,0.001464977162953,0.001440153362869,,0.00137874257272 +"O=C(O)C12CC3CC(CC(C3)C1)C2",0.001152332162315,0.001152332162315,0.001152071621577,0.001143428667892,,0.001103226286078 +"O=C(O)C12CC3CC(CC(C3)C1)C2",0.001615398037049,0.001615398037049,0.001615398037049,0.001597272715252,,0.001548153095967 +"O=C(O)C12CC3CC(CC(C3)C1)C2",0.002134517520757,0.002134517520757,0.002133285602166,0.00212154199788,,0.002099190727388 +"O/N=C1/CC2C[C@H]3CC1C[C@@H](C2)C3",0.001243662931256,0.001243662931256,0.001238035303857,0.001185792256904,,0.001078919928777 +"O/N=C1/CC2C[C@H]3CC1C[C@@H](C2)C3",0.003137976147899,,0.003134698735437,0.003084031520585,0.003000987961505,0.002886258043889 +"O=C(O)CC1C[C@H]2CCC[C@@H](C1)C2",0.000919363720206,,0.000921039474642,0.000941929944071,0.000968723026689,0.000999237610327 +"O=C(O)CC1C[C@H]2CCC[C@@H](C1)C2",0.003252326473162,,0.003242679122482,0.003153227070565,0.003061135781429,0.002945080771062 +"ClC(Cl)[C@]12C[C@@H]3C[C@H](C1)C[C@@](C(Cl)Cl)(C3)C2",0.00534845173677,,0.005343782329951,0.005291911881391,0.005224859663568,0.00513414348467 +"ClC(Cl)[C@]12C[C@@H]3C[C@H](C1)C[C@@](C(Cl)Cl)(C3)C2",0.001945640809474,,0.001942987714868,0.001919738862473,0.001899780873369,0.001879028550369 +"O=C(O)C12CC3CC(CC(CBr)(C3)C1)C2",0.000916370311921,,0.000916482405903,0.000916772814012,0.000916250198849,0.000915929494363 +"O=C(O)C12CC3CC(CC(CBr)(C3)C1)C2",0.007858812308727,,0.007852310122188,0.007762536593329,0.007634486461968,0.007476892821513 +"CC(O)=NC12CC3CC(CC(C3)C1)C2",0.00161158093703,,0.001612359938931,0.001601063351429,0.001563688507875,0.001512924253318 +"CC(O)=NC12CC3CC(CC(C3)C1)C2",0.003741126095613,,0.00373433803993,0.003651674846398,0.003538834963848,0.003391395319184 +"CC(O)=NC1(C)C2CC3CC(C2)CC1C3",0.002618478953991,,0.002615725666243,0.002592869533172,0.002571911936553,0.002543254268616 +"CC(O)=NC1(C)C2CC3CC(C2)CC1C3",0.000893786366761,,0.000893795311505,0.000893818480477,0.000893776781004,0.000893751181174 +"CCCCCCCCCC1CCCCC1",0.00334670096896,,0.003343590820542,0.003296713317767,0.003224588616061,0.003131906766355 +"CCCCCCCCCCC1CCCCC1",0.006340314839409,,0.006340498215315,0.006278594629684,0.006126584700986,0.005922089429307 +"c1ccc2c(c1)Cc1cc3ccccc3cc1-2",0.001114625848908,,0.001112721022853,0.001101521731849,0.00110150219775,0.001108222616505 +"c1ccc2c(c1)Cc1cc3ccccc3cc1-2",0.003596873343085,,0.003592651122822,0.003520922404186,0.003399436134198,0.003234602239806 +"O=C(O)c1ccccc1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1",0.00057790463162,,0.000576775593398,0.000568041722842,0.000561798345277,0.000554927895698 +"O=C(O)c1ccccc1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1",0.000801589964393,,0.00080186919873,0.000802975186025,0.000800929261023,0.000795683091934 +"Cc1ccc(/C=C/C(=O)O)o1",0.001737047861411,,0.001734981293203,0.001714651911826,0.001693055636889,0.001667313852754 +"Cc1ccc(/C=C/C(=O)O)o1",0.002403074396331,,0.00240138076174,0.002372487682542,0.002325768587323,0.002266614549837 +"CCCCNc1ccccc1",0.001448557231584,,0.00144780897147,0.001452239033618,0.001473340300037,0.001503993482388 +"CCCCNc1ccccc1",0.001395728285315,,0.001396301467008,0.001395168959663,0.001382050527161,0.001357968830524 +"NCCNCCN",,0.001301158513397,0.001299248800555,0.001280077319527,0.001259397847293,0.001235077097568 +"COC(=O)/C=C/c1ccco1",,0.0006680060611,0.000667152636274,0.000658060221716,0.000648497505044,0.00063908627888 +"COC(=O)/C=C/c1ccco1",,0.000495688605345,0.000494683202909,0.000486304382877,0.000478167807101,0.00046634033155 +"COC(=O)c1ccc(C(C)=O)o1",,0.000437024622862,0.000435900468775,0.000431960427333,0.000439234183464,0.00045513935411 +"COC(=O)c1ccc(C(C)=O)o1",,0.003290130195429,0.003289035887059,0.003285587606594,0.003291887745201,0.003302257448652 +"COC(=O)/C=C/c1ccc(C)o1",,0.000355106417068,0.000355576192307,0.000363368185437,0.000375731528014,0.000391180201359 +"COC(=O)/C=C/c1ccc(C)o1",,0.001408683538118,0.001406781729215,0.001389552782119,0.00137414614952,0.00135830917763 +"OCc1cccs1",,0.001246981971416,0.001246430066127,0.001231250066406,0.001200760429801,0.001158371504403 +"Cc1ccc(C=O)s1",,0.002489728664158,0.002487585792396,0.002494911596595,0.002540700165362,0.002605171776548 +"Cc1ccc(C=O)s1",,0.000658200227859,0.000657930690997,0.000659913974168,0.000668579611328,0.000681328571521 +"Cc1ccc(C=O)s1",,0.000875323720374,0.000875816823539,0.000889239356529,0.0009136490722,0.00094251224163 +"c1cscn1",,0.000998241037688,0.000998427145578,0.000999186783385,0.000997778982712,0.000994009967793 +"C#CCCO",,0.001643112620979,0.001640688282696,0.001612580628741,0.001577227307734,0.00153409570317 +"CC/C=C/CC",,0.004916553689412,0.004911941961109,0.004861441768465,0.004800464218471,0.004724956106222 +"C/C=C/C(CC)CC",,0.00274067330068,0.002741975036957,0.002789781185404,0.00289129940497,0.003033654734951 +"CCCCCC(C)C(C)C",,0.004202068981141,0.004196401141147,0.004117398926314,0.004002498663441,0.003857989138471 +"CCCCCC(C)(C)CC",,0.003472157975402,0.003462863433988,0.003377112873657,0.003291519489775,0.003187764407276 +"CC1(C)C(=O)[C@]2(C)CC[C@H]1C2",0.003577895131815,0.003577895131815,,0.003608946910462,0.0036266480917,0.00363888982431 +"CCCCCCC(C)(C)C",0.005722584947602,0.005722584947602,,0.005667275162823,0.005553997348974,0.005407709938593 +"CCC(C)CCCC(C)C",0.005745379979516,0.005745379979516,,0.005613385765233,0.005398374012888,0.005120185452244 +"Cc1cc(C)c2ccccc2c1",0.002301083694001,0.002301083694001,,0.002313828853115,0.002337856288365,0.002369894114871 +"C=CCCC(C)=O",0.000908943007738,0.000908943007738,,0.000895325901391,0.000884913417855,0.000874061876574 +"C=CCCC(C)=O",0.000859837077208,0.000859837077208,,0.000887498621312,0.000912852970538,0.000938220738353 +"OC/C=C/c1ccccc1",0.000252673524188,0.000252673524188,,0.000245647412717,0.000236203694218,0.000224208876858 +"OC/C=C/c1ccccc1",0.001044371847333,0.001044371847333,,0.001025652899677,0.000972238129473,0.000895359451555 +"OC/C=C/c1ccccc1",0.000430908276566,0.000430908276566,,0.000423122932184,0.000415358210613,0.000402981709727 +"OC/C=C/c1ccccc1",0.002683496806127,0.002683496806127,,0.002618518763381,0.00249045810214,0.002333891535566 +"C/C=C/C=O",0.001692808975099,0.001692808975099,,0.001670698975495,0.001634382188318,0.001584953843554 +"C/C=C/C=O",0.001414324753441,0.001414324753441,,0.001356654807834,0.001282861991468,0.001194902945053 +"Clc1cc(Cl)cc(Cl)c1",0.001110120979772,0.001110120979772,,0.001117979858102,0.001133072649557,0.001153078165939 +"Clc1cc(Cl)cc(Cl)c1",0.001100341638378,0.001100341638378,,0.001153042176604,0.001212993584033,0.001287639803331 +"Clc1cc(Cl)cc(Cl)c1",0.001095249777361,0.001095249777361,,0.001086137647633,0.001075831201719,0.001063078978437 +"C#CCO",0.00058441750236,0.00058441750236,,0.00057694925639,0.000571384658625,0.000565545499676 +"C#CCO",0.001177642618512,0.001177642618512,,0.001162562139579,0.0011391844742,0.001106895214881 +"O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1",0.001258785286516,0.001258785286516,,0.001265999002663,0.001286263405937,0.001311340606229 +"O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1",0.000885758280606,0.000885758280606,,0.000900572377555,0.000943338429253,0.001001806787228 +"O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1",0.002390863978396,0.002390863978396,,0.002345663608308,0.002260096760039,0.002148284885808 +"BrC(Br)c1cccc(C(Br)Br)c1",0.000590317533026,0.000590317533026,0.000591686331479,,0.000611499237075,0.000620744191319 +"BrC(Br)c1cccc(C(Br)Br)c1",0.000636209163384,0.000636209163384,0.000636006843913,,0.000636836585259,0.000642513976296 +"BrC(Br)c1cccc(C(Br)Br)c1",0.004042107521661,0.004042107521661,0.004036783885137,,0.003896694215722,0.003805833716808 +"CCSC#N",0.003910657833755,0.003910657833755,0.003905707289248,,0.00370523952941,0.003542392466382 +"CCOC(=O)CC(C(=O)OCC)C(C(=O)OCC)C(=O)OCC",0.002277592362436,0.002277592362436,0.002276414796252,,0.002240773249748,0.002214276421924 +"CCCCCCCCCCCCCCCCCCCCCC",0.004994984462008,0.004994984462008,0.004989466382052,,0.004836282746394,0.004722534573558 +"CCCCCCCCCCCCCCCCCCCCCC",0.003929900476866,0.003929900476866,0.003929739664487,,0.00386756188552,0.003812486739059 +"CC(C)CCC#N",0.000969720221234,0.000969720221234,0.000967864269898,,0.000973083625161,0.000995616079035 +"CC(C)CCC#N",0.003244411988843,0.003244411988843,0.003232110486425,,0.003610427694565,0.00386829697764 +"C=CCOC(=O)CCC(=O)OCC=C",0.001470167487509,0.001470167487509,0.001467587142741,,0.001394808566463,0.001342346141865 +"C=CC1CC=CCC1",0.000562749956281,0.000562749956281,0.000563970290969,,0.000560271748922,0.000542604792027 +"C=CC1CC=CCC1",0.000767878383041,0.000767878383041,0.000765940433071,,0.000752016816537,0.000759027582971 +"CCc1ccccn1",0.004409861887012,0.004409861887012,0.004400725560892,,0.004111545227854,0.003892314902875 +"CCCCCCCC/C=C\CCCCCCCC(=O)OCCCC",0.000776615776034,0.000776615776034,0.000778643756685,,0.00080879429736,0.000819325343349 +"C#CC(O)c1ccccc1",0.000620778333046,0.000620778333046,0.000620428076424,,0.000583924153737,0.000550343998713 +"CC(N)CN",0.001924668669902,0.001924668669902,0.001923010622307,,0.001867862171346,0.001824175756107 +"CCOC(=O)C(=O)C(C)C(=O)OCC",0.000805514213129,0.000805514213129,0.000805136190559,,0.000792436353711,0.0007836011027 +"O=S(=O)(Cl)c1ccccc1",0.000464934288403,0.000464934288403,0.000465605340513,,0.000478509094289,0.000484924890707 +"C=CCc1ccc(O)c(OC)c1",0.001850196251208,0.001850196251208,0.001851533135608,,0.001916496246382,0.001968326004411 +"C=CCc1ccc(O)c(OC)c1",0.001543725936885,0.001543725936885,0.00154178843564,,0.00149085229164,0.00145781239583 +"ClCC(Cl)CCl",0.002701306496333,0.002701306496333,0.002699788526744,,0.002688403842426,0.002689004776899 +"C=CCc1ccc(OC(C)=O)c(OC)c1",0.000623804770522,0.000623804770522,0.000622247524325,,0.000588980919232,0.00056474208899 +"CCc1ccccc1[N+](=O)[O-]",0.001123020103993,0.001123020103993,0.001120753642306,,0.001034173086507,0.000971041005104 +"CCc1ccccc1[N+](=O)[O-]",0.002548625192808,,0.002546804920817,0.002507940984867,0.002439432724395,0.002352405155428 +"Cc1ccccc1",0.003027615937978,,0.003020276589253,0.002926049354785,0.002791305316685,0.002613708788309 +"CC(=O)OC1CCCCC1",0.000508351036366,,0.000509099010623,0.000515766117555,0.000521588169948,0.00052762642869 +"CC1(C)CC(O)CC(C)(C)N1[O]",0.001456558638968,,0.001453106489428,0.001420316444776,0.001383219017534,0.001331604967575 +"CC1(C)CC(O)CC(C)(C)N1[O]",0.002062916952092,,0.002061752091391,0.002037043066483,0.001992084392317,0.001931917938831 +"C/C=C/C=C/C(=O)O",0.000638912210565,,0.000637998034308,0.000639559146041,0.000656612557764,0.000683754033542 +"C/C=C/C=C/C(=O)O",0.001408764499532,,0.001407706097217,0.001394786530306,0.001377306077887,0.001354719573875 +"O=C(O)CCN1C(=O)c2ccccc2C1=O",0.000955020306053,,0.000957064647584,0.000996924937896,0.001066609421572,0.001158227169969 +"O=C(O)CCN1C(=O)c2ccccc2C1=O",0.00066013516606,,0.000662304048604,0.000687102131517,0.000719663662809,0.000763932700238 +"CCCCCCCCCCCCCCCCC(=O)OC",0.00408949853518,,0.004082324203529,0.003969672203026,0.003792776989075,0.003566511282459 +"CCCCCCCCCCCCCCCCC(=O)OC",0.001457749049853,,0.001455477357252,0.001419446005857,0.001359380382703,0.001276393968015 +"O=C1c2ccccc2C(=O)c2cc(Cl)ccc21",0.00082210696046,,0.000819278493129,0.0008045293702,0.000807011029494,0.00081658486468 +"O=C1c2ccccc2C(=O)c2cc(Cl)ccc21",0.003643428769101,,0.003641126761025,0.003597628739648,0.003524758073951,0.003433401932937 +"O=C1c2ccccc2C(=O)c2ccccc21",0.002393759466363,,0.002387549137707,0.002314532589091,0.002221070981982,0.002106251577641 +"O=C1c2ccccc2C(=O)c2ccccc21",0.001363319981965,,0.001361544242838,0.00133751527733,0.001302973122143,0.001259086181017 +"CCCCCCc1c2ccccc2cc2ccccc12",0.004935719541282,,0.004926118043482,0.004838101990824,0.004749489588371,0.004638510548077 +"CCCCCCc1c2ccccc2cc2ccccc12",0.000447649326779,,0.00045031624782,0.000468960155242,0.000479136882499,0.000491236088445 +"FC1(F)C(F)(F)C(F)(F)C2(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C2(F)C1(F)F",0.001506751653308,,0.001506562979692,0.001498098357858,0.001478181337091,0.001447701012395 +"CSC1=CC(=O)C=CC1=O",0.000680334744945,,0.000682601528949,0.000706206461363,0.000732086948856,0.00076153828672 +"NCc1ccccc1",0.001092425906411,,0.001091860026198,0.001083129051778,0.001069798314772,0.001053195682325 +"NCc1ccccc1",0.000108653789486,,0.000108761935631,0.000107854165763,0.000104037227273,9.84311719646078E-05 +"N=c1nc(O)c2[nH]c(=O)cnc2[nH]1",0.001099641038101,,0.001099641038101,0.001092135969085,0.001071222889446,0.00104393405244 +"N=c1nc(O)c2[nH]c(=O)cnc2[nH]1",0.000624460599789,,0.000623718467253,0.000627242458678,0.000645762124857,0.000672352650184 +"CCCCCCCCCCCc1c2ccccc2cc2ccccc12",,0.002934491741835,0.002929721576638,0.002864769498483,0.002770435423108,0.002649543747599 +"CCCCCCCCCCCc1c2ccccc2cc2ccccc12",,0.002238921272485,0.002240783746441,0.002237826941303,0.002203289309605,0.002148993733298 +"CC(CC(=O)O)c1ccccc1",,0.000652429788979,0.000651998751927,0.000639206908178,0.000614009842144,0.00058083283661 +"CC(CC(=O)O)c1ccccc1",,0.001482328962704,0.001480589679768,0.001450931277174,0.001403469486115,0.00134441324605 +"CC(CC(=O)O)c1ccccc1",,0.002659047324208,0.002655461742804,0.002602062168189,0.002520710964502,0.00241691668491 +"COC(=O)[C@@H]1CCC[C@H]1C(=O)OC",,0.001001450445642,0.001000736314585,0.00098937284165,0.000970544362528,0.000944584651821 +"CC(=O)n1cnc(/C=C2\N=C(c3ccccc3)OC2=O)c1",,0.00135240208531,0.001349499873797,0.001322716564314,0.001294392861261,0.001256649074795 +"CC(=O)n1cnc(/C=C2\N=C(c3ccccc3)OC2=O)c1",,0.004171883538026,0.004165772091148,0.004092425579288,0.00399345740674,0.003864014917082 +"COC(=O)/C=C/c1ccc([N+](=O)[O-])s1",,0.001201276992511,0.001198042710525,0.0011546269286,0.001089022564254,0.000999812579256 +"COC(=O)/C=C/c1ccc([N+](=O)[O-])s1",,0.003582717070019,0.003579002460625,0.003521023742011,0.003431539030957,0.003319251424274 +"COC(=O)/C=C/c1ccc(Br)s1",,0.000935722616009,0.000934810776445,0.000936175414041,0.000952615255936,0.000978553209962 +"COC(=O)/C=C/c1ccc(Br)s1",,0.007763918244485,0.00775576965242,0.007639252563432,0.00747176203788,0.007270127390359 +"O=C(O)/C=C/c1ccc([N+](=O)[O-])s1",,0.000620214389512,0.000618581387968,0.000598902496643,0.000573369091056,0.000542270705958 +"O=C(O)/C=C/c1ccc([N+](=O)[O-])s1",,0.001873009041489,0.001865665095909,0.001798360571644,0.001731497194345,0.001650531358074 +"O=C(O)/C=C/c1ccc(Cl)s1",,0.002575923417334,0.002572500301067,0.002521705204504,0.002439830527684,0.002326258705307 +"O=C(O)/C=C/c1ccc(Cl)s1",,0.004765302802949,0.004757410728739,0.004683418332195,0.004611057513713,0.004529205487703 +"O=C(O)C=Cc1ccc(Br)s1",,0.002159911677497,0.00215562438673,0.002098904538199,0.002017060157172,0.00191053927471 +"O=C(O)C=Cc1ccc(Br)s1",,0.001711612555351,0.001711622994992,0.001698446770849,0.001665894396997,0.001620968157293 +"C/C=C/C(=O)OC(C)(C)C",,0.000545599990522,0.000544310639453,0.000526270055461,0.000498540940207,0.000461140224687 +"O=C(O)[C@@H]1CC[C@H]1C(=O)O",,0.001258387118705,0.001258497202538,0.001271927290944,0.001303903445908,0.00134906368119 +"O=C(O)[C@@H]1CC[C@H]1C(=O)O",,0.001347510688857,0.001348113796452,0.001350182047046,0.001346387333535,0.001338789705495 +"Cc1ccc(C=NO)s1",,0.001951859605456,0.001949151324343,0.0019265135138,0.001905590642066,0.001877206867095 +"Cc1ccc(C=NO)s1",,0.001206283257257,0.001205416539329,0.001193600173009,0.001177158981928,0.001157526274622 +"Cc1ccc(Br)s1",,0.002958940397577,0.002955725089221,0.002887856137041,0.002767603160027,0.002613051533751 +"O=[N+]([O-])c1cccs1",0.00121976090843,0.00121976090843,,0.001204299650943,0.001172339055533,0.001130890798451 +"CCCCCCCCCCC(C)C",0.003679026398294,0.003679026398294,,0.003623919127817,0.003556715308429,0.003469161729441 +"CCCCCCCC1CCCCC1",0.00523019493678,0.00523019493678,,0.005159683858506,0.005076219349197,0.004974621655032 +"c1ccc(CC2CCNCC2)cc1",0.002003307668452,0.002003307668452,,0.001974037345974,0.001927542088939,0.001867731340188 +"C/C=C\Cl",0.000256313554212,0.000256313554212,,0.000250821597679,0.000243158438802,0.000233527124157 +"N#Cc1ccc2c(Cl)cccc2n1",0.001825744162353,0.001825744162353,,0.001811574061474,0.001792363320881,0.001769694961224 +"N#Cc1ccc2c(Cl)cccc2n1",0.002192530310623,0.002192530310623,,0.002139620093867,0.002110795281417,0.002090768614637 +"Cc1nc(-c2ccccc2)c(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1",0.004719585457034,0.004719585457034,,0.004628797677538,0.004484975890453,0.004288701315043 +"Cc1nc(-c2ccccc2)c(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1",0.000644355887184,0.000644355887184,,0.000647129789641,0.000643187548324,0.000635278356872 +"O=c1c(O[C@@H]2O[C@@H](CO)[C@H](O)[C@H]2O)c(-c2ccc(O)c(O)c2)oc2cc(O)cc(O)c12",0.000146866708032,0.000146866708032,,0.000161598561242,0.000184279585597,0.000214835127367 +"O=c1c(O[C@@H]2O[C@@H](CO)[C@H](O)[C@H]2O)c(-c2ccc(O)c(O)c2)oc2cc(O)cc(O)c12",0.000813923805172,0.000813923805172,,0.000818602335527,0.000834283226656,0.000856039816226 +"CC1(C)CC1C(=O)OCC(=O)c1ccc(-c2ccccc2)cc1",0.001600427562802,0.001600427562802,,0.001589812946977,0.001567339982101,0.001538327116728 +"CC1(C)CC1C(=O)OCC(=O)c1ccc(-c2ccccc2)cc1",0.003495395407337,0.003495395407337,,0.003407485665784,0.003300844256072,0.003157242089638 +"CCOC(=O)CCc1ccccc1",0.001801679591141,0.001801679591141,,0.001760929958604,0.001713657476351,0.001656223795507 +"CCCN(CCC)C(=O)c1ccccc1",0.002921779086415,0.002921779086415,,0.00287884440293,0.002857322696033,0.002842301414084 +"O=C(Cl)CCl",0.000442319724511,0.000442319724511,,0.000465648171886,0.000489471109914,0.000517689492641 +"O=C(Cl)CCl",0.001028435476674,0.001028435476674,,0.000979074868873,0.000909751498723,0.000820260607825 +"O=C(Cl)CCl",0.000234633280642,0.000234633280642,,0.000229617919318,0.000226696275458,0.00022597057443 +"O=C(Cl)CCl",0.000514462517725,0.000514462517725,,0.000490162362369,0.000460856892773,0.000421270375364 +"Oc1cccnc1O",0.000523957679607,0.000523957679607,,0.000543381074787,0.000571826906485,0.000608765153379 +"Oc1cccnc1O",0.001376451598925,0.001376451598925,,0.00137473855941,0.001377235383874,0.001383186710387 +"Oc1cccnc1O",0.001572651018218,0.001572651018218,,0.001496223625128,0.001399299531239,0.00127587134396 +"CCC=C(CC)CC",0.004331797155989,0.004331797155989,,0.004213072228435,0.004088980725559,0.003942602777491 +"CCCC(CC)CCC",0.003998977942541,0.003998977942541,,0.003967274317822,0.003925669539396,0.00387614831022 +"CCC(CC)C(CC)CC",0.004792548774896,0.004792548774896,,0.004785947419391,0.004756865263184,0.004713649987006 +"CCC(C)CCC(C)CC",0.002001793736694,0.002001793736694,,0.001956546171054,0.001931924288507,0.001903482293456 +"CCCCC(C)(C)CCC",0.002778685707643,0.002778685707643,,0.002744298378059,0.002725097737185,0.002694624214078 +"CCCCC(C)(CC)CC",0.00332617481953,0.00332617481953,,0.003365976144788,0.003432510276172,0.003513539894214 +"CCCCCCCC(C)C",0.00781520215437,0.00781520215437,,0.007621110676782,0.007430400486176,0.00721619563018 +"CCCCCCC(C)CC",0.004129425306313,0.004129425306313,,0.004076343290657,0.003995268174461,0.00389260968071 diff --git a/chemprop-updated/tests/data/spectra/features.csv b/chemprop-updated/tests/data/spectra/features.csv new file mode 100644 index 0000000000000000000000000000000000000000..e33bad8a862a5cc55c16d2abb30c7cd5a263f5ca --- /dev/null +++ b/chemprop-updated/tests/data/spectra/features.csv @@ -0,0 +1,201 @@ +"gas","liquid","KBr","nujol mull","CCl4" +0,0,0,0,1 +0,0,0,1,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,1,0,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,0,1 +0,0,0,1,0 +0,1,0,0,0 +0,1,0,0,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,1,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,1,0,0,0 +0,1,0,0,0 +0,0,0,1,0 +0,0,0,0,1 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,0,1,0,0 +0,1,0,0,0 +0,0,1,0,0 +0,0,0,1,0 +0,0,0,0,1 +0,1,0,0,0 +0,0,1,0,0 +0,0,0,1,0 +0,0,1,0,0 +0,0,0,0,1 +0,0,1,0,0 +0,1,0,0,0 +0,0,1,0,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,1,0,0 +0,0,0,0,1 +0,1,0,0,0 +0,1,0,0,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,1,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,0,1,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,0,1,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,0,1,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,1,0,0,0 +0,0,0,1,0 +0,1,0,0,0 +0,0,1,0,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,1,0 +0,0,0,0,1 +0,1,0,0,0 +0,1,0,0,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,1,0,0,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,0,1 +0,0,0,1,0 +0,1,0,0,0 +0,0,0,1,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,0,0,1,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,0,1,0,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 diff --git a/chemprop-updated/tests/data/spectra/mask.csv b/chemprop-updated/tests/data/spectra/mask.csv new file mode 100644 index 0000000000000000000000000000000000000000..516a569756824d0d81a398e6feca2118d8f5b634 --- /dev/null +++ b/chemprop-updated/tests/data/spectra/mask.csv @@ -0,0 +1,6 @@ +,400,402,404,406,408,410 +"gas",1,1,1,1,1,1 +"liquid",1,1,1,1,1,1 +"KBr",0,0,1,1,1,1 +"nujol mull",1,1,1,1,0,0 +"CCl4",1,1,0,0,1,1 diff --git a/chemprop-updated/tests/data/spectra/test_smiles.csv b/chemprop-updated/tests/data/spectra/test_smiles.csv new file mode 100644 index 0000000000000000000000000000000000000000..c7b41df8cf1c6ccaa5a4a5b8a28bed4f2b29d4d1 --- /dev/null +++ b/chemprop-updated/tests/data/spectra/test_smiles.csv @@ -0,0 +1,201 @@ +"smiles" +"O=C(O)c1ccco1" +"O=C(O)c1ccco1" +"CCOP(=O)(OCC)C(O)[C@@H]1OC(C)(C)O[C@H]1[C@@H]1COC(C)(C)O1" +"CCOP(=O)(OCC)C(O)[C@@H]1OC(C)(C)O[C@H]1[C@@H]1COC(C)(C)O1" +"c1ccc(C2=NOC(c3ccccc3)C2)cc1" +"c1ccc(C2=NOC(c3ccccc3)C2)cc1" +"Clc1ccc(OCc2ccccc2)cc1" +"Clc1ccc(OCc2ccccc2)cc1" +"Cc1ccc(OCc2ccccc2)cc1" +"Cc1ccc(OCc2ccccc2)cc1" +"c1ccc(P(CCP(c2ccccc2)c2ccccc2)c2ccccc2)cc1" +"c1ccc(P(CCP(c2ccccc2)c2ccccc2)c2ccccc2)cc1" +"CC(=O)Oc1ccc(C=O)cc1Br" +"CC(=O)Oc1ccc(C=O)cc1Br" +"CN=C(O)Oc1cccc(C(=O)O)c1" +"CN=C(O)Oc1cccc(C(=O)O)c1" +"CN=C(O)Oc1cccc(C(=O)OC)c1" +"CN=C(O)Oc1cccc(C(=O)OC)c1" +"COc1ccc(OCc2ccccc2)cc1" +"COc1ccc(OCc2ccccc2)cc1" +"COc1cc(/C=C/C(=O)O)ccc1OC(C)=O" +"COc1cc(/C=C/C(=O)O)ccc1OC(C)=O" +"CC1(C)CC[C@]2(C(=O)O)CC[C@]3(C)C(=CC[C@@H]4[C@@]5(C)CC[C@H](O)[C@@](C)(CO)[C@@H]5CC[C@]43C)[C@@H]2C1" +"CC1(C)CC[C@]2(C(=O)O)CC[C@]3(C)C(=CC[C@@H]4[C@@]5(C)CC[C@H](O)[C@@](C)(CO)[C@@H]5CC[C@]43C)[C@@H]2C1" +"CS(=O)(=O)OC1C2CC3CC(C2)C(=O)C1C3" +"CS(=O)(=O)OC1C2CC3CC(C2)C(=O)C1C3" +"O=C(O)[C@@H]1C[C@@H]2C=CC[C@H](C1)C2" +"O=C(O)[C@@H]1C[C@@H]2C=CC[C@H](C1)C2" +"OC1=NC2CC3CC(C2)CC1C3" +"OC1=NC2CC3CC(C2)CC1C3" +"ON=C1C2CC3CC(C2)CC1C3" +"ON=C1C2CC3CC(C2)CC1C3" +"OC1C2CC3CC(C2)CC1C3" +"OC1C2CC3CC(C2)CC1C3" +"O=C(O)C1CC2CCCC(C2)C1" +"O=C(O)C1CC2CCCC(C2)C1" +"NC1C2CC3CC(C2)CC1C3" +"NC1C2CC3CC(C2)CC1C3" +"O=C(O)C12CC3CC(CC(C3)C1)C2" +"O=C(O)C12CC3CC(CC(C3)C1)C2" +"O=C(O)C12CC3CC(CC(C3)C1)C2" +"O/N=C1/CC2C[C@H]3CC1C[C@@H](C2)C3" +"O/N=C1/CC2C[C@H]3CC1C[C@@H](C2)C3" +"O=C(O)CC1C[C@H]2CCC[C@@H](C1)C2" +"O=C(O)CC1C[C@H]2CCC[C@@H](C1)C2" +"ClC(Cl)[C@]12C[C@@H]3C[C@H](C1)C[C@@](C(Cl)Cl)(C3)C2" +"ClC(Cl)[C@]12C[C@@H]3C[C@H](C1)C[C@@](C(Cl)Cl)(C3)C2" +"O=C(O)C12CC3CC(CC(CBr)(C3)C1)C2" +"O=C(O)C12CC3CC(CC(CBr)(C3)C1)C2" +"CC(O)=NC12CC3CC(CC(C3)C1)C2" +"CC(O)=NC12CC3CC(CC(C3)C1)C2" +"CC(O)=NC1(C)C2CC3CC(C2)CC1C3" +"CC(O)=NC1(C)C2CC3CC(C2)CC1C3" +"CCCCCCCCCC1CCCCC1" +"CCCCCCCCCCC1CCCCC1" +"c1ccc2c(c1)Cc1cc3ccccc3cc1-2" +"c1ccc2c(c1)Cc1cc3ccccc3cc1-2" +"O=C(O)c1ccccc1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1" +"O=C(O)c1ccccc1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1" +"Cc1ccc(/C=C/C(=O)O)o1" +"Cc1ccc(/C=C/C(=O)O)o1" +"CCCCNc1ccccc1" +"CCCCNc1ccccc1" +"NCCNCCN" +"COC(=O)/C=C/c1ccco1" +"COC(=O)/C=C/c1ccco1" +"COC(=O)c1ccc(C(C)=O)o1" +"COC(=O)c1ccc(C(C)=O)o1" +"COC(=O)/C=C/c1ccc(C)o1" +"COC(=O)/C=C/c1ccc(C)o1" +"OCc1cccs1" +"Cc1ccc(C=O)s1" +"Cc1ccc(C=O)s1" +"Cc1ccc(C=O)s1" +"c1cscn1" +"C#CCCO" +"CC/C=C/CC" +"C/C=C/C(CC)CC" +"CCCCCC(C)C(C)C" +"CCCCCC(C)(C)CC" +"CC1(C)C(=O)[C@]2(C)CC[C@H]1C2" +"CCCCCCC(C)(C)C" +"CCC(C)CCCC(C)C" +"Cc1cc(C)c2ccccc2c1" +"C=CCCC(C)=O" +"C=CCCC(C)=O" +"OC/C=C/c1ccccc1" +"OC/C=C/c1ccccc1" +"OC/C=C/c1ccccc1" +"OC/C=C/c1ccccc1" +"C/C=C/C=O" +"C/C=C/C=O" +"Clc1cc(Cl)cc(Cl)c1" +"Clc1cc(Cl)cc(Cl)c1" +"Clc1cc(Cl)cc(Cl)c1" +"C#CCO" +"C#CCO" +"O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1" +"O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1" +"O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1" +"BrC(Br)c1cccc(C(Br)Br)c1" +"BrC(Br)c1cccc(C(Br)Br)c1" +"BrC(Br)c1cccc(C(Br)Br)c1" +"CCSC#N" +"CCOC(=O)CC(C(=O)OCC)C(C(=O)OCC)C(=O)OCC" +"CCCCCCCCCCCCCCCCCCCCCC" +"CCCCCCCCCCCCCCCCCCCCCC" +"CC(C)CCC#N" +"CC(C)CCC#N" +"C=CCOC(=O)CCC(=O)OCC=C" +"C=CC1CC=CCC1" +"C=CC1CC=CCC1" +"CCc1ccccn1" +"CCCCCCCC/C=C\CCCCCCCC(=O)OCCCC" +"C#CC(O)c1ccccc1" +"CC(N)CN" +"CCOC(=O)C(=O)C(C)C(=O)OCC" +"O=S(=O)(Cl)c1ccccc1" +"C=CCc1ccc(O)c(OC)c1" +"C=CCc1ccc(O)c(OC)c1" +"ClCC(Cl)CCl" +"C=CCc1ccc(OC(C)=O)c(OC)c1" +"CCc1ccccc1[N+](=O)[O-]" +"CCc1ccccc1[N+](=O)[O-]" +"Cc1ccccc1" +"CC(=O)OC1CCCCC1" +"CC1(C)CC(O)CC(C)(C)N1[O]" +"CC1(C)CC(O)CC(C)(C)N1[O]" +"C/C=C/C=C/C(=O)O" +"C/C=C/C=C/C(=O)O" +"O=C(O)CCN1C(=O)c2ccccc2C1=O" +"O=C(O)CCN1C(=O)c2ccccc2C1=O" +"CCCCCCCCCCCCCCCCC(=O)OC" +"CCCCCCCCCCCCCCCCC(=O)OC" +"O=C1c2ccccc2C(=O)c2cc(Cl)ccc21" +"O=C1c2ccccc2C(=O)c2cc(Cl)ccc21" +"O=C1c2ccccc2C(=O)c2ccccc21" +"O=C1c2ccccc2C(=O)c2ccccc21" +"CCCCCCc1c2ccccc2cc2ccccc12" +"CCCCCCc1c2ccccc2cc2ccccc12" +"FC1(F)C(F)(F)C(F)(F)C2(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C2(F)C1(F)F" +"CSC1=CC(=O)C=CC1=O" +"NCc1ccccc1" +"NCc1ccccc1" +"N=c1nc(O)c2[nH]c(=O)cnc2[nH]1" +"N=c1nc(O)c2[nH]c(=O)cnc2[nH]1" +"CCCCCCCCCCCc1c2ccccc2cc2ccccc12" +"CCCCCCCCCCCc1c2ccccc2cc2ccccc12" +"CC(CC(=O)O)c1ccccc1" +"CC(CC(=O)O)c1ccccc1" +"CC(CC(=O)O)c1ccccc1" +"COC(=O)[C@@H]1CCC[C@H]1C(=O)OC" +"CC(=O)n1cnc(/C=C2\N=C(c3ccccc3)OC2=O)c1" +"CC(=O)n1cnc(/C=C2\N=C(c3ccccc3)OC2=O)c1" +"COC(=O)/C=C/c1ccc([N+](=O)[O-])s1" +"COC(=O)/C=C/c1ccc([N+](=O)[O-])s1" +"COC(=O)/C=C/c1ccc(Br)s1" +"COC(=O)/C=C/c1ccc(Br)s1" +"O=C(O)/C=C/c1ccc([N+](=O)[O-])s1" +"O=C(O)/C=C/c1ccc([N+](=O)[O-])s1" +"O=C(O)/C=C/c1ccc(Cl)s1" +"O=C(O)/C=C/c1ccc(Cl)s1" +"O=C(O)C=Cc1ccc(Br)s1" +"O=C(O)C=Cc1ccc(Br)s1" +"C/C=C/C(=O)OC(C)(C)C" +"O=C(O)[C@@H]1CC[C@H]1C(=O)O" +"O=C(O)[C@@H]1CC[C@H]1C(=O)O" +"Cc1ccc(C=NO)s1" +"Cc1ccc(C=NO)s1" +"Cc1ccc(Br)s1" +"O=[N+]([O-])c1cccs1" +"CCCCCCCCCCC(C)C" +"CCCCCCCC1CCCCC1" +"c1ccc(CC2CCNCC2)cc1" +"C/C=C\Cl" +"N#Cc1ccc2c(Cl)cccc2n1" +"N#Cc1ccc2c(Cl)cccc2n1" +"Cc1nc(-c2ccccc2)c(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1" +"Cc1nc(-c2ccccc2)c(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1" +"O=c1c(O[C@@H]2O[C@@H](CO)[C@H](O)[C@H]2O)c(-c2ccc(O)c(O)c2)oc2cc(O)cc(O)c12" +"O=c1c(O[C@@H]2O[C@@H](CO)[C@H](O)[C@H]2O)c(-c2ccc(O)c(O)c2)oc2cc(O)cc(O)c12" +"CC1(C)CC1C(=O)OCC(=O)c1ccc(-c2ccccc2)cc1" +"CC1(C)CC1C(=O)OCC(=O)c1ccc(-c2ccccc2)cc1" +"CCOC(=O)CCc1ccccc1" +"CCCN(CCC)C(=O)c1ccccc1" +"O=C(Cl)CCl" +"O=C(Cl)CCl" +"O=C(Cl)CCl" +"O=C(Cl)CCl" +"Oc1cccnc1O" +"Oc1cccnc1O" +"Oc1cccnc1O" +"CCC=C(CC)CC" +"CCCC(CC)CCC" +"CCC(CC)C(CC)CC" +"CCC(C)CCC(C)CC" +"CCCCC(C)(C)CCC" +"CCCCC(C)(CC)CC" +"CCCCCCCC(C)C" +"CCCCCCC(C)CC" diff --git a/chemprop-updated/tests/data/test_smiles.csv b/chemprop-updated/tests/data/test_smiles.csv new file mode 100644 index 0000000000000000000000000000000000000000..fae67e410bbe2d62968b7af8174ac4c083536782 --- /dev/null +++ b/chemprop-updated/tests/data/test_smiles.csv @@ -0,0 +1,11 @@ +smiles +CCOP(=S)(OCC)SC(CCl)N2C(=O)c1ccccc1C2=O +Brc1ccc(Br)cc1 +Cn2c(=O)on(c1ccc(Cl)c(Cl)c1)c2=O +Oc1ccc(cc1)c2ccccc2 +CC1=C(CCCO1)C(=O)Nc2ccccc2 +CCOC=C +CCC#C +COc1ncnc2nccnc12 +CCCCC(C)(O)CC +Clc1ccc(Cl)cc1 diff --git a/chemprop-updated/tests/integration/conftest.py b/chemprop-updated/tests/integration/conftest.py new file mode 100644 index 0000000000000000000000000000000000000000..707132223318188dc7f46fdc33043bd7085b720a --- /dev/null +++ b/chemprop-updated/tests/integration/conftest.py @@ -0,0 +1,74 @@ +import warnings + +import pytest + +from chemprop import models, nn +from chemprop.models import multi + +warnings.filterwarnings("ignore", module=r"lightning.*", append=True) + + +@pytest.fixture(scope="session") +def mpnn(request): + message_passing, agg = request.param + ffn = nn.RegressionFFN() + + return models.MPNN(message_passing, agg, ffn, True) + + +@pytest.fixture(scope="session") +def regression_mpnn_mve(request): + agg = nn.SumAggregation() + ffn = nn.MveFFN() + + return models.MPNN(request.param, agg, ffn, True) + + +@pytest.fixture(scope="session") +def regression_mpnn_evidential(request): + agg = nn.SumAggregation() + ffn = nn.EvidentialFFN() + + return models.MPNN(request.param, agg, ffn, True) + + +@pytest.fixture(scope="session") +def classification_mpnn_dirichlet(request): + agg = nn.SumAggregation() + ffn = nn.BinaryDirichletFFN() + + return models.MPNN(request.param, agg, ffn, True) + + +@pytest.fixture(scope="session") +def classification_mpnn(request): + agg = nn.SumAggregation() + ffn = nn.BinaryClassificationFFN() + + return models.MPNN(request.param, agg, ffn, True) + + +@pytest.fixture(scope="session") +def classification_mpnn_multiclass(request): + agg = nn.SumAggregation() + ffn = nn.MulticlassClassificationFFN(n_classes=3) + + return models.MPNN(request.param, agg, ffn, True) + + +@pytest.fixture(scope="session") +def classification_mpnn_multiclass_dirichlet(request): + agg = nn.SumAggregation() + ffn = nn.MulticlassDirichletFFN(n_classes=3) + + return models.MPNN(request.param, agg, ffn, True) + + +@pytest.fixture(scope="session") +def mcmpnn(request): + blocks, n_components, shared = request.param + mcmp = nn.MulticomponentMessagePassing(blocks, n_components, shared=shared) + agg = nn.SumAggregation() + ffn = nn.RegressionFFN(input_dim=mcmp.output_dim) + + return multi.MulticomponentMPNN(mcmp, agg, ffn, True) diff --git a/chemprop-updated/tests/integration/test_classification_mol.py b/chemprop-updated/tests/integration/test_classification_mol.py new file mode 100644 index 0000000000000000000000000000000000000000..7052602a4f03581f7598a9eee2dd20d6318aac14 --- /dev/null +++ b/chemprop-updated/tests/integration/test_classification_mol.py @@ -0,0 +1,134 @@ +"""This integration test is designed to ensure that the chemprop model can _overfit_ the training +data. A small enough dataset should be memorizable by even a moderately sized model, so this test +should generally pass.""" + +from lightning import pytorch as pl +import pytest +import torch +from torch.utils.data import DataLoader +from torchmetrics import functional as F + +from chemprop import nn +from chemprop.data import MoleculeDatapoint, MoleculeDataset, collate_batch + + +@pytest.fixture +def data(mol_classification_data): + smis, Y = mol_classification_data + + return [MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, Y)] + + +@pytest.fixture +def dataloader(data): + dset = MoleculeDataset(data) + + return DataLoader(dset, 32, collate_fn=collate_batch) + + +@pytest.mark.parametrize( + "classification_mpnn", [nn.BondMessagePassing(), nn.AtomMessagePassing()], indirect=True +) +@pytest.mark.integration +def test_quick(classification_mpnn, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + fast_dev_run=True, + ) + trainer.fit(classification_mpnn, dataloader, None) + + +@pytest.mark.parametrize( + "classification_mpnn_dirichlet", + [nn.BondMessagePassing(), nn.AtomMessagePassing()], + indirect=True, +) +@pytest.mark.integration +def test_dirichlet_quick(classification_mpnn_dirichlet, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + fast_dev_run=True, + ) + trainer.fit(classification_mpnn_dirichlet, dataloader, None) + + +@pytest.mark.parametrize( + "classification_mpnn", [nn.BondMessagePassing(), nn.AtomMessagePassing()], indirect=True +) +@pytest.mark.integration +def test_overfit(classification_mpnn, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=True, + enable_model_summary=False, + accelerator="cpu", + devices=1, + max_epochs=100, + overfit_batches=1.00, + ) + trainer.fit(classification_mpnn, dataloader) + + predss = [] + targetss = [] + masks = [] + for batch in dataloader: + bmg, _, _, targets, *_ = batch + preds = classification_mpnn(bmg) + not_nan = ~targets.isnan() + predss.append(preds) + targetss.append(targets) + masks.append(not_nan) + + preds = torch.cat(predss) + targets = torch.cat(targetss) + mask = torch.cat(masks) + auroc = F.auroc(preds[mask], targets[mask].long(), task="binary") + assert auroc >= 0.99 + + +@pytest.mark.parametrize( + "classification_mpnn_dirichlet", + [nn.BondMessagePassing(), nn.AtomMessagePassing()], + indirect=True, +) +@pytest.mark.integration +def test_dirichlet_overfit(classification_mpnn_dirichlet, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=True, + enable_model_summary=False, + accelerator="cpu", + devices=1, + max_epochs=200, + overfit_batches=1.00, + ) + trainer.fit(classification_mpnn_dirichlet, dataloader) + + predss = [] + targetss = [] + masks = [] + for batch in dataloader: + bmg, _, _, targets, *_ = batch + preds = classification_mpnn_dirichlet(bmg) + not_nan = ~targets.isnan() + predss.append(preds) + targetss.append(targets) + masks.append(not_nan) + + preds = torch.cat(predss)[..., 0] + targets = torch.cat(targetss) + mask = torch.cat(masks) + auroc = F.auroc(preds[mask], targets[mask].long(), task="binary") + assert auroc >= 0.99 diff --git a/chemprop-updated/tests/integration/test_classification_mol_multiclass.py b/chemprop-updated/tests/integration/test_classification_mol_multiclass.py new file mode 100644 index 0000000000000000000000000000000000000000..0f9724e0873eea669f8d81f8c39f18651203ca34 --- /dev/null +++ b/chemprop-updated/tests/integration/test_classification_mol_multiclass.py @@ -0,0 +1,136 @@ +"""This integration test is designed to ensure that the chemprop model can _overfit_ the training +data. A small enough dataset should be memorizable by even a moderately sized model, so this test +should generally pass.""" + +from lightning import pytorch as pl +import pytest +import torch +from torch.utils.data import DataLoader +import torchmetrics + +from chemprop import nn +from chemprop.data import MoleculeDatapoint, MoleculeDataset, collate_batch + + +@pytest.fixture +def data(mol_classification_data_multiclass): + smis, Y = mol_classification_data_multiclass + + return [MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, Y)] + + +@pytest.fixture +def dataloader(data): + dset = MoleculeDataset(data) + + return DataLoader(dset, 32, collate_fn=collate_batch) + + +@pytest.mark.parametrize( + "classification_mpnn_multiclass", + [nn.BondMessagePassing(), nn.AtomMessagePassing()], + indirect=True, +) +@pytest.mark.integration +def test_quick(classification_mpnn_multiclass, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + fast_dev_run=True, + ) + trainer.fit(classification_mpnn_multiclass, dataloader, None) + + +@pytest.mark.parametrize( + "classification_mpnn_multiclass_dirichlet", + [nn.BondMessagePassing(), nn.AtomMessagePassing()], + indirect=True, +) +@pytest.mark.integration +def test_dirichlet_quick(classification_mpnn_multiclass_dirichlet, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + fast_dev_run=True, + ) + trainer.fit(classification_mpnn_multiclass_dirichlet, dataloader, None) + + +@pytest.mark.parametrize( + "classification_mpnn_multiclass", + [nn.BondMessagePassing(), nn.AtomMessagePassing()], + indirect=True, +) +@pytest.mark.integration +def test_overfit(classification_mpnn_multiclass, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=True, + enable_model_summary=False, + accelerator="cpu", + devices=1, + max_epochs=100, + overfit_batches=1.00, + ) + trainer.fit(classification_mpnn_multiclass, dataloader) + + predss = [] + targetss = [] + for batch in dataloader: + bmg, _, _, targets, *_ = batch + preds = classification_mpnn_multiclass(bmg) + preds = preds.transpose(1, 2) + predss.append(preds) + targetss.append(targets) + + preds = torch.cat(predss) + targets = torch.cat(targetss) + accuracy = torchmetrics.functional.accuracy( + preds, targets.long(), task="multiclass", num_classes=3 + ) + assert accuracy >= 0.99 + + +@pytest.mark.parametrize( + "classification_mpnn_multiclass_dirichlet", + [nn.BondMessagePassing(), nn.AtomMessagePassing()], + indirect=True, +) +@pytest.mark.integration +def test_dirichlet_overfit(classification_mpnn_multiclass_dirichlet, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=True, + enable_model_summary=False, + accelerator="cpu", + devices=1, + max_epochs=200, + overfit_batches=1.00, + ) + trainer.fit(classification_mpnn_multiclass_dirichlet, dataloader) + + predss = [] + targetss = [] + for batch in dataloader: + bmg, _, _, targets, *_ = batch + preds = classification_mpnn_multiclass_dirichlet(bmg) + preds = preds.transpose(1, 2) + predss.append(preds) + targetss.append(targets) + + preds = torch.cat(predss) + targets = torch.cat(targetss) + accuracy = torchmetrics.functional.accuracy( + preds, targets.long(), task="multiclass", num_classes=3 + ) + assert accuracy >= 0.99 diff --git a/chemprop-updated/tests/integration/test_output_transform.py b/chemprop-updated/tests/integration/test_output_transform.py new file mode 100644 index 0000000000000000000000000000000000000000..7890e3aa08fcaa3009311dc773bc01bbca150f82 --- /dev/null +++ b/chemprop-updated/tests/integration/test_output_transform.py @@ -0,0 +1,61 @@ +from lightning import pytorch as pl +import pytest +import torch +from torch.utils.data import DataLoader + +from chemprop import nn +from chemprop.data import MoleculeDatapoint, MoleculeDataset, collate_batch +from chemprop.models import MPNN + + +@pytest.fixture +def data(mol_regression_data): + smis, Y = mol_regression_data + + return [MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, Y)] + + +def test_output_transform(data): + train_dset = MoleculeDataset(data) + output_scaler = train_dset.normalize_targets() + train_loader = DataLoader(train_dset, 32, collate_fn=collate_batch) + + test_dset = MoleculeDataset(data) + test_loader = DataLoader(test_dset, 32, collate_fn=collate_batch, shuffle=False) + + output_transform = nn.UnscaleTransform.from_standard_scaler(output_scaler) + ffn = nn.RegressionFFN(output_transform=output_transform) + mpnn = MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), ffn) + + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=True, + enable_model_summary=False, + accelerator="cpu", + devices=1, + max_epochs=100, + overfit_batches=1.00, + ) + trainer.fit(mpnn, train_loader) + + mpnn.train() + predss = [] + for batch in train_loader: + bmg, _, _, targets, *_ = batch + preds = mpnn(bmg) + predss.append(preds) + + preds = torch.cat(predss) + std, mean = torch.std_mean(preds, dim=0) + + assert torch.allclose(std, torch.ones_like(std), atol=0.1) + assert torch.allclose(mean, torch.zeros_like(mean), atol=0.1) + + predss = trainer.predict(mpnn, test_loader) + preds = torch.cat(predss) + std, mean = torch.std_mean(preds, dim=0) + y_std, y_mean = torch.std_mean(torch.from_numpy(test_dset.Y).float(), dim=0) + + assert torch.allclose(std, y_std, atol=0.1) + assert torch.allclose(mean, y_mean, atol=0.1) diff --git a/chemprop-updated/tests/integration/test_regression_mol+mol.py b/chemprop-updated/tests/integration/test_regression_mol+mol.py new file mode 100644 index 0000000000000000000000000000000000000000..22a811ae26ed3677cf7f9c17e5df5d5830571d1f --- /dev/null +++ b/chemprop-updated/tests/integration/test_regression_mol+mol.py @@ -0,0 +1,87 @@ +"""This integration test is designed to ensure that the chemprop model can _overfit_ the training +data. A small enough dataset should be memorizable by even a moderately sized model, so this test +should generally pass.""" + +from lightning import pytorch as pl +import pytest +import torch +from torch.utils.data import DataLoader + +from chemprop import nn +from chemprop.data import ( + MoleculeDatapoint, + MoleculeDataset, + MulticomponentDataset, + collate_multicomponent, +) + +N_COMPONENTS = 2 +pytestmark = [ + pytest.mark.parametrize( + "mcmpnn", + [ + ([nn.BondMessagePassing() for _ in range(N_COMPONENTS)], N_COMPONENTS, False), + ([nn.AtomMessagePassing() for _ in range(N_COMPONENTS)], N_COMPONENTS, False), + ([nn.BondMessagePassing()], N_COMPONENTS, True), + ], + indirect=True, + ), + pytest.mark.integration, +] + + +@pytest.fixture +def datas(mol_mol_regression_data): + smis1, smis2, Y = mol_mol_regression_data + + return [ + [MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis1, Y)], + [MoleculeDatapoint.from_smi(smi) for smi in smis2], + ] + + +@pytest.fixture +def dataloader(datas): + dsets = [MoleculeDataset(data) for data in datas] + mcdset = MulticomponentDataset(dsets) + mcdset.normalize_targets() + + return DataLoader(mcdset, 32, collate_fn=collate_multicomponent) + + +def test_quick(mcmpnn, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + fast_dev_run=True, + ) + trainer.fit(mcmpnn, dataloader) + + +def test_overfit(mcmpnn, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=True, + enable_model_summary=False, + accelerator="cpu", + devices=1, + max_epochs=100, + overfit_batches=1.00, + ) + trainer.fit(mcmpnn, dataloader) + + errors = [] + for batch in dataloader: + bmg, _, _, targets, *_ = batch + preds = mcmpnn(bmg) + errors.append(preds - targets) + + errors = torch.cat(errors) + mse = errors.square().mean().item() + + assert mse <= 0.05 diff --git a/chemprop-updated/tests/integration/test_regression_mol.py b/chemprop-updated/tests/integration/test_regression_mol.py new file mode 100644 index 0000000000000000000000000000000000000000..3d17277de9cad01e01d35298a37f2a99701ae187 --- /dev/null +++ b/chemprop-updated/tests/integration/test_regression_mol.py @@ -0,0 +1,118 @@ +"""This integration test is designed to ensure that the chemprop model can _overfit_ the training +data. A small enough dataset should be memorizable by even a moderately sized model, so this test +should generally pass.""" + +from lightning import pytorch as pl +import pytest +import torch +from torch.utils.data import DataLoader + +from chemprop import nn +from chemprop.data import MoleculeDatapoint, MoleculeDataset, collate_batch + + +@pytest.fixture +def data(mol_regression_data): + smis, Y = mol_regression_data + + return [MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, Y)] + + +@pytest.fixture +def dataloader(data): + dset = MoleculeDataset(data) + dset.normalize_targets() + + return DataLoader(dset, 32, collate_fn=collate_batch) + + +@pytest.mark.parametrize( + "mpnn", + [ + (nn.BondMessagePassing(), nn.MeanAggregation()), + (nn.AtomMessagePassing(), nn.SumAggregation()), + (nn.BondMessagePassing(), nn.NormAggregation()), + ], + indirect=True, +) +@pytest.mark.integration +def test_quick(mpnn, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + fast_dev_run=True, + ) + trainer.fit(mpnn, dataloader, None) + + +@pytest.mark.parametrize( + "mpnn", + [ + (nn.BondMessagePassing(), nn.MeanAggregation()), + (nn.AtomMessagePassing(), nn.SumAggregation()), + (nn.BondMessagePassing(), nn.NormAggregation()), + ], + indirect=True, +) +@pytest.mark.integration +def test_overfit(mpnn, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=True, + enable_model_summary=False, + accelerator="cpu", + devices=1, + max_epochs=100, + overfit_batches=1.00, + ) + trainer.fit(mpnn, dataloader) + + errors = [] + for batch in dataloader: + bmg, _, _, targets, *_ = batch + preds = mpnn(bmg) + errors.append(preds - targets) + + errors = torch.cat(errors) + mse = errors.square().mean().item() + + assert mse <= 0.05 + + +@pytest.mark.parametrize( + "regression_mpnn_mve", [nn.BondMessagePassing(), nn.AtomMessagePassing()], indirect=True +) +@pytest.mark.integration +def test_mve_quick(regression_mpnn_mve, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + fast_dev_run=True, + ) + trainer.fit(regression_mpnn_mve, dataloader, None) + + +@pytest.mark.parametrize( + "regression_mpnn_evidential", [nn.BondMessagePassing(), nn.AtomMessagePassing()], indirect=True +) +@pytest.mark.integration +def test_evidential_quick(regression_mpnn_evidential, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + fast_dev_run=True, + ) + trainer.fit(regression_mpnn_evidential, dataloader, None) diff --git a/chemprop-updated/tests/integration/test_regression_rxn+mol.py b/chemprop-updated/tests/integration/test_regression_rxn+mol.py new file mode 100644 index 0000000000000000000000000000000000000000..c128db0957e8a3a5cbc46517f3e8437515d55703 --- /dev/null +++ b/chemprop-updated/tests/integration/test_regression_rxn+mol.py @@ -0,0 +1,90 @@ +"""This integration test is designed to ensure that the chemprop model can _overfit_ the training +data. A small enough dataset should be memorizable by even a moderately sized model, so this test +should generally pass.""" + +from lightning import pytorch as pl +import pytest +import torch +from torch.utils.data import DataLoader + +from chemprop import nn +from chemprop.data import ( + MoleculeDatapoint, + MoleculeDataset, + MulticomponentDataset, + ReactionDatapoint, + ReactionDataset, + collate_multicomponent, +) +from chemprop.featurizers.molgraph import CondensedGraphOfReactionFeaturizer + +N_COMPONENTS = 2 +SHAPE = CondensedGraphOfReactionFeaturizer().shape +pytestmark = [ + pytest.mark.parametrize( + "mcmpnn", + [ + ([nn.BondMessagePassing(*SHAPE), nn.BondMessagePassing()], N_COMPONENTS, False), + ([nn.AtomMessagePassing(*SHAPE), nn.AtomMessagePassing()], N_COMPONENTS, False), + ], + indirect=True, + ), + pytest.mark.integration, +] + + +@pytest.fixture +def datas(rxn_mol_regression_data): + rxns, smis, Y = rxn_mol_regression_data + + return [ + [ReactionDatapoint.from_smi(smi, y) for smi, y in zip(rxns, Y)], + [MoleculeDatapoint.from_smi(smi) for smi in smis], + ] + + +@pytest.fixture +def dataloader(datas): + dsets = [ReactionDataset(datas[0]), MoleculeDataset(datas[1])] + dset = MulticomponentDataset(dsets) + dset.normalize_targets() + + return DataLoader(dset, 32, collate_fn=collate_multicomponent) + + +def test_quick(mcmpnn, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + fast_dev_run=True, + ) + trainer.fit(mcmpnn, dataloader) + + +def test_overfit(mcmpnn, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=True, + enable_model_summary=False, + accelerator="cpu", + devices=1, + max_epochs=100, + overfit_batches=1.00, + ) + trainer.fit(mcmpnn, dataloader) + + errors = [] + for batch in dataloader: + bmg, _, _, targets, *_ = batch + preds = mcmpnn(bmg) + errors.append(preds - targets) + + errors = torch.cat(errors) + mse = errors.square().mean().item() + + assert mse <= 0.05 diff --git a/chemprop-updated/tests/integration/test_regression_rxn.py b/chemprop-updated/tests/integration/test_regression_rxn.py new file mode 100644 index 0000000000000000000000000000000000000000..46e2e899442b8d088e093563e8b35fcbfc5a2193 --- /dev/null +++ b/chemprop-updated/tests/integration/test_regression_rxn.py @@ -0,0 +1,77 @@ +"""This integration test is designed to ensure that the chemprop model can _overfit_ the training +data. A small enough dataset should be memorizable by even a moderately sized model, so this test +should generally pass.""" + +from lightning import pytorch as pl +import pytest +import torch +from torch.utils.data import DataLoader + +from chemprop import nn +from chemprop.data import ReactionDatapoint, ReactionDataset, collate_batch +from chemprop.featurizers.molgraph import CondensedGraphOfReactionFeaturizer + +SHAPE = CondensedGraphOfReactionFeaturizer().shape +pytestmark = pytest.mark.parametrize( + "mpnn", + [ + (nn.BondMessagePassing(*SHAPE), nn.MeanAggregation()), + (nn.AtomMessagePassing(*SHAPE), nn.SumAggregation()), + (nn.BondMessagePassing(*SHAPE), nn.NormAggregation()), + ], + indirect=True, +) + + +@pytest.fixture +def data(rxn_regression_data): + smis, Y = rxn_regression_data + + return [ReactionDatapoint.from_smi(smi, y) for smi, y in zip(smis, Y)] + + +@pytest.fixture +def dataloader(data): + dset = ReactionDataset(data) + dset.normalize_targets() + + return DataLoader(dset, 32, collate_fn=collate_batch) + + +def test_quick(dataloader, mpnn): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + fast_dev_run=True, + ) + trainer.fit(mpnn, dataloader) + + +def test_overfit(dataloader, mpnn): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=True, + enable_model_summary=False, + accelerator="cpu", + devices=1, + max_epochs=100, + overfit_batches=1.00, + ) + trainer.fit(mpnn, dataloader) + + with torch.inference_mode(): + errors = [] + for batch in dataloader: + bmg, _, _, targets, *_ = batch + preds = mpnn(bmg) + errors.append(preds - targets) + + errors = torch.cat(errors) + mse = errors.square().mean().item() + + assert mse <= 0.01 diff --git a/chemprop-updated/tests/regenerate_models.sh b/chemprop-updated/tests/regenerate_models.sh new file mode 100644 index 0000000000000000000000000000000000000000..942e46a123466aa19b86b9068f2c8b5d8d597b99 --- /dev/null +++ b/chemprop-updated/tests/regenerate_models.sh @@ -0,0 +1,105 @@ +#!/bin/bash -l + +CHEMPROP_ENV=$1 +CHEMPROP_PATH=$2 + +if [ -z "${CHEMPROP_ENV}" ] || [ -z "${CHEMPROP_PATH}" ]; then + echo "Usage: regenerate_models.sh " + exit 1 +fi + +conda activate $CHEMPROP_ENV + +# test_cli_classification_mol + +rm -rf test_cli_classification_mol + +chemprop train -i $CHEMPROP_PATH/tests/data/classification/mol.csv --accelerator cpu --epochs 3 --num-workers 0 --task-type classification --save-dir test_cli_classification_mol + +cp -L test_cli_classification_mol/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_classification_mol.pt + +# test_cli_classification_mol_multiclass + +rm -rf test_cli_classification_mol_multiclass + +chemprop train -i $CHEMPROP_PATH/tests/data/classification/mol_multiclass.csv --accelerator cpu --epochs 3 --num-workers 0 --save-dir test_cli_classification_mol_multiclass --task-type multiclass + +cp -L test_cli_classification_mol_multiclass/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_classification_mol_multiclass.pt + +# test_cli_regression_mol+mol + +rm -rf test_cli_regression_mol+mol + +chemprop train -i $CHEMPROP_PATH/tests/data/regression/mol+mol/mol+mol.csv --accelerator cpu --epochs 3 --num-workers 0 --smiles-columns smiles solvent --save-dir test_cli_regression_mol+mol + +cp -L test_cli_regression_mol+mol/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_regression_mol+mol.pt + +cp -L test_cli_regression_mol+mol/model_0/checkpoints/best*.ckpt $CHEMPROP_PATH/tests/data/example_model_v2_regression_mol+mol.ckpt + +# test_cli_regression_mol + +rm -rf test_cli_regression_mol + +chemprop train -i $CHEMPROP_PATH/tests/data/regression/mol/mol.csv --accelerator cpu --epochs 3 --num-workers 0 --save-dir test_cli_regression_mol + +cp -L test_cli_regression_mol/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_regression_mol.pt + +cp -L test_cli_regression_mol/model_0/checkpoints/best*.ckpt $CHEMPROP_PATH/tests/data/example_model_v2_regression_mol.ckpt + +# test_cli_regression_mol_multitask + +rm -rf test_cli_regression_mol_multitask + +chemprop train -i $CHEMPROP_PATH/tests/data/regression/mol_multitask.csv --accelerator cpu --epochs 3 --num-workers 0 --save-dir test_cli_regression_mol_multitask + +cp -L test_cli_regression_mol_multitask/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_regression_mol_multitask.pt + +# test_cli_regression_rxn+mol + +rm -rf test_cli_regression_rxn+mol + +chemprop train -i $CHEMPROP_PATH/tests/data/regression/rxn+mol/rxn+mol.csv --accelerator cpu --epochs 3 --num-workers 0 --reaction-columns rxn_smiles --smiles-columns solvent_smiles --save-dir test_cli_regression_rxn+mol + +cp -L test_cli_regression_rxn+mol/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_regression_rxn+mol.pt + +# test_cli_regression_rxn + +rm -rf test_cli_regression_rxn + +chemprop train -i $CHEMPROP_PATH/tests/data/regression/rxn/rxn.csv --accelerator cpu --epochs 3 --num-workers 0 --reaction-columns smiles --save-dir test_cli_regression_rxn + +cp -L test_cli_regression_rxn/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_regression_rxn.pt + +cp -L test_cli_regression_rxn/model_0/checkpoints/best*.ckpt $CHEMPROP_PATH/tests/data/example_model_v2_regression_rxn.ckpt + +# test_cli_regression_mve_mol + +rm -rf test_cli_regression_mve_mol + +chemprop train -i $CHEMPROP_PATH/tests/data/regression/mol/mol.csv --accelerator cpu --epochs 3 --num-workers 0 --save-dir test_cli_regression_mve_mol --task-type regression-mve + +cp -L test_cli_regression_mve_mol/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_regression_mve_mol.pt + +# test_cli_regression_evidential_mol + +rm -rf test_cli_regression_evidential_mol + +chemprop train -i $CHEMPROP_PATH/tests/data/regression/mol/mol.csv --accelerator cpu --epochs 3 --num-workers 0 --save-dir test_cli_regression_evidential_mol --task-type regression-evidential + +cp -L test_cli_regression_evidential_mol/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_regression_evidential_mol.pt + +test_cli_classification_dirichlet_mol + +rm -rf test_cli_classification_dirichlet_mol + +chemprop train -i $CHEMPROP_PATH/tests/data/classification/mol.csv --accelerator cpu --epochs 3 --num-workers 0 --save-dir test_cli_classification_dirichlet_mol --task-type classification-dirichlet + +cp -L test_cli_classification_dirichlet_mol/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_classification_dirichlet_mol.pt + +# test_cli_classification_dirichlet_mol + +rm -rf test_cli_multiclass_dirichlet_mol + +chemprop train -i $CHEMPROP_PATH/tests/data/classification/mol_multiclass.csv --accelerator cpu --epochs 3 --num-workers 0 --save-dir test_cli_multiclass_dirichlet_mol --task-type multiclass-dirichlet + +cp -L test_cli_multiclass_dirichlet_mol/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_multiclass_dirichlet_mol.pt \ No newline at end of file diff --git a/chemprop-updated/tests/unit/data/test_data_utils.py b/chemprop-updated/tests/unit/data/test_data_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b857bf9e49a4c80494e8eaed4d7683e278ef21a3 --- /dev/null +++ b/chemprop-updated/tests/unit/data/test_data_utils.py @@ -0,0 +1,158 @@ +from astartes import train_val_test_split +from astartes.utils.warnings import NormalizationWarning +import numpy as np +import pytest +from rdkit import Chem + +from chemprop.data.splitting import _unpack_astartes_result, make_split_indices + + +@pytest.fixture(params=[["C", "CC", "CCC", "CN", "CCN", "CCCN", "CCCCN", "CO", "CCO", "CCCO"]]) +def mol_data(request): + """A dataset with single molecules""" + return [Chem.MolFromSmiles(smi) for smi in request.param] + + +@pytest.fixture(params=[["C", "CC", "CN", "CN", "CO", "C"]]) +def mol_data_with_repeated_mols(request): + """A dataset with repeated single molecules""" + return [Chem.MolFromSmiles(smi) for smi in request.param] + + +@pytest.fixture(params=[["C", "CC", "CCC", "C1CC1", "C1CCC1"]]) +def molecule_dataset_with_rings(request): + """A dataset with rings (for scaffold splitting)""" + return [Chem.MolFromSmiles(smi) for smi in request.param] + + +def test_splits_sum1_warning(mol_data): + """Testing that the splits are normalized to 1, for overspecified case.""" + with pytest.warns(NormalizationWarning): + make_split_indices(mols=mol_data, sizes=(0.4, 0.6, 0.2)) + + +def test_splits_sum2_warning(mol_data): + """Testing that the splits are normalized to 1, for underspecified case.""" + with pytest.warns(NormalizationWarning): + make_split_indices(mols=mol_data, sizes=(0.1, 0.1, 0.1)) + + +def test_three_splits_provided(mol_data): + """Testing that three splits are provided""" + with pytest.raises(ValueError): + make_split_indices(mols=mol_data, sizes=(0.8, 0.2)) + + +def test_seed0(mol_data): + """ + Testing that make_split_indices can get expected output using astartes as backend for random split with seed 0. + Note: the behaviour of randomness for data splitting is not controlled by chemprop but by the chosen backend. + """ + train, val, test = make_split_indices(mols=mol_data, seed=0) + train_astartes, val_astartes, test_astartes = _unpack_astartes_result( + train_val_test_split(np.arange(len(mol_data)), sampler="random", random_state=0), True + ) + assert set(train[0]) == set(train_astartes) + assert set(val[0]) == set(val_astartes) + assert set(test[0]) == set(test_astartes) + + +def test_seed100(mol_data): + """ + Testing that make_split_indices can get expected output using astartes as backend for random split with seed 100. + Note: the behaviour of randomness for data splitting is not controlled by chemprop but by the chosen backend. + """ + train, val, test = make_split_indices(mols=mol_data, seed=100) + train_astartes, val_astartes, test_astartes = _unpack_astartes_result( + train_val_test_split(np.arange(len(mol_data)), sampler="random", random_state=100), True + ) + assert set(train[0]) == set(train_astartes) + assert set(val[0]) == set(val_astartes) + assert set(test[0]) == set(test_astartes) + + +def test_split_4_4_2(mol_data): + """Testing the random split with changed sizes""" + train, val, test = make_split_indices(mols=mol_data, sizes=(0.4, 0.4, 0.2)) + train_astartes, val_astartes, test_astartes = _unpack_astartes_result( + train_val_test_split( + np.arange(len(mol_data)), + sampler="random", + train_size=0.4, + val_size=0.4, + test_size=0.2, + random_state=0, + ), + True, + ) + assert set(train[0]) == set(train_astartes) + assert set(val[0]) == set(val_astartes) + assert set(test[0]) == set(test_astartes) + + +def test_split_empty_validation_set(mol_data): + """Testing the random split with an empty validation set""" + train, val, test = make_split_indices(mols=mol_data, sizes=(0.4, 0, 0.6)) + assert set(val[0]) == set([]) + + +def test_random_split(mol_data_with_repeated_mols): + """ + Testing if random split yield expected results. + Note: This test mainly serves as a red flag. Test failure strongly indicates unexpected change of data splitting backend that needs attention. + """ + split_type = "random" + train, val, test = make_split_indices( + mols=mol_data_with_repeated_mols, sizes=(0.4, 0.4, 0.2), split=split_type + ) + + assert train[0] == [2, 1] + + +def test_repeated_smiles(mol_data_with_repeated_mols): + """ + Testing if random split with repeated smiles yield expected results. + Note: This test mainly serves as a red flag. Test failure strongly indicates unexpected change of data splitting backend that needs attention. + """ + split_type = "random_with_repeated_smiles" + train, val, test = make_split_indices( + mols=mol_data_with_repeated_mols, sizes=(0.8, 0.0, 0.2), split=split_type + ) + + assert train[0] == [4, 1, 0, 5] + assert test[0] == [2, 3] + + +def test_kennard_stone(mol_data): + """ + Testing if Kennard-Stone split yield expected results. + Note: This test mainly serves as a red flag. Test failure strongly indicates unexpected change of data splitting backend that needs attention. + """ + split_type = "kennard_stone" + train, val, test = make_split_indices(mols=mol_data, sizes=(0.4, 0.4, 0.2), split=split_type) + + assert set(test[0]) == set([9, 5]) + + +def test_kmeans(mol_data): + """ + Testing if Kmeans split yield expected results. + Note: This test mainly serves as a red flag. Test failure strongly indicates unexpected change of data splitting backend that needs attention. + """ + split_type = "kmeans" + train, val, test = make_split_indices(mols=mol_data, sizes=(0.5, 0.0, 0.5), split=split_type) + + assert train[0] == [0, 1, 2, 3, 7, 8, 9] + + +def test_scaffold(molecule_dataset_with_rings): + """ + Testing if Bemis-Murcko Scaffolds split yield expected results. + Note: This test mainly serves as a red flag. Test failure strongly indicates unexpected change of data splitting backend that needs attention. + """ + split_type = "scaffold_balanced" + train, val, test = make_split_indices( + mols=molecule_dataset_with_rings, sizes=(0.3, 0.3, 0.3), split=split_type + ) + + assert train[0] == [0, 1, 2] diff --git a/chemprop-updated/tests/unit/data/test_dataloader.py b/chemprop-updated/tests/unit/data/test_dataloader.py new file mode 100644 index 0000000000000000000000000000000000000000..b1a3a5ad65a92e34067db3938b7fe1755953bd8b --- /dev/null +++ b/chemprop-updated/tests/unit/data/test_dataloader.py @@ -0,0 +1,84 @@ +import numpy as np +import pytest +import torch + +from chemprop.data.collate import BatchMolGraph, collate_batch +from chemprop.data.datasets import Datum +from chemprop.data.molgraph import MolGraph + + +@pytest.fixture +def datum_1(): + mol_graph1 = MolGraph( + V=np.array([[1.0], [2.0], [3.0]]), + E=np.array([[0.5], [1.5]]), + edge_index=np.array([[0, 1, 0, 2], [1, 0, 2, 0]]), + rev_edge_index=np.array([1, 0, 3, 2]), + ) + return Datum( + mol_graph1, + V_d=np.array([[1.0], [2.0], [4.0]]), + x_d=[3, 4], + y=[6, 7], + weight=[8.0], + lt_mask=[True], + gt_mask=[False], + ) + + +@pytest.fixture +def datum_2(): + mol_graph2 = MolGraph( + V=np.array([[4.0], [5.0]]), + E=np.array([[2.5]]), + edge_index=np.array([[0, 1], [1, 0]]), + rev_edge_index=np.array([1, 0]), + ) + return Datum( + mol_graph2, + V_d=np.array([[5.0], [7.0]]), + x_d=[8, 9], + y=[6, 4], + weight=[1.0], + lt_mask=[False], + gt_mask=[True], + ) + + +def test_collate_batch_single_graph(datum_1): + batch = [datum_1] + + result = collate_batch(batch) + mgs, V_ds, x_ds, ys, weights, lt_masks, gt_masks = result + + assert isinstance(result, tuple) + assert isinstance(mgs, BatchMolGraph) + assert ( + mgs.V.shape[0] == V_ds.shape[0] + ) # V is number of atoms x number of atom features, V_ds is number of atoms x number of atom descriptors + torch.testing.assert_close(V_ds, torch.tensor([[1.0], [2.0], [4.0]], dtype=torch.float32)) + torch.testing.assert_close(x_ds, torch.tensor([[3.0, 4.0]], dtype=torch.float32)) + torch.testing.assert_close(ys, torch.tensor([[6.0, 7.0]], dtype=torch.float32)) + torch.testing.assert_close(weights, torch.tensor([[[8.0]]], dtype=torch.float32)) + torch.testing.assert_close(lt_masks, torch.tensor([[1]], dtype=torch.bool)) + torch.testing.assert_close(gt_masks, torch.tensor([[0]], dtype=torch.bool)) + + +def test_collate_batch_multiple_graphs(datum_1, datum_2): + batch = [datum_1, datum_2] + + result = collate_batch(batch) + mgs, V_ds, x_ds, ys, weights, lt_masks, gt_masks = result + + assert isinstance(mgs, BatchMolGraph) + assert ( + mgs.V.shape[0] == V_ds.shape[0] + ) # V is number of atoms x number of atom features, V_ds is number of atoms x number of atom descriptors + torch.testing.assert_close( + V_ds, torch.tensor([[1.0], [2.0], [4.0], [5.0], [7.0]], dtype=torch.float32) + ) + torch.testing.assert_close(x_ds, torch.tensor([[3.0, 4.0], [8.0, 9.0]], dtype=torch.float32)) + torch.testing.assert_close(ys, torch.tensor([[6.0, 7.0], [6.0, 4.0]], dtype=torch.float32)) + torch.testing.assert_close(weights, torch.tensor([[[8.0]], [[1.0]]], dtype=torch.float32)) + torch.testing.assert_close(lt_masks, torch.tensor([[1], [0]], dtype=torch.bool)) + torch.testing.assert_close(gt_masks, torch.tensor([[0], [1]], dtype=torch.bool)) diff --git a/chemprop-updated/tests/unit/data/test_datapoint.py b/chemprop-updated/tests/unit/data/test_datapoint.py new file mode 100644 index 0000000000000000000000000000000000000000..89107575c435f21fc30dcb557d55a1943321edf5 --- /dev/null +++ b/chemprop-updated/tests/unit/data/test_datapoint.py @@ -0,0 +1,58 @@ +import numpy as np +import pytest + +from chemprop.data import MoleculeDatapoint + +SMI = "c1ccccc1" + + +@pytest.fixture(params=["@", "@@"]) +def chiral_smi(request): + return f"C[C{request.param}H](O)N" + + +@pytest.fixture(params=range(1, 3)) +def targets(request): + return np.random.rand(request.param) + + +@pytest.fixture(params=[0.5, 0.9]) +def features(request): + return np.where(np.random.rand(1024) > request.param, 1.0, 0.0) + + +@pytest.fixture +def features_with_nans(features): + idxs = np.random.choice(len(features), len(features) // 100, False) + features[idxs] = np.nan + + return features + + +def test_num_tasks(targets): + d = MoleculeDatapoint.from_smi(SMI, y=targets) + + assert d.t == targets.shape[0] + + +def test_addh(smi, targets): + d1 = MoleculeDatapoint.from_smi(smi, y=targets) + d2 = MoleculeDatapoint.from_smi(smi, y=targets, add_h=True) + + assert d1.mol.GetNumAtoms() != d2.mol.GetNumAtoms() + + +def test_ignore_chirality(chiral_smi, targets): + d1 = MoleculeDatapoint.from_smi(chiral_smi, y=targets) + d2 = MoleculeDatapoint.from_smi(chiral_smi, y=targets, ignore_chirality=True) + + assert d1.mol.GetAtomWithIdx(1).GetChiralTag() != d2.mol.GetAtomWithIdx(1).GetChiralTag() + + +def test_replace_token(smi, targets, features_with_nans): + if not np.isnan(features_with_nans).any(): + pytest.skip("no `nan`s") + + d = MoleculeDatapoint.from_smi(smi, y=targets, x_d=features_with_nans) + + assert not np.isnan(d.x_d).any() diff --git a/chemprop-updated/tests/unit/data/test_dataset.py b/chemprop-updated/tests/unit/data/test_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..d34b166937e03a994b3a195bfb4d2cc63acf56de --- /dev/null +++ b/chemprop-updated/tests/unit/data/test_dataset.py @@ -0,0 +1,180 @@ +from unittest.mock import MagicMock, call + +import numpy as np +import pytest +from rdkit import Chem +from sklearn.preprocessing import StandardScaler + +from chemprop.data.datasets import MoleculeDatapoint, MoleculeDataset +from chemprop.data.molgraph import MolGraph +from chemprop.featurizers.molgraph import SimpleMoleculeMolGraphFeaturizer + + +@pytest.fixture(params=[1, 5, 10]) +def smis(smis, request): + return smis.sample(request.param).to_list() + + +@pytest.fixture +def targets(smis): + return np.random.rand(len(smis), 1) + + +@pytest.fixture +def mols(smis): + return [Chem.MolFromSmiles(smi) for smi in smis] + + +@pytest.fixture +def X_d(mols): + return [np.random.rand(1) for _ in mols] + + +@pytest.fixture +def V_fs(mols): + return [np.random.rand(mol.GetNumAtoms(), 1) for mol in mols] + + +@pytest.fixture +def E_fs(mols): + return [np.random.rand(mol.GetNumBonds(), 2) for mol in mols] + + +@pytest.fixture +def V_ds(mols): + return [np.random.rand(mol.GetNumAtoms(), 3) for mol in mols] + + +@pytest.mark.parametrize( + "X_d, V_fs, E_fs, V_ds", + [(None, None, None, None), ("X_d", "V_fs", "E_fs", "V_ds")], + indirect=True, +) +@pytest.fixture +def data(mols, targets, X_d, V_fs, E_fs, V_ds): + return [ + MoleculeDatapoint(mol=mol, y=target, x_d=x_d, V_f=V_f, E_f=E_f, V_d=V_d) + for mol, target, x_d, V_f, E_f, V_d in zip(mols, targets, X_d, V_fs, E_fs, V_ds) + ] + + +@pytest.fixture(params=[False, True]) +def cache(request): + return request.param + + +@pytest.fixture +def dataset(data, cache): + extra_atom_fdim = data[0].V_f.shape[1] if data[0].V_f is not None else 0 + extra_bond_fdim = data[0].E_f.shape[1] if data[0].E_f is not None else 0 + + dset = MoleculeDataset( + data, + SimpleMoleculeMolGraphFeaturizer( + extra_atom_fdim=extra_atom_fdim, extra_bond_fdim=extra_bond_fdim + ), + ) + dset.cache = cache + + return dset + + +def test_none(): + with pytest.raises(ValueError): + MoleculeDataset(None, SimpleMoleculeMolGraphFeaturizer()) + + +def test_empty(): + """TODO""" + + +def test_len(data, dataset): + assert len(data) == len(dataset) + + +def test_smis(dataset, smis): + assert smis == dataset.smiles + + +def test_targets(dataset, targets): + np.testing.assert_array_equal(dataset.Y, targets) + + +def test_set_targets_too_short(dataset): + with pytest.raises(ValueError): + dataset.Y = np.random.rand(len(dataset) // 2, 1) + + +def test_num_tasks(dataset, targets): + assert dataset.t == targets.shape[1] + + +@pytest.mark.skipif( + not all([x is None for x in ["X_d", "V_fs", "E_fs", "V_ds"]]), reason="Not all inputs are None" +) +def test_aux_nones(dataset: MoleculeDataset): + np.testing.assert_array_equal(dataset.X_d, None) + np.testing.assert_array_equal(dataset.V_fs, None) + np.testing.assert_array_equal(dataset.E_fs, None) + np.testing.assert_array_equal(dataset.V_ds, None) + np.testing.assert_array_equal(dataset.gt_mask, None) + np.testing.assert_array_equal(dataset.lt_mask, None) + assert dataset.d_xd == 0 + assert dataset.d_vf == 0 + assert dataset.d_ef == 0 + assert dataset.d_vd == 0 + + +def test_normalize_targets(dataset): + dset_scaler = dataset.normalize_targets() + scaler = StandardScaler() + scaler.fit(dataset._Y) + Y = scaler.transform(dataset._Y) + + np.testing.assert_array_equal(dataset.Y, Y) + np.testing.assert_array_equal(dset_scaler.mean_, scaler.mean_) + np.testing.assert_array_equal(dset_scaler.scale_, scaler.scale_) + + +def test_normalize_inputs(dataset): + dset_scaler = dataset.normalize_inputs("X_d") + scaler = StandardScaler() + scaler.fit(dataset._X_d) + X = scaler.transform(dataset._X_d) + + np.testing.assert_array_equal(dataset.X_d, X) + np.testing.assert_array_equal(dset_scaler.mean_, scaler.mean_) + np.testing.assert_array_equal(dset_scaler.scale_, scaler.scale_) + + inputs = ["V_f", "E_f", "V_d"] + for input_ in inputs: + dset_scaler = dataset.normalize_inputs(input_) + scaler = StandardScaler() + Xs = getattr(dataset, f"_{input_}s") + X = np.concatenate(Xs, axis=0) + scaler.fit(X) + Xs = [scaler.transform(x) for x in Xs] + + for X, dset_X in zip(Xs, getattr(dataset, f"{input_}s")): + np.testing.assert_array_equal(X, dset_X) + np.testing.assert_array_equal(getattr(dset_scaler, "mean_"), scaler.mean_) + np.testing.assert_array_equal(getattr(dset_scaler, "scale_"), scaler.scale_) + + +@pytest.mark.parametrize("cache", [False, True]) +def test_cache(dataset: MoleculeDataset, cache): + """Test that cache attribute is being set appropriately and that the underlying cache is being + used correctly to load the molgraphs.""" + mg = MolGraph(None, None, None, None) + + dataset.cache = cache + assert dataset.cache == cache + dataset.mg_cache = MagicMock() + dataset.mg_cache.__getitem__.side_effect = lambda i: mg + + calls = [] + for i in range(len(dataset)): + assert dataset[i].mg is mg + calls.append(call(i)) + + dataset.mg_cache.__getitem__.assert_has_calls(calls) diff --git a/chemprop-updated/tests/unit/data/test_samplers.py b/chemprop-updated/tests/unit/data/test_samplers.py new file mode 100644 index 0000000000000000000000000000000000000000..354158bce205eb0a4d2c3216501ac569eb81421a --- /dev/null +++ b/chemprop-updated/tests/unit/data/test_samplers.py @@ -0,0 +1,108 @@ +import numpy as np +import pytest + +from chemprop.data import ClassBalanceSampler, MoleculeDatapoint, MoleculeDataset, SeededSampler +from chemprop.featurizers.molgraph import SimpleMoleculeMolGraphFeaturizer + + +@pytest.fixture(params=[0.0, 0.1, 0.5, 1.0]) +def threshold(request): + return request.param + + +@pytest.fixture +def bin_targets(targets, threshold): + return targets <= threshold + + +@pytest.fixture +def featurizer(): + return SimpleMoleculeMolGraphFeaturizer() + + +@pytest.fixture +def dataset(mols, targets, featurizer): + data = [MoleculeDatapoint(mol, y) for mol, y in zip(mols, targets)] + + return MoleculeDataset(data, featurizer) + + +@pytest.fixture(params=[0, 24, 100]) +def seed(request): + return request.param + + +@pytest.fixture +def class_sampler(mols, bin_targets, featurizer): + data = [MoleculeDatapoint(mol, y) for mol, y in zip(mols, bin_targets)] + dset = MoleculeDataset(data, featurizer) + + return ClassBalanceSampler(dset.Y, shuffle=True) + + +def test_seeded_no_seed(dataset): + with pytest.raises(ValueError): + SeededSampler(len(dataset), None) + + +def test_seeded_shuffle(dataset, seed): + sampler = SeededSampler(len(dataset), seed) + + assert list(sampler) != list(sampler) + + +def test_seeded_fixed_shuffle(dataset, seed): + sampler1 = SeededSampler(len(dataset), seed) + sampler2 = SeededSampler(len(dataset), seed) + + idxs1 = list(sampler1) + idxs2 = list(sampler2) + + assert idxs1 == idxs2 + + +def test_class_balance_length(class_sampler, bin_targets: np.ndarray): + n_actives = bin_targets.any(1).sum(0) + n_inactives = len(bin_targets) - n_actives + expected_length = 2 * min(n_actives, n_inactives) + + assert len(class_sampler) == expected_length + + +def test_class_balance_sample(class_sampler, bin_targets: np.ndarray): + idxs = list(class_sampler) + + # sampled indices should be 50/50 actives/inacitves + assert sum(bin_targets[idxs]) == len(idxs) // 2 + + +def test_class_balance_shuffle(class_sampler): + idxs1 = list(class_sampler) + idxs2 = list(class_sampler) + + if len(class_sampler) == 0: + pytest.skip("no indices to sample!") + + assert idxs1 != idxs2 + + +def test_seed_class_balance_shuffle(smis, bin_targets, featurizer, seed): + data = [MoleculeDatapoint.from_smi(smi, target) for smi, target in zip(smis, bin_targets)] + dset = MoleculeDataset(data, featurizer) + + sampler = ClassBalanceSampler(dset.Y, seed, True) + + if len(sampler) == 0: + pytest.skip("no indices to sample!") + + assert list(sampler) != list(sampler) + + +def test_seed_class_balance_reproducibility(smis, bin_targets, featurizer, seed): + data = [MoleculeDatapoint.from_smi(smi, target) for smi, target in zip(smis, bin_targets)] + dset = MoleculeDataset(data, featurizer) + + sampler1 = ClassBalanceSampler(dset.Y, seed, True) + sampler2 = ClassBalanceSampler(dset.Y, seed, True) + + assert list(sampler1) == list(sampler2) diff --git a/chemprop-updated/tests/unit/featurizers/test_atom.py b/chemprop-updated/tests/unit/featurizers/test_atom.py new file mode 100644 index 0000000000000000000000000000000000000000..a26fc93cc7bec20fa82f7d828e9faf82fc760da4 --- /dev/null +++ b/chemprop-updated/tests/unit/featurizers/test_atom.py @@ -0,0 +1,141 @@ +"""NOTE: these tests make a lot of assumptions about the internal mechanics of the AtomFeaturizer, +so they'll need to be reworked if something ever changes about that.""" + +import numpy as np +import pytest +from rdkit import Chem +from rdkit.Chem.rdchem import HybridizationType + +from chemprop.featurizers import MultiHotAtomFeaturizer + +SMI = "Cn1nc(CC(=O)Nc2ccc3oc4ccccc4c3c2)c2ccccc2c1=O" + + +@pytest.fixture(params=list(Chem.MolFromSmiles(SMI).GetAtoms())[:5]) +def atom(request): + return request.param + + +@pytest.fixture +def aromatic(atom): + return atom.GetIsAromatic() + + +@pytest.fixture +def mass_bit(atom): + return 0.01 * atom.GetMass() + + +@pytest.fixture +def atomic_num(): + return list(range(1, 37)) + [53] + + +@pytest.fixture +def degree(): + return list(range(6)) + + +@pytest.fixture +def formal_charge(): + return [-1, -2, 1, 2, 0] + + +@pytest.fixture +def chiral_tag(): + return list(range(4)) + + +@pytest.fixture +def num_Hs(): + return list(range(5)) + + +@pytest.fixture +def hybridization(): + return [ + HybridizationType.S, + HybridizationType.SP, + HybridizationType.SP2, + HybridizationType.SP2D, + HybridizationType.SP3, + HybridizationType.SP3D, + HybridizationType.SP3D2, + HybridizationType.OTHER, + ] + + +@pytest.fixture +def featurizer(atomic_num, degree, formal_charge, chiral_tag, num_Hs, hybridization): + return MultiHotAtomFeaturizer( + atomic_num, degree, formal_charge, chiral_tag, num_Hs, hybridization + ) + + +@pytest.fixture +def expected_len(atomic_num, degree, formal_charge, chiral_tag, num_Hs, hybridization): + return ( + +sum( + len(xs) + 1 + for xs in (atomic_num, degree, formal_charge, chiral_tag, num_Hs, hybridization) + ) + + 2 + ) + + +@pytest.fixture +def x(featurizer, atom): + return featurizer(atom) + + +def test_len(featurizer, expected_len): + assert len(featurizer) == expected_len + + +def test_none(featurizer): + np.testing.assert_array_equal(featurizer(None), np.zeros(len(featurizer))) + + +def test_atomic_num_bit(atom, x, atomic_num): + n = atom.GetAtomicNum() + + if n == 53: # special check for Iodine + assert x[len(atomic_num) - 1] == 1 + else: + if n in atomic_num: + assert x[n - 1] == 1 + else: + assert x[len(atomic_num)] == 1 + + +def test_aromatic_bit(x, aromatic): + i = -2 + if aromatic: + assert x[i] == 1 + else: + assert x[i] == 0 + + +def test_mass_bit(x, mass_bit): + assert x[-1] == pytest.approx(mass_bit) + + +@pytest.mark.parametrize( + "a,x_v_orig", + zip( + list(Chem.MolFromSmiles("Fc1cccc(C2(c3nnc(Cc4cccc5ccccc45)o3)CCOCC2)c1").GetAtoms()), + # fmt: off + [ + [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0.18998], + [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0.12011], + [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0.12011], + [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0.12011], + ] + # fmt: on + ), +) +def test_x_orig(a, x_v_orig): + f = MultiHotAtomFeaturizer.v2() + x_v_calc = f(a) + + np.testing.assert_array_almost_equal(x_v_calc, x_v_orig) diff --git a/chemprop-updated/tests/unit/featurizers/test_bond.py b/chemprop-updated/tests/unit/featurizers/test_bond.py new file mode 100644 index 0000000000000000000000000000000000000000..3277d1dc60a53f9f33a210dce0daa6d9af84af67 --- /dev/null +++ b/chemprop-updated/tests/unit/featurizers/test_bond.py @@ -0,0 +1,93 @@ +import numpy as np +import pytest +from rdkit import Chem + +from chemprop.featurizers import MultiHotBondFeaturizer + +SMI = "Cn1nc(CC(=O)Nc2ccc3oc4ccccc4c3c2)c2ccccc2c1=O" + + +@pytest.fixture(params=list(Chem.MolFromSmiles(SMI).GetBonds())) +def bond(request): + return request.param + + +@pytest.fixture +def bond_types(): + return [1, 2, 3, 12] + + +@pytest.fixture +def stereo(): + return list(range(6)) + + +@pytest.fixture +def featurizer(bond_types, stereo): + return MultiHotBondFeaturizer(bond_types, stereo) + + +@pytest.fixture +def exp_len(bond_types, stereo): + return sum([1, len(bond_types), 1, 1, (len(stereo) + 1)]) + + +@pytest.fixture +def bt_bit(bond, bond_types, featurizer): + bt = bond.GetBondType() + i_bt = int(bt) + + i = bond_types.index(i_bt) if i_bt in bond_types else -1 + + if i == -1: + return i + + return featurizer.one_hot_index(bt, featurizer.bond_types)[0] + 1 + + +@pytest.fixture +def x(featurizer, bond): + return featurizer(bond) + + +def test_len(featurizer, exp_len): + assert len(featurizer) == exp_len + + +def test_none(featurizer): + x_e = np.zeros(len(featurizer)) + x_e[0] = 1 + + np.testing.assert_array_equal(x_e, featurizer(None)) + + +def test_bt_bit(x, bt_bit): + assert x[bt_bit] == 1 + + +def test_conj_bit(featurizer, bond, x): + conj_bit = 1 + len(featurizer.bond_types) + assert x[conj_bit] == int(bond.GetIsConjugated()) + + +@pytest.mark.parametrize( + "mol,X_e_orig", + [ + ( + Chem.MolFromSmiles("O=C(NCc1ccc(Cn2ccccc2=O)cc1)c1ccccc1CCc1ccccc1"), + np.array( + [ + [0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], + ] + ), + ) + ], +) +def test_x_hand_calc(mol, X_e_orig): + f = MultiHotBondFeaturizer() + + bonds = list(mol.GetBonds()) + X_e_calc = np.array([f(b) for b in bonds[: len(X_e_orig)]]) + np.testing.assert_array_almost_equal(X_e_calc, X_e_orig) diff --git a/chemprop-updated/tests/unit/featurizers/test_cgr.py b/chemprop-updated/tests/unit/featurizers/test_cgr.py new file mode 100644 index 0000000000000000000000000000000000000000..31abef478924a767d9e83245edcb1c1ba67d6de9 --- /dev/null +++ b/chemprop-updated/tests/unit/featurizers/test_cgr.py @@ -0,0 +1,409 @@ +import random +from typing import NamedTuple +import uuid + +import numpy as np +import pytest + +from chemprop.featurizers.molgraph import CGRFeaturizer, RxnMode +from chemprop.utils import make_mol + +AVAILABLE_RXN_MODE_NAMES = [ + "REAC_PROD", + "REAC_PROD_BALANCE", + "REAC_DIFF", + "REAC_DIFF_BALANCE", + "PROD_DIFF", + "PROD_DIFF_BALANCE", +] + + +@pytest.fixture +def expected_aliases(): + return AVAILABLE_RXN_MODE_NAMES + + +@pytest.fixture(params=AVAILABLE_RXN_MODE_NAMES) +def mode_name(request): + return request.param + + +@pytest.fixture(params=AVAILABLE_RXN_MODE_NAMES[::2]) +def mode_imbalanced(request): + return request.param + + +@pytest.fixture(params=AVAILABLE_RXN_MODE_NAMES[1::2]) +def mode_balanced(request): + return request.param + + +@pytest.fixture +def rxn_mode(mode_name): + return getattr(RxnMode, mode_name) + + +@pytest.fixture(params=[str(uuid.uuid4()) for _ in range(3)]) +def invalid_alias(request): + return request.param + + +rxn_smis = [ + # reactant and product with the same number of atoms + "[CH3:1][H:2]>>[CH3:1].[H:2]", # reactant and product are balanced and mapped + "[CH3:2][H:1]>>[H:1].[CH3:2]", # reactant and product are balanced, mapped but with different atom index order + "[CH3:1][H]>>[CH3:1].[H:2]", # reactant and product are balanced and but reactant has less atom-mapped atoms + "[CH3:1][H:2]>>[H].[CH3:1]", # reactant and product are balanced and but product has less atom-mapped atoms + # reactant and product has different numbers of atoms + "[CH4:1]>>[CH2:1].[H:2][H:3]", # product has more atoms and more atom-mapped atoms + "[H:1].[CH2:2][H:3]>>[CH3:2][H:3]", # reactant with more atoms and atom-mapped atoms + "[CH4:1]>>[CH3:1].[H:2]", # product with more atoms and atom-mapped atoms with 0 edge +] + +# Expected output for CGRFeaturizer.map_reac_to_prod +reac_prod_maps = { + "[CH3:1][H:2]>>[CH3:1].[H:2]": ({0: 0, 1: 1}, [], []), + "[CH3:2][H:1]>>[H:1].[CH3:2]": ({0: 1, 1: 0}, [], []), + "[CH3:1][H]>>[CH3:1].[H:2]": ({0: 0}, [1], [1]), + "[CH3:1][H:2]>>[H].[CH3:1]": ({0: 1}, [0], [1]), + "[CH4:1]>>[CH2:1].[H:2][H:3]": ({0: 0}, [1, 2], []), + "[H:1].[CH2:2][H:3]>>[CH3:2][H:3]": ({1: 0, 2: 1}, [], [0]), + "[CH4:1]>>[CH3:1].[H:2]": ({0: 0}, [1], []), +} + + +@pytest.fixture(params=rxn_smis) +def rxn_smi(request): + return request.param + + +class BondExpectation(NamedTuple): + """ + whether elements in the returns for _get_bonds are Nones under + imbalanced and balanced modes for provided bond + """ + + bond: tuple + bond_reac_none: bool + bond_prod_none: bool + + +bond_expect_imbalanced = { + "[CH3:1][H:2]>>[CH3:1].[H:2]": [ + BondExpectation((0, 1), bond_reac_none=False, bond_prod_none=True) + ], + "[CH3:2][H:1]>>[H:1].[CH3:2]": [ + BondExpectation((0, 1), bond_reac_none=False, bond_prod_none=True) + ], + "[CH3:1][H]>>[CH3:1].[H:2]": [ + BondExpectation((0, 1), bond_reac_none=False, bond_prod_none=True), + BondExpectation((0, 2), bond_reac_none=True, bond_prod_none=True), + BondExpectation((1, 2), bond_reac_none=True, bond_prod_none=True), + ], + "[CH3:1][H:2]>>[H].[CH3:1]": [ + BondExpectation((0, 1), bond_reac_none=False, bond_prod_none=True), + BondExpectation((0, 2), bond_reac_none=True, bond_prod_none=True), + BondExpectation((1, 2), bond_reac_none=True, bond_prod_none=True), + ], + "[CH4:1]>>[CH2:1].[H:2][H:3]": [ + BondExpectation((0, 1), bond_reac_none=True, bond_prod_none=True), + BondExpectation((0, 2), bond_reac_none=True, bond_prod_none=True), + BondExpectation((1, 2), bond_reac_none=True, bond_prod_none=False), + ], + "[H:1].[CH2:2][H:3]>>[CH3:2][H:3]": [ + BondExpectation((0, 1), bond_reac_none=True, bond_prod_none=True), + BondExpectation((0, 2), bond_reac_none=True, bond_prod_none=True), + BondExpectation((1, 2), bond_reac_none=False, bond_prod_none=False), + ], + "[CH4:1]>>[CH3:1].[H:2]": [ + BondExpectation((0, 0), bond_reac_none=True, bond_prod_none=True) + ], # this last entry doesn't test for anything meaningful, only to enable other tests for graph with zero edges +} +bond_expect_balanced = bond_expect_imbalanced.copy() +bond_expect_balanced.update( + { + "[CH4:1]>>[CH2:1].[H:2][H:3]": [ + BondExpectation((0, 1), bond_reac_none=True, bond_prod_none=True), + BondExpectation((0, 2), bond_reac_none=True, bond_prod_none=True), + BondExpectation((1, 2), bond_reac_none=False, bond_prod_none=False), + ] # this is the only difference compared to the imbalanced case + } +) + + +# A fake `bond` is used in test_calc_edge_features. This is a workaround, +# as RDKit cannot construct a bond directly in Python +bond = make_mol("[CH3:1][H:2]", keep_h=True, add_h=False, ignore_chirality=False).GetBondWithIdx(0) + + +def get_reac_prod(rxn_smi: str) -> list: + return [ + make_mol(smi, keep_h=True, add_h=False, ignore_chirality=False) + for smi in rxn_smi.split(">>") + ] + + +def randomize_case(s: str) -> str: + choices = (str.upper, str.lower) + + return "".join(random.choice(choices)(x) for x in s) + + +@pytest.mark.parametrize("s", [str(uuid.uuid4()) for _ in range(3)]) +def test_randomize_case(s): + """test our helper function to ensure that it's not mangling our strings""" + assert randomize_case(s).upper() == s.upper() + + +def test_len(expected_aliases): + """ + Test that the RxnMode class has the correct length. + """ + assert len(RxnMode) == len(expected_aliases) + + +def test_keys(expected_aliases): + """ + Test that the keys function returns the correct set of modes. + """ + assert set(RxnMode.keys()) == set(alias.upper() for alias in expected_aliases) + + +@pytest.mark.parametrize( + "alias,rxn_mode", + [ + ("REAC_PROD", RxnMode.REAC_PROD), + ("REAC_PROD_BALANCE", RxnMode.REAC_PROD_BALANCE), + ("REAC_DIFF", RxnMode.REAC_DIFF), + ("REAC_DIFF_BALANCE", RxnMode.REAC_DIFF_BALANCE), + ("PROD_DIFF", RxnMode.PROD_DIFF), + ("PROD_DIFF_BALANCE", RxnMode.PROD_DIFF_BALANCE), + ], +) +class TestRxnModeGet: + def test_name_and_value(self, alias, rxn_mode): + assert alias.upper() == rxn_mode.name + assert alias.lower() == rxn_mode.value + + def test_getitem(self, alias, rxn_mode): + """ + Test that the RxnMode class can be indexed with uppercase mode. + """ + assert RxnMode[alias.upper()] == rxn_mode + + def test_get(self, alias, rxn_mode): + """ + Test that the get function returns the correct RxnMode. + """ + assert RxnMode.get(alias.upper()) == rxn_mode + + def test_get_random_case(self, alias, rxn_mode): + """ + Test that the get function returns the correct RxnMode when given an alias with random case. + """ + assert RxnMode.get(randomize_case(alias)) == rxn_mode + + def test_get_enum_identity(self, alias, rxn_mode): + """ + Test that the get function returns the correct RxnMode when given a RxnMode. + """ + assert RxnMode.get(rxn_mode) == rxn_mode + + +def test_getitem_invalid_mode(invalid_alias): + """ + Test that the RxnMode class raises a ValueError when indexed with an invalid mode. + """ + with pytest.raises(KeyError): + RxnMode[invalid_alias] + + +def test_get_invalid_mode(invalid_alias): + """ + Test that the get function raises a ValueError when given an invalid mode. + """ + with pytest.raises(KeyError): + RxnMode.get(invalid_alias) + + +class TestCondensedGraphOfReactionFeaturizer: + def test_init_without_mode_(self): + """ + Test that the CondensedGraphOfReactionFeaturizer can be initialized without a mode. + """ + featurizer = CGRFeaturizer() + assert featurizer.mode == RxnMode.REAC_DIFF + + def test_init_with_mode_str(self, mode_name, rxn_mode): + """ + Test that the CondensedGraphOfReactionFeaturizer can be initialized with a string of the mode. + """ + featurizer = CGRFeaturizer(mode_=mode_name) + assert featurizer.mode == rxn_mode + + def test_init_with_mode_enum(self, rxn_mode): + """ + Test that the CondensedGraphOfReactionFeaturizer can be initialized with a RxnMode. + """ + featurizer = CGRFeaturizer(mode_=rxn_mode) + assert featurizer.mode == rxn_mode + + def test_map_reac_to_prod(self, rxn_smi): + """ + Test that the map_reac_to_prod method returns the correct mapping. + """ + reac, prod = get_reac_prod(rxn_smi) + assert CGRFeaturizer.map_reac_to_prod(reac, prod) == reac_prod_maps[rxn_smi] + + def test_calc_node_feature_matrix_shape(self, rxn_smi, mode_name): + """ + Test that the calc_node_feature_matrix method returns the correct node feature matrix. + """ + featurizer = CGRFeaturizer(mode_=mode_name) + + reac, prod = get_reac_prod(rxn_smi) + ri2pj, pids, rids = featurizer.map_reac_to_prod(reac, prod) + + num_nodes, atom_fdim = featurizer._calc_node_feature_matrix( + reac, prod, ri2pj, pids, rids + ).shape + assert num_nodes == len(ri2pj) + len(pids) + len(rids) + assert atom_fdim == featurizer.atom_fdim + + def test_calc_node_feature_matrix_atomic_number_features(self, rxn_smi, rxn_mode): + """ + Test that the calc_node_feature_matrix method returns the correct feature matrix for the atomic number features. + """ + featurizer = CGRFeaturizer(mode_=rxn_mode) + reac, prod = get_reac_prod(rxn_smi) + ri2pj, pids, rids = featurizer.map_reac_to_prod(reac, prod) + atom_featurizer = featurizer.atom_featurizer + + atomic_num_features_expected = np.array( + [atom_featurizer.num_only(a) for a in reac.GetAtoms()] + + [atom_featurizer.num_only(prod.GetAtomWithIdx(pid)) for pid in pids] + )[ + :, : len(atom_featurizer.atomic_nums) + 1 + ] # only create and keep the atomic number features + + atomic_num_features = featurizer._calc_node_feature_matrix(reac, prod, ri2pj, pids, rids)[ + :, : len(atom_featurizer.atomic_nums) + 1 + ] + + np.testing.assert_equal(atomic_num_features, atomic_num_features_expected) + + def test_get_bonds_imbalanced(self, rxn_smi, mode_imbalanced): + """ + Test that the get_bonds method returns the correct bonds when modes are imbalanced. + """ + featurizer = CGRFeaturizer(mode_=mode_imbalanced) + reac, prod = get_reac_prod(rxn_smi) + ri2pj, pids, _ = featurizer.map_reac_to_prod(reac, prod) + + for bond_expect in bond_expect_imbalanced[rxn_smi]: + bond_reac, bond_prod = featurizer._get_bonds( + reac, prod, ri2pj, pids, reac.GetNumAtoms(), *bond_expect.bond + ) + assert (bond_reac is None) == bond_expect.bond_reac_none + assert (bond_prod is None) == bond_expect.bond_prod_none + + def test_get_bonds_balanced(self, rxn_smi, mode_balanced): + """ + Test that the get_bonds method returns the correct bonds when modes are balanced. + """ + featurizer = CGRFeaturizer(mode_=mode_balanced) + reac, prod = get_reac_prod(rxn_smi) + ri2pj, pids, _ = featurizer.map_reac_to_prod(reac, prod) + + for bond_expect in bond_expect_balanced[rxn_smi]: + bond_reac, bond_prod = featurizer._get_bonds( + reac, prod, ri2pj, pids, reac.GetNumAtoms(), *bond_expect.bond + ) + assert (bond_reac is None) == bond_expect.bond_reac_none + assert (bond_prod is None) == bond_expect.bond_prod_none + + @pytest.mark.parametrize( + "reac_prod_bonds", [(bond, bond), (bond, None), (None, bond), (None, None)] + ) + def test_calc_edge_feature_shape(self, reac_prod_bonds, rxn_mode): + """ + Test that the calc_edge_feature method returns the correct edge feature. + """ + featurizer = CGRFeaturizer(mode_=rxn_mode) + reac_bond, prod_bond = reac_prod_bonds + + assert featurizer._calc_edge_feature(reac_bond, prod_bond).shape == ( + len(featurizer.bond_featurizer) * 2, + ) + + def test_featurize_balanced(self, rxn_smi, mode_balanced): + """ + Test CGR featurizer returns the correct features with balanced modes. + """ + featurizer = CGRFeaturizer(mode_=mode_balanced) + reac, prod = get_reac_prod(rxn_smi) + ri2pj, pids, rids = featurizer.map_reac_to_prod(reac, prod) + + molgraph = featurizer((reac, prod)) + + n_atoms = len(ri2pj) + len(pids) + len(rids) + atom_fdim = featurizer.atom_fdim + + assert molgraph.V.shape == (n_atoms, atom_fdim) + + bonds = [ + b.bond + for b in bond_expect_balanced[rxn_smi] + if not (b.bond_reac_none and b.bond_prod_none) + ] + bond_fdim = featurizer.bond_fdim + + assert molgraph.E.shape == (len(bonds) * 2, bond_fdim) + + expect_edge_index = [[], []] + expect_rev_edge_index = [] + + for i, bond in enumerate(bonds): + bond = list(bond) + expect_edge_index[0].extend(bond) + expect_edge_index[1].extend(bond[::-1]) + expect_rev_edge_index.extend([i * 2 + 1, i * 2]) + + assert np.array_equal(molgraph.edge_index, expect_edge_index) + assert np.array_equal(molgraph.rev_edge_index, expect_rev_edge_index) + + def test_featurize_imbalanced(self, rxn_smi, mode_imbalanced): + """ + Test CGR featurizer returns the correct features with balanced modes. + """ + featurizer = CGRFeaturizer(mode_=mode_imbalanced) + reac, prod = get_reac_prod(rxn_smi) + ri2pj, pids, rids = featurizer.map_reac_to_prod(reac, prod) + + molgraph = featurizer((reac, prod)) + + n_atoms = len(ri2pj) + len(pids) + len(rids) + atom_fdim = featurizer.atom_fdim + + assert molgraph.V.shape == (n_atoms, atom_fdim) + + bonds = [ + b.bond + for b in bond_expect_imbalanced[rxn_smi] + if not (b.bond_reac_none and b.bond_prod_none) + ] + bond_fdim = featurizer.bond_fdim + + assert molgraph.E.shape == (len(bonds) * 2, bond_fdim) + + expect_edge_index = [[], []] + expect_rev_edge_index = [] + + for i, bond in enumerate(bonds): + bond = list(bond) + expect_edge_index[0].extend(bond) + expect_edge_index[1].extend(bond[::-1]) + expect_rev_edge_index.extend([i * 2 + 1, i * 2]) + + assert np.array_equal(molgraph.edge_index, expect_edge_index) + assert np.array_equal(molgraph.rev_edge_index, expect_rev_edge_index) diff --git a/chemprop-updated/tests/unit/featurizers/test_molecule.py b/chemprop-updated/tests/unit/featurizers/test_molecule.py new file mode 100644 index 0000000000000000000000000000000000000000..2a14ad8866d81605f4907808a1d86223ee4d046d --- /dev/null +++ b/chemprop-updated/tests/unit/featurizers/test_molecule.py @@ -0,0 +1,238 @@ +# flake8: noqa +import sys + +import numpy as np +import pytest +from rdkit import Chem + +from chemprop.featurizers import ( + MorganBinaryFeaturizer, + MorganCountFeaturizer, + RDKit2DFeaturizer, + V1RDKit2DFeaturizer, + V1RDKit2DNormalizedFeaturizer, +) + + +@pytest.fixture +def mol(): + return Chem.MolFromSmiles("Fc1cccc(C2(c3nnc(Cc4cccc5ccccc45)o3)CCOCC2)c1") + + +# fmt: off +@pytest.fixture +def morgan_binary_bits(): + return np.array([[ 80, 230, 332, 378, 429, 450, 502, 503, 523, 544, 556, + 645, 649, 656, 663, 699, 772, 875, 917, 926, 950, 1039, + 1060, 1087, 1088, 1104, 1136, 1162, 1164, 1199, 1349, 1357, 1380, + 1405, 1430, 1487, 1510, 1561, 1573, 1597, 1604, 1670, 1742, 1747, + 1750, 1824, 1855, 1873, 1928]]) + + +@pytest.fixture +def morgan_count_bits(): + return np.array([ 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, + 1, 1, 4, 2, 2, 1, 2, 4, 1, 1, 2, 2, 2, 1, 1, 7, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 2, 1, 11, 1]) + + +@pytest.fixture +def morgan_binary_custom(): + return np.array([[ 15, 36, 49, 63, 64, 80, 112, 138, 140, 175, 230, 275, 301, + 325, 332, 333, 339, 356, 378, 381, 406, 429, 450, 463, 465, 478, + 486, 502, 503, 517, 523, 524, 537, 544, 549, 554, 556, 573, 579, + 580, 645, 646, 647, 649, 652, 656, 663, 699, 718, 721, 723, 726, + 731, 772, 773, 800, 818, 821, 828, 831, 836, 849, 865, 875, 887, + 894, 904, 917, 926, 950, 951, 989]]) + + +@pytest.fixture +def rdkit_2d_values(): + return np.array([ 13.9511, 13.9511, 0.2603, -0.5096, + 0.4909, 16.1724, 388.442 , 367.274 , + 388.1587, 146. , 0. , 0.2267, + -0.4239, 0.4239, 0.2267, 0.8966, + 1.6897, 2.5517, 19.1421, 9.7377, + 2.4117, -2.34 , 2.4051, -2.3511, + 5.8532, 0.054 , 3.2361, 1.5168, + 1143.0568, 19.6836, 15.9753, 15.9753, + 14.244 , 9.8787, 9.8787, 7.5208, + 7.5208, 5.8214, 5.8214, 4.26 , + 4.26 , -3.05 , 9626644.372 , 18.0088, + 7.4091, 3.3162, 167.8922, 9.154 , + 5.8172, 0. , 11.7814, 0. , + 0. , 0. , 4.3904, 0. , + 10.1974, 54.5973, 46.8737, 13.2138, + 11.8358, 13.5444, 10.7724, 0. , + 10.1974, 0. , 24.6775, 13.2138, + 95.4556, 0. , 0. , 0. , + 4.3904, 0. , 0. , 23.4111, + 16.5727, 5.8172, 35.75 , 71.1472, + 0. , 10.7724, 0. , 48.15 , + 5.415 , 4.3904, 0. , 5.8172, + 44.2577, 11.1269, 16.8388, 12.1327, + 24.2655, 34.4628, 9.154 , 25.6895, + 0. , 0. , 11.1016, 1.4962, + 0.851 , 21.1832, 1.9333, 1.1618, + 0. , 0.25 , 29. , 0. , + 4. , 0. , 1. , 1. , + 0. , 3. , 1. , 4. , + 0. , 0. , 4. , 0. , + 5. , 2. , 4. , 0. , + 1. , 1. , 0. , 0. , + 4.601 , 5. , 5.0492, 108.285 , + 0. , 0. , 0. , 0. , + 0. , 2. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 2. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 3. , 0. , + 1. , 0. , 0. , 0. , + 0. , 1. , 0. , 0. , + 1. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. ]) + +@pytest.fixture +def v1_rdkit_2d_values(): + return np.array([ 1.5168, 1143.0568, 19.6836, 15.9753, + 15.9753, 14.244 , 9.8787, 9.8787, + 7.5208, 7.5208, 5.8214, 5.8214, + 4.26 , 4.26 , 5.415 , 4.3904, + 0. , 5.8172, 44.2577, 11.1269, + 16.8388, 12.1327, 24.2655, 34.4628, + 9.154 , 388.1587, 0.8966, 1.6897, + 2.5517, 0.25 , -3.05 , 29. , + 367.274 , 9626644.372 , 18.0088, 7.4091, + 3.3162, 167.8922, 13.9511, 0.4239, + 13.9511, 0.2267, 0.2603, 0.2267, + -0.5096, -0.4239, 5.0492, 108.285 , + 388.442 , 0. , 4. , 0. , + 1. , 1. , 3. , 1. , + 4. , 4. , 0. , 5. , + 0. , 4. , 0. , 1. , + 1. , 146. , 9.154 , 5.8172, + 0. , 11.7814, 0. , 0. , + 0. , 4.3904, 0. , 10.1974, + 54.5973, 46.8737, 13.2138, 11.8358, + 5. , 13.5444, 10.7724, 0. , + 10.1974, 0. , 24.6775, 13.2138, + 95.4556, 0. , 0. , 0. , + 4.3904, 0. , 0. , 23.4111, + 16.5727, 5.8172, 35.75 , 71.1472, + 0. , 10.7724, 0. , 48.15 , + 25.6895, 0. , 0. , 11.1016, + 1.4962, 0.851 , 21.1832, 1.9333, + 1.1618, 0. , 0. , 0. , + 0. , 0. , 0. , 2. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 2. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 3. , 0. , 1. , 0. , + 0. , 0. , 0. , 1. , + 0. , 0. , 1. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0.4909]) + +@pytest.fixture +def v1_rdkit_2d_normalized_values(): + return np.array([0.2662, 0.6887, 0.5077, 0.5362, 0.4843, 0.6014, 0.6126, 0.534 , + 0.6197, 0.513 , 0.7176, 0.6135, 0.7476, 0.6436, 0.5736, 0.2421, + 0. , 0.2162, 0.9261, 0.2905, 0.8332, 0.5472, 0.6221, 0.8157, + 0.5639, 0.4934, 0.1407, 0.2732, 0.553 , 0.3169, 0.3848, 0.5742, + 0.4977, 1. , 0.4275, 0.3974, 0.4283, 0.5421, 0.8529, 0.349 , + 0.8529, 0.2728, 0.8296, 0.2614, 0.4263, 0.6376, 0.8529, 0.5321, + 0.4905, 0.0613, 0.1937, 0. , 0.9187, 0.5 , 0.964 , 0.865 , + 0.9176, 0.3071, 0.0553, 0.2075, 0. , 0.2143, 0. , 0.98 , + 0.8807, 0.5194, 0.3119, 0.4701, 0. , 0.9161, 0. , 0. , + 0.06 , 0.6132, 0. , 1. , 0.8269, 0.6454, 0.2879, 0.4656, + 0.8852, 0.5202, 0.218 , 0.1671, 0.4275, 0. , 0.5073, 0.4523, + 0.9257, 0.0001, 0. , 0.0373, 0.9759, 0. , 0. , 0.2569, + 0.6995, 0.9386, 0.6704, 0.8781, 0. , 0.9855, 0.0001, 0.1612, + 0.0001, 0.5 , 0.3847, 0.0001, 0.0001, 0.9999, 0.0001, 0.9987, + 0.646 , 0.0203, 0. , 0. , 0. , 0. , 0. , 0.9012, + 0.1651, 0.167 , 0.1665, 0.1665, 0.2029, 0.0694, 0. , 0.1683, + 0.168 , 0.5223, 0.0012, 0.1643, 0.0008, 0.1663, 0.163 , 0.1651, + 0. , 0. , 0.1682, 0.1658, 0.1673, 0. , 0. , 0.0999, + 0. , 0.3777, 0.0045, 0.1333, 0.964 , 0. , 0.914 , 0. , + 0. , 0.4993, 0.1649, 0.7608, 0. , 0. , 0.9095, 0. , + 0.1681, 0.1655, 0. , 0. , 0.1647, 0.1669, 0. , 0. , + 0. , 0.1547, 0. , 0. , 0.1676, 0. , 0.1682, 0.0091, + 0.1684, 0. , 0.1563, 0. , 0. , 0.0211, 0.0211, 0. , + 0. , 0. , 0.0001, 0.157 , 0. , 0. , 0. , 0. , + 0. , 0.1684, 0.1674, 0. , 0. , 0. , 0.1666, 0.3442]) +# fmt: on + + +def test_morgan_binary(mol, morgan_binary_bits): + featurizer = MorganBinaryFeaturizer() + features = featurizer(mol) + + np.testing.assert_array_almost_equal(np.nonzero(features), morgan_binary_bits) + + +def test_morgan_count(mol, morgan_count_bits, morgan_binary_bits): + featurizer = MorganCountFeaturizer() + features = featurizer(mol) + + np.testing.assert_array_almost_equal(features[np.nonzero(features)], morgan_count_bits) + + +def test_morgan_binary_custom(mol, morgan_binary_custom): + featurizer = MorganBinaryFeaturizer(radius=3, length=1024) + features = featurizer(mol) + + np.testing.assert_array_almost_equal(np.nonzero(features), morgan_binary_custom) + + +@pytest.mark.skipif( + sys.platform.startswith("win"), reason="rdkit's BertzCT gives different values on Windows" +) +def test_rdkit_2d(mol, rdkit_2d_values): + featurizer = RDKit2DFeaturizer() + features = featurizer(mol) + + np.testing.assert_array_almost_equal(features, rdkit_2d_values, decimal=2) + + +@pytest.mark.skipif( + sys.platform.startswith("win"), reason="rdkit's BertzCT gives different values on Windows" +) +def test_v1_rdkit_2d(mol, v1_rdkit_2d_values): + featurizer = V1RDKit2DFeaturizer() + features = featurizer(mol) + + np.testing.assert_array_almost_equal(features, v1_rdkit_2d_values, decimal=2) + + +@pytest.mark.skipif( + sys.platform.startswith("win"), reason="rdkit's BertzCT gives different values on Windows" +) +def test_v1_rdkit_2d_normalized(mol, v1_rdkit_2d_normalized_values): + featurizer = V1RDKit2DNormalizedFeaturizer() + features = featurizer(mol) + + np.testing.assert_array_almost_equal(features, v1_rdkit_2d_normalized_values, decimal=2) diff --git a/chemprop-updated/tests/unit/featurizers/test_molgraph.py b/chemprop-updated/tests/unit/featurizers/test_molgraph.py new file mode 100644 index 0000000000000000000000000000000000000000..a9cb948b828f6fa349ddf4c143f47bed9cb3045a --- /dev/null +++ b/chemprop-updated/tests/unit/featurizers/test_molgraph.py @@ -0,0 +1,115 @@ +import numpy as np +import pytest +from rdkit import Chem + +from chemprop.data.molgraph import MolGraph +from chemprop.featurizers.atom import MultiHotAtomFeaturizer +from chemprop.featurizers.molgraph import SimpleMoleculeMolGraphFeaturizer + + +@pytest.fixture(params=[0, 10, 100]) +def extra(request): + return request.param + + +@pytest.fixture +def atom_features_extra(mol, extra): + n_a = mol.GetNumAtoms() + + return np.random.rand(n_a, extra) + + +@pytest.fixture +def bond_features_extra(mol, extra): + n_b = mol.GetNumBonds() + + return np.random.rand(n_b, extra) + + +@pytest.fixture +def mol_featurizer(): + return SimpleMoleculeMolGraphFeaturizer() + + +@pytest.fixture +def mol_featurizer_extra(extra): + return SimpleMoleculeMolGraphFeaturizer(None, None, extra, extra) + + +@pytest.fixture +def mg(mol, mol_featurizer): + return mol_featurizer(mol) + + +def test_atom_fdim(extra): + mf = SimpleMoleculeMolGraphFeaturizer(extra_atom_fdim=extra) + + assert mf.atom_fdim == len(mf.atom_featurizer) + extra + + +def test_V_shape(mol, mol_featurizer: SimpleMoleculeMolGraphFeaturizer, mg: MolGraph): + n_a = mol.GetNumAtoms() + d_a = mol_featurizer.atom_fdim + + assert mg.V.shape == (n_a, d_a) + + +def test_E_shape(mol, mol_featurizer: SimpleMoleculeMolGraphFeaturizer, mg: MolGraph): + n_b = mol.GetNumBonds() + d_b = mol_featurizer.bond_fdim + + assert mg.E.shape == (2 * n_b, d_b) + + +def test_x2y_len(mol: Chem.Mol, mg: MolGraph): + num_bonds = mol.GetNumBonds() + + assert mg.edge_index.shape == (2, 2 * num_bonds) + assert mg.rev_edge_index.shape == (2 * num_bonds,) + + +def test_composability(mol): + mf1 = SimpleMoleculeMolGraphFeaturizer(MultiHotAtomFeaturizer.v1(50)) + mf2 = SimpleMoleculeMolGraphFeaturizer(MultiHotAtomFeaturizer.v1(100)) + + assert mf1(mol).V.shape != mf2(mol).V.shape + + +def test_invalid_atom_extra_shape(mol_featurizer, mol): + n_a = mol.GetNumAtoms() + with pytest.raises(ValueError): + mol_featurizer(mol, atom_features_extra=np.random.rand(n_a + 1, 10)) + + +def test_invalid_bond_extra_shape(mol_featurizer, mol): + n_b = mol.GetNumBonds() + with pytest.raises(ValueError): + mol_featurizer(mol, bond_features_extra=np.random.rand(n_b + 1, 10)) + + +def test_atom_extra_shape(mol, extra, atom_features_extra): + mf = SimpleMoleculeMolGraphFeaturizer(extra_atom_fdim=extra) + mg = mf(mol, atom_features_extra=atom_features_extra) + + assert mg.V.shape == (mol.GetNumAtoms(), mf.atom_fdim) + + +def test_atom_extra_values(mol, extra, atom_features_extra): + mf = SimpleMoleculeMolGraphFeaturizer(extra_atom_fdim=extra) + mg = mf(mol, atom_features_extra=atom_features_extra) + + np.testing.assert_array_equal(mg.V[:, len(mf.atom_featurizer) :], atom_features_extra) + + +def test_bond_extra(mol, extra, bond_features_extra): + mf = SimpleMoleculeMolGraphFeaturizer(extra_bond_fdim=extra) + mg = mf(mol, bond_features_extra=bond_features_extra) + + assert mg.E.shape == (2 * mol.GetNumBonds(), mf.bond_fdim) + + +def test_atom_bond_extra(mol, extra, atom_features_extra, bond_features_extra): + mf = SimpleMoleculeMolGraphFeaturizer(extra_atom_fdim=extra, extra_bond_fdim=extra) + mg = mf(mol, atom_features_extra, bond_features_extra) + + assert mg.E.shape == (2 * mol.GetNumBonds(), len(mf.bond_featurizer) + extra) diff --git a/chemprop-updated/tests/unit/nn/test_loss_functions.py b/chemprop-updated/tests/unit/nn/test_loss_functions.py new file mode 100644 index 0000000000000000000000000000000000000000..3d748f5e2095569f6f69f9e49869561d4ad0f787 --- /dev/null +++ b/chemprop-updated/tests/unit/nn/test_loss_functions.py @@ -0,0 +1,504 @@ +"""Chemprop unit tests for chemprop/models/loss.py""" + +import numpy as np +import pytest +import torch + +from chemprop.nn.metrics import ( + SID, + BCELoss, + BinaryMCCLoss, + BoundedMSE, + CrossEntropyLoss, + DirichletLoss, + EvidentialLoss, + MulticlassMCCLoss, + MVELoss, + Wasserstein, +) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,mse", + [ + ( + torch.tensor([[-3, 2], [1, -1]], dtype=torch.float), + torch.zeros([2, 2], dtype=torch.float), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([2]), + torch.ones([2]), + torch.zeros([2, 2], dtype=torch.bool), + torch.zeros([2, 2], dtype=torch.bool), + torch.tensor(3.75000, dtype=torch.float), + ), + ( + torch.tensor([[-3, 2], [1, -1]], dtype=torch.float), + torch.zeros([2, 2], dtype=torch.float), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([2]), + torch.ones([2]), + torch.zeros([2, 2], dtype=torch.bool), + torch.ones([2, 2], dtype=torch.bool), + torch.tensor(2.5000, dtype=torch.float), + ), + ( + torch.tensor([[-3, 2], [1, -1]], dtype=torch.float), + torch.zeros([2, 2], dtype=torch.float), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([2]), + torch.ones([2]), + torch.ones([2, 2], dtype=torch.bool), + torch.zeros([2, 2], dtype=torch.bool), + torch.tensor(1.25000, dtype=torch.float), + ), + ], +) +def test_BoundedMSE(preds, targets, mask, weights, task_weights, lt_mask, gt_mask, mse): + """ + Testing the bounded_mse loss function + """ + bmse_loss = BoundedMSE(task_weights) + loss = bmse_loss(preds, targets, mask, weights, lt_mask, gt_mask) + torch.testing.assert_close(loss, mse) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,likelihood", + [ + ( + torch.tensor([[0, 1]], dtype=torch.float), + torch.zeros([1, 1]), + torch.ones([1, 2], dtype=torch.bool), + torch.ones([1]), + torch.ones([2]), + torch.zeros([2], dtype=torch.bool), + torch.zeros([2], dtype=torch.bool), + torch.tensor(0.39894228, dtype=torch.float), + ) + ], +) +def test_MVE(preds, targets, mask, weights, task_weights, lt_mask, gt_mask, likelihood): + """ + Tests the normal_mve loss function + """ + mve_loss = MVELoss(task_weights) + nll_calc = mve_loss(preds, targets, mask, weights, lt_mask, gt_mask) + likelihood_calc = np.exp(-1 * nll_calc) + torch.testing.assert_close(likelihood_calc, likelihood) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,v_kl,expected_loss", + [ + ( + torch.tensor([[[2, 2]]]), + torch.ones([1, 1]), + torch.ones([1, 2], dtype=torch.bool), + torch.ones([1]), + torch.ones([1]), + torch.zeros([1], dtype=torch.bool), + torch.zeros([1], dtype=torch.bool), + 0, + torch.tensor(0.6, dtype=torch.float), + ), + ( + torch.tensor([[[2, 2]]]), + torch.ones([1, 1]), + torch.ones([1, 2], dtype=torch.bool), + torch.ones([1]), + torch.ones([1]), + torch.zeros([1], dtype=torch.bool), + torch.zeros([1], dtype=torch.bool), + 0.2, + torch.tensor(0.63862943, dtype=torch.float), + ), + ], +) +def test_BinaryDirichlet( + preds, targets, mask, weights, task_weights, lt_mask, gt_mask, v_kl, expected_loss +): + """ + Test on the dirichlet loss function for classification. + Note these values were not hand derived, just testing for + dimensional consistency. + """ + binary_dirichlet_loss = DirichletLoss(task_weights=task_weights, v_kl=v_kl) + loss = binary_dirichlet_loss(preds, targets, mask, weights, lt_mask, gt_mask) + torch.testing.assert_close(loss, expected_loss) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,", + [ + ( + torch.ones([1, 1]), + torch.ones([1, 1]), + torch.ones([1, 2], dtype=torch.bool), + torch.ones([1]), + torch.ones([1]), + torch.zeros([1], dtype=torch.bool), + torch.zeros([1], dtype=torch.bool), + ) + ], +) +def test_BinaryDirichlet_wrong_dimensions( + preds, targets, mask, weights, task_weights, lt_mask, gt_mask +): + """ + Test on the dirichlet loss function for classification + for dimension errors. + """ + with pytest.raises(IndexError): + binary_dirichlet_loss = DirichletLoss(task_weights) + binary_dirichlet_loss(preds, targets, mask, weights, lt_mask, gt_mask) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,v_kl,expected_loss", + [ + ( + torch.tensor([[[0.2, 0.1, 0.3], [0.1, 0.3, 0.1]], [[1.2, 0.5, 1.7], [1.1, 1.4, 0.8]]]), + torch.tensor([[0, 0], [1, 1]]), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([2]), + torch.ones([2]), + torch.zeros([2], dtype=torch.bool), + torch.zeros([2], dtype=torch.bool), + 0.2, + torch.tensor(1.868991, dtype=torch.float), + ), + ( + torch.tensor([[[0.2, 0.1, 0.3], [0.1, 0.3, 0.1]], [[1.2, 0.5, 1.7], [1.1, 1.4, 0.8]]]), + torch.tensor([[0, 0], [1, 1]]), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([2]), + torch.ones([2]), + torch.zeros([2], dtype=torch.bool), + torch.zeros([2], dtype=torch.bool), + 0.0, + torch.tensor(1.102344, dtype=torch.float), + ), + ], +) +def test_MulticlassDirichlet( + preds, targets, mask, weights, task_weights, lt_mask, gt_mask, v_kl, expected_loss +): + """ + Test on the dirichlet loss function for classification. + Note these values were not hand derived, just testing for + dimensional consistency. + """ + multiclass_dirichlet_loss = DirichletLoss(task_weights=task_weights, v_kl=v_kl) + loss = multiclass_dirichlet_loss(preds, targets, mask, weights, lt_mask, gt_mask) + torch.testing.assert_close(loss, expected_loss) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,v_kl,expected_loss", + [ + ( + torch.tensor([[2, 2, 2, 2]]), + torch.ones([1, 1]), + torch.ones([1, 1], dtype=torch.bool), + torch.ones([1]), + torch.ones([1]), + torch.zeros([1], dtype=torch.bool), + torch.zeros([1], dtype=torch.bool), + 0, + torch.tensor(1.56893861, dtype=torch.float), + ), + ( + torch.tensor([[2, 2, 2, 2]]), + torch.ones([1, 1]), + torch.ones([1, 1], dtype=torch.bool), + torch.ones([1]), + torch.ones([1]), + torch.zeros([1], dtype=torch.bool), + torch.zeros([1], dtype=torch.bool), + 0.2, + torch.tensor(2.768938541, dtype=torch.float), + ), + ], +) +def test_Evidential( + preds, targets, mask, weights, task_weights, lt_mask, gt_mask, v_kl, expected_loss +): + """ + Test on the evidential loss function for classification. + Note these values were not hand derived, just testing for + dimensional consistency. + """ + evidential_loss = EvidentialLoss(task_weights=task_weights, v_kl=v_kl) + loss = evidential_loss(preds, targets, mask, weights, lt_mask, gt_mask) + torch.testing.assert_close(loss, expected_loss) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask", + [ + ( + torch.ones([2, 2]), + torch.ones([2, 2]), + torch.ones([1, 1], dtype=torch.bool), + torch.ones([1]), + torch.ones([1]), + torch.zeros([1], dtype=torch.bool), + torch.zeros([1], dtype=torch.bool), + ) + ], +) +def test_Evidential_wrong_dimensions(preds, targets, mask, weights, task_weights, lt_mask, gt_mask): + """ + Test on the Evidential loss function for classification + for dimension errors. + """ + evidential_loss = EvidentialLoss(task_weights) + with pytest.raises(ValueError): + evidential_loss(preds, targets, mask, weights, lt_mask, gt_mask) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,expected_loss", + [ + ( + torch.tensor([2, 2], dtype=torch.float), + torch.ones([2], dtype=torch.float), + torch.ones([2], dtype=torch.bool), + torch.ones([1]), + torch.ones([2]), + torch.zeros([2], dtype=torch.bool), + torch.zeros([2], dtype=torch.bool), + torch.tensor(0.126928, dtype=torch.float), + ), + ( + torch.tensor([0.5, 0.5], dtype=torch.float), + torch.ones([2], dtype=torch.float), + torch.ones([2], dtype=torch.bool), + torch.ones([1]), + torch.ones([2]), + torch.zeros([2], dtype=torch.bool), + torch.zeros([2], dtype=torch.bool), + torch.tensor(0.474077, dtype=torch.float), + ), + ], +) +def test_BCE(preds, targets, mask, weights, task_weights, lt_mask, gt_mask, expected_loss): + """ + Test on the BCE loss function for classification. + """ + bce_loss = BCELoss(task_weights) + loss = bce_loss(preds, targets, mask, weights, lt_mask, gt_mask) + torch.testing.assert_close(loss, expected_loss) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,expected_loss", + [ + ( + torch.tensor([[[1.2, 0.5, 0.7], [-0.1, 0.3, 0.1]], [[1.2, 0.5, 0.7], [1.1, 1.3, 1.1]]]), + torch.tensor([[1, 0], [1, 2]]), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([2]), + torch.ones([2]), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([2, 2], dtype=torch.bool), + torch.tensor(1.34214, dtype=torch.float), + ), + ( + torch.tensor([[[1.2, 1.5, 0.7], [-0.1, 2.3, 1.1]], [[1.2, 1.5, 1.7], [2.1, 1.3, 1.1]]]), + torch.tensor([[1, 1], [2, 2]], dtype=torch.float64), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([2]), + torch.ones([2]), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([2, 2], dtype=torch.bool), + torch.tensor(0.899472, dtype=torch.float), + ), + ], +) +def test_CrossEntropy(preds, targets, mask, weights, task_weights, lt_mask, gt_mask, expected_loss): + """ + Test on the CE loss function for classification. + Note these values were not hand derived, just testing for + dimensional consistency. + """ + cross_entropy_loss = CrossEntropyLoss(task_weights) + loss = cross_entropy_loss(preds, targets, mask, weights, lt_mask, gt_mask) + torch.testing.assert_close(loss, expected_loss) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,expected_loss", + [ + ( + torch.tensor([0, 1, 1, 0]), + torch.tensor([0, 1, 1, 0]), + torch.ones([4], dtype=torch.bool), + torch.ones(1), + torch.ones(4), + torch.zeros([1, 4], dtype=torch.bool), + torch.zeros([1, 4], dtype=torch.bool), + torch.tensor(0, dtype=torch.float), + ), + ( + torch.tensor([0, 1, 0, 1, 1, 1, 0, 1, 1]), + torch.tensor([0, 1, 1, 0, 1, 1, 0, 0, 1]), + torch.ones([9], dtype=torch.bool), + torch.ones(1), + torch.ones(9), + torch.zeros([1, 9], dtype=torch.bool), + torch.zeros([1, 9], dtype=torch.bool), + torch.tensor(0.683772, dtype=torch.float), + ), + ], +) +def test_BinaryMCC(preds, targets, mask, weights, task_weights, lt_mask, gt_mask, expected_loss): + """ + Test on the BinaryMCC loss function for classification. Values have been checked using TorchMetrics. + """ + binary_mcc_loss = BinaryMCCLoss(task_weights) + loss = binary_mcc_loss(preds, targets, mask, weights, lt_mask, gt_mask) + torch.testing.assert_close(loss, expected_loss) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,expected_loss", + [ + ( + torch.tensor( + [[[0.16, 0.26, 0.58], [0.22, 0.61, 0.17]], [[0.71, 0.09, 0.20], [0.05, 0.82, 0.13]]] + ), + torch.tensor([[2, 1], [0, 0]]), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([2]), + torch.ones([2]), + torch.zeros([2, 2], dtype=torch.bool), + torch.zeros([2, 2], dtype=torch.bool), + torch.tensor(0.5, dtype=torch.float), + ), + ( + torch.tensor( + [[[0.16, 0.26, 0.58], [0.22, 0.61, 0.17]], [[0.71, 0.09, 0.20], [0.05, 0.82, 0.13]]] + ), + torch.tensor([[2, 1], [0, 0]]), + torch.tensor([[1, 1], [0, 1]], dtype=torch.bool), + torch.ones([2]), + torch.ones([2]), + torch.zeros([2, 2], dtype=bool), + torch.zeros([2, 2], dtype=bool), + torch.tensor(1.0, dtype=torch.float), + ), + ], +) +def test_MulticlassMCC( + preds, targets, mask, weights, task_weights, lt_mask, gt_mask, expected_loss +): + """ + Test on the MulticlassMCC loss function for classification. + """ + multiclass_mcc_loss = MulticlassMCCLoss(task_weights) + loss = multiclass_mcc_loss(preds, targets, mask, weights, lt_mask, gt_mask) + torch.testing.assert_close(loss, expected_loss) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,threshold,expected_loss", + [ + ( + torch.tensor([[0.8, 0.2], [0.3, 0.7]]), + torch.tensor([[0.9, 0.1], [0.4, 0.6]]), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([1]), + torch.ones([2]), + torch.ones([2], dtype=torch.bool), + torch.ones([2], dtype=torch.bool), + None, + torch.tensor(0.031319, dtype=torch.float), + ), + ( + torch.tensor([[0.6, 0.4], [0.2, 0.8]]), + torch.tensor([[0.7, 0.3], [0.3, 0.7]]), + torch.tensor([[1, 1], [1, 0]], dtype=torch.bool), + torch.ones([1]), + torch.ones([2]), + torch.ones([2], dtype=torch.bool), + torch.ones([2], dtype=torch.bool), + None, + torch.tensor(0.295655, dtype=torch.float), + ), + ( + torch.tensor([[0.6, 0.4], [0.2, 0.8]]), + torch.tensor([[0.7, 0.3], [0.3, 0.7]]), + torch.tensor([[1, 1], [1, 1]], dtype=torch.bool), + torch.ones([1]), + torch.ones([2]), + torch.ones([2], dtype=torch.bool), + torch.ones([2], dtype=torch.bool), + 0.5, + torch.tensor(0.033673, dtype=torch.float), + ), + ], +) +def test_SID( + preds, targets, mask, weights, task_weights, lt_mask, gt_mask, threshold, expected_loss +): + """ + Test on the SID loss function. These values were not handchecked, + just checking function returns values with/without mask and threshold. + """ + sid_loss = SID(task_weights=task_weights, threshold=threshold) + loss = sid_loss(preds, targets, mask, weights, lt_mask, gt_mask) + torch.testing.assert_close(loss, expected_loss) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,threshold,expected_loss", + [ + ( + torch.tensor([[0.1, 0.3, 0.5, 0.7], [0.2, 0.4, 0.6, 0.8]]), + torch.tensor([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]), + torch.tensor([[1, 1, 1, 1], [1, 0, 1, 0]], dtype=torch.bool), + torch.ones([2, 1]), + torch.ones([1, 4]), + torch.zeros([2, 4], dtype=torch.bool), + torch.zeros([2, 4], dtype=torch.bool), + None, + torch.tensor(0.1125, dtype=torch.float), + ), + ( + torch.tensor([[0.1, 0.3, 0.5, 0.7], [0.2, 0.4, 0.6, 0.8]]), + torch.tensor([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]), + torch.ones([2, 4], dtype=torch.bool), + torch.ones([2, 1]), + torch.ones([1, 4]), + torch.zeros([2, 4], dtype=torch.bool), + torch.zeros([2, 4], dtype=torch.bool), + None, + torch.tensor(0.515625, dtype=torch.float), + ), + ( + torch.tensor([[0.1, 0.3, 0.5, 0.7], [0.2, 0.4, 0.6, 0.8]]), + torch.tensor([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]), + torch.ones([2, 4], dtype=torch.bool), + torch.ones([2, 1]), + torch.ones([1, 4]), + torch.zeros([2, 4], dtype=torch.bool), + torch.zeros([2, 4], dtype=torch.bool), + 0.3, + torch.tensor(0.501984, dtype=torch.float), + ), + ], +) +def test_Wasserstein( + preds, targets, mask, weights, task_weights, lt_mask, gt_mask, threshold, expected_loss +): + """ + Test on the Wasserstein loss function. These values were not handchecked, + just checking function returns values with/without mask and threshold. + """ + wasserstein_loss = Wasserstein(task_weights=task_weights, threshold=threshold) + loss = wasserstein_loss(preds, targets, mask, weights, lt_mask, gt_mask) + torch.testing.assert_close(loss, expected_loss) + + +# TODO: Add quantile loss tests diff --git a/chemprop-updated/tests/unit/nn/test_metrics.py b/chemprop-updated/tests/unit/nn/test_metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..41ced0997258265b2312860ad7aa0dd2a1bae9b4 --- /dev/null +++ b/chemprop-updated/tests/unit/nn/test_metrics.py @@ -0,0 +1,278 @@ +from io import StringIO +import re + +from lightning import pytorch as pl +from lightning.pytorch.callbacks.progress.tqdm_progress import Tqdm, TQDMProgressBar +import pytest +import torch +from torch.nn import functional as F +from torch.utils.data import DataLoader, Dataset + +from chemprop.nn.metrics import ( + MAE, + MSE, + RMSE, + SID, + BCELoss, + BinaryAccuracy, + BinaryAUPRC, + BinaryAUROC, + BinaryF1Score, + BinaryMCCLoss, + BinaryMCCMetric, + BoundedMAE, + BoundedMSE, + BoundedRMSE, + CrossEntropyLoss, + DirichletLoss, + EvidentialLoss, + MulticlassMCCLoss, + MulticlassMCCMetric, + MVELoss, + R2Score, + Wasserstein, +) + +reg_targets = torch.arange(-20, 20, dtype=torch.float32).view(-1, 2) +# fmt: off +b_class_targets = torch.tensor( + [0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, + 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0], dtype=torch.float32 +).view(-1, 2) +m_class_targets = torch.tensor( + [0, 2, 1, 0, 2, 0, 2, 2, 1, 0, 1, 1, 0, 1, 2, 1, 0, 0, 1, 0, + 0, 0, 0, 2, 1, 2, 2, 1, 2, 2, 2, 0, 1, 1, 0, 0, 1, 1, 2, 0], dtype=torch.float32 +).view(-1, 2) +raw_spectra = torch.tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 4, 3, 2, 1, + 4, 3, 2, 1, 0, 1, 2, 3, 4, 5, + 9, 1, 8, 0, 5, 4, 3, 6, 8, 3, + 2, 1, 6, 4, 7, 2, 6, 2, 5, 1, + 5, 3, 4, 4, 4, 4, 5, 1, 2, 8, + 9, 7, 6, 5, 4, 3, 2, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, + 9, 0, 1, 5, 2, 6, 2, 7, 4, 7, + 3, 8, 2, 9, 1, 0, 1, 0, 1, 0, + 4, 4, 2, 3, 1, 3, 2, 3, 1, 3, + 9, 1, 8, 0, 5, 4, 3, 6, 8, 3], dtype=torch.float32).view(-1, 10) +spectral_targets = raw_spectra / raw_spectra.sum(1, keepdim=True) + +mockffn = torch.tensor([ + -3.7, -14.2, 3.4, 7.5, 11.7, 13.8, 10.2, -0.7, 9.2, -8.0, + -5.3, -2.7, -5.3, -14.4, 1.3, 9.0, -0.4, -10.9, 14.8, 16.4, + 10.9, 5.8, -18.9, 3.6, 18.3, -2.7, -16.8, -8.4, 9.7, -7.2, + 17.1, -9.6, -3.3, -1.0, -11.9, -19.6, -12.3, -13.9, -1.1, -6.0, + 1.1, 12.0, -7.8, 0.2, -12.9, 13.8, 1.1, -9.4, 4.3, -14.9, + 10.0, 9.2, -1.3, -4.4, -7.0, 18.5, -17.5, -0.3, -13.2, -0.1, + 16.2, -14.6, -19.6, 5.5, 4.7, -4.5, -4.9, 13.8, 12.3, -6.9, +-12.1, -18.6, -9.5, 9.8, -9.6, -9.9, 8.7, 0.5, 11.2, 13.0, + -1.2, 4.2, -15.9, 11.4, 14.6, -19.9, 14.7, -3.0, -10.0, 9.5, + 9.0, -6.8, -13.0, -18.0, -12.6, 8.5, 16.9, -17.8, -11.2, 14.5, +-11.8, -5.1, 5.1, 8.5, -4.2, 11.6, 14.5, 19.7, -17.1, 19.0, + 19.2, 17.7, -4.9, 0.7, -16.5, 2.9, 11.3, -5.5, 17.8, 14.6, + -4.2, -1.4, -7.3, 8.4, -8.0, 2.5, 17.5, 13.3, -6.0, -7.9, + 3.5, -2.8, 2.8, 15.3, 15.2, -9.3, -1.0, -20.0, -19.6, -16.7, +-15.5, -10.3, -16.6, 17.9, 18.3, 4.2, -15.8, 5.8, 13.0, 7.9, + 19.7, 7.7, 16.5, 1.8, -16.6, -4.3, 2.9, 18.4, 4.2, 13.1, + ], dtype=torch.float32, +) +# fmt: on + +reg_train_step = mockffn.clone()[:40].view(-1, 2) +reg_forward = reg_train_step.clone() +mve_train_step = torch.stack( + (mockffn.clone()[:40].view(-1, 2), F.softplus(mockffn.clone()[40:80].view(-1, 2))), 2 +) +mve_forward = mve_train_step.clone() +evi_train_step = torch.stack( + ( + mockffn.clone()[:40].view(-1, 2), + F.softplus(mockffn.clone()[40:80].view(-1, 2)), + F.softplus(mockffn.clone()[80:120].view(-1, 2)) + 1, + F.softplus(mockffn.clone()[120:160].view(-1, 2)), + ), + 2, +) +evi_forward = evi_train_step.clone() + +b_class_train_step = mockffn.clone()[:40].view(-1, 2) +b_class_forward = b_class_train_step.clone().sigmoid() +b_diri_train_step = F.softplus(mockffn.clone()[0:80].view(-1, 2, 2)) + 1 +b_diri_forward = b_diri_train_step[..., 1] / b_diri_train_step.sum(-1) + +m_class_train_step = mockffn.clone()[:120].view(20, 2, 3) +m_class_forward = m_class_train_step.clone().softmax(-1) +m_diri_train_step = F.softplus(mockffn.clone()[:120].view(20, 2, 3)) + 1 +m_diri_forward = m_diri_train_step / m_diri_train_step.sum(-1, keepdim=True) +spectral_train_step = mockffn.clone()[:150].view(-1, 10).exp() / mockffn.clone()[:150].view( + -1, 10 +).exp().sum(1, keepdim=True) +spectral_forward = spectral_train_step.clone() + +# fmt: off +mask = torch.tensor( + [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, + 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=torch.bool +).view(-1, 2) +spectral_mask = torch.tensor( + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, + 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, + 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, + 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, + 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, + 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, + 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, + 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, + 1, 1, 1, 0, 0, 1, 1, 1, 1, 1], dtype=torch.bool +).view(-1, 10) +# fmt: on + + +class _MockDataset(Dataset): + def __init__(self, train_step, forward, targets, mask): + self.train_step = train_step + self.forward = forward + self.targets = targets + # fmt: off + self.mask = mask + self.w = torch.linspace(0.1, 1, len(self.targets), dtype=torch.float32).view(-1, 1) + self.lt_mask = torch.tensor( + [0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, + 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1], dtype=torch.bool + ).view(-1, 2) + self.gt_mask = torch.tensor( + [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, + 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=torch.bool + ).view(-1, 2) + # fmt: on + + def __len__(self): + return len(self.targets) + + def __getitem__(self, idx): + return ( + self.train_step[idx], + self.forward[idx], + self.targets[idx], + self.mask[idx], + self.w[idx], + self.lt_mask[idx], + self.gt_mask[idx], + ) + + +class _MockMPNN(pl.LightningModule): + def __init__(self, criterion, metric): + super().__init__() + self.automatic_optimization = False + self.ignore = torch.nn.Parameter(torch.tensor(0.0)) + self.criterion = criterion + self.metrics = torch.nn.ModuleList([metric, self.criterion.clone()]) + + def training_step(self, batch, batch_idx): + train_step, _, targets, mask, w, lt_mask, gt_mask = batch + loss = self.criterion(train_step, targets, mask, w, lt_mask, gt_mask) + self.log("train_loss", self.criterion, prog_bar=True, on_epoch=True) + return loss + + def validation_step(self, batch, batch_idx): + self._evalute_batch(batch, "val") + + train_step, _, targets, mask, w, lt_mask, gt_mask = batch + self.metrics[-1].update(train_step, targets, mask, w, lt_mask, gt_mask) + self.log("val_loss", self.metrics[-1], prog_bar=True) + + def test_step(self, batch, batch_idx): + self._evalute_batch(batch, "test") + + def _evalute_batch(self, batch, val_test): + _, forward, targets, mask, w, lt_mask, gt_mask = batch + if isinstance(self.metrics[-1], (MVELoss, EvidentialLoss)): + forward = forward[..., 0] + self.metrics[0].update(forward, targets, mask, w, lt_mask, gt_mask) + self.log(f"{val_test}_metric", self.metrics[0], prog_bar=True) + + def configure_optimizers(self): + return torch.optim.Adam(self.parameters(), lr=1e-3) + + +class _TestBar(TQDMProgressBar): + def __init__(self, bar_as_text, *args, **kwargs): + super().__init__(*args, **kwargs) + self.bar_as_text = bar_as_text + + def init_train_tqdm(self) -> Tqdm: + return Tqdm( + desc=self.train_description, + position=(2 * self.process_position), + disable=self.is_disabled, + leave=True, + dynamic_ncols=True, + file=self.bar_as_text, + smoothing=0, + bar_format=self.BAR_FORMAT, + ) + + +# fmt: off +groups = [ + (MSE(), R2Score(), reg_train_step, reg_forward, reg_targets, mask), + (MAE(), MSE(), reg_train_step, reg_forward, reg_targets, mask), + (RMSE(), MAE(), reg_train_step, reg_forward, reg_targets, mask), + (BoundedMSE(), RMSE(), reg_train_step, reg_forward, reg_targets, mask), + (BoundedMAE(), BoundedMSE(), reg_train_step, reg_forward, reg_targets, mask), + (BoundedRMSE(), BoundedMAE(), reg_train_step, reg_forward, reg_targets, mask), + (MSE(), BoundedRMSE(), reg_train_step, reg_forward, reg_targets, mask), + (MVELoss(), MSE(), mve_train_step, mve_forward, reg_targets, mask), + (EvidentialLoss(), MSE(), evi_train_step, evi_forward, reg_targets, mask), + (BCELoss(), BinaryMCCMetric(), b_class_train_step, b_class_forward, b_class_targets, mask), + (BinaryMCCLoss(), BinaryAUROC(), b_class_train_step, b_class_forward, b_class_targets, mask), + (BCELoss(), BinaryAUPRC(), b_class_train_step, b_class_forward, b_class_targets, mask), + (BCELoss(), BinaryAccuracy(), b_class_train_step, b_class_forward, b_class_targets, mask), + (DirichletLoss(), BinaryF1Score(), b_diri_train_step, b_diri_forward, b_class_targets, mask), + (CrossEntropyLoss(), MulticlassMCCMetric(), m_class_train_step, m_class_forward, m_class_targets, mask), + (MulticlassMCCLoss(), MulticlassMCCMetric(), m_class_train_step, m_class_forward, m_class_targets, mask), + (DirichletLoss(), MulticlassMCCMetric(), m_diri_train_step, m_diri_forward, m_class_targets, mask), + (SID(), Wasserstein(), spectral_train_step, spectral_forward, spectral_targets, spectral_mask), + (Wasserstein(), SID(), spectral_train_step, spectral_forward, spectral_targets, spectral_mask), +] +# fmt: on + + +@pytest.mark.parametrize("loss_fn, metric_fn, train_step, forward, targets, mask", groups) +def test_metric_integeration(loss_fn, metric_fn, train_step, forward, targets, mask): + model = _MockMPNN(loss_fn, metric_fn) + + dataset = _MockDataset(train_step, forward, targets, mask) + train_loader = DataLoader(dataset, batch_size=5, shuffle=True) + val_loader = DataLoader(dataset, batch_size=5, shuffle=False) + test_loader = DataLoader(dataset, batch_size=20, shuffle=False) + + bar_as_text = StringIO() + trainer = pl.Trainer(max_epochs=2, log_every_n_steps=1, callbacks=[_TestBar(bar_as_text)]) + trainer.fit(model, train_loader, val_loader) + + x = bar_as_text.getvalue() + train_losses = re.findall(r"train_loss_epoch=(-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?)", x) + val_losses = re.findall(r"val_loss=(-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?)", x) + val_metrics = re.findall(r"val_metric=(-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?)", x) + + test_results = trainer.test(model, test_loader) + test_metric = test_results[0]["test_metric"] + + for train_loss in train_losses: + for val_loss in val_losses: + train_loss, val_loss = float(train_loss), float(val_loss) + assert abs(train_loss - val_loss) <= 0.01 * max(abs(train_loss), abs(val_loss)) + + for value in val_metrics: + assert abs(float(value) - test_metric) <= 0.01 * max(abs(float(value)), abs(test_metric)) diff --git a/chemprop-updated/tests/unit/nn/test_transforms.py b/chemprop-updated/tests/unit/nn/test_transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..54bd4ed7a763f28fda9fe0e9c4b4e0795e36b340 --- /dev/null +++ b/chemprop-updated/tests/unit/nn/test_transforms.py @@ -0,0 +1,156 @@ +import numpy as np +import pytest +from sklearn.preprocessing import StandardScaler +import torch + +from chemprop.nn.transforms import GraphTransform, ScaleTransform, UnscaleTransform + + +class MockBatchMolGraph: + def __init__(self, V, E): + self.V = V + self.E = E + + +@pytest.fixture +def mean(): + return np.array([0.0, 1.0], dtype=np.float32) + + +@pytest.fixture +def scale(): + return np.array([2.0, 3.0], dtype=np.float32) + + +@pytest.fixture +def pad(): + return 2 + + +@pytest.fixture +def tensor_mean(mean, pad): + return torch.cat([torch.zeros(pad), torch.tensor(mean, dtype=torch.float)]) + + +@pytest.fixture +def tensor_scale(scale, pad): + return torch.cat([torch.ones(pad), torch.tensor(scale, dtype=torch.float)]) + + +@pytest.fixture +def scaler(mean, scale): + scaler = StandardScaler() + scaler.mean_ = mean + scaler.scale_ = scale + return scaler + + +@pytest.fixture +def scale_transform(scaler, pad): + return ScaleTransform.from_standard_scaler(scaler, pad) + + +@pytest.fixture +def unscale_transform(scaler, pad): + return UnscaleTransform.from_standard_scaler(scaler, pad=0) + + +@pytest.fixture +def graph_transform(scale_transform): + return GraphTransform(V_transform=scale_transform, E_transform=scale_transform) + + +@pytest.fixture +def X(): + return torch.tensor([[99.0, 99.0, 1.0, 2.0], [99.0, 99.0, 3.0, 4.0]]) + + +@pytest.fixture +def prediction(): + return torch.tensor([[1.0, 2.0]]) + + +@pytest.fixture +def variance(): + return torch.tensor([[0.1, 0.2]]) + + +@pytest.fixture +def bmg(): + V = torch.tensor([[99.0, 99.0, 1.0, 2.0], [99.0, 99.0, 3.0, 4.0]]) + E = torch.tensor([[99.0, 99.0, 1.0, 2.0], [99.0, 99.0, 3.0, 4.0]]) + return MockBatchMolGraph(V=V, E=E) + + +def test_uneven_shapes(): + with pytest.raises(ValueError): + ScaleTransform(mean=[0.0], scale=[1.0, 2.0]) + + +def test_padding(mean, scale, pad): + scale_transform = ScaleTransform(mean, scale, pad) + assert torch.all(scale_transform.mean[0, :pad] == 0.0).item() + assert torch.all(scale_transform.scale[0, :pad] == 1.0).item() + + +def test_from_standard_scaler(mean, scale, scaler): + scale_transform = ScaleTransform.from_standard_scaler(scaler) + + assert torch.all(scale_transform.mean == torch.tensor([0.0, 1.0])).item() + assert torch.all(scale_transform.scale == torch.tensor([2.0, 3.0])).item() + + +def test_scale_transform_forward_train(scale_transform, X): + scale_transform.train() + output_X = scale_transform(X) + assert output_X is X + + +def test_scale_transform_forward_eval(tensor_mean, tensor_scale, scale_transform, X): + scale_transform.eval() + output_X = scale_transform(X) + expected_X = (X - tensor_mean) / tensor_scale + assert torch.equal(output_X, expected_X) + + +def test_unscale_transform_forward_train(unscale_transform, X): + unscale_transform.train() + output_X = unscale_transform(X) + assert output_X is X + + +def test_unscale_transform_forward_eval(mean, scale, unscale_transform, prediction): + unscale_transform.eval() + output = unscale_transform(prediction) + expected = prediction * scale + mean + assert torch.equal(output, expected) + + +def test_unscale_transform_variance_train(unscale_transform, variance): + unscale_transform.train() + output_variance = unscale_transform.transform_variance(variance) + assert output_variance is variance + + +def test_unscale_transform_variance_eval(scale, unscale_transform, variance): + unscale_transform.eval() + output_variance = unscale_transform.transform_variance(variance) + expected_variance = variance * scale**2 + assert torch.equal(output_variance, expected_variance) + + +def test_graph_transform_forward_train(graph_transform, bmg): + graph_transform.train() + output_bmg = graph_transform(bmg) + assert output_bmg is bmg + + +def test_graph_transform_forward_eval(graph_transform, bmg): + graph_transform.eval() + expected_V = graph_transform.V_transform(bmg.V) + expected_E = graph_transform.E_transform(bmg.E) + + transformed_bmg = graph_transform(bmg) + + assert torch.equal(transformed_bmg.V, expected_V) + assert torch.equal(transformed_bmg.E, expected_E) diff --git a/chemprop-updated/tests/unit/uncertainty/test_calibrators.py b/chemprop-updated/tests/unit/uncertainty/test_calibrators.py new file mode 100644 index 0000000000000000000000000000000000000000..4d126f198569350b00f2479fd13ce8d39ab16905 --- /dev/null +++ b/chemprop-updated/tests/unit/uncertainty/test_calibrators.py @@ -0,0 +1,376 @@ +import pytest +import torch + +from chemprop.uncertainty.calibrator import ( + AdaptiveMulticlassConformalCalibrator, + IsotonicCalibrator, + IsotonicMulticlassCalibrator, + MulticlassConformalCalibrator, + MultilabelConformalCalibrator, + MVEWeightingCalibrator, + PlattCalibrator, + RegressionConformalCalibrator, + ZelikmanCalibrator, + ZScalingCalibrator, +) + + +@pytest.mark.parametrize( + "cal_uncs,cal_targets,cal_mask,test_uncs,cal_test_uncs", + [ + ( + torch.tensor( + [ + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + [0.7, 0.8, 0.9], + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + [0.7, 0.8, 0.9], + ] + ), + torch.tensor([[0, 1, 0], [0, 0, 1], [0, 1, 1], [1, 1, 0], [1, 0, 0], [1, 1, 0]]), + torch.tensor( + [[1, 1, 1], [1, 0, 1], [1, 1, 1], [1, 1, 1], [0, 1, 1], [1, 1, 1]], dtype=torch.bool + ), + torch.tensor([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]), + torch.tensor([[1 / 3, 2 / 3, 0.0], [1 / 3, 2 / 3, 0.5]]), + ) + ], +) +def test_IsotonicCalibrator(cal_uncs, cal_targets, cal_mask, test_uncs, cal_test_uncs): + """ + Testing the IsotonicCalibrator + """ + calibrator = IsotonicCalibrator() + calibrator.fit(cal_uncs, cal_targets, cal_mask) + uncs = calibrator.apply(test_uncs) + + torch.testing.assert_close(uncs, cal_test_uncs) + + +@pytest.mark.parametrize( + "cal_uncs,cal_targets,cal_mask,test_uncs,cal_test_uncs,training_targets,cal_test_uncs_with_training_targets", + [ + ( + torch.tensor( + [ + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + [0.7, 0.8, 0.9], + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + [0.7, 0.8, 0.9], + ] + ), + torch.tensor([[0, 1, 0], [0, 0, 1], [0, 1, 1], [1, 1, 0], [1, 0, 0], [1, 1, 0]]), + torch.tensor( + [[1, 1, 1], [1, 0, 1], [1, 1, 1], [1, 1, 1], [0, 1, 1], [1, 1, 1]], dtype=torch.bool + ), + torch.tensor([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]), + torch.tensor( + [[0.4182101, 0.8000248, 0.1312900], [0.3973791, 0.7999378, 0.2770228]], + dtype=torch.float64, + ), + torch.tensor([[0, 0, 0], [1, 1, 1], [1, 1, 0], [1, 0, 1]]), + torch.tensor( + [[0.5285367, 0.6499191, 0.3089508], [0.5188822, 0.6499544, 0.3998689]], + dtype=torch.float64, + ), + ) + ], +) +def test_PlattCalibrator( + cal_uncs, + cal_targets, + cal_mask, + test_uncs, + cal_test_uncs, + training_targets, + cal_test_uncs_with_training_targets, +): + """ + Testing the PlattCalibrator + """ + calibrator1 = PlattCalibrator() + calibrator1.fit(cal_uncs, cal_targets, cal_mask) + uncs1 = calibrator1.apply(test_uncs) + + calibrator2 = PlattCalibrator() + calibrator2.fit(cal_uncs, cal_targets, cal_mask, training_targets) + uncs2 = calibrator2.apply(test_uncs) + + torch.testing.assert_close(uncs1, cal_test_uncs, rtol=1e-4, atol=1e-4) + torch.testing.assert_close(uncs2, cal_test_uncs_with_training_targets, rtol=1e-4, atol=1e-4) + + +@pytest.mark.parametrize( + "cal_preds,cal_uncs,cal_targets,cal_mask,test_uncs,cal_test_uncs", + [ + ( + torch.zeros(100, 1, dtype=float), + torch.arange(1, 101, dtype=float).unsqueeze(1).pow(2), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.ones(100, 1, dtype=bool), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.arange(1, 101, dtype=float).unsqueeze(1), + ), + ( + torch.zeros(100, 1, dtype=float), + torch.arange(2, 201, step=2, dtype=float).unsqueeze(1).pow(2), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.ones(100, 1, dtype=bool), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.arange(1, 101, dtype=float).unsqueeze(1) / 4, + ), + ], +) +def test_ZScalingCalibrator(cal_preds, cal_uncs, cal_targets, cal_mask, test_uncs, cal_test_uncs): + """ + Testing the ZScalingCalibrator + """ + calibrator = ZScalingCalibrator() + calibrator.fit(cal_preds, cal_uncs, cal_targets, cal_mask) + uncs = calibrator.apply(test_uncs) + + torch.testing.assert_close(uncs, cal_test_uncs) + + +@pytest.mark.parametrize( + "cal_preds,cal_uncs,cal_targets,cal_mask,test_uncs,cal_test_uncs", + [ + ( + torch.zeros(100, 1, dtype=float), + torch.arange(1, 101, dtype=float).unsqueeze(1).pow(2), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.ones(100, 1, dtype=bool), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.arange(1, 101, dtype=float).unsqueeze(1), + ), + ( + torch.zeros(100, 1, dtype=float), + torch.ones(100, 1, dtype=bool), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.ones(100, 1, dtype=bool), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.arange(1, 101, dtype=float).unsqueeze(1) * 8100, + ), + ], +) +def test_ZelikmanCalibrator(cal_preds, cal_uncs, cal_targets, cal_mask, test_uncs, cal_test_uncs): + """ + Testing the ZelikmanCalibrator + """ + calibrator = ZelikmanCalibrator(p=0.9) + calibrator.fit(cal_preds, cal_uncs, cal_targets, cal_mask) + uncs = calibrator.apply(test_uncs) + + torch.testing.assert_close(uncs, cal_test_uncs) + + +@pytest.mark.parametrize( + "cal_preds,cal_uncs,cal_targets,cal_mask,test_uncs,cal_test_uncs", + [ + ( + torch.zeros(100, 1, dtype=float), + torch.arange(1, 101, dtype=float).unsqueeze(1).repeat(5, 1, 1), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.ones(100, 1, dtype=bool), + torch.arange(1, 101, dtype=float).unsqueeze(1).repeat(5, 1, 1), + torch.arange(1, 101, dtype=float).unsqueeze(1), + ) + ], +) +def test_MVEWeightingCalibrator( + cal_preds, cal_uncs, cal_targets, cal_mask, test_uncs, cal_test_uncs +): + """ + Testing the MVEWeightingCalibrator + """ + calibrator = MVEWeightingCalibrator() + calibrator.fit(cal_preds, cal_uncs, cal_targets, cal_mask) + uncs = calibrator.apply(test_uncs) + + torch.testing.assert_close(uncs, cal_test_uncs) + + +@pytest.mark.parametrize( + "cal_uncs,cal_targets,cal_mask,test_uncs,cal_test_uncs", + [ + ( + torch.tensor( + [ + [[0.2, 0.3, 0.5], [0.1, 0.6, 0.3]], + [[0.1, 0.6, 0.3], [0.4, 0.4, 0.2]], + [[0.4, 0.4, 0.2], [0.2, 0.3, 0.5]], + ] + ), + torch.tensor([[2, 1], [1, 0], [0, 2]]).long(), + torch.ones([3, 2], dtype=torch.bool), + torch.tensor( + [ + [[0.3, 0.4, 0.3], [0.5, 0.2, 0.3]], + [[0.5, 0.2, 0.3], [0.6, 0.3, 0.1]], + [[0.6, 0.3, 0.1], [0.3, 0.4, 0.3]], + ] + ), + torch.tensor( + [[[0, 1, 0], [1, 0, 0]], [[1, 0, 0], [1, 0, 0]], [[1, 0, 0], [0, 1, 0]]] + ).int(), + ) + ], +) +def test_AdaptiveMulticlassConformalCalibrator( + cal_uncs, cal_targets, cal_mask, test_uncs, cal_test_uncs +): + """ + Testing the AdaptiveMulticlassConformalCalibrator + """ + calibrator = AdaptiveMulticlassConformalCalibrator(alpha=0.5) + calibrator.fit(cal_uncs, cal_targets, cal_mask) + uncs = calibrator.apply(test_uncs) + + torch.testing.assert_close(uncs, cal_test_uncs) + + +@pytest.mark.parametrize( + "cal_uncs,cal_targets,cal_mask,test_uncs,cal_test_uncs", + [ + ( + torch.tensor( + [ + [[0.2, 0.3, 0.5], [0.1, 0.6, 0.3]], + [[0.1, 0.6, 0.3], [0.4, 0.4, 0.2]], + [[0.4, 0.4, 0.2], [0.2, 0.3, 0.5]], + ] + ), + torch.tensor([[2, 2], [1, 0], [0, 2]]).long(), + torch.ones([3, 2], dtype=torch.bool), + torch.tensor( + [ + [[0.3, 0.4, 0.3], [0.5, 0.2, 0.3]], + [[0.5, 0.2, 0.3], [0.6, 0.3, 0.1]], + [[0.6, 0.3, 0.1], [0.3, 0.4, 0.3]], + ] + ), + torch.tensor( + [[[0, 1, 0], [1, 0, 1]], [[1, 0, 0], [1, 1, 0]], [[1, 0, 0], [1, 1, 1]]] + ).int(), + ) + ], +) +def test_MulticlassConformalCalibrator(cal_uncs, cal_targets, cal_mask, test_uncs, cal_test_uncs): + """ + Testing the MulticlassConformalCalibrator + """ + calibrator = MulticlassConformalCalibrator(alpha=0.5) + calibrator.fit(cal_uncs, cal_targets, cal_mask) + uncs = calibrator.apply(test_uncs) + + torch.testing.assert_close(uncs, cal_test_uncs) + + +@pytest.mark.parametrize( + "cal_uncs,cal_targets,cal_mask,test_uncs,cal_test_uncs", + [ + ( + torch.tensor([[0, 1, 0], [1, 0, 0], [0, 0, 1]]), + torch.tensor([[0, 1, 0], [1, 0, 0], [0, 0, 1]]), + torch.ones([3, 3], dtype=torch.bool), + torch.tensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), + torch.tensor( + [[[1, 1], [1, 0], [1, 0]], [[1, 0], [1, 1], [1, 0]], [[1, 0], [1, 0], [1, 1]]], + dtype=torch.int, + ), + ) + ], +) +def test_MultilabelConformalCalibrator(cal_uncs, cal_targets, cal_mask, test_uncs, cal_test_uncs): + """ + Testing the MultilabelConformalCalibrator + """ + calibrator = MultilabelConformalCalibrator(alpha=0.1) + calibrator.fit(cal_uncs, cal_targets, cal_mask) + uncs = calibrator.apply(test_uncs) + + torch.testing.assert_close(uncs, cal_test_uncs) + + +@pytest.mark.parametrize( + "cal_preds,cal_uncs,cal_targets,cal_mask,test_uncs,cal_test_uncs", + [ + ( + torch.arange(100).unsqueeze(1), + torch.arange(100).unsqueeze(1) / 10, + torch.arange(10, 110).unsqueeze(1), + torch.ones([100, 1], dtype=torch.bool), + torch.arange(100, 200).unsqueeze(1) / 10, + torch.arange(29.2, 39.1, 0.1).unsqueeze(1), + ), + ( + torch.arange(100).unsqueeze(1), + torch.zeros(100, 1), + torch.arange(10, 110).unsqueeze(1), + torch.ones([100, 1], dtype=torch.bool), + torch.zeros(100, 1), + torch.ones(100, 1) * 20, + ), + ], +) +def test_RegressionConformalCalibrator( + cal_preds, cal_uncs, cal_targets, cal_mask, test_uncs, cal_test_uncs +): + """ + Testing the RegressionConformalCalibrator + """ + calibrator = RegressionConformalCalibrator(alpha=0.1) + calibrator.fit(cal_preds, cal_uncs, cal_targets, cal_mask) + uncs = calibrator.apply(test_uncs) + + torch.testing.assert_close(uncs, cal_test_uncs) + + +@pytest.mark.parametrize( + "cal_uncs,cal_targets,cal_mask,test_uncs,cal_test_uncs", + [ + ( + torch.tensor( + [ + [[0.2, 0.3, 0.5], [0.1, 0.6, 0.3]], + [[0.1, 0.6, 0.3], [0.4, 0.4, 0.2]], + [[0.4, 0.4, 0.2], [0.2, 0.3, 0.5]], + [[0.0, 0.6, 0.4], [0.8, 0.1, 0.1]], + [[0.5, 0.2, 0.3], [0.4, 0.4, 0.2]], + [[0.4, 0.3, 0.3], [0.7, 0.3, 0.0]], + ] + ), + torch.tensor([[2, 1], [1, 2], [0, 2], [1, 1], [0, 0], [2, 0]]).long(), + torch.ones([6, 2], dtype=torch.bool), + torch.tensor( + [ + [[0.0, 0.1, 0.9], [0.5, 0.2, 0.3]], + [[0.3, 0.4, 0.3], [0.6, 0.3, 0.1]], + [[0.9, 0.1, 0.0], [0.3, 0.4, 0.3]], + ] + ), + torch.tensor( + [ + [[0.000000, 0.000000, 1.000000], [0.483871, 0.193548, 0.322581]], + [[0.500000, 0.000000, 0.500000], [0.714286, 0.285714, 0.000000]], + [[1.000000, 0.000000, 0.000000], [0.319149, 0.255319, 0.425532]], + ] + ), + ) + ], +) +def test_IsotonicMulticlassCalibratorCalibrator( + cal_uncs, cal_targets, cal_mask, test_uncs, cal_test_uncs +): + """ + Testing the IsotonicMulticlassCalibratorCalibrator + """ + calibrator = IsotonicMulticlassCalibrator() + calibrator.fit(cal_uncs, cal_targets, cal_mask) + uncs = calibrator.apply(test_uncs) + + torch.testing.assert_close(uncs, cal_test_uncs) diff --git a/chemprop-updated/tests/unit/uncertainty/test_estimators.py b/chemprop-updated/tests/unit/uncertainty/test_estimators.py new file mode 100644 index 0000000000000000000000000000000000000000..fc88e07c80632100070dcc02786e071ec5b70809 --- /dev/null +++ b/chemprop-updated/tests/unit/uncertainty/test_estimators.py @@ -0,0 +1,150 @@ +from lightning import pytorch as pl +import pytest +import torch +from torch.utils.data import DataLoader + +from chemprop.data import MoleculeDatapoint, MoleculeDataset, collate_batch +from chemprop.models import MPNN +from chemprop.uncertainty.estimator import ( + ClassificationDirichletEstimator, + DropoutEstimator, + EnsembleEstimator, + EvidentialAleatoricEstimator, + EvidentialEpistemicEstimator, + EvidentialTotalEstimator, + MulticlassDirichletEstimator, + MVEEstimator, + NoUncertaintyEstimator, +) + + +@pytest.fixture +def dataloader(mol_regression_data): + smis, Y = mol_regression_data + data = [MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis[:2], Y[:2])] + dset = MoleculeDataset(data) + + return DataLoader(dset, 32, collate_fn=collate_batch) + + +@pytest.fixture +def trainer(): + return pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + ) + + +def test_NoUncertaintyEstimator(data_dir, dataloader, trainer): + model = MPNN.load_from_file(data_dir / "example_model_v2_regression_mol.pt") + estimator = NoUncertaintyEstimator() + preds, uncs = estimator(dataloader, [model], trainer) + + torch.testing.assert_close(preds, torch.tensor([[[2.25354], [2.23501]]])) + assert uncs is None + + +def test_DropoutEstimator(data_dir, dataloader, trainer): + model = MPNN.load_from_file(data_dir / "example_model_v2_regression_mol.pt") + estimator = DropoutEstimator(ensemble_size=2, dropout=0.1) + preds, uncs = estimator(dataloader, [model], trainer) + + assert torch.all(uncs != 0) + assert getattr(model.message_passing.dropout, "p", None) == 0.0 + + +def test_EnsembleEstimator(data_dir, dataloader, trainer): + model1 = MPNN.load_from_file(data_dir / "example_model_v2_regression_mol.pt") + model2 = MPNN.load_from_file(data_dir / "example_model_v2_regression_mol.pt") + + # Make the second model predict different values than the first + model2.predictor.output_transform = torch.nn.Identity() + + estimator = EnsembleEstimator() + preds, uncs = estimator(dataloader, [model1, model2], trainer) + + torch.testing.assert_close( + preds, torch.tensor([[[2.25354], [2.23501]], [[0.09652], [0.08291]]]) + ) + torch.testing.assert_close(uncs, torch.tensor([[[1.16318], [1.15788]]])) + + +def test_EnsembleEstimator_wrong_n_models(): + estimator = EnsembleEstimator() + with pytest.raises(ValueError): + estimator("mock_dataloader", ["mock_model"], "mock_trainer") + + +def test_MVEEstimator(data_dir, dataloader, trainer): + model = MPNN.load_from_file(data_dir / "example_model_v2_regression_mve_mol.pt") + estimator = MVEEstimator() + preds, uncs = estimator(dataloader, [model], trainer) + + torch.testing.assert_close(preds, torch.tensor([[[2.10946], [2.10234]]])) + torch.testing.assert_close(uncs, torch.tensor([[[1.27602], [1.28058]]])) + + +@pytest.mark.parametrize( + "estimator_class, expected_preds, expected_uncs", + [ + ( + EvidentialTotalEstimator, + torch.tensor([[[2.09985], [2.09525]]]), + torch.tensor([[[4.63703], [4.67548]]]), + ), + ( + EvidentialEpistemicEstimator, + torch.tensor([[[2.09985], [2.09525]]]), + torch.tensor([[[2.77602], [2.80313]]]), + ), + ( + EvidentialAleatoricEstimator, + torch.tensor([[[2.09985], [2.09525]]]), + torch.tensor([[[1.86101], [1.87234]]]), + ), + ], +) +def test_EvidentialEstimators( + estimator_class, expected_preds, expected_uncs, data_dir, dataloader, trainer +): + model = MPNN.load_from_file(data_dir / "example_model_v2_regression_evidential_mol.pt") + + estimator = estimator_class() + preds, uncs = estimator(dataloader, [model], trainer) + + torch.testing.assert_close(preds, expected_preds) + torch.testing.assert_close(uncs, expected_uncs) + + +def test_ClassificationDirichletEstimator(data_dir, dataloader, trainer): + model = MPNN.load_from_file(data_dir / "example_model_v2_classification_dirichlet_mol.pt") + estimator = ClassificationDirichletEstimator() + preds, uncs = estimator(dataloader, [model], trainer) + + torch.testing.assert_close( + preds, + torch.tensor( + [[[0.085077, 0.085050, 0.086104, 0.138729], [0.069522, 0.069501, 0.070306, 0.116051]]] + ), + ) + torch.testing.assert_close( + uncs, + torch.tensor( + [[[0.170140, 0.170079, 0.172037, 0.277232], [0.139044, 0.138999, 0.140591, 0.232073]]] + ), + ) + + +def test_MulticlassDirichletEstimator(data_dir, dataloader, trainer): + model = MPNN.load_from_file(data_dir / "example_model_v2_multiclass_dirichlet_mol.pt") + estimator = MulticlassDirichletEstimator() + preds, uncs = estimator(dataloader, [model], trainer) + + torch.testing.assert_close( + preds, torch.tensor([[[[0.906426, 0.046787, 0.046787]], [[0.925395, 0.037303, 0.037303]]]]) + ) + torch.testing.assert_close(uncs, torch.tensor([[[0.140361], [0.111908]]])) diff --git a/chemprop-updated/tests/unit/uncertainty/test_evaluators.py b/chemprop-updated/tests/unit/uncertainty/test_evaluators.py new file mode 100644 index 0000000000000000000000000000000000000000..56d63574469c361fcd9cda0aa792b4c3fa9fe5b5 --- /dev/null +++ b/chemprop-updated/tests/unit/uncertainty/test_evaluators.py @@ -0,0 +1,277 @@ +import pytest +import torch + +from chemprop.uncertainty.evaluator import ( + CalibrationAreaEvaluator, + ExpectedNormalizedErrorEvaluator, + MulticlassConformalEvaluator, + MultilabelConformalEvaluator, + NLLClassEvaluator, + NLLMulticlassEvaluator, + NLLRegressionEvaluator, + RegressionConformalEvaluator, + SpearmanEvaluator, +) + + +@pytest.mark.parametrize( + "uncs,targets,mask,likelihood", + [ + ( + torch.tensor([[0.8]]), + torch.ones([1, 1]), + torch.ones([1, 1], dtype=bool), + torch.tensor([0.8]), + ), + ( + torch.tensor([[0.8]]), + torch.zeros([1, 1]), + torch.ones([1, 1], dtype=bool), + torch.tensor([0.2]), + ), + ], +) +def test_NLLClassEvaluator(uncs, targets, mask, likelihood): + """ + Testing the NLLClassEvaluator + """ + evaluator = NLLClassEvaluator() + nll_calc = evaluator.evaluate(uncs, targets, mask) + likelihood_calc = torch.exp(-1 * nll_calc) + torch.testing.assert_close(likelihood_calc, likelihood) + + +@pytest.mark.parametrize( + "uncs,targets,mask,likelihood", + [ + ( + torch.tensor( + [ + [[0.29, 0.22, 0.49]], + [[0.35, 0.19, 0.46]], + [[0.55, 0.38, 0.07]], + [[0.15, 0.29, 0.56]], + [[0.08, 0.68, 0.24]], + ] + ), + torch.tensor([[0], [2], [2], [0], [1]]), + torch.ones([5, 1], dtype=bool), + torch.tensor([0.24875443]), + ), + ( + torch.tensor( + [ + [[8.7385e-01, 8.3770e-04, 3.3212e-02, 9.2103e-02]], + [[7.2274e-03, 1.0541e-01, 8.8703e-01, 3.2886e-04]], + [[1.7376e-03, 9.9478e-01, 1.4227e-03, 2.0596e-03]], + [[2.6487e-04, 1.3251e-03, 2.4325e-02, 9.7409e-01]], + ] + ), + torch.tensor([[0], [2], [1], [3]]), + torch.ones([4, 1], dtype=bool), + torch.tensor([0.93094635]), + ), + ], +) +def test_NLLMulticlassEvaluator(uncs, targets, mask, likelihood): + """ + Testing the NLLMulticlassEvaluator + """ + evaluator = NLLMulticlassEvaluator() + nll_calc = evaluator.evaluate(uncs, targets, mask) + likelihood_calc = torch.exp(-1 * nll_calc) + torch.testing.assert_close(likelihood_calc, likelihood) + + +@pytest.mark.parametrize( + "preds,uncs,targets,mask,likelihood", + [ + ( + torch.zeros([1, 1]), + torch.ones([1, 1]), + torch.zeros([1, 1]), + torch.ones([1, 1], dtype=bool), + torch.tensor([0.39894228]), + ), + ( + torch.zeros([2, 2]), + torch.ones([2, 2]), + torch.zeros([2, 2]), + torch.ones([2, 2], dtype=bool), + torch.tensor([0.39894228, 0.39894228]), + ), + ], +) +def test_NLLRegressionEvaluator(preds, uncs, targets, mask, likelihood): + """ + Testing the NLLRegressionEvaluator + """ + evaluator = NLLRegressionEvaluator() + nll_calc = evaluator.evaluate(preds, uncs, targets, mask) + likelihood_calc = torch.exp(-1 * nll_calc) + torch.testing.assert_close(likelihood_calc, likelihood) + + +@pytest.mark.parametrize( + "preds,uncs,targets,mask,spearman_exp", + [ + ( + torch.zeros(100, 1, dtype=float), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.ones(100, 1, dtype=bool), + torch.tensor([1.0]), + ), + ( + torch.zeros(100, 1, dtype=float), + -torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.ones(100, 1, dtype=bool), + torch.tensor([-1.0]), + ), + ], +) +def test_SpearmanEvaluator(preds, uncs, targets, mask, spearman_exp): + """ + Testing the SpearmanEvaluator + """ + evaluator = SpearmanEvaluator() + area = evaluator.evaluate(preds, uncs, targets, mask) + torch.testing.assert_close(area, spearman_exp) + + +@pytest.mark.parametrize( + "uncs,targets,mask,coverage", + [ + ( + torch.tensor([[[1, 0], [0, 1]], [[0, 1], [1, 0]], [[1, 0], [1, 0]]]), + torch.tensor([[0, 0], [1, 0], [1, 1]]), + torch.ones([3, 2], dtype=torch.bool), + torch.tensor([0.66666, 0.33333]), + ) + ], +) +def test_MulticlassConformalEvaluator(uncs, targets, mask, coverage): + """ + Testing the MulticlassConformalEvaluator + """ + evaluator = MulticlassConformalEvaluator() + coverage_cal = evaluator.evaluate(uncs, targets, mask) + + torch.testing.assert_close(coverage_cal, coverage) + + +@pytest.mark.parametrize( + "uncs,targets,mask,coverage", + [ + ( + torch.tensor([[0, 0, 0, 0], [0, 1, 1, 1], [0, 0, 0, 0]]), + torch.tensor([[0, 0], [1, 0], [1, 1]]), + torch.ones([3, 2], dtype=torch.bool), + torch.tensor([0.66666, 0.33333]), + ) + ], +) +def test_MultilabelConformalEvaluator(uncs, targets, mask, coverage): + """ + Testing the MultilabelConformalEvaluator + """ + evaluator = MultilabelConformalEvaluator() + coverage_cal = evaluator.evaluate(uncs, targets, mask) + + torch.testing.assert_close(coverage_cal, coverage) + + +@pytest.mark.parametrize( + "preds,uncs,targets,mask,coverage", + [ + ( + torch.arange(100).unsqueeze(1), + torch.arange(100).unsqueeze(1), + torch.arange(10, 110).unsqueeze(1), + torch.ones([100, 1], dtype=torch.bool), + torch.tensor([0.8]), + ), + ( + torch.tensor([[0, 0.3, 1]]), + torch.tensor([[0.4, 0.6, 0.8]]), + torch.tensor([[0.5, 0.5, 0.5]]), + torch.ones([1, 3], dtype=torch.bool), + torch.tensor([0.0, 1.0, 0.0]), + ), + ( + torch.arange(100, 0, -1).unsqueeze(1), + torch.full((100, 1), 140), + torch.arange(1, 101, 1).unsqueeze(1), + torch.ones([100, 1], dtype=torch.bool), + torch.tensor([0.7]), + ), + ], +) +def test_RegressionConformalEvaluator(preds, uncs, targets, mask, coverage): + """ + Testing the RegressionConformalEvaluator + """ + evaluator = RegressionConformalEvaluator() + coverage_cal = evaluator.evaluate(preds, uncs, targets, mask) + + torch.testing.assert_close(coverage_cal, coverage) + + +@pytest.mark.parametrize( + "preds,uncs,targets,mask,miscal_area", + [ + ( + torch.zeros(100).unsqueeze(1), + torch.ones(100).unsqueeze(1), + torch.zeros(100).unsqueeze(1), + torch.ones([100, 1], dtype=torch.bool), + torch.tensor([0.495]), + ), + ( + torch.ones(100).unsqueeze(1), + torch.ones(100).unsqueeze(1), + torch.ones(100, 1) * 100, + torch.ones([100, 1], dtype=torch.bool), + torch.tensor([0.495]), + ), + ], +) +def test_CalibrationAreaEvaluator(preds, uncs, targets, mask, miscal_area): + """ + Testing the CalibrationAreaEvaluator + """ + evaluator = CalibrationAreaEvaluator() + miscal_area_cal = evaluator.evaluate(preds, uncs, targets, mask) + + torch.testing.assert_close(miscal_area_cal, miscal_area) + + +@pytest.mark.parametrize( + "preds,uncs,targets,mask,ence", + [ + ( + torch.zeros(100, 1), + torch.ones(100, 1), + torch.zeros(100, 1), + torch.ones([100, 1], dtype=torch.bool), + torch.tensor([1.0]), + ), + ( + torch.linspace(1, 100, steps=100).unsqueeze(1), + torch.linspace(1, 10, steps=100).unsqueeze(1), + torch.linspace(1, 100, steps=100).unsqueeze(1) + + torch.tensor([-2, -1, 1, 2]).repeat(25).unsqueeze(1), + torch.ones([100, 1], dtype=torch.bool), + torch.tensor([0.392]), + ), + ], +) +def test_ExpectedNormalizedErrorEvaluator(preds, uncs, targets, mask, ence): + """ + Testing the ExpectedNormalizedErrorEvaluator + """ + evaluator = ExpectedNormalizedErrorEvaluator() + ence_cal = evaluator.evaluate(preds, uncs, targets, mask) + + torch.testing.assert_close(ence_cal, ence) diff --git a/chemprop-updated/tests/unit/utils/test_converter.py b/chemprop-updated/tests/unit/utils/test_converter.py new file mode 100644 index 0000000000000000000000000000000000000000..e81efc89c7c0deac34d3edb5b185528a6bbf996f --- /dev/null +++ b/chemprop-updated/tests/unit/utils/test_converter.py @@ -0,0 +1,69 @@ +import csv + +from lightning import pytorch as pl +import numpy as np +import pytest + +from chemprop.data.dataloader import build_dataloader +from chemprop.data.datapoints import MoleculeDatapoint +from chemprop.data.datasets import MoleculeDataset +from chemprop.featurizers.atom import MultiHotAtomFeaturizer +from chemprop.featurizers.molgraph.molecule import SimpleMoleculeMolGraphFeaturizer +from chemprop.models.model import MPNN +from chemprop.utils.v1_to_v2 import convert_model_file_v1_to_v2 + + +@pytest.fixture +def example_model_v1_path(data_dir): + return data_dir / "example_model_v1_regression_mol.pt" + + +@pytest.fixture +def example_model_v1_4_path(data_dir): + return data_dir / "example_model_v1_4.pt" + + +@pytest.fixture +def example_model_v1_prediction(data_dir): + path = data_dir / "example_model_v1_regression_mol_prediction.csv" + + with open(path) as fid: + reader = csv.reader(fid) + next(reader) + smis, ys = zip(*[(smi, float(score)) for smi, score in reader]) + + featurizer = SimpleMoleculeMolGraphFeaturizer(atom_featurizer=MultiHotAtomFeaturizer.v1()) + + ys = np.array(ys).reshape(-1, 1) + test_data = [MoleculeDatapoint.from_smi(smi, None) for smi in smis] + test_dset = MoleculeDataset(test_data, featurizer) + + test_loader = build_dataloader(test_dset, shuffle=False) + return ys, test_loader + + +def test_converter(tmp_path, example_model_v1_path, example_model_v1_prediction): + directory = tmp_path / "test_converter" + directory.mkdir() + model_v2_save_path = directory / "example_model_v2_regression_mol.pt" + + convert_model_file_v1_to_v2(example_model_v1_path, model_v2_save_path) + assert model_v2_save_path.exists() + + mpnn = MPNN.load_from_checkpoint(model_v2_save_path) + + ys_v1, test_loader = example_model_v1_prediction + + trainer = pl.Trainer(accelerator="cpu", logger=None, enable_progress_bar=False) + predss = trainer.predict(mpnn, test_loader) + ys_v2 = np.vstack(predss) + assert np.allclose(ys_v2, ys_v1, atol=1e-6) + + +def test_converter_v1_4(tmp_path, example_model_v1_4_path): + directory = tmp_path / "test_converter" + directory.mkdir() + model_v2_save_path = directory / "converted_v1_4.pt" + + convert_model_file_v1_to_v2(example_model_v1_4_path, model_v2_save_path) + assert model_v2_save_path.exists() diff --git a/chemprop-updated/tests/unit/utils/test_save_load_mol+mol.py b/chemprop-updated/tests/unit/utils/test_save_load_mol+mol.py new file mode 100644 index 0000000000000000000000000000000000000000..bd50d02e716515b2c1fa53ba25d9730d5f809989 --- /dev/null +++ b/chemprop-updated/tests/unit/utils/test_save_load_mol+mol.py @@ -0,0 +1,187 @@ +from pathlib import Path + +from lightning import pytorch as pl +import numpy as np +import pytest +import torch +from torch.nn import Identity +from torch.utils.data import DataLoader + +from chemprop.data import ( + MoleculeDatapoint, + MoleculeDataset, + MulticomponentDataset, + collate_multicomponent, +) +from chemprop.models import MulticomponentMPNN +from chemprop.models.utils import load_model, save_model +from chemprop.nn import ( + MSE, + BondMessagePassing, + GraphTransform, + MulticomponentMessagePassing, + NormAggregation, + RegressionFFN, + ScaleTransform, + UnscaleTransform, +) + + +@pytest.fixture +def checkpoint_path(data_dir): + return data_dir / "example_model_v2_regression_mol+mol.ckpt" + + +@pytest.fixture +def file_path(data_dir): + return data_dir / "example_model_v2_regression_mol+mol.pt" + + +@pytest.fixture +def model(checkpoint_path): + model = MulticomponentMPNN.load_from_checkpoint(checkpoint_path) + return model + + +@pytest.fixture +def test_loader(mol_mol_regression_data): + smis1, smis2, _ = mol_mol_regression_data + data = [ + [MoleculeDatapoint.from_smi(smi) for smi in smis1], + [MoleculeDatapoint.from_smi(smi) for smi in smis2], + ] + dsets = [MoleculeDataset(d) for d in data] + dset = MulticomponentDataset(dsets) + + return DataLoader(dset, 32, collate_fn=collate_multicomponent) + + +@pytest.fixture +def trainer(): + return pl.Trainer( + logger=None, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + ) + + +@pytest.fixture +def ys(model, test_loader, trainer): + predss = trainer.predict(model, test_loader) + return np.vstack(predss) + + +def test_roundtrip(tmp_path, model, test_loader, trainer, ys): + save_path = Path(tmp_path) / "test.pt" + save_model(save_path, model) + + model_from_file = MulticomponentMPNN.load_from_file(save_path) + + predss_from_file = trainer.predict(model_from_file, test_loader) + ys_from_file = np.vstack(predss_from_file) + + assert np.allclose(ys_from_file, ys, atol=1e-6) + + +def test_checkpoint_is_valid(checkpoint_path, test_loader, trainer, ys): + model_from_checkpoint = MulticomponentMPNN.load_from_file(checkpoint_path) + + predss_from_checkpoint = trainer.predict(model_from_checkpoint, test_loader) + ys_from_checkpoint = np.vstack(predss_from_checkpoint) + + assert np.allclose(ys_from_checkpoint, ys, atol=1e-6) + + +def test_checkpoint_roundtrip(checkpoint_path, file_path, trainer, test_loader): + model_from_checkpoint = MulticomponentMPNN.load_from_checkpoint( + checkpoint_path, map_location="cpu" + ) + model_from_file = MulticomponentMPNN.load_from_file(file_path, map_location="cpu") + + predss_from_checkpoint = trainer.predict(model_from_checkpoint, test_loader) + ys_from_checkpoint = np.vstack(predss_from_checkpoint) + + predss_from_file = trainer.predict(model_from_file, test_loader) + ys_from_file = np.vstack(predss_from_file) + + assert np.allclose(ys_from_file, ys_from_checkpoint, atol=1e-6) + + +def test_scalers_roundtrip_one_block(tmp_path): + E_f_transform = ScaleTransform(mean=[0.0, 1.0], scale=[2.0, 3.0]) + graph_transform = GraphTransform(V_transform=Identity(), E_transform=E_f_transform) + V_d_transform = ScaleTransform(mean=[4.0, 5.0], scale=[6.0, 7.0]) + mcmp = MulticomponentMessagePassing( + blocks=[BondMessagePassing(graph_transform=graph_transform, V_d_transform=V_d_transform)], + n_components=2, + shared=True, + ) + + output_transform = UnscaleTransform(mean=[8.0, 9.0], scale=[10.0, 11.0]) + criterion = MSE(task_weights=[12.0]) + ffn = RegressionFFN(output_transform=output_transform, criterion=criterion) + + X_d_transform = ScaleTransform(mean=[13.0, 14.0], scale=[15.0, 16.0]) + original = MulticomponentMPNN(mcmp, NormAggregation(), ffn, X_d_transform=X_d_transform) + + save_model(tmp_path / "model.pt", original) + loaded = load_model(tmp_path / "model.pt", multicomponent=True) + + assert torch.equal( + original.message_passing.blocks[0].V_d_transform.mean, + loaded.message_passing.blocks[0].V_d_transform.mean, + ) + assert torch.equal( + original.message_passing.blocks[1].graph_transform.E_transform.mean, + loaded.message_passing.blocks[1].graph_transform.E_transform.mean, + ) + assert torch.equal( + original.predictor.criterion.task_weights, loaded.predictor.criterion.task_weights + ) + assert torch.equal( + original.predictor.output_transform.mean, loaded.predictor.output_transform.mean + ) + assert torch.equal(original.X_d_transform.mean, loaded.X_d_transform.mean) + + +def test_scalers_roundtrip_two_blocks(tmp_path): + E_f_transform = ScaleTransform(mean=[0.0, 1.0], scale=[2.0, 3.0]) + graph_transform = GraphTransform(V_transform=Identity(), E_transform=E_f_transform) + V_d_transform = ScaleTransform(mean=[4.0, 5.0], scale=[6.0, 7.0]) + mcmp = MulticomponentMessagePassing( + blocks=[ + BondMessagePassing(graph_transform=graph_transform, V_d_transform=V_d_transform), + BondMessagePassing(graph_transform=graph_transform), + ], + n_components=2, + shared=True, + ) + + output_transform = UnscaleTransform(mean=[8.0, 9.0], scale=[10.0, 11.0]) + criterion = MSE(task_weights=[12.0]) + ffn = RegressionFFN(output_transform=output_transform, criterion=criterion) + + X_d_transform = ScaleTransform(mean=[13.0, 14.0], scale=[15.0, 16.0]) + original = MulticomponentMPNN(mcmp, NormAggregation(), ffn, X_d_transform=X_d_transform) + + save_model(tmp_path / "model.pt", original) + loaded = load_model(tmp_path / "model.pt", multicomponent=True) + + assert torch.equal( + original.message_passing.blocks[0].V_d_transform.mean, + loaded.message_passing.blocks[0].V_d_transform.mean, + ) + assert torch.equal( + original.message_passing.blocks[1].graph_transform.E_transform.mean, + loaded.message_passing.blocks[1].graph_transform.E_transform.mean, + ) + assert torch.equal( + original.predictor.criterion.task_weights, loaded.predictor.criterion.task_weights + ) + assert torch.equal( + original.predictor.output_transform.mean, loaded.predictor.output_transform.mean + ) + assert torch.equal(original.X_d_transform.mean, loaded.X_d_transform.mean) diff --git a/chemprop-updated/tests/unit/utils/test_save_load_mol.py b/chemprop-updated/tests/unit/utils/test_save_load_mol.py new file mode 100644 index 0000000000000000000000000000000000000000..46ed8cfc159f486e0486636bcc6050338353ae72 --- /dev/null +++ b/chemprop-updated/tests/unit/utils/test_save_load_mol.py @@ -0,0 +1,138 @@ +from pathlib import Path + +from lightning import pytorch as pl +import numpy as np +import pytest +import torch +from torch.nn import Identity +from torch.utils.data import DataLoader + +from chemprop.data import MoleculeDatapoint, MoleculeDataset, collate_batch +from chemprop.models import MPNN +from chemprop.models.utils import load_model, save_model +from chemprop.nn import ( + MSE, + BondMessagePassing, + GraphTransform, + NormAggregation, + RegressionFFN, + ScaleTransform, + UnscaleTransform, +) + + +@pytest.fixture +def checkpoint_path(data_dir): + return data_dir / "example_model_v2_regression_mol.ckpt" + + +@pytest.fixture +def model_path(data_dir): + return data_dir / "example_model_v2_regression_mol.pt" + + +@pytest.fixture +def model(checkpoint_path): + model = MPNN.load_from_checkpoint(checkpoint_path) + return model + + +@pytest.fixture +def test_loader(smis): + data = [MoleculeDatapoint.from_smi(smi) for smi in smis] + dset = MoleculeDataset(data) + + return DataLoader(dset, 32, collate_fn=collate_batch) + + +@pytest.fixture +def trainer(): + return pl.Trainer( + logger=None, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + ) + + +@pytest.fixture +def ys(model, test_loader, trainer): + predss = trainer.predict(model, test_loader) + return np.vstack(predss) + + +def test_roundtrip(tmp_path, model, test_loader, trainer, ys): + save_path = Path(tmp_path) / "test.pt" + save_model(save_path, model) + + model_from_file = MPNN.load_from_file(save_path) + + predss_from_file = trainer.predict(model_from_file, test_loader) + ys_from_file = np.vstack(predss_from_file) + + assert np.allclose(ys_from_file, ys, atol=1e-6) + + +def test_checkpoint_is_valid(checkpoint_path, test_loader, trainer, ys): + model_from_checkpoint = MPNN.load_from_file(checkpoint_path) + + predss_from_checkpoint = trainer.predict(model_from_checkpoint, test_loader) + ys_from_checkpoint = np.vstack(predss_from_checkpoint) + + assert np.allclose(ys_from_checkpoint, ys, atol=1e-6) + + +def test_checkpoint_roundtrip(checkpoint_path, model_path, trainer, test_loader): + model_from_checkpoint = MPNN.load_from_checkpoint(checkpoint_path, map_location="cpu") + model_from_file = MPNN.load_from_file(model_path, map_location="cpu") + + predss_from_checkpoint = trainer.predict(model_from_checkpoint, test_loader) + ys_from_checkpoint = np.vstack(predss_from_checkpoint) + + predss_from_file = trainer.predict(model_from_file, test_loader) + ys_from_file = np.vstack(predss_from_file) + + assert np.allclose(ys_from_file, ys_from_checkpoint, atol=1e-6) + + +def test_scalers_roundtrip(tmp_path): + E_f_transform = ScaleTransform(mean=[0.0, 1.0], scale=[2.0, 3.0]) + graph_transform = GraphTransform(V_transform=Identity(), E_transform=E_f_transform) + V_d_transform = ScaleTransform(mean=[4.0, 5.0], scale=[6.0, 7.0]) + mp = BondMessagePassing(graph_transform=graph_transform, V_d_transform=V_d_transform) + + output_transform = UnscaleTransform(mean=[8.0, 9.0], scale=[10.0, 11.0]) + criterion = MSE(task_weights=[12.0]) + ffn = RegressionFFN(output_transform=output_transform, criterion=criterion) + + X_d_transform = ScaleTransform(mean=[13.0, 14.0], scale=[15.0, 16.0]) + original = MPNN(mp, NormAggregation(), ffn, X_d_transform=X_d_transform) + + save_model(tmp_path / "model.pt", original) + loaded = load_model(tmp_path / "model.pt", multicomponent=False) + + assert torch.equal( + original.message_passing.V_d_transform.mean, loaded.message_passing.V_d_transform.mean + ) + assert torch.equal( + original.message_passing.graph_transform.E_transform.mean, + loaded.message_passing.graph_transform.E_transform.mean, + ) + assert torch.equal( + original.predictor.criterion.task_weights, loaded.predictor.criterion.task_weights + ) + assert torch.equal( + original.predictor.output_transform.mean, loaded.predictor.output_transform.mean + ) + assert torch.equal(original.X_d_transform.mean, loaded.X_d_transform.mean) + + +def test_load_checkpoint_with_metrics(data_dir): + MPNN.load_from_checkpoint(data_dir / "example_model_v2_regression_mol_with_metrics.ckpt") + MPNN.load_from_checkpoint(data_dir / "example_model_v2_classification_mol_with_metrics.ckpt") + + +def test_load_trained_on_cuda(data_dir): + MPNN.load_from_file(data_dir / "example_model_v2_trained_on_cuda.pt", map_location="cpu") diff --git a/chemprop/.bumpversion.cfg b/chemprop/.bumpversion.cfg new file mode 100644 index 0000000000000000000000000000000000000000..6cd39fbd0a2f9ec573a3a412e5c42fdb6c077e24 --- /dev/null +++ b/chemprop/.bumpversion.cfg @@ -0,0 +1,10 @@ +[bumpversion] +current_version = 2.1.2 +commit = True +tag = True + +[bumpversion:file:pyproject.toml] + +[bumpversion:file:chemprop/__init__.py] + +[bumpversion:file:docs/source/conf.py] diff --git a/chemprop/.dockerignore b/chemprop/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..c5e0ea7ad77234ddf99146c9320aea416495adbb --- /dev/null +++ b/chemprop/.dockerignore @@ -0,0 +1,3 @@ +**.git* +.dockerignore +Dockerfile diff --git a/chemprop/.flake8 b/chemprop/.flake8 new file mode 100644 index 0000000000000000000000000000000000000000..a188a7e321cb817e76c3bf76f08c196a709a2a99 --- /dev/null +++ b/chemprop/.flake8 @@ -0,0 +1,9 @@ +[flake8] +ignore = E203, E266, E501, F403, E741, W503, W605 +max-line-length = 100 +max-complexity = 18 +per-file-ignores = + __init__.py: F401 + chemprop/nn/predictors.py: F405 + chemprop/nn/metrics.py: F405 + tests/unit/nn/test_metrics.py: E121, E122, E131, E241, W291 diff --git a/chemprop/.github/ISSUE_TEMPLATE/todo.md b/chemprop/.github/ISSUE_TEMPLATE/todo.md new file mode 100644 index 0000000000000000000000000000000000000000..e88203a8de5130928a009d023fe6cc25f865facb --- /dev/null +++ b/chemprop/.github/ISSUE_TEMPLATE/todo.md @@ -0,0 +1,11 @@ +--- +name: to-do +about: Add an item to the to-do list. More generic than a feature request +title: "[TODO]: " +labels: todo +assignees: '' + +--- + +**Notes** +_these could be implementation or more specific details to keep in mind, if they'll be helpful for issue tracking_ diff --git a/chemprop/.github/ISSUE_TEMPLATE/v1_bug_report.md b/chemprop/.github/ISSUE_TEMPLATE/v1_bug_report.md new file mode 100644 index 0000000000000000000000000000000000000000..43808ed6f1bae881d608b8c1bb0a223a714a7ce4 --- /dev/null +++ b/chemprop/.github/ISSUE_TEMPLATE/v1_bug_report.md @@ -0,0 +1,35 @@ +--- +name: v1 Bug Report +about: Report a bug in v1 (will not be fixed) +title: "[v1 BUG]: " +labels: bug, v1-wontfix +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**Example(s)** +Provide some examples of where the current code fails. Feel free to share your actual code for additional context, but a minimal and isolated example is preferred. + +**Expected behavior** +A clear and concise description of what you expected to happen. If there is correct, expected output, include that here as well. + +**Error Stack Trace** +If the bug is resulting in an error message, provide the _full_ stack trace (not just the last line). This is helpful for debugging, especially in cases where you aren't able to provide a minimum/isolated working example with accompanying files. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Environment** +- python version +- package versions: `conda list` or `pip list` +- OS + +**Checklist** +- [ ] all dependencies are satisifed: `conda list` or `pip list` shows the packages listed in the `pyproject.toml` +- [ ] the unit tests are working: `pytest -v` reports no errors + +**Additional context** +Add any other context about the problem here. diff --git a/chemprop/.github/ISSUE_TEMPLATE/v1_question.md b/chemprop/.github/ISSUE_TEMPLATE/v1_question.md new file mode 100644 index 0000000000000000000000000000000000000000..77227d669535d1c59391df2e829e43f1619bfdf1 --- /dev/null +++ b/chemprop/.github/ISSUE_TEMPLATE/v1_question.md @@ -0,0 +1,17 @@ +--- +name: v1 Question +about: Have a question about how to use Chemprop v1? +title: "[v1 QUESTION]: " +labels: question +assignees: '' + +--- + +**What are you trying to do?** +Please tell us what you're trying to do with Chemprop, providing as much detail as possible + +**Previous attempts** +If possible, provide some examples of what you've already tried and what the output was. + +**Screenshots** +If applicable, add screenshots to help explain your problem. diff --git a/chemprop/.github/ISSUE_TEMPLATE/v2_bug_report.md b/chemprop/.github/ISSUE_TEMPLATE/v2_bug_report.md new file mode 100644 index 0000000000000000000000000000000000000000..36894da38e9b2ab8c8291b1e98a5393b107725ee --- /dev/null +++ b/chemprop/.github/ISSUE_TEMPLATE/v2_bug_report.md @@ -0,0 +1,35 @@ +--- +name: v2 Bug Report +about: Create a report to help us improve +title: "[v2 BUG]: " +labels: bug +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**Example(s)** +Provide some examples of where the current code fails. Feel free to share your actual code for additional context, but a minimal and isolated example is preferred. + +**Expected behavior** +A clear and concise description of what you expected to happen. If there is correct, expected output, include that here as well. + +**Error Stack Trace** +If the bug is resulting in an error message, provide the _full_ stack trace (not just the last line). This is helpful for debugging, especially in cases where you aren't able to provide a minimum/isolated working example with accompanying files. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Environment** +- python version +- package versions: `conda list` or `pip list` +- OS + +**Checklist** +- [ ] all dependencies are satisifed: `conda list` or `pip list` shows the packages listed in the `pyproject.toml` +- [ ] the unit tests are working: `pytest -v` reports no errors + +**Additional context** +Add any other context about the problem here. diff --git a/chemprop/.github/ISSUE_TEMPLATE/v2_feature_request.md b/chemprop/.github/ISSUE_TEMPLATE/v2_feature_request.md new file mode 100644 index 0000000000000000000000000000000000000000..2df14257c2da81ebf200dc0aa55630bf77fc32ec --- /dev/null +++ b/chemprop/.github/ISSUE_TEMPLATE/v2_feature_request.md @@ -0,0 +1,23 @@ +--- +name: v2 Feature Request +about: Suggest an idea for this project +title: "[v2 FEATURE]: " +labels: enhancement +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. + +**Use-cases/examples of this new feature** +What are some example workflows that would employ this new feature? Are there any relevant issues? + +**Desired solution/workflow** +A clear and concise description of what you want to happen. Include some (pseudo)code, if possible + +**Discussion** +What are some considerations around this new feature? Are there alternative approaches to consider? What should the scope of the feature be? + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/chemprop/.github/ISSUE_TEMPLATE/v2_question.md b/chemprop/.github/ISSUE_TEMPLATE/v2_question.md new file mode 100644 index 0000000000000000000000000000000000000000..8a79ad871770c5122033baee35226c6b99cfb6d8 --- /dev/null +++ b/chemprop/.github/ISSUE_TEMPLATE/v2_question.md @@ -0,0 +1,17 @@ +--- +name: v2 Question +about: Have a question about how to use Chemprop v2? +title: "[v2 QUESTION]: " +labels: question +assignees: '' + +--- + +**What are you trying to do?** +Please tell us what you're trying to do with Chemprop, providing as much detail as possible + +**Previous attempts** +If possible, provide some examples of what you've already tried and what the output was. + +**Screenshots** +If applicable, add screenshots to help explain your problem. diff --git a/chemprop/.github/PULL_REQUEST_TEMPLATE.md b/chemprop/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000000000000000000000000000000000000..7f0331e7fa838665894689fbc6ff4f1f9d440cdc --- /dev/null +++ b/chemprop/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,18 @@ +## Description +Include a brief summary of the bug/feature/etc. that this PR seeks to address + +## Example / Current workflow +Include a sample workflow to either **(a)** reproduce the bug with current codebase or **(b)** showcase the deficiency does this PR seeks to address + +## Bugfix / Desired workflow +Include either **(a)** the same workflow from above with the correct output produced via this PR **(b)** some (pseudo)code containing the new workflow that this PR will (seek to) implement + +## Questions +If there are open questions about implementation strategy or scope of the PR, include them here + +## Relevant issues +If appropriate, please tag them here and include a quick summary + +## Checklist +- [ ] linted with flake8? +- [ ] (if appropriate) unit tests added? diff --git a/chemprop/.github/PULL_REQUEST_TEMPLATE/bugfix.md b/chemprop/.github/PULL_REQUEST_TEMPLATE/bugfix.md new file mode 100644 index 0000000000000000000000000000000000000000..3e79367a22854d83ac45896ceb9afb1b7be43462 --- /dev/null +++ b/chemprop/.github/PULL_REQUEST_TEMPLATE/bugfix.md @@ -0,0 +1,12 @@ +## Bug report +Include a brief summary of the bug that this PR seeks to address. If possible, include relevant issue tags + +## Example +Include a sample execution to reproduce the bug with current codebase, and some sample output showcasing that the PR fixes this bug + +## Questions +If there are open questions about implementation strategy or scope of the PR, include them here + +## Checklist +- [ ] linted with flake8? +- [ ] (if necessary) appropriate unit tests added? diff --git a/chemprop/.github/PULL_REQUEST_TEMPLATE/new_feature.md b/chemprop/.github/PULL_REQUEST_TEMPLATE/new_feature.md new file mode 100644 index 0000000000000000000000000000000000000000..a38b8ab66c3bfe610f1b1b6315252f93e83eb8f8 --- /dev/null +++ b/chemprop/.github/PULL_REQUEST_TEMPLATE/new_feature.md @@ -0,0 +1,15 @@ +## Statement of need +What deficiency does this PR seek to address? If there are relevant issues, please tag them here + +## Current workflow +How is this need achieved with the current codebase? + +## Desired workflow +Include some (pseudo)code containing the new workflow that this PR will (seek to) implement + +## Questions +If there are open questions about implementation strategy or scope of the PR, include them here + +## Checklist +- [ ] linted with flake8? +- [ ] appropriate unit tests added? diff --git a/chemprop/.github/workflows/ci.yml b/chemprop/.github/workflows/ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..1f2b92c5a3d688c9fc3b371ef3163d3dd1659719 --- /dev/null +++ b/chemprop/.github/workflows/ci.yml @@ -0,0 +1,158 @@ +# ci.yml +# +# Continuous Integration for Chemprop - checks build, code formatting, and runs tests for all +# proposed changes and on a regular schedule +# +# Note: this file contains extensive inline documentation to aid with knowledge transfer. + +name: Continuous Integration + +on: + # run on pushes/pull requests to/against main + push: + branches: [main] + pull_request: + branches: [main] + # run this in the morning on weekdays to catch dependency issues + schedule: + - cron: "0 8 * * 1-5" + # allow manual runs + workflow_dispatch: + +# cancel previously running tests if new commits are made +# https://docs.github.com/en/actions/examples/using-concurrency-expressions-and-a-test-matrix +concurrency: + group: actions-id-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + USE_LIBUV: 0 # libuv doesn't work on GitHub actions Windows runner + +jobs: + build: + name: Check Build + runs-on: ubuntu-latest + steps: + # clone the repo, attempt to build + - uses: actions/checkout@v4 + - run: python -m pip install build + - run: python -m build . + + lint: + name: Check Formatting + needs: build + runs-on: ubuntu-latest + steps: + # clone the repo, run black and flake8 on it + - uses: actions/checkout@v4 + - run: python -m pip install black==23.* flake8 isort + - run: black --check . + - run: flake8 . + - run: isort --check . + + test: + name: Execute Tests + needs: lint + runs-on: ${{ matrix.os }} + defaults: + run: + # run with a login shell (so that the conda environment is activated) + # and echo the commands we run as we do them (for debugging purposes) + shell: bash -el {0} + strategy: + # if one platform/python version fails, continue testing the others + fail-fast: false + matrix: + # test on all platforms with both supported versions of Python + os: [ubuntu-latest, macos-13, windows-latest] + python-version: [3.11, 3.12] + steps: + - uses: actions/checkout@v4 + # use a version of the conda virtual environment manager to set up an + # isolated environment with the Python version we want + - uses: conda-incubator/setup-miniconda@v3 + with: + python-version: ${{ matrix.python-version }} + auto-update-conda: true + show-channel-urls: true + conda-remove-defaults: "true" + environment-file: environment.yml + activate-environment: chemprop + - name: Install dependencies + shell: bash -l {0} + run: | + python -m pip install nbmake + python -m pip install ".[dev,docs,test,hpopt]" + - name: Test with pytest + shell: bash -l {0} + run: | + pytest -v tests + - name: Test notebooks + shell: bash -l {0} + run: | + python -m pip install matplotlib + pytest --no-cov -v --nbmake $(find examples -name '*.ipynb' ! -name 'use_featurizer_with_other_libraries.ipynb' ! -name 'shapley_value_with_customized_featurizers.ipynb') + pytest --no-cov -v --nbmake $(find docs/source/tutorial/python -name "*.ipynb") + pypi: + name: Build and publish Python 🐍 distributions 📦 to PyPI + runs-on: ubuntu-latest + # only run if the tests pass + needs: [test] + # run only on pushes to main on chemprop + if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' && github.repository == 'chemprop/chemprop'}} + steps: + - uses: actions/checkout@master + - name: Set up Python 3.12 + uses: actions/setup-python@v3 + with: + python-version: "3.11" + - name: Install pypa/build + run: >- + python -m + pip install + build + --user + - name: Build a binary wheel and a source tarball + run: >- + python -m + build + --sdist + --wheel + --outdir dist/ + . + - name: Publish distribution 📦 to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_API_TOKEN }} + skip-existing: true + verbose: true + + build-and-push-docker: + # shamelessly copied from: + # https://github.com/ReactionMechanismGenerator/RMG-Py/blob/bfaee1cad9909a17103a8e6ef9a22569c475964c/.github/workflows/CI.yml#L359C1-L386C54 + # which is also shamelessly copied from somewhere + runs-on: ubuntu-latest + # only run if the tests pass + needs: [test] + # run only on pushes to main on chemprop + if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' && github.repository == 'chemprop/chemprop'}} + steps: + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Login to Docker Hub + uses: docker/login-action@v2 + with: + # repository secretes managed by the maintainers + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Build and Push + uses: docker/build-push-action@v4 + with: + push: true + tags: chemprop/chemprop:latest + diff --git a/chemprop/.gitignore b/chemprop/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..ea9f6e3054d1c4e93965c11b90db8fa3bb880486 --- /dev/null +++ b/chemprop/.gitignore @@ -0,0 +1,178 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +*.idea +*.DS_Store +*.vscode +*.csv +*.pkl +*.pt +*.json +*.sqlite3 +*.yaml +*.tfevents.* +*.ckpt +chemprop/_version.py +*.ckpt +*.ipynb +config.toml + +!tests/data/* diff --git a/chemprop/.readthedocs.yml b/chemprop/.readthedocs.yml new file mode 100644 index 0000000000000000000000000000000000000000..9110336dbdc6ef24b22efa666e1095593276cf5a --- /dev/null +++ b/chemprop/.readthedocs.yml @@ -0,0 +1,19 @@ +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.11" + jobs: + post_install: + - python -m pip install --upgrade --upgrade-strategy only-if-needed --no-cache-dir ".[docs]" + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/source/conf.py diff --git a/chemprop/CITATIONS.bib b/chemprop/CITATIONS.bib new file mode 100644 index 0000000000000000000000000000000000000000..1eb6f4b3554ed25e2a7f2aa75a4d8f2eecb7eba0 --- /dev/null +++ b/chemprop/CITATIONS.bib @@ -0,0 +1,37 @@ +# this was downloaded from ACS: https://pubs.acs.org/doi/10.1021/acs.jcim.9b00237 +@article{chemprop_theory, + author = {Yang, Kevin and Swanson, Kyle and Jin, Wengong and Coley, Connor and Eiden, Philipp and Gao, Hua and Guzman-Perez, Angel and Hopper, Timothy and Kelley, Brian and Mathea, Miriam and Palmer, Andrew and Settels, Volker and Jaakkola, Tommi and Jensen, Klavs and Barzilay, Regina}, + title = {Analyzing Learned Molecular Representations for Property Prediction}, + journal = {Journal of Chemical Information and Modeling}, + volume = {59}, + number = {8}, + pages = {3370-3388}, + year = {2019}, + doi = {10.1021/acs.jcim.9b00237}, + note ={PMID: 31361484}, + URL = { + https://doi.org/10.1021/acs.jcim.9b00237 + }, + eprint = { + https://doi.org/10.1021/acs.jcim.9b00237 + } +} + +# this was downloaded from ACS: https://pubs.acs.org/doi/10.1021/acs.jcim.3c01250 +@article{chemprop_software, + author = {Heid, Esther and Greenman, Kevin P. and Chung, Yunsie and Li, Shih-Cheng and Graff, David E. and Vermeire, Florence H. and Wu, Haoyang and Green, William H. and McGill, Charles J.}, + title = {Chemprop: A Machine Learning Package for Chemical Property Prediction}, + journal = {Journal of Chemical Information and Modeling}, + volume = {64}, + number = {1}, + pages = {9-17}, + year = {2024}, + doi = {10.1021/acs.jcim.3c01250}, + note ={PMID: 38147829}, + URL = { + https://doi.org/10.1021/acs.jcim.3c01250 + }, + eprint = { + https://doi.org/10.1021/acs.jcim.3c01250 + } +} diff --git a/chemprop/CONTRIBUTING.md b/chemprop/CONTRIBUTING.md new file mode 100644 index 0000000000000000000000000000000000000000..514fe849a9f0e2214ad314f9d32170d07b9300de --- /dev/null +++ b/chemprop/CONTRIBUTING.md @@ -0,0 +1,40 @@ +# How to contribute + +We welcome contributions from external contributors, and this document +describes how to merge code changes into this repository. + +## Getting Started + +* Make sure you have a [GitHub account](https://github.com/signup/free). +* [Fork](https://help.github.com/articles/fork-a-repo/) this repository on GitHub. +* On your local machine, + [clone](https://help.github.com/articles/cloning-a-repository/) your fork of + the repository. + +## Making Changes + +* Add some really awesome code to your local fork. It's usually a [good + idea](http://blog.jasonmeridth.com/posts/do-not-issue-pull-requests-from-your-master-branch/) + to make changes on a + [branch](https://help.github.com/articles/creating-and-deleting-branches-within-your-repository/) + with the branch name relating to the feature you are going to add. +* When you are ready for others to examine and comment on your new feature, + navigate to your fork of `chemprop` on GitHub and open a [pull + request](https://help.github.com/articles/using-pull-requests/) (PR). Note that + after you launch a PR from one of your fork's branches, all + subsequent commits to that branch will be added to the open pull request + automatically. Each commit added to the PR will be validated for + mergability, compilation and test suite compliance; the results of these tests + will be visible on the PR page. +* If you're providing a new feature, you **must** add test cases and documentation. +* When the code is ready to go, run the test suite: `pytest`. +* When you're ready to be considered for merging, click the "Ready for review" + box on the PR page to let the Chemprop devs know that the changes are complete. + The code will not be merged until the continuous integration returns checkmarks, + and at least one core developer gives "Approved" reviews. + +## Additional Resources + +* [General GitHub documentation](https://help.github.com/) +* [PR best practices](http://codeinthehole.com/writing/pull-requests-and-other-good-practices-for-teams-using-github/) +* [A guide to contributing to software packages](http://www.contribution-guide.org) diff --git a/chemprop/Dockerfile b/chemprop/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..20f773292efa9b59bc348a1b7025c175e90b50d2 --- /dev/null +++ b/chemprop/Dockerfile @@ -0,0 +1,50 @@ +# Dockerfile +# +# Builds a Docker image containing Chemprop and its required dependencies. +# +# Build this image with: +# git clone https://github.com/chemprop/chemprop.git +# cd chemprop +# docker build --tag=chemprop:latest . +# +# Run the built image with: +# docker run --name chemprop_container -it chemprop:latest +# +# Note: +# This image only runs on CPU - we do not provide a Dockerfile +# for GPU use (see installation documentation). + +# Parent Image +FROM continuumio/miniconda3:latest + +# Install libxrender1 (required by RDKit) and then clean up +RUN apt-get update && \ + apt-get install -y \ + libxrender1 && \ + apt-get autoremove -y && \ + apt-get clean -y + +WORKDIR /opt/chemprop + +# build an empty conda environment with appropriate Python version +RUN conda create --name chemprop_env python=3.11* + +# This runs all subsequent commands inside the chemprop_env conda environment +# +# Analogous to just activating the environment, which we can't actually do here +# since that requires running conda init and restarting the shell (not possible +# in a Dockerfile build script) +SHELL ["conda", "run", "--no-capture-output", "-n", "chemprop_env", "/bin/bash", "-c"] + +# Follow the installation instructions then clear the cache +ADD chemprop chemprop +ENV PYTHONPATH /opt/chemprop +ADD LICENSE.txt pyproject.toml README.md ./ +RUN conda install pytorch cpuonly -c pytorch && \ + conda clean --all --yes && \ + python -m pip install . && \ + python -m pip cache purge + +# when running this image, open an interactive bash terminal inside the conda environment +RUN echo "conda activate chemprop_env" > ~/.bashrc +ENTRYPOINT ["/bin/bash", "--login"] diff --git a/chemprop/LICENSE.txt b/chemprop/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..77e0c51d93cc3e4e315ba539a4a801828dc1e366 --- /dev/null +++ b/chemprop/LICENSE.txt @@ -0,0 +1,27 @@ +MIT License + +Copyright (c) 2024 The Chemprop Development Team (Regina Barzilay, +Jackson Burns, Yunsie Chung, Anna Doner, Xiaorui Dong, David Graff, +William Green, Kevin Greenman, Yanfei Guan, Esther Heid, Lior Hirschfeld, +Tommi Jaakkola, Wengong Jin, Olivier Lafontant-Joseph, Shih-Cheng Li, +Mengjie Liu, Joel Manu, Charles McGill, Angiras Menon, Nathan Morgan, +Hao-Wei Pang, Kevin Spiekermann, Kyle Swanson, Allison Tam, +Florence Vermeire, Haoyang Wu, and Kevin Yang, Jonathan Zheng) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/chemprop/README.md b/chemprop/README.md new file mode 100644 index 0000000000000000000000000000000000000000..43bf7e12f7e039643670d31b33bb447f2701f467 --- /dev/null +++ b/chemprop/README.md @@ -0,0 +1,59 @@ +![ChemProp Logo](docs/source/_static/images/logo/chemprop_logo.svg) +# Chemprop + +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/chemprop)](https://badge.fury.io/py/chemprop) +[![PyPI version](https://badge.fury.io/py/chemprop.svg)](https://badge.fury.io/py/chemprop) +[![Anaconda-Server Badge](https://anaconda.org/conda-forge/chemprop/badges/version.svg)](https://anaconda.org/conda-forge/chemprop) +[![Build Status](https://github.com/chemprop/chemprop/workflows/tests/badge.svg)](https://github.com/chemprop/chemprop/actions/workflows/tests.yml) +[![Documentation Status](https://readthedocs.org/projects/chemprop/badge/?version=main)](https://chemprop.readthedocs.io/en/main/?badge=main) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) +[![Downloads](https://static.pepy.tech/badge/chemprop)](https://pepy.tech/project/chemprop) +[![Downloads](https://static.pepy.tech/badge/chemprop/month)](https://pepy.tech/project/chemprop) +[![Downloads](https://static.pepy.tech/badge/chemprop/week)](https://pepy.tech/project/chemprop) + +Chemprop is a repository containing message passing neural networks for molecular property prediction. + +Documentation can be found [here](https://chemprop.readthedocs.io/en/main/). + +There are tutorial notebooks in the [`examples/`](https://github.com/chemprop/chemprop/tree/main/examples) directory. + +Chemprop recently underwent a ground-up rewrite and new major release (v2.0.0). A helpful transition guide from Chemprop v1 to v2 can be found [here](https://docs.google.com/spreadsheets/u/3/d/e/2PACX-1vRshySIknVBBsTs5P18jL4WeqisxDAnDE5VRnzxqYEhYrMe4GLS17w5KeKPw9sged6TmmPZ4eEZSTIy/pubhtml). This includes a side-by-side comparison of CLI argument options, a list of which arguments will be implemented in later versions of v2, and a list of changes to default hyperparameters. + +**License:** Chemprop is free to use under the [MIT License](LICENSE.txt). The Chemprop logo is free to use under [CC0 1.0](docs/source/_static/images/logo/LICENSE.txt). + +**References**: Please cite the appropriate papers if Chemprop is helpful to your research. + +- Chemprop was initially described in the papers [Analyzing Learned Molecular Representations for Property Prediction](https://pubs.acs.org/doi/abs/10.1021/acs.jcim.9b00237) for molecules and [Machine Learning of Reaction Properties via Learned Representations of the Condensed Graph of Reaction](https://doi.org/10.1021/acs.jcim.1c00975) for reactions. +- The interpretation functionality (available in v1, but not yet implemented in v2) is based on the paper [Multi-Objective Molecule Generation using Interpretable Substructures](https://arxiv.org/abs/2002.03244). +- Chemprop now has its own dedicated manuscript that describes and benchmarks it in more detail: [Chemprop: A Machine Learning Package for Chemical Property Prediction](https://doi.org/10.1021/acs.jcim.3c01250). +- A paper describing and benchmarking the changes in v2.0.0 is forthcoming. + +**Selected Applications**: Chemprop has been successfully used in the following works. + +- [A Deep Learning Approach to Antibiotic Discovery](https://www.cell.com/cell/fulltext/S0092-8674(20)30102-1) - _Cell_ (2020): Chemprop was used to predict antibiotic activity against _E. coli_, leading to the discovery of [Halicin](https://en.wikipedia.org/wiki/Halicin), a novel antibiotic candidate. Model checkpoints are availabile on [Zenodo](https://doi.org/10.5281/zenodo.6527882). +- [Discovery of a structural class of antibiotics with explainable deep learning](https://www.nature.com/articles/s41586-023-06887-8) - _Nature_ (2023): Identified a structural class of antibiotics selective against methicillin-resistant _S. aureus_ (MRSA) and vancomycin-resistant enterococci using ensembles of Chemprop models, and explained results using Chemprop's interpret method. +- [ADMET-AI: A machine learning ADMET platform for evaluation of large-scale chemical libraries](https://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/btae416/7698030?utm_source=authortollfreelink&utm_campaign=bioinformatics&utm_medium=email&guestAccessKey=f4fca1d2-49ec-4b10-b476-5aea3bf37045): Chemprop was trained on 41 absorption, distribution, metabolism, excretion, and toxicity (ADMET) datasets from the [Therapeutics Data Commons](https://tdcommons.ai). The Chemprop models in ADMET-AI are available both as a web server at [admet.ai.greenstonebio.com](https://admet.ai.greenstonebio.com) and as a Python package at [github.com/swansonk14/admet_ai](https://github.com/swansonk14/admet_ai). +- A more extensive list of successful Chemprop applications is given in our [2023 paper](https://doi.org/10.1021/acs.jcim.3c01250) + +## Version 1.x + +For users who have not yet made the switch to Chemprop v2.0, please reference the following resources. + +### v1 Documentation + +- Documentation of Chemprop v1 is available [here](https://chemprop.readthedocs.io/en/v1.7.1/). Note that the content of this site is several versions behind the final v1 release (v1.7.1) and does not cover the full scope of features available in chemprop v1. +- The v1 [README](https://github.com/chemprop/chemprop/blob/v1.7.1/README.md) is the best source for documentation on more recently-added features. +- Please also see descriptions of all the possible command line arguments in the v1 [`args.py`](https://github.com/chemprop/chemprop/blob/v1.7.1/chemprop/args.py) file. + +### v1 Tutorials and Examples + +- [Benchmark scripts](https://github.com/chemprop/chemprop_benchmark) - scripts from our 2023 paper, providing examples of many features using Chemprop v1.6.1 +- [ACS Fall 2023 Workshop](https://github.com/chemprop/chemprop-workshop-acs-fall2023) - presentation, interactive demo, exercises on Google Colab with solution key +- [Google Colab notebook](https://colab.research.google.com/github/chemprop/chemprop/blob/v1.7.1/colab_demo.ipynb) - several examples, intended to be run in Google Colab rather than as a Jupyter notebook on your local machine +- [nanoHUB tool](https://nanohub.org/resources/chempropdemo/) - a notebook of examples similar to the Colab notebook above, doesn't require any installation + - [YouTube video](https://www.youtube.com/watch?v=TeOl5E8Wo2M) - lecture accompanying nanoHUB tool +- These [slides](https://docs.google.com/presentation/d/14pbd9LTXzfPSJHyXYkfLxnK8Q80LhVnjImg8a3WqCRM/edit?usp=sharing) provide a Chemprop tutorial and highlight additions as of April 28th, 2020 + +### v1 Known Issues + +We have discontinued support for v1 since v2 has been released, but we still appreciate v1 bug reports and will tag them as [`v1-wontfix`](https://github.com/chemprop/chemprop/issues?q=label%3Av1-wontfix+) so the community can find them easily. diff --git a/chemprop/chemprop.egg-info/PKG-INFO b/chemprop/chemprop.egg-info/PKG-INFO new file mode 100644 index 0000000000000000000000000000000000000000..e0815eaf8bf1a4f97b6fe4fd074d8ee610bc4b6e --- /dev/null +++ b/chemprop/chemprop.egg-info/PKG-INFO @@ -0,0 +1,648 @@ +Metadata-Version: 2.1 +Name: chemprop +Version: 1.7.1 +Summary: Molecular Property Prediction with Message Passing Neural Networks +Home-page: https://github.com/chemprop/chemprop +Author: The Chemprop Development Team (see LICENSE.txt) +Author-email: chemprop@mit.edu +License: MIT +Download-URL: https://github.com/chemprop/chemprop/v_1.7.1.tar.gz +Project-URL: Documentation, https://chemprop.readthedocs.io/en/latest/ +Project-URL: Source, https://github.com/chemprop/chemprop +Project-URL: PyPi, https://pypi.org/project/chemprop/ +Description: ![ChemProp Logo](logo/chemprop_logo.svg) + # Chemprop + + [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/chemprop)](https://badge.fury.io/py/chemprop) + [![PyPI version](https://badge.fury.io/py/chemprop.svg)](https://badge.fury.io/py/chemprop) + [![Anaconda-Server Badge](https://anaconda.org/conda-forge/chemprop/badges/version.svg)](https://anaconda.org/conda-forge/chemprop) + [![Build Status](https://github.com/chemprop/chemprop/workflows/tests/badge.svg)](https://github.com/chemprop/chemprop/actions/workflows/tests.yml) + [![Documentation Status](https://readthedocs.org/projects/chemprop/badge/?version=latest)](https://chemprop.readthedocs.io/en/latest/?badge=latest) + [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) + [![Downloads](https://static.pepy.tech/badge/chemprop)](https://pepy.tech/project/chemprop) + [![Downloads](https://static.pepy.tech/badge/chemprop/month)](https://pepy.tech/project/chemprop) + [![Downloads](https://static.pepy.tech/badge/chemprop/week)](https://pepy.tech/project/chemprop) + + Chemprop is a repository containing message passing neural networks for molecular property prediction. + + **License:** Chemprop is free to use under the [MIT License](LICENSE.txt). The Chemprop logo is free to use under [CC0 1.0](logo/LICENSE.txt). + + **References**: Please cite the appropriate papers if Chemprop is helpful to your research. + + - Chemprop was initially described in the papers [Analyzing Learned Molecular Representations for Property Prediction](https://pubs.acs.org/doi/abs/10.1021/acs.jcim.9b00237) for molecules and [Machine Learning of Reaction Properties via Learned Representations of the Condensed Graph of Reaction](https://doi.org/10.1021/acs.jcim.1c00975) for reactions. + - The interpretation functionality is based on the paper [Multi-Objective Molecule Generation using Interpretable Substructures](https://arxiv.org/abs/2002.03244). + - Chemprop now has its own dedicated manuscript that describes and benchmarks it in more detail: [Chemprop: A Machine Learning Package for Chemical Property Prediction](https://doi.org/10.1021/acs.jcim.3c01250). + + **Selected Applications**: Chemprop has been successfully used in the following works. + + - [A Deep Learning Approach to Antibiotic Discovery](https://www.cell.com/cell/fulltext/S0092-8674(20)30102-1) - _Cell_ (2020): Chemprop was used to predict antibiotic activity against _E. coli_, leading to the discovery of [Halicin](https://en.wikipedia.org/wiki/Halicin), a novel antibiotic candidate. Model checkpoints are availabile on [Zenodo](https://doi.org/10.5281/zenodo.6527882). + - [Discovery of a structural class of antibiotics with explainable deep learning](https://www.nature.com/articles/s41586-023-06887-8) - _Nature_ (2023): Identified a structural class of antibiotics selective against methicillin-resistant _S. aureus_ (MRSA) and vancomycin-resistant enterococci using ensembles of Chemprop models, and explained results using Chemprop's interpret method. + - [ADMET-AI: A machine learning ADMET platform for evaluation of large-scale chemical libraries](https://www.biorxiv.org/content/10.1101/2023.12.28.573531v1): Chemprop was trained on 41 absorption, distribution, metabolism, excretion, and toxicity (ADMET) datasets from the [Therapeutics Data Commons](https://tdcommons.ai). The Chemprop models in ADMET-AI are available both as a web server at [admet.ai.greenstonebio.com](https://admet.ai.greenstonebio.com) and as a Python package at [github.com/swansonk14/admet_ai](https://github.com/swansonk14/admet_ai). + - A more extensive list of successful Chemprop applications is given in our [2023 paper](https://doi.org/10.1021/acs.jcim.3c01250) + + ## Table of Contents + + - [Documentation](#documentation) + - [Tutorials and Examples](#tutorials-and-examples) + - [Requirements](#requirements) + - [Installation](#installation) + * [Option 1: Installing from PyPi](#option-1-installing-from-pypi) + * [Option 2: Installing from source](#option-2-installing-from-source) + * [Docker](#docker) + - [Known Issues](#known-issues) + - [Web Interface](#web-interface) + - [Within Python](#within-python) + - [Data](#data) + - [Training](#training) + * [Train/Validation/Test Splits](#trainvalidationtest-splits) + * [Loss functions](#loss-functions) + * [Metrics](#metrics) + * [Cross validation and ensembling](#cross-validation-and-ensembling) + * [Aggregation](#aggregation) + * [Additional Features](#additional-features) + * [Custom Features](#molecule-level-custom-features) + * [RDKit 2D Features](#molecule-level-rdkit-2d-features) + * [Atomic Features](#atom-level-features) + * [Spectra](#spectra) + * [Reaction](#reaction) + * [Reaction in a solvent / Reaction and a molecule](#reaction-in-a-solvent--reaction-and-a-molecule) + * [Atomic and bond properties prediction](#atomic-and-bond-properties-prediction) + * [Pretraining](#pretraining) + * [Missing target values](#missing-target-values) + * [Weighted training by target and data](#weighted-training-by-target-and-data) + * [Caching](#caching) + - [Predicting](#predicting) + * [Uncertainty Estimation](#uncertainty-estimation) + * [Uncertainty Calibration](#uncertainty-calibration) + * [Uncertainty Evaluation Metrics](#uncertainty-evaluation-metrics) + - [Hyperparameter Optimization](#hyperparameter-optimization) + * [Choosing the Search Parameters](#choosing-the-search-parameters) + * [Checkpoints and Parallel Operation](#checkpoints-and-parallel-operation) + * [Random or Directed Search](#random-or-directed-search) + * [Manual Trials](#manual-trials) + - [Encode Fingerprint Latent Representation](#encode-fingerprint-latent-representation) + - [Interpreting Model Prediction](#interpreting) + - [TensorBoard](#tensorboard) + - [Results](#results) + + ## Documentation + + * Documentation of Chemprop is available at https://chemprop.readthedocs.io/en/latest/. Note that this site is several versions behind. An up-to-date version of Read the Docs is forthcoming with the release of Chemprop v2.0. + * This README is currently the best source for documentation on more recently-added features. + * Please also see descriptions of all the possible command line arguments in our [`args.py`](https://github.com/chemprop/chemprop/blob/master/chemprop/args.py) file. + + ## Tutorials and Examples + + * [Benchmark scripts](https://github.com/chemprop/chemprop_benchmark) - scripts from our 2023 paper, providing examples of many features using Chemprop v1.6.1 + * [ACS Fall 2023 Workshop](https://github.com/chemprop/chemprop-workshop-acs-fall2023) - presentation, interactive demo, exercises on Google Colab with solution key + * [Google Colab notebook](https://colab.research.google.com/github/chemprop/chemprop/blob/master/colab_demo.ipynb) - several examples, intended to be run in Google Colab rather than as a Jupyter notebook on your local machine + * [nanoHUB tool](https://nanohub.org/resources/chempropdemo/) - a notebook of examples similar to the Colab notebook above, doesn't require any installation + * [YouTube video](https://www.youtube.com/watch?v=TeOl5E8Wo2M) - lecture accompanying nanoHUB tool + * These [slides](https://docs.google.com/presentation/d/14pbd9LTXzfPSJHyXYkfLxnK8Q80LhVnjImg8a3WqCRM/edit?usp=sharing) provide a Chemprop tutorial and highlight additions as of April 28th, 2020 + + ## Requirements + + For small datasets (~1000 molecules), it is possible to train models within a few minutes on a standard laptop with CPUs only. However, for larger datasets and larger Chemprop models, we recommend using a GPU for significantly faster training. + + To use `chemprop` with GPUs, you will need: + * cuda >= 8.0 + * cuDNN + + ## Installation + + Chemprop can either be installed from PyPi via pip or from source (i.e., directly from this git repo). The PyPi version includes a vast majority of Chemprop functionality, but some functionality is only accessible when installed from source. + + Both options require conda, so first install Miniconda from [https://conda.io/miniconda.html](https://conda.io/miniconda.html). + + Then proceed to either option below to complete the installation. If installing the environment with conda seems to be taking too long, you can also try running `conda install -c conda-forge mamba` and then replacing `conda` with `mamba` in each of the steps below. + + **Note for machines with GPUs:** You may need to manually install a GPU-enabled version of PyTorch by following the instructions [here](https://pytorch.org/get-started/locally/). If you're encountering issues with Chemprop not using a GPU on your system after following the instructions below, check which version of PyTorch you have installed in your environment using `conda list | grep torch` or similar. If the PyTorch line includes `cpu`, please uninstall it using `conda remove pytorch` and reinstall a GPU-enabled version using the instructions at the link above. + + ### Option 1: Installing from PyPi + + 1. `conda create -n chemprop python=3.8` + 2. `conda activate chemprop` + 3. `pip install chemprop` + + > [!NOTE] + > Some features that were not made available in the main releases of Chemprop are instead available through 'feature releases' via PyPI: + > - SSL Pre-train with DDP - available in version `1.6.1.dev0`, install with `pip install chemprop==1.6.1.dev0`. + + ### Option 2: Installing from source + + 1. `git clone https://github.com/chemprop/chemprop.git` + 2. `cd chemprop` + 3. `conda env create -f environment.yml` + 4. `conda activate chemprop` + 5. `pip install -e .` + + ### Docker + + Chemprop can also be installed with Docker. + Docker makes it possible to isolate the Chemprop code and environment. + You can either pull a pre-built image or build it locally. + + Note that regardless of installation method you will need to run the `docker run` command with the `--gpus` command line flag to access GPUs on your machine. + + In addition, you will also need to ensure that the CUDA toolkit version in the Docker image is compatible with the CUDA driver on your host machine. + Newer CUDA driver versions are backward-compatible with older CUDA toolkit versions. + To set a specific CUDA toolkit version, add `cudatoolkit=X.Y` to `environment.yml` before building the Docker image. + + #### Pull Pre-Built + + Run this command to download and run a given release version of Chemprop: + + `docker run -it chemprop/chemprop:X.Y.X` + + where `X.Y.Z` is the version you want to download, i.e. `1.7.0`. + + > [!NOTE] + > Not all versions of Chemprop are available from DockerHub - see the [DockerHub](https://hub.docker.com/r/chemprop/chemprop/tags) page for a complete list of those available. + + DockerHub also has a `latest` tag - this is _not_ the latest release of Chemprop, but rather the latest version of `master` which is _not necessarily fit for deployment_. + Use this tag only for development or if you need to access a feature which has not yet been formally released! + + #### Local Build + + To install and run our code in a Docker container, follow these steps: + + 1. `git clone https://github.com/chemprop/chemprop.git` + 2. `cd chemprop` + 3. Install Docker from [https://docs.docker.com/install/](https://docs.docker.com/install/) + 4. `docker build -t chemprop .` + 5. `docker run -it chemprop:latest` + + ## Known Issues + + As we approach the upcoming release of Chemprop v2.0, we have closed [several issues](https://github.com/chemprop/chemprop/issues?q=label%3Av1-wontfix+) corresponding to bugs that we don't plan to fix before the final release of v1 (v1.7). We will be discontinuing support for v1 once v2 is released, but we still appreciate bug reports and will tag them as [`v1-wontfix`](https://github.com/chemprop/chemprop/issues?q=label%3Av1-wontfix+) so the community can find them easily. + + ## Web Interface + + For those less familiar with the command line, Chemprop also includes a web interface which allows for basic training and predicting. You can start the web interface on your local machine in two ways. Flask is used for development mode while gunicorn is used for production mode. + + ### Flask + + Run `chemprop_web` (or optionally `python web.py` if installed from source) and then navigate to [localhost:5000](http://localhost:5000) in a web browser. + + ### Gunicorn + + Gunicorn is only available for a UNIX environment, meaning it will not work on Windows. It is not installed by default with the rest of Chemprop, so first run: + + ``` + pip install gunicorn + ``` + + Next, navigate to `chemprop/web` and run `gunicorn --bind {host}:{port} 'wsgi:build_app()'`. This will start the site in production mode. + * To run this server in the background, add the `--daemon` flag. + * Arguments including `init_db` and `demo` can be passed with this pattern: `'wsgi:build_app(init_db=True, demo=True)'` + * Gunicorn documentation can be found [here](http://docs.gunicorn.org/en/stable/index.html). + + ## Within Python + + For information on the use of Chemprop within a python script, refer to the [Within a python script](https://chemprop.readthedocs.io/en/latest/tutorial.html#within-a-python-script) + section of the documentation. A [Google Colab notebook](https://colab.research.google.com/github/chemprop/chemprop/blob/master/colab_demo.ipynb) is also available with several examples. Note that this notebook is intended to be run in Google Colab rather than as a Jupyter notebook on your local machine. A similar notebook of examples is available as a [nanoHUB tool](https://nanohub.org/resources/chempropdemo/). + + + ## Data + + In order to train a model, you must provide training data containing molecules (as SMILES strings) and known target values. + + Chemprop can either train on a single target ("single tasking") or on multiple targets simultaneously ("multi-tasking"). + + There are four current supported dataset types. Targets with unknown values can be left as blanks. + * **Regression.** Targets are float values. With bounded loss functions or metrics, the values may also be simple inequalities (e.g., >7.5 or <5.0). + * **Classification.** Targets are binary (i.e. 0s and 1s) indicators of the classification. + * **Multiclass.** Targets are integers (starting with zero) indicating which class the datapoint belongs to, out of a total number of exclusive classes indicated with `--multiclass_num_classes `. + * **Spectra.** Targets are positive float values with each target representing the signal at a specific spectrum position. + + The data file must be be a **CSV file with a header row**. For example: + ``` + smiles,NR-AR,NR-AR-LBD,NR-AhR + CCOc1ccc2nc(S(N)(=O)=O)sc2c1,0,0,1 + CCN1C(=O)NC(c2ccccc2)C1=O,0,,0 + ... + ``` + + By default, it is assumed that the SMILES are in the first column (can be changed using `--number_of_molecules`) and the targets are in the remaining columns. However, the specific columns containing the SMILES and targets can be specified using the `--smiles_columns ...` and `--target_columns ...` flags, respectively. + + Datasets from [MoleculeNet](https://moleculenet.org/) and a 450K subset of ChEMBL from [http://www.bioinf.jku.at/research/lsc/index.html](http://www.bioinf.jku.at/research/lsc/index.html) have been preprocessed and are available in `data.tar.gz`. To uncompress them, run `tar xvzf data.tar.gz`. + + ## Training + + To train a model, run: + ``` + chemprop_train --data_path --dataset_type --save_dir + ``` + where `` is the path to a CSV file containing a dataset, `` is one of [classification, regression, multiclass, spectra] depending on the type of the dataset, and `` is the directory where model checkpoints will be saved. + + For example: + ``` + chemprop_train --data_path data/tox21.csv --dataset_type classification --save_dir tox21_checkpoints + ``` + + A full list of available command-line arguments can be found in [chemprop/args.py](https://github.com/chemprop/chemprop/blob/master/chemprop/args.py). + + If installed from source, `chemprop_train` can be replaced with `python train.py`. + + Notes: + * The default metric for classification is AUC and the default metric for regression is RMSE. Other metrics may be specified with `--metric `. + * `--save_dir` may be left out if you don't want to save model checkpoints. + * `--quiet` can be added to reduce the amount of debugging information printed to the console. Both a quiet and verbose version of the logs are saved in the `save_dir`. + + ### Train/Validation/Test Splits + + Our code supports several methods of splitting data into train, validation, and test sets. + + * **Random.** By default, the data will be split randomly into train, validation, and test sets. + * **Scaffold.** Alternatively, the data can be split by molecular scaffold so that the same scaffold never appears in more than one split. This can be specified by adding `--split_type scaffold_balanced`. Note that the atom-mapped numbers for atom-mapped SMILES will be removed before computing the Bemis-Murcko scaffold. + * **k-Fold Cross-Validation.** A split type specified with `--split_type cv` intended for use when training with cross-validation. The data are split randomly into k groups of equal size, where k is the number of cross-validation folds specified with `--num_folds `. Each group is used once as the test set and once as the validation set in training the k folds of the model. Alternatively, the option `--split_type cv-no-test` can be used to train without a test splits. + * **Random With Repeated SMILES.** Some datasets have multiple entries with the same SMILES. To constrain splitting so the repeated SMILES are in the same split, use the argument `--split_type random_with_repeated_smiles`. + * **Separate val/test.** If you have separate data files you would like to use as the validation or test set, you can specify them with `--separate_val_path ` and/or `--separate_test_path `. If both are provided, then the data specified by `--data_path` is used entirely as the training data. If only one separate path is provided, the `--data_path` data is split between train data and either val or test data, whichever is not provided separately. + + When data contains multiple molecules per datapoint, scaffold and repeated SMILES splitting will only constrain splitting based on one of the molecules. The key molecule can be chosen with the argument `--split_key_molecule `, with the default setting using an index of 0 indicating the first molecule. + + By default, both random and scaffold split the data into 80% train, 10% validation, and 10% test. This can be changed with `--split_sizes `. The default setting is `--split_sizes 0.8 0.1 0.1`. If a separate validation set or test set is provided, the split defaults to 80%-20%. Splitting involves a random component and can be seeded with `--seed `. The default setting is `--seed 0`. The split size argument is not used with split types `cv` or `cv-no-test`. + + ### Loss functions + + The loss functions available for training are dependent on the selected dataset type. Loss functions other than the defaults can be selected from the supported options with the argument `--loss_function `. + * **Regression.** mse (default), bounded_mse, mve (mean-variance estimation, a.k.a. heteroscedastic loss), evidential, quantile_interval (Pinball loss, specify margins with `--quantile_loss_alpha `). + * **Classification.** binary_cross_entropy (default), mcc (a soft version of Matthews Correlation Coefficient), dirichlet (a.k.a. evidential classification) + * **Multiclass.** cross_entropy (default), mcc (a soft version of Matthews Correlation Coefficient) + * **Spectra.** sid (default, spectral information divergence), wasserstein (First-order Wasserstein distance a.k.a. earthmover's distance.) + + + Dropout regularization can be applied regardless of loss function using the argument `--dropout ` and providing a dropout fraction between 0 and 1. + + The regression loss functions `mve` and `evidential` function by minimizing the negative log likelihood of a predicted uncertainty distribution. If used during training, the uncertainty predictions from these loss functions can be used for uncertainty prediction during prediction tasks. A regularization specific to evidential learning can be applied using the argument `--evidential_regularization `. The regression loss function `quantile_interval` trains the model with two different output heads which correspond to the `quantile_loss_alpha/2` and `1 - quantile_loss_alpha/2` quantile predictions. Since it is a symmetrical interval, return the center of the interval as the predicted value. The evaluation metric for `quantile_interval` is automatically set to the `quantile` metric. + + ### Metrics + + Metrics are used to evaluate the success of the model against the test set as the final model score and to determine the optimal epoch to save the model at based on the validation set. The primary metric used for both purposes is selected with the argument `--metric ` and additional metrics for test set score only can be added with `--extra_metrics ...`. Supported metrics are dependent on the dataset type. Unlike loss functions, metrics do not have to be differentiable. + * **Regression.** rmse (default), mae, mse, r2, bounded_rmse, bounded_mae, bounded_mse (default if bounded_mse is loss function), quantile (average of pinball loss for both output heads). + * **Classification.** auc (default), prc-auc, accuracy, binary_cross_entropy, f1, mcc, recall, precision and balanced accuracy. + * **Multiclass.** cross_entropy (default), accuracy, f1, mcc. + * **Spectra.** sid (default), wasserstein. + + When a multitask model is used, the metric score used for evaluation at each epoch or for choosing the best set of hyperparameters during hyperparameter search is obtained by taking the mean of the metric scores for each task. Some metrics scale with the magnitude of the targets (most regression metrics), so geometric mean instead of arithmetic mean is used in those cases in order to avoid having the mean score dominated by changes in the larger magnitude task. + ### Cross validation and ensembling + + Cross-validation can be run by specifying `--num_folds `. The default is `--num_folds 1`. Each trained model will have different train/val/test splits, determined according to the specified split type argument and split sizes argument but using a different random seed to perform the splitting. The reported test score will be the average of the metrics from each fold. To use a strict k-fold cross-validation where each datapoint will appear in fold test sets exactly once, the argument `--split_type cv` must be used. + + To train an ensemble, specify the number of models in the ensemble with `--ensemble_size `. The default is `--ensemble_size 1`. Each trained model within the ensemble will share data splits. The reported test score for one ensemble is the metric applied to the averaged prediction across the models. Ensembling and cross-validation can be used at the same time. + + ### Aggregation + + By default, the atom-level representations from the message passing network are averaged over all atoms of a molecule to yield a molecule-level representation. Alternatively, the atomic vectors can be summed up (by specifying `--aggregation sum`) or summed up and divided by a constant number N (by specifying `--aggregation norm --aggregation_norm `). A reasonable value for N is usually the average number of atoms per molecule in the dataset of interest. The default is `--aggregation_norm 100`. + + ### Additional Features + + While the model works very well on its own, especially after hyperparameter optimization, we have seen that additional features can further improve performance on certain datasets. The additional features can be added at the atom-, bond, or molecule-level. Molecule-level features can be either automatically generated by RDKit or custom features provided by the user. + + #### Molecule-Level Custom Features + + If you install from source, you can modify the code to load custom features as follows: + + 1. **Generate features:** If you want to generate features in code, you can write a custom features generator function in `chemprop/features/features_generators.py`. Scroll down to the bottom of that file to see a features generator code template. + 2. **Load features:** If you have features saved as a numpy `.npy` file or as a `.csv` file, you can load the features by using `--features_path /path/to/features`. Note that the features must be in the same order as the SMILES strings in your data file. Also note that `.csv` files must have a header row and the features should be comma-separated with one line per molecule. By default, provided features will be normalized unless the flag `--no_features_scaling` is used. + + #### Molecule-Level RDKit 2D Features + + As a starting point, we recommend using pre-normalized RDKit features by using the `--features_generator rdkit_2d_normalized --no_features_scaling` flags. In general, we recommend NOT using the `--no_features_scaling` flag (i.e. allow the code to automatically perform feature scaling), but in the case of `rdkit_2d_normalized`, those features have been pre-normalized and don't require further scaling. The utilization of the `rdkit_2d_normalized` should be avoided in cases where molecule-level custom features have been loaded and necessitate additional scaling. + + The full list of available features for `--features_generator` is as follows. + + `morgan` is binary Morgan fingerprints, radius 2 and 2048 bits. + `morgan_count` is count-based Morgan, radius 2 and 2048 bits. + `rdkit_2d` is an unnormalized version of 200 assorted rdkit descriptors. Full list can be found at the bottom of our paper: https://arxiv.org/pdf/1904.01561.pdf + `rdkit_2d_normalized` is the CDF-normalized version of the 200 rdkit descriptors. + + #### Atom-Level Features + + Similar to the additional molecular features described above, you can also provide additional atomic features via `--atom_descriptors_path /path/to/features` with valid file formats: + * `.npz` file, where descriptors are saved as 2D array for each molecule in the exact same order as the SMILES strings in your data file. + * `.pkl` / `.pckl` / `.pickle` containing a pandas dataframe with smiles as index and a numpy array of descriptors as columns. + * `.sdf` containing all mol blocks with descriptors as entries. + + The order of the descriptors for each atom per molecule must match the ordering of atoms in the RDKit molecule object. Further information on supplying atomic descriptors can be found [here](https://github.com/chemprop/chemprop/releases/tag/v1.1.0). + + Users must select in which way atom descriptors are used. The command line option `--atom_descriptors descriptor` concatenates the new features to the embedded atomic features after the D-MPNN with an additional linear layer. The option `--atom_descriptors feature` concatenates the features to each atomic feature vector before the D-MPNN, so that they are used during message-passing. Alternatively, the user can overwrite the default atom features with the custom features using the option `--overwrite_default_atom_features`. + + Similar to the molecule-level features, the atom-level descriptors and features are scaled by default. This can be disabled with the option `--no_atom_descriptor_scaling` + + #### Bond-Level Features + + Bond-level features can be provided in the same format as the atom-level features, using the option `--bond_descriptors_path /path/to/features`. The order of the features for each molecule must match the bond ordering in the RDKit molecule object. + + Users must select in which way bond descriptors are used. The command line option `--bond_descriptors feature` concatenates the bond-level features with the bond feature vectors before the D-MPNN, such that they are used during message-passing. For atomic/bond properties prediction, the command line option `--bond_descriptors descriptor` concatenates the new features to the embedded bond features after the D-MPNN with an additional linear layer. Alternatively, the user can overwrite the default bond features with the custom features using the option `--overwrite_default_bond_features`. + + Similar to molecule-level and atom-level features, the bond-level descriptors and features are scaled by default. This can be disabled with the option `--no_bond_descriptor_scaling`. + + ### Spectra + + One of the data types that can be trained with Chemprop is "spectra". Spectra training is different than other datatypes because it considers the predictions of all targets together. Targets for spectra should be provided as the values for the spectrum at a specific position in the spectrum. The loss function for spectra is SID, spectral information divergence. Alternatively, Wasserstein distance (earthmover's distance) can be used for both loss function and metric with input arguments `--metric wasserstein --loss_function wasserstein`. + + Spectra predictions are configured to return only positive values and normalize them to sum each spectrum to 1. Activation to enforce positivity is an exponential function by default but can also be set as a Softplus function, according to the argument `--spectra_activation `. Value positivity is enforced on input targets as well using a floor value that replaces negative or smaller target values with the floor value (default 1e-8), customizable with the argument `--spectra_target_floor `. + + In absorption spectra, sometimes the phase of collection will create regions in the spectrum where data collection or prediction would be unreliable. To exclude these regions, include paths to phase features for your data (`--phase_features_path `) and a mask indicating the spectrum regions that are supported (`--spectra_phase_mask_path `). The format for the mask file is a `.csv` file with columns for the spectrum positions and rows for the phases, with column and row labels in the same order as they appear in the targets and features files. + + ### Reaction + + As an alternative to molecule SMILES, Chemprop can also process atom-mapped reaction SMILES (see [Daylight manual](https://www.daylight.com/meetings/summerschool01/course/basics/smirks.html) for details on reaction SMILES), which consist of three parts denoting reactants, agents and products, separated by ">". Use the option `--reaction` to enable the input of reactions, which transforms the reactants and products of each reaction to the corresponding condensed graph of reaction and changes the initial atom and bond features to hold information from both the reactant and product (option `--reaction_mode reac_prod`), or from the reactant and the difference upon reaction (option `--reaction_mode reac_diff`, default) or from the product and the difference upon reaction (option `--reaction_mode prod_diff`). In reaction mode, Chemprop thus concatenates information to each atomic and bond feature vector, for example, with option `--reaction_mode reac_prod`, each atomic feature vector holds information on the state of the atom in the reactant (similar to default Chemprop), and concatenates information on the state of the atom in the product, so that the size of the D-MPNN increases slightly. Agents are discarded. Functions incompatible with a reaction as input (scaffold splitting and feature generation) are carried out on the reactants only. If the atom-mapped reaction SMILES contain mapped hydrogens, enable explicit hydrogens via `--explicit_h`. Example of an atom-mapped reaction SMILES denoting the reaction of methanol to formaldehyde without hydrogens: `[CH3:1][OH:2]>>[CH2:1]=[O:2]` and with hydrogens: `[C:1]([H:3])([H:4])([H:5])[O:2][H:6]>>[C:1]([H:3])([H:4])=[O:2].[H:5][H:6]`. The reactions do not need to be balanced and can thus contain unmapped parts, for example leaving groups, if necessary. With reaction modes `reac_prod`, `reac_diff` and `prod_diff`, the atom and bond features of unbalanced aroma are set to zero on the side of the reaction they are not specified. Alternatively, features can be set to the same values on the reactant and product side via the modes `reac_prod_balance`, `reac_diff_balance` and `prod_diff_balance`, which corresponds to a rough balancing of the reaction. + For further details and benchmarking, as well as a citable reference, please refer to the [article](https://doi.org/10.1021/acs.jcim.1c00975). + + ### Reaction in a solvent / Reaction and a molecule + + Chemprop can process a reaction in a solvent or a reaction and a molecule with the `--reaction_solvent` option. While this + option is originally built to model a reaction in a solvent, this option works for any reaction and a molecule where + the molecule can represent anything, i.e. a solvent, a reagent, etc. + This requires the input csv file to have two separate columns of SMILES: one column for atom-mapped reaction SMILES + and the other column for solvent/molecule SMILES. The reaction and solvent/molecule SMILES columns can be ordered in + any way (i.e. the first column can be either reaction SMILES or solvent SMILES and the second column can then be + solvent SMILES or reaction SMILES). However, the same column ordering as used in the training must be used for the prediction + (i.e. if the input csv file used for model training had reaction SMILES as the first column and solvent SMILES as the + second columns, the csv file used for prediction should also have the first column as reaction SMILES and second column + as the solvent SMILES). For the information on atom-mapped reaction SMILES, please refer to [Reaction](#reaction). + + When using the `--reaction_solvent` option, `--number_of_molecules` must be set to 2. All options listed in the [Reaction](#reaction) + section such as different `--reaction_mode` and `--explicit_h` can be used for `--reaction_solvent`. Note that + `--explicit_h` option is only applicable to reaction SMILES. The `--adding_h` option can be used instead for + solvent/molecule if one wishes to add hydrogens to solvent/molecule SMILES. Chemprop allows differently sized MPNNs to be used for each + reaction and solvent/molecule encoding. Below are the input arguments for specifying the size and option of the two MPNNs: + * Reaction: + * `--bias` Whether to add bias to linear layers. + * `--hidden_size` Dimensionality of hidden layers. + * `--depth` Number of message passing steps. + * `--explicit_h` Whether H are explicitly specified in input and should be kept this way. Only applicable to reaction SMILES. + * Solvent / Molecule: + * `--bias_solvent` Whether to add bias to linear layers for solvent/molecule MPN. + * `--hidden_size_solvent` Dimensionality of hidden layers in solvent/molecule MPN. + * `--depth_solvent` Number of message passing steps for solvent/molecule. + * `--adding_h` Whether RDKit molecules will be constructed with adding the Hs to them. Applicable to any SMILES that is not reaction. + + ### Atomic and bond properties prediction + + Chemprop can perform multitask constrained message passing neural networks for atomic/bond properties prediction as described in this [paper](https://chemrxiv.org/articles/preprint/Regio-Selectivity_Prediction_with_a_Machine-Learned_Reaction_Representation_and_On-the-Fly_Quantum_Mechanical_Descriptors/12907316). This model can train on any number of atomic/bond properties simultaneously. In the original work, a total loss was calculated as a weighted sum of every single loss, where the weights were required to be specified for the regression task. In this repository, these weights have been automatically taken into account by doing standardization of all the training targets. In order to train a model, training data containing molecules (as SMILES strings) and known atomic/bond target values are required, and the `--is_atom_bond_targets` flag is used. The input is a csv file. For example: + ``` + smiles hirshfeld_charges ... bond_length_matrix bond_index_matrix + 0 CNC(=S)N/N=C/c1c(O)ccc2ccccc12 [-0.026644, -0.075508, 0.096217, -0.287798, -0... ... [[0.0, 1.4372890960937539, 2.4525543850909814,... [[0.0, 0.9595, 0.0158, 0.0162, 0.0103, 0.0008,... + 1 O=C(NCCn1cccc1)c1cccc2ccccc12 [-0.292411, 0.170263, -0.085754, 0.002736, 0.0... ... [[0.0, 1.2158509801073485, 2.2520730233154076,... [[0.0, 1.6334, 0.1799, 0.0086, 0.0068, 0.0002,... + 2 C=C(C)[C@H]1C[C@@H]2OO[C@H]1C=C2C [-0.101749, 0.012339, -0.07947, -0.020027, -0.... ... [[0.0, 1.3223632546838255, 2.468055985361353, ... [[0.0, 1.9083, 0.0179, 0.016, 0.0236, 0.001, 0... + 3 OCCCc1cc[nH]n1 [-0.268379, 0.027614, -0.050745, -0.045047, 0.... ... [[0.0, 1.4018301850170725, 2.4667588956616737,... [[0.0, 0.9446, 0.0311, 0.002, 0.005, 0.0007, 0... + 4 CC(=N)NCc1cccc(CNCc2ccncc2)c1 [-0.083162, 0.114954, -0.274544, -0.100369, 0.... ... [[0.0, 1.5137126697008916, 2.4882198180715465,... [[0.0, 1.0036, 0.0437, 0.0108, 0.0134, 0.0004,...... + ``` + where atomic properties (e.g. hirshfeld_charges) must be a 1D list with the order same as that of atoms in the SMILES string; and bond properties (e.g. bond_length_matrix) can either be a 2D list of shape (number_of_atoms × number_of_atoms) or a 1D list with the order same as that of bonds in the SMILES string. The `--keeping_atom_map` option can be used if atom-mapped SMILES is provided. The `--adding_h` option can be used if hydrogens are included in the atom targets and bonds to hydrogens are included in the bond targets. + This model allows multitask constraints applied to different atomic/bond properties by specifying the argument `--constraints_path` with a given `.csv` file. Note that the constraints must be in the same order as the SMILES strings in your data file. Also note that `.csv` file must have a header row and the constraints should be comma-separated with one line per molecule. The optional argument `--no_shared_atom_bond_ffn` will make it so that the ffn weights used by each task are independent, otherwise the default is that atom tasks share ffn weights and bond tasks share ffn weights so that the ffn weights have the benefits of multitask training. The optional argument `--no_adding_bond_types` will let the bond types of each bond determined by RDKit molecules not be added to the output of bond targets. The optional argument `--weights_ffn_num_layers` can change the number of layers in FFN for determining weights used to correct the constrained targets. + + Please note that the current framework is only available for models trained on multiple atomic and bond properties simultaneously. Training on both atomic/bond and molecular targets is not supported. + + ### Pretraining + + Pretraining can be carried out using previously trained checkpoint files to set some or all of the initial values of a model for training. Additionally, some model parameters from the previous model can be frozen in place, so that they will not be updated during training. + + Parameters from existing models can be used for parameter-initialization of a new model by providing a checkpoint of the existing model using either + * `--checkpoint_dir ` Directory where the model checkpoint(s) are saved (i.e. `--save_dir` during training of the old model). This will walk the directory, and load all `.pt` files it finds. + * `--checkpoint_path ` Path to a model checkpoint file (`.pt` file). + * `--checkpoint_paths ` A list of paths to multiple model checkpoint (`.pt`) files. + when training the new model. The model architecture of the new model should resemble the architecture of the old model - otherwise some or all parameters might not be loaded correctly. If any of these options are specified during training, any argument provided with `--ensemble_size` will be overwritten and the ensemble size will be specified as the number of checkpoint files that were provided, with each submodel in the ensemble using a separate checkpoint file for initialization. When using these options, new model parameters are initialized using the old checkpoint files but all parameters remain trainable (no frozen layers from these arguments). + + Certain portions of the model can be loaded from a previous model and frozen so that they will not be trainable, using the various frozen layer parameters. A path to a checkpoint file for frozen parameters is provided with the argument `--checkpoint_frzn `. If this path is provided, the parameters in the MPNN portion of the model will be specified from the path and frozen. Layers in the FFNN portion of the model can also be applied and frozen in addition to freezing the MPNN using `--frzn_ffn_layers `. Model architecture of the new model should match the old model in any layers that are being frozen, but non-frozen layers can be different without affecting the frozen layers (e.g., MPNN alone is frozen and new model has a larger number of FFNN layers). Parameters provided with `--checkpoint_frzn` will overwrite initialization parameters from `--checkpoint_path` (or similar) that are frozen in the new model. At present, only one checkpoint can be provided for the `--checkpoint_frzn` and those parameters will be used for any number of submodels if `--ensemble_size` is specified. If multiple molecules (with multiple MPNNs) are being trained in the new model, the default behavior is for both of the new MPNNs to be frozen and drawn from the checkpoint. Only the first MPNN will be frozen and subsequent MPNNs still allowed to train if `--freeze_first_only` is specified. + + ### Missing Target Values + + When training multitask models (models which predict more than one target simultaneously), sometimes not all target values are known for all molecules in the dataset. Chemprop automatically handles missing entries in the dataset by masking out the respective values in the loss function, so that partial data can be utilized, too. The loss function is rescaled according to all non-missing values, and missing values furthermore do not contribute to validation or test errors. Training on partial data is therefore possible and encouraged (versus taking out datapoints with missing target entries). No keyword is needed for this behavior, it is the default. + + In contrast, when using `sklearn_train.py` (a utility script provided within Chemprop that trains standard models such as random forests on Morgan fingerprints via the python package scikit-learn), multi-task models cannot be trained on datasets with partially missing targets. However, one can instead train individual models for each task (via the argument `--single_task`), where missing values are automatically removed from the dataset. Thus, the training still makes use of all non-missing values, but by training individual models for each task, instead of one model with multiple output values. This restriction only applies to sklearn models (via :code:`sklearn_train` or :code:`python sklearn_train.py`), but NOT to default Chemprop models via `chemprop_train` or `python train.py`. Alternatively, missing target values can be imputed by specifying `--impute_mode `. The option `single_task` trains single task sklearn models on each task to predict missing values and is computationally expensive. The option `linear` trains a stochastic gradient linear model on each target to compute missing targets. Both `single_task` and `linear` are applicable to regression and classification task. For regression tasks, the options `median` and `mean` furthermore compute the median and mean of the training data. For classification tasks, `frequent` computes the most frequent value for each task. For all options, models are fitted to non-missing training targets and predict missing training targets. The test set is not affected by imputing. + + ### Weighted Training by Target and Data + + By default, each task in multitask training and each provided datapoint are weighted equally for training. Weights can be specified in either case to allow some tasks in training or some specified data points to be weighted more heavily than others in the training of the model. + + Using the `--target_weights` argument followed by a list of numbers equal in length to the number of tasks in multitask training, different tasks can be given more weight in parameter updates during training. For instance, in a multitask training with two tasks, the argument `--target_weights 1 2` would give the second task twice as much weight in model parameter updates. Provided weights must be non-negative. Values are normalized to make the average weight equal 1. Target weights are not used with the validation set for the determination of early stopping or in evaluation of the test set. + + Using the `--data_weights_path` argument followed by a path to a data file containing weights will allow each individual datapoint in the training data to be given different weight in parameter updates. Formatting of this file is similar to provided features CSV files: they should contain only a single column with one header row and a numerical value in each row that corresponds to the order of datapoints provided with `--data_path`. Data weights should not be provided for validation or test sets if they are provided through the arguments `--separate_test_path` or `--separate_val_path`. Provided weights must be non-negative. Values are normalized to make the average weight equal 1. Data weights are not used with the validation set for the determination of early stopping or in evaluation of the test set. + + ### Caching + + By default, the molecule objects created from each SMILES string are cached for all dataset sizes, and the graph objects created from each molecule object are cached for datasets up to 10000 molecules. If memory permits, you may use the keyword `--cache_cutoff inf` to set this cutoff from 10000 to infinity to always keep the generated graphs in cache (or to another integer value for custom behavior). This may speed up training (depending on the dataset size, molecule size, number of epochs and GPU support), since the graphs do not need to be recreated each epoch, but increases memory usage considerably. Below the cutoff, graphs are created sequentially in the first epoch. Above the cutoff, graphs are created in parallel (on `--num_workers ` workers) for each epoch. If training on a GPU, training without caching and creating graphs on the fly in parallel is often preferable. On CPU, training with caching if often preferable for medium-sized datasets and a very low number of CPUs. If a very large dataset causes memory issues, you might turn off caching even of the molecule objects via the commands `--no_cache_mol` to reduce memory usage further. + + ## Predicting + + To load a trained model and make predictions, run `predict.py` and specify: + * `--test_path ` Path to the data to predict on. + * A checkpoint by using either: + * `--checkpoint_dir ` Directory where the model checkpoint(s) are saved (i.e. `--save_dir` during training). This will walk the directory, load all `.pt` files it finds, and treat the models as an ensemble. + * `--checkpoint_path ` Path to a model checkpoint file (`.pt` file). + * `--preds_path` Path where a CSV file containing the predictions will be saved. + + For example: + ``` + chemprop_predict --test_path data/tox21.csv --checkpoint_dir tox21_checkpoints --preds_path tox21_preds.csv + ``` + or + ``` + chemprop_predict --test_path data/tox21.csv --checkpoint_path tox21_checkpoints/fold_0/model_0/model.pt --preds_path tox21_preds.csv + ``` + + Predictions made on an ensemble of models will return the average of the individual model predictions. To return the individual model predictions as well, include the `--individual_ensemble_predictions` argument. + + If installed from source, `chemprop_predict` can be replaced with `python predict.py`. + + ### Uncertainty Estimation + + The uncertainty of predictions made in Chemprop can be estimated by several different methods. Uncertainty estimation is carried out alongside model value prediction and reported in the predictions csv file when the argument `--uncertainty_method ` is provided. If no uncertainty method is provided, then only the model value predictions will be carried out. The available methods are: + + * `ensemble` For a prediction using an ensemble of models. Returns the variance of predictions made by each of the ensemble submodels. Ensemble variance can be used with any dataset type, but the results are only usable for calibration or evaluation with regression datasets. + * `dropout` Intended for use with a single model and not an ensemble. This method uses Monte Carlo dropout to generate a virtual ensemble of models and reports the ensemble variance of the predictions. The number of models generated and the probability of dropout can be changed using `--uncertainty_dropout_p ` and `--dropout_sampling_size `, respectively. Note that this dropout is distinct from dropout regularization used during training, which is not active during predictions. + * `mve` When mve has been used for the training loss function on regression datasets, this method uses the separate variance prediction of the model. The variance result from ensembling models together includes the variance contribution of the different models having different mean predictions. + * `evidential_total`, `evidential_epistemic`, `evidential_aleatoric` When evidential was used as the training loss function for regression datasets, these methods use the variance prediction of the model. The evidential output includes different functions intended to divide the variance into epistemic and aleatoric uncertainty. The variance result from ensembling models together includes the variance contribution of the different models having different mean predictions. + * `spectra_roundrobin` For an ensemble of spectra predictions. Calculates the pairwise SID between the predictions made by each of the ensemble submodels. Returns the average SID. + * `classification` The predictions of classification and multiclass dataset types are inherently probabilistic already. Used by default for classification and multiclass as needed. + + ### Uncertainty Calibration + + Uncertainty predictions may be calibrated to improve their performance on new predictions. Calibration methods are selected using `--calibration_method `, options provided below. An additional dataset to use in calibration is provided through `--calibration_path `, along with necessary features like `--calibration_features_path `. As with the data used in training, calibration data for multitask models are allowed to have gaps and missing targets in the data. + + **Regression** + + Calibrated regression outputs can be in the form of a standard deviation or an interval, as specified with the argument `--regression_calibrator_metric <"stdev" or "interval">`. The interval can be set using `--calibration_interval_percentile ` in the range (1,100). The options mentioned above do not apply to the calibration methods `conformal_regression` and `conformal_quantile_regression`. + * `zscaling` Assumes that errors are normally distributed according to the estimated variance for each prediction. Applies a constant multiple to all stdev or interval outputs in order to minimize the negative log likelihood for the normal distributions. (https://arxiv.org/abs/1905.11659) + * `tscaling` Similar to zscaling. Assumes that the errors are normally distributed, but accounts for the ensemble size and uncertainty in the sample variance by using a sample-size reduced t-distribution in the negative log likelihood. Works best when errors are mostly due to variability between model instances and not dataset noise or model bias. + * `zelikman_interval` Assumes that the error distribution is the same for each prediction but scaled by the uncalibrated standard deviation for each. Multiplies the uncalibrated standard deviation by a factor necessary to cover the specified interval of the calibration set. Does not assume a Gaussian distribution. Intended for use with intervals but can return a stdev as well. (https://arxiv.org/abs/2005.12496) + * `mve_weighting` For use with ensembles of models trained with mve or evidential loss function. Uses a weighted average of the predicted variances to achieve a minimum negative log likelihood of predictions. (https://doi.org/10.1186/s13321-021-00551-x) + * `conformal_regression` Generates a symmetric interval of fixed size for each prediction such that the actual value has probability $1-\alpha$ of falling in the interval. The desired error rate is controlled using the parameter `--conformal_alpha ` which is set by default to 0.1. (https://arxiv.org/abs/2107.07511) + * `conformal_quantile_regression` Similar to `conformal_regression` but generates an interval of variable size for each prediction based on quantile predictions of the data. The model should be trained with parameters `--loss_function quantile_interval` and `--quantile_loss_alpha ` where $\alpha$ is the desired error rate of the quantile interval. The trained model will output the center of the $\alpha/2$ and $1-\alpha/2$ quantiles according to pinball loss as the predicted value and return the half range of the interval as the uncertainty quantification. The parameter `--conformal_alpha ` should be included to specify the desired error rate of the conformal method during inference. (https://arxiv.org/abs/2107.07511) + + **Classification** + * `platt` Uses a linear scaling before the sigmoid function in prediction to minimize the negative log likelihood of the predictions. If the model checkpoint was generated after Chemprop v1.5.0, then a Bayesian correction is applied to account for the class balance in the training set during prediction. Implemented for classification but not multiclass datasets. (https://arxiv.org/abs/1706.04599) + * `isotonic` Fits an isotonic regression model to the predictions. Prediction outputs are transformed using a stepped histogram-style to match the empirical probability observed in the calibration data. Number and size of the histogram bins are procedurally decided. Histogram bins are wider in the regions of the model output that are less reliable in ordering confidence. Implemented for both classification and multiclass datasets. (https://arxiv.org/abs/1706.04599) + * `conformal` Generates a pair of sets of labels $C_{in} \subset C_{out}$ such that the true set of labels $S$ satisfies the property $C_{in} \subset S \subset C_{out}$ with probability at least $1-\alpha$. The desired error rate $\alpha$ can be controlled with the parameter `--conformal_alpha ` which is set by default to 0.1. (https://arxiv.org/abs/2004.10181) + + **Multiclass** + * `conformal` Generates a set of possible classes for each prediction such that the true class has probability $1-\alpha$ of falling in the set. The desired error rate $\alpha$ can be controlled with the parameter `--conformal_alpha ` which is set by default to 0.1. Set generated using the basic conformal method. (https://arxiv.org/abs/2107.07511) + * `conformal_adaptive` Generates a set of possible classes for each prediction such that the true class has probability 1-alpha of falling in the set. The desired error rate $\alpha$ can be controlled with the parameter `--conformal_alpha ` which is set by default to 0.1. Set generated using the adaptive conformal method. (https://arxiv.org/abs/2107.07511) + + ### Uncertainty Evaluation Metrics + + The performance of uncertainty predictions (calibrated or uncalibrated) as evaluated on the test set using different evaluation metrics as specified with `--evaluation_methods <[methods]>`. Evaluation scores will be saved at the path provided with `--evaluation_scores_path `. If no path is provided to save the scores, then the results will only appear in the output trace. Multiple evaluation methods can be provided and they will be calculated separately for each model task. Evaluation is only available when the target values are provided with the data in `--test_path `. As with the data used in training, evaluation data for multitask models are allowed to have gaps and missing targets in the data. + + * Any valid classification or multiclass metric. Because classification and multiclass outputs are inherently probabilistic, any metric used to assess them during training is appropriate to evaluate the confidences produced after calibration. + * `nll` Returns the average negative log likelihood of the real target as indicated by the uncertainty predictions. Enabled for regression, classification, and multiclass dataset types. + * `spearman` A regression evaluation metric. Returns the Spearman rank correlation between the predicted uncertainty and the actual error in predictions. Only considers ordering, does not assume a particular probability distribution. + * `ence` Expected normalized calibration error. A regression evaluation metric. Bins model prediction according to uncertainty prediction and compares the RMSE in each bin versus the expected error based on the predicted uncertainty variance then scaled by variance. (discussed in https://doi.org/10.1021/acs.jcim.9b00975) + * `miscalibration_area` A regression evaluation metric. Calculates the model's performance of expected probability versus realized probability at different points along the probability distribution. Values range (0, 0.5) with perfect calibration at 0. (discussed in https://doi.org/10.1021/acs.jcim.9b00975) + * `conformal_coverage` Measures the empirical coverage of the conformal methods, that is the proportion of datapoints that fall within the output set or interval. Must be used with a conformal calibration method which outputs a set or interval. The metric can be used with multiclass, multilabel, or regression conformal methods. + + Different evaluation metrics consider different aspects of uncertainty. It is often appropriate to consider multiple metrics. For intance, miscalibration error is important for evaluating uncertainty magnitude but does not indicate that the uncertainty function discriminates well between different outputs. Similarly, spearman tests ordering but not prediction magnitude. + + Evaluations can be used to compare different uncertainty methods and different calibration methods for a given dataset. Using evaluations to compare between datasets may not be a fair comparison and should be done cautiously. + + ## Hyperparameter Optimization + + Although the default message passing architecture works well on a variety of datasets, optimizing the hyperparameters for a particular dataset often leads to improvement in performance. We have automated hyperparameter optimization via Bayesian optimization (using the [hyperopt](https://github.com/hyperopt/hyperopt) package). The default hyperparameter optimization will search for the best configuration of hidden size, depth, dropout, and number of feed-forward layers for our model. Optimization can be run as follows: + ``` + chemprop_hyperopt --data_path --dataset_type --num_iters --config_save_path + ``` + where `` is the number of hyperparameter trial configurations to try and `` is the path to a `.json` file where the optimal hyperparameters will be saved. If installed from source, `chemprop_hyperopt` can be replaced with `python hyperparameter_optimization.py`. Additional training arguments can also be supplied during submission, and they will be applied to all included training iterations (`--epochs`, `--aggregation`, `--num_folds`, `--gpu`, `--ensemble_size`, `--seed`, etc.). The argument `--log_dir ` can optionally be provided to set a location for the hyperparameter optimization log. + + Once hyperparameter optimization is complete, the optimal hyperparameters can be applied during training by specifying the config path as follows: + ``` + chemprop_train --data_path --dataset_type --config_path + ``` + + Note that the hyperparameter optimization script sees all the data given to it. The intended use is to run the hyperparameter optimization script on a dataset with the eventual test set held out. If you need to optimize hyperparameters separately for several different cross validation splits, you should e.g. set up a bash script to run hyperparameter_optimization.py separately on each split's training and validation data with test held out. + + ### Choosing the Search Parameters + + The parameter space being searched can be changed to include different sets of model hyperparameters. These can be selected using the argument `--search_parameter_keywords `. The available keywords are listed below. Some keywords refer to bundles of parameters or other special behavior. Note that the search ranges for each parameter is hardcoded and can be viewed or changed in `chemprop/hyperopt_utils.py`. + + Special keywords + * basic - the default set of hyperparameters for search: depth, ffn_num_layers, dropout, and linked_hidden_size. + * linked_hidden_size - search for hidden_size and ffn_hidden_size, but constrained for them to have the same value. This allows search through both but with one fewer degree of freedom. + * learning_rate - search for max_lr, init_lr, final_lr, and warmup_epochs. + * all - include search for all inidividual keyword options + Individual supported parameters + * activation, aggregation, aggregation_norm, batch_size, depth, dropout, ffn_hidden_size, ffn_num_layers, final_lr, hidden_size, init_lr, max_lr, warmup_epochs + + Choosing to include additional search parameters should be undertaken carefully. The number of possible parameter combinations increases combinatorially with the addition of more hyperparameters, so the search for an optimal configuration will become more difficult accordingly. The recommendation from Hyperopt is to use at least 10 trials per hyperparameter for an appropriate search as a rule of thumb, but even more will be necessary at higher levels of search complexity or to obtain better convergence to the optimal hyperparameters. Steps to reduce the complexity of a search space should be considered, such as excluding low-sensitivity parameters or those for which a judgement can be made ahead of time. Splitting the search into two steps can also reduce overall complexity. The `all` search option should only be used in situations where the dataset is small and a very large number of trials can be used. + + For best results, the `--epochs` specified during hyperparameter search should be the same as in the intended final application of the model. Learning rate parameters are especially sensitive to the number of epochs used. Note that the number of epochs is not a hyperparameter search option. + + The search space for init_lr and final_lr values are defined as fractions of the max_lr value. The search space for warmup_epochs is set by fraction of the `--epochs` training argument. The search for aggregation_norm values is only relevant when the aggregation function is set to norm and can otherwise be neglected. If a separate training argument is provided that is included in the search parameters, the search will overwrite the specified value (e.g., `--depth 5 --search_parameter_keywords depth`). + + ### Checkpoints and Parallel Operation + + Results of completed trial configurations will be stored there and may serve as checkpoints for other instances of hyperparameter optimization if the directory for hyperopt checkpoint files has been specified, `--hyperopt_checkpoint_dir `. If `--hyperopt_checkpoint_dir` is not specified, then checkpoints will default to being stored with the hyperparame. Interrupted hyperparameter optimizations can be restarted by specifying the same directory. Previously completed hyperparameter optimizations can be used as the starting point for new optimizations with a larger selected number of iterations. Note that the `--num_iters ` argument will count all previous checkpoints saved in the directory towards the total number of iterations, and if the existing number of checkpoints exceeds this argment then no new trials will be carried out. + + Parallel instances of hyperparameter optimization that share a checkpoint directory will have access to the shared results of hyperparameter optimization trials, allowing them to arrive at the desired total number of iterations collectively more quickly. In this way multiple GPUs or other computing resources can be applied to the search. Each instance of hyperparameter optimization is unaware of parallel trials that have not yet completed. This has several implications when running `n` parallel instances: + * A parallel search will have different information and search different parameters than a single instance sequential search. + * New trials will not consider the parameters in currently running trials, in rare cases leading to duplication. + * Up to `n-1` extra random search iterations may occur above the number specified with `--startup_random_iters`. + * Up to `n-1` extra total trials will be run above the chosen `num_iters`, though each instance will be exposed to at least that number of iterations. + * The last parallel instance to complete is the only one that is aware of all the trials when reporting results. + + ### Random or Directed Search + + As part of the hyperopt search algorithm, the first trial configurations for the model will be randomly spread through the search space. The number of randomized trials can be altered with the argument `--startup_random_iters `. By default, the number or random trials will be half the number of total trials. After this number of trial iterations has been carried out, subsequent trials will use the directed search algorithm to select parameter configurations. This startup count considers the total number of trials in the checkpoint directory rather than the number that has been carried out by an individual instance of hyperparamter optimization instance. Both the random and directed search use a unique trial seed in choosing hyperparameters, this can be specified for the first trial in an optimization instance using `--hyperopt_seed ` and will increment up to the next unused seed for trials afterward. + + + ### Manual Trials + + Manual training instances outside of hyperparameter optimization may also be considered in the history of attempted trials. The paths to the save_dirs for these training instances can be specified with `--manual_trial_dirs `. These directories must contain the files `test_scores.csv` and `args.json` as generated during training. To work appropriately, these training instances must be consistent with the parameter space being searched in hyperparameter optimization (including the hyperparameter optimization default of ffn_hidden_size being set equal to hidden_size). Manual trials considered with this argument are not added to the checkpoint directory. + + ## Encode Fingerprint Latent Representation + + To load a trained model and encode the fingerprint latent representation of molecules, run `fingerprint.py` and specify: + * `--test_path ` Path to the data to predict on. + * A checkpoint by using either: + * `--checkpoint_dir ` Directory where the model checkpoint is saved (i.e. `--save_dir` during training). + * `--checkpoint_path ` Path to a model checkpoint file (`.pt` file). + * `--preds_path` Path where a CSV file containing the encoded fingerprint vectors will be saved. + * Any other arguments that you would supply for a prediction, such as atom or bond features. + + Latent representations of molecules are taken from intermediate stages of the prediction model. This latent representation can be taken at the output of the MPNN (default) or from the last input layer of the FFNN, specified using `--fingerprint_type `. Fingerprint encoding uses the same set of arguments as making predictions. If multiple checkpoint files are supplied through `--checkpoint_dir`, then the fingerprint encodings for each of the models will be provided concatenated together as a longer vector. + + Example input: + ``` + chemprop_fingerprint --test_path data/tox21.csv --checkpoint_dir tox21_checkpoints --preds_path tox21_fingerprint.csv + ``` + or + ``` + chemprop_fingerprint --test_path data/tox21.csv --checkpoint_path tox21_checkpoints/fold_0/model_0/model.pt --preds_path tox21_fingerprint.csv + ``` + + If installed from source, `chemprop_fingerprint` can be replaced with `python fingerprint.py`. + + ## Interpreting + + It is often helpful to provide explanation of model prediction (i.e., this molecule is toxic because of this substructure). Given a trained model, you can interpret the model prediction using the following command: + ``` + chemprop_interpret --data_path data/tox21.csv --checkpoint_dir tox21_checkpoints/fold_0/ --property_id 1 + ``` + + If installed from source, `chemprop_interpret` can be replaced with `python interpret.py`. + + The output will be like the following: + * The first column is a molecule and second column is its predicted property (in this case NR-AR toxicity). + * The third column is the smallest substructure that made this molecule classified as toxic (which we call rationale). + * The fourth column is the predicted toxicity of that substructure. + + As shown in the first row, when a molecule is predicted to be non-toxic, we will not provide any rationale for its prediction. + + smiles | NR-AR | rationale | rationale_score + | :---: | :---: | :---: | :---: | + O=\[N+\](\[O-\])c1cc(C(F)(F)F)cc(\[N+\](=O)\[O-\])c1Cl | 0.014 | | | + CC1(C)O\[C@@H\]2C\[C@H\]3\[C@@H\]4C\[C@H\](F)C5=CC(=O)C=C\[C@\]5(C)\[C@H\]4\[C@@H\](O)C\[C@\]3(C)\[C@\]2(C(=O)CO)O1 | 0.896 | C\[C@\]12C=CC(=O)C=C1\[CH2:1\]C\[CH2:1\]\[CH2:1\]2 | 0.769 | + C\[C@\]12CC\[C@H\]3\[C@@H\](CC\[C@@\]45O\[C@@H\]4C(O)=C(C#N)C\[C@\]35C)\[C@@H\]1CC\[C@@H\]2O | 0.941 | C\[C@\]12C\[CH:1\]=\[CH:1\]\[C@H\]3O\[C@\]31CC\[C@@H\]1\[C@@H\]2CC\[C:1\]\[CH2:1\]1 | 0.808 | + C\[C@\]12C\[C@H\](O)\[C@H\]3\[C@@H\](CCC4=CC(=O)CC\[C@@\]43C)\[C@@H\]1CC\[C@\]2(O)C(=O)COP(=O)(\[O-\])\[O-\] | 0.957 | C1C\[CH2:1\]\[C:1\]\[C@@H\]2\[C@@H\]1\[C@@H\]1CC\[C:1\]\[C:1\]1C\[CH2:1\]2 | 0.532 | + + Chemprop's interpretation script explains model prediction one property at a time. `--property_id 1` tells the script to provide explanation for the first property in the dataset (which is NR-AR). In a multi-task training setting, you will need to change `--property_id` to provide explanation for each property in the dataset. + + For computational efficiency, we currently restricted the rationale to have maximum 20 atoms and minimum 8 atoms. You can adjust these constraints through `--max_atoms` and `--min_atoms` argument. + + Please note that the interpreting framework is currently only available for models trained on properties of single molecules, that is, multi-molecule models generated via the `--number_of_molecules` command are not supported. + + ## TensorBoard + + During training, TensorBoard logs are automatically saved to the same directory as the model checkpoints. To view TensorBoard logs, first install TensorFlow with `pip install tensorflow`. Then run `tensorboard --logdir=` where `` is the path to the checkpoint directory. Then navigate to [http://localhost:6006](http://localhost:6006). + + ## Results + + We compared our model against MolNet by Wu et al. on all of the MolNet datasets for which we could reproduce their splits (all but Bace, Toxcast, and qm7). When there was only one fold provided (scaffold split for BBBP and HIV), we ran our model multiple times and reported average performance. In each case we optimize hyperparameters on separate folds, use rdkit_2d_normalized features when useful, and compare to the best-performing model in MolNet as reported by Wu et al. We did not ensemble our model in these results. + + Results on regression datasets (lower is better) + + Dataset | Size | Metric | Ours | MolNet Best Model | + | :---: | :---: | :---: | :---: | :---: | + QM8 | 21,786 | MAE | 0.011 ± 0.000 | 0.0143 ± 0.0011 | + QM9 | 133,885 | MAE | 2.666 ± 0.006 | 2.4 ± 1.1 | + ESOL | 1,128 | RMSE | 0.555 ± 0.047 | 0.58 ± 0.03 | + FreeSolv | 642 | RMSE | 1.075 ± 0.054 | 1.15 ± 0.12 | + Lipophilicity | 4,200 | RMSE | 0.555 ± 0.023 | 0.655 ± 0.036 | + PDBbind (full) | 9,880 | RMSE | 1.391 ± 0.012 | 1.25 ± 0 | + PDBbind (core) | 168 | RMSE | 2.173 ± 0.090 | 1.92 ± 0.07 | + PDBbind (refined) | 3,040 | RMSE | 1.486 ± 0.026 | 1.38 ± 0 | + + Results on classification datasets (higher is better) + + | Dataset | Size | Metric | Ours | MolNet Best Model | + | :---: | :---: | :---: | :---: | :---: | + | PCBA | 437,928 | PRC-AUC | 0.335 ± 0.001 | 0.136 ± 0.004 | + | MUV | 93,087 | PRC-AUC | 0.041 ± 0.007 | 0.184 ± 0.02 | + | HIV | 41,127 | ROC-AUC | 0.776 ± 0.007 | 0.792 ± 0 | + | BBBP | 2,039 | ROC-AUC | 0.737 ± 0.001 | 0.729 ± 0 | + | Tox21 | 7,831 | ROC-AUC | 0.851 ± 0.002 | 0.829 ± 0.006 | + | SIDER | 1,427 | ROC-AUC | 0.676 ± 0.014 | 0.648 ± 0.009 | + | ClinTox | 1,478 | ROC-AUC | 0.864 ± 0.017 | 0.832 ± 0.037 | + + Lastly, you can find the code to our original repo at https://github.com/wengong-jin/chemprop and for the Mayr et al. baseline at https://github.com/yangkevin2/lsc_experiments . + +Keywords: chemistry,machine learning,property prediction,message passing neural network,graph neural network +Platform: UNKNOWN +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Requires-Python: >=3.7,<3.9 +Description-Content-Type: text/markdown +Provides-Extra: test diff --git a/chemprop/chemprop.egg-info/SOURCES.txt b/chemprop/chemprop.egg-info/SOURCES.txt new file mode 100644 index 0000000000000000000000000000000000000000..b55f935a46a8edc5935f2880db6fadf0ced8d6e2 --- /dev/null +++ b/chemprop/chemprop.egg-info/SOURCES.txt @@ -0,0 +1,59 @@ +README.md +setup.cfg +setup.py +chemprop/__init__.py +chemprop/args.py +chemprop/constants.py +chemprop/hyperopt_utils.py +chemprop/hyperparameter_optimization.py +chemprop/interpret.py +chemprop/multitask_utils.py +chemprop/nn_utils.py +chemprop/py.typed +chemprop/rdkit.py +chemprop/sklearn_predict.py +chemprop/sklearn_train.py +chemprop/spectra_utils.py +chemprop/utils.py +chemprop.egg-info/PKG-INFO +chemprop.egg-info/SOURCES.txt +chemprop.egg-info/dependency_links.txt +chemprop.egg-info/entry_points.txt +chemprop.egg-info/requires.txt +chemprop.egg-info/top_level.txt +chemprop/data/__init__.py +chemprop/data/data.py +chemprop/data/scaffold.py +chemprop/data/scaler.py +chemprop/data/utils.py +chemprop/features/__init__.py +chemprop/features/features_generators.py +chemprop/features/featurization.py +chemprop/features/utils.py +chemprop/models/__init__.py +chemprop/models/ffn.py +chemprop/models/model.py +chemprop/models/mpn.py +chemprop/train/__init__.py +chemprop/train/cross_validate.py +chemprop/train/evaluate.py +chemprop/train/loss_functions.py +chemprop/train/make_predictions.py +chemprop/train/metrics.py +chemprop/train/molecule_fingerprint.py +chemprop/train/predict.py +chemprop/train/run_training.py +chemprop/train/train.py +chemprop/uncertainty/__init__.py +chemprop/uncertainty/uncertainty_calibrator.py +chemprop/uncertainty/uncertainty_estimator.py +chemprop/uncertainty/uncertainty_evaluator.py +chemprop/uncertainty/uncertainty_predictor.py +chemprop/web/__init__.py +chemprop/web/config.py +chemprop/web/run.py +chemprop/web/utils.py +chemprop/web/wsgi.py +chemprop/web/app/__init__.py +chemprop/web/app/db.py +chemprop/web/app/views.py \ No newline at end of file diff --git a/chemprop/chemprop.egg-info/dependency_links.txt b/chemprop/chemprop.egg-info/dependency_links.txt new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/chemprop/chemprop.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/chemprop/chemprop.egg-info/entry_points.txt b/chemprop/chemprop.egg-info/entry_points.txt new file mode 100644 index 0000000000000000000000000000000000000000..6cea2f0301230a49a39024fe17c9dcca49c69634 --- /dev/null +++ b/chemprop/chemprop.egg-info/entry_points.txt @@ -0,0 +1,10 @@ +[console_scripts] +chemprop_fingerprint = chemprop.train:chemprop_fingerprint +chemprop_hyperopt = chemprop.hyperparameter_optimization:chemprop_hyperopt +chemprop_interpret = chemprop.interpret:chemprop_interpret +chemprop_predict = chemprop.train:chemprop_predict +chemprop_train = chemprop.train:chemprop_train +chemprop_web = chemprop.web.run:chemprop_web +sklearn_predict = chemprop.sklearn_predict:sklearn_predict +sklearn_train = chemprop.sklearn_train:sklearn_train + diff --git a/chemprop/chemprop.egg-info/requires.txt b/chemprop/chemprop.egg-info/requires.txt new file mode 100644 index 0000000000000000000000000000000000000000..b273bbea52a2e73b2eafde29bad21e4b5aff6efe --- /dev/null +++ b/chemprop/chemprop.egg-info/requires.txt @@ -0,0 +1,27 @@ +flask<=2.1.3,>=1.1.2 +Werkzeug<3 +hyperopt>=0.2.3 +matplotlib>=3.1.3 +numpy>=1.18.1 +pandas>=1.0.3 +pandas-flavor>=0.2.0 +scikit-learn>=0.22.2.post1 +sphinx>=3.1.2 +sphinx-rtd-theme>=2.0.0 +tensorboardX>=2.0 +torch>=1.4.0 +tqdm>=4.45.0 +typed-argument-parser>=1.6.1 +rdkit>=2020.03.1.0 + +[:python_version == "3.7"] +scipy<1.11 +descriptastorus<2.6.1 + +[:python_version == "3.8"] +scipy>=1.9 +descriptastorus>=2.6.1 + +[test] +pytest>=6.2.2 +parameterized>=0.8.1 diff --git a/chemprop/chemprop.egg-info/top_level.txt b/chemprop/chemprop.egg-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..da10d69120060eda4e46ecb2e4bc31fc42c30512 --- /dev/null +++ b/chemprop/chemprop.egg-info/top_level.txt @@ -0,0 +1 @@ +chemprop diff --git a/chemprop/chemprop/__init__.py b/chemprop/chemprop/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4c2a05683e98de16fe555275a8f1430a97a9008e --- /dev/null +++ b/chemprop/chemprop/__init__.py @@ -0,0 +1,5 @@ +from . import data, exceptions, featurizers, models, nn, schedulers, utils + +__all__ = ["data", "featurizers", "models", "nn", "utils", "exceptions", "schedulers"] + +__version__ = "2.1.2" diff --git a/chemprop/chemprop/__pycache__/__init__.cpython-37.pyc b/chemprop/chemprop/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c9b7095c4cb5147e6fb1867912ea804331ba5bb5 Binary files /dev/null and b/chemprop/chemprop/__pycache__/__init__.cpython-37.pyc differ diff --git a/chemprop/chemprop/__pycache__/args.cpython-37.pyc b/chemprop/chemprop/__pycache__/args.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3e72640d8427d96850ec6362d5443893265b037c Binary files /dev/null and b/chemprop/chemprop/__pycache__/args.cpython-37.pyc differ diff --git a/chemprop/chemprop/__pycache__/constants.cpython-37.pyc b/chemprop/chemprop/__pycache__/constants.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a79288e0006c7aa20463e3569efdda612645f38f Binary files /dev/null and b/chemprop/chemprop/__pycache__/constants.cpython-37.pyc differ diff --git a/chemprop/chemprop/__pycache__/hyperopt_utils.cpython-37.pyc b/chemprop/chemprop/__pycache__/hyperopt_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0efad6a7d927f8adfe8268cc54e3c2fad10b12e3 Binary files /dev/null and b/chemprop/chemprop/__pycache__/hyperopt_utils.cpython-37.pyc differ diff --git a/chemprop/chemprop/__pycache__/hyperparameter_optimization.cpython-37.pyc b/chemprop/chemprop/__pycache__/hyperparameter_optimization.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a80289e1131169088a082fda6cb6fd723970ef9d Binary files /dev/null and b/chemprop/chemprop/__pycache__/hyperparameter_optimization.cpython-37.pyc differ diff --git a/chemprop/chemprop/__pycache__/interpret.cpython-37.pyc b/chemprop/chemprop/__pycache__/interpret.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e4ff850a4d89900133c1cddf13a4ae1abfd861e0 Binary files /dev/null and b/chemprop/chemprop/__pycache__/interpret.cpython-37.pyc differ diff --git a/chemprop/chemprop/__pycache__/multitask_utils.cpython-37.pyc b/chemprop/chemprop/__pycache__/multitask_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..52f37ea7d193887ee7e45c97f61e997b896465cf Binary files /dev/null and b/chemprop/chemprop/__pycache__/multitask_utils.cpython-37.pyc differ diff --git a/chemprop/chemprop/__pycache__/nn_utils.cpython-37.pyc b/chemprop/chemprop/__pycache__/nn_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5edd6b47a163590b196d1b1e4dd9faa05f2175eb Binary files /dev/null and b/chemprop/chemprop/__pycache__/nn_utils.cpython-37.pyc differ diff --git a/chemprop/chemprop/__pycache__/rdkit.cpython-37.pyc b/chemprop/chemprop/__pycache__/rdkit.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..556cc7ee61a15e08f0ec64256af87fdbce6d3e33 Binary files /dev/null and b/chemprop/chemprop/__pycache__/rdkit.cpython-37.pyc differ diff --git a/chemprop/chemprop/__pycache__/sklearn_predict.cpython-37.pyc b/chemprop/chemprop/__pycache__/sklearn_predict.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1ed0056d83f64e771778cd13fd8ad8a18f8007c6 Binary files /dev/null and b/chemprop/chemprop/__pycache__/sklearn_predict.cpython-37.pyc differ diff --git a/chemprop/chemprop/__pycache__/sklearn_train.cpython-37.pyc b/chemprop/chemprop/__pycache__/sklearn_train.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e3647d20a264473bf844ab3f1bed7b7a073c710f Binary files /dev/null and b/chemprop/chemprop/__pycache__/sklearn_train.cpython-37.pyc differ diff --git a/chemprop/chemprop/__pycache__/spectra_utils.cpython-37.pyc b/chemprop/chemprop/__pycache__/spectra_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..16ce436997ebd26f9d522ff691aa6b39708d705e Binary files /dev/null and b/chemprop/chemprop/__pycache__/spectra_utils.cpython-37.pyc differ diff --git a/chemprop/chemprop/__pycache__/utils.cpython-37.pyc b/chemprop/chemprop/__pycache__/utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d5b9d9fd85df60238ed748527d1a06c5290a2231 Binary files /dev/null and b/chemprop/chemprop/__pycache__/utils.cpython-37.pyc differ diff --git a/chemprop/chemprop/cli/common.py b/chemprop/chemprop/cli/common.py new file mode 100644 index 0000000000000000000000000000000000000000..798627387c9a490444b050d4d0a1db2aa04a7ce8 --- /dev/null +++ b/chemprop/chemprop/cli/common.py @@ -0,0 +1,216 @@ +from argparse import ArgumentError, ArgumentParser, Namespace +import logging +from pathlib import Path + +from chemprop.cli.utils import LookupAction +from chemprop.cli.utils.args import uppercase +from chemprop.featurizers import AtomFeatureMode, MoleculeFeaturizerRegistry, RxnMode + +logger = logging.getLogger(__name__) + + +def add_common_args(parser: ArgumentParser) -> ArgumentParser: + data_args = parser.add_argument_group("Shared input data args") + data_args.add_argument( + "-s", + "--smiles-columns", + nargs="+", + help="Column names in the input CSV containing SMILES strings (uses the 0th column by default)", + ) + data_args.add_argument( + "-r", + "--reaction-columns", + nargs="+", + help="Column names in the input CSV containing reaction SMILES in the format ``REACTANT>AGENT>PRODUCT``, where 'AGENT' is optional", + ) + data_args.add_argument( + "--no-header-row", + action="store_true", + help="Turn off using the first row in the input CSV as column names", + ) + + dataloader_args = parser.add_argument_group("Dataloader args") + dataloader_args.add_argument( + "-n", + "--num-workers", + type=int, + default=0, + help="""Number of workers for parallel data loading where 0 means sequential +(Warning: setting ``num_workers`` to a value greater than 0 can cause hangs on Windows and MacOS)""", + ) + dataloader_args.add_argument("-b", "--batch-size", type=int, default=64, help="Batch size") + + parser.add_argument( + "--accelerator", default="auto", help="Passed directly to the lightning ``Trainer()``" + ) + parser.add_argument( + "--devices", + default="auto", + help="Passed directly to the lightning ``Trainer()`` (must be a single string of comma separated devices, e.g. '1, 2' if specifying multiple devices)", + ) + + featurization_args = parser.add_argument_group("Featurization args") + featurization_args.add_argument( + "--rxn-mode", + "--reaction-mode", + type=uppercase, + default="REAC_DIFF", + choices=list(RxnMode.keys()), + help="""Choices for construction of atom and bond features for reactions (case insensitive): + +- ``REAC_PROD``: concatenates the reactants feature with the products feature +- ``REAC_DIFF``: concatenates the reactants feature with the difference in features between reactants and products (Default) +- ``PROD_DIFF``: concatenates the products feature with the difference in features between reactants and products +- ``REAC_PROD_BALANCE``: concatenates the reactants feature with the products feature, balances imbalanced reactions +- ``REAC_DIFF_BALANCE``: concatenates the reactants feature with the difference in features between reactants and products, balances imbalanced reactions +- ``PROD_DIFF_BALANCE``: concatenates the products feature with the difference in features between reactants and products, balances imbalanced reactions""", + ) + # TODO: Update documenation for multi_hot_atom_featurizer_mode + featurization_args.add_argument( + "--multi-hot-atom-featurizer-mode", + type=uppercase, + default="V2", + choices=list(AtomFeatureMode.keys()), + help="""Choices for multi-hot atom featurization scheme. This will affect both non-reaction and reaction feturization (case insensitive): + +- ``V1``: Corresponds to the original configuration employed in the Chemprop V1 +- ``V2``: Tailored for a broad range of molecules, this configuration encompasses all elements in the first four rows of the periodic table, along with iodine. It is the default in Chemprop V2. +- ``ORGANIC``: This configuration is designed specifically for use with organic molecules for drug research and development and includes a subset of elements most common in organic chemistry, including H, B, C, N, O, F, Si, P, S, Cl, Br, and I. +- ``RIGR``: Modified V2 (default) featurizer using only the resonance-invariant atom and bond features.""", + ) + featurization_args.add_argument( + "--keep-h", + action="store_true", + help="Whether hydrogens explicitly specified in input should be kept in the mol graph", + ) + featurization_args.add_argument( + "--add-h", action="store_true", help="Whether hydrogens should be added to the mol graph" + ) + data_args.add_argument( + "--ignore-chirality", + action="store_true", + help="Ignore chirality information in the input SMILES", + ) + featurization_args.add_argument( + "--molecule-featurizers", + "--features-generators", + nargs="+", + action=LookupAction(MoleculeFeaturizerRegistry), + help="Method(s) of generating molecule features to use as extra descriptors", + ) + # TODO: add in v2.1 to deprecate features-generators and then remove in v2.2 + # featurization_args.add_argument( + # "--features-generators", nargs="+", help="Renamed to `--molecule-featurizers`." + # ) + featurization_args.add_argument( + "--descriptors-path", + type=Path, + help="Path to extra descriptors to concatenate to learned representation", + ) + # TODO: Add in v2.1 + # featurization_args.add_argument( + # "--phase-features-path", + # help="Path to features used to indicate the phase of the data in one-hot vector form. Used in spectra datatype.", + # ) + featurization_args.add_argument( + "--no-descriptor-scaling", action="store_true", help="Turn off extra descriptor scaling" + ) + featurization_args.add_argument( + "--no-atom-feature-scaling", action="store_true", help="Turn off extra atom feature scaling" + ) + featurization_args.add_argument( + "--no-atom-descriptor-scaling", + action="store_true", + help="Turn off extra atom descriptor scaling", + ) + featurization_args.add_argument( + "--no-bond-feature-scaling", action="store_true", help="Turn off extra bond feature scaling" + ) + featurization_args.add_argument( + "--atom-features-path", + nargs="+", + action="append", + help="If a single path is given, it is assumed to correspond to the 0-th molecule. Alternatively, it can be a two-tuple of molecule index and path to additional atom features to supply before message passing (e.g., ``--atom-features-path 0 /path/to/features_0.npz``) indicates that the features at the given path should be supplied to the 0-th component. To supply additional features for multiple components, repeat this argument on the command line for each component's respective values (e.g., ``--atom-features-path [...] --atom-features-path [...]``).", + ) + featurization_args.add_argument( + "--atom-descriptors-path", + nargs="+", + action="append", + help="If a single path is given, it is assumed to correspond to the 0-th molecule. Alternatively, it can be a two-tuple of molecule index and path to additional atom descriptors to supply after message passing (e.g., ``--atom-descriptors-path 0 /path/to/descriptors_0.npz`` indicates that the descriptors at the given path should be supplied to the 0-th component. To supply additional descriptors for multiple components, repeat this argument on the command line for each component's respective values (e.g., ``--atom-descriptors-path [...] --atom-descriptors-path [...]``).", + ) + featurization_args.add_argument( + "--bond-features-path", + nargs="+", + action="append", + help="If a single path is given, it is assumed to correspond to the 0-th molecule. Alternatively, it can be a two-tuple of molecule index and path to additional bond features to supply before message passing (e.g., ``--bond-features-path 0 /path/to/features_0.npz`` indicates that the features at the given path should be supplied to the 0-th component. To supply additional features for multiple components, repeat this argument on the command line for each component's respective values (e.g., ``--bond-features-path [...] --bond-features-path [...]``).", + ) + # TODO: Add in v2.2 + # parser.add_argument( + # "--constraints-path", + # help="Path to constraints applied to atomic/bond properties prediction.", + # ) + + return parser + + +def process_common_args(args: Namespace) -> Namespace: + # TODO: add in v2.1 to deprecate features-generators and then remove in v2.2 + # if args.features_generators is not None: + # raise ArgumentError( + # argument=None, + # message="`--features-generators` has been renamed to `--molecule-featurizers`.", + # ) + + for key in ["atom_features_path", "atom_descriptors_path", "bond_features_path"]: + inds_paths = getattr(args, key) + + if not inds_paths: + continue + + ind_path_dict = {} + + for ind_path in inds_paths: + if len(ind_path) > 2: + raise ArgumentError( + argument=None, + message="Too many arguments were given for atom features/descriptors or bond features. It should be either a two-tuple of molecule index and a path, or a single path (assumed to be the 0-th molecule).", + ) + + if len(ind_path) == 1: + ind = 0 + path = ind_path[0] + else: + ind, path = ind_path + + if ind_path_dict.get(int(ind), None): + raise ArgumentError( + argument=None, + message=f"Duplicate atom features/descriptors or bond features given for molecule index {ind}", + ) + + ind_path_dict[int(ind)] = Path(path) + + setattr(args, key, ind_path_dict) + + return args + + +def validate_common_args(args): + pass + + +def find_models(model_paths: list[Path]): + collected_model_paths = [] + + for model_path in model_paths: + if model_path.suffix in [".ckpt", ".pt"]: + collected_model_paths.append(model_path) + elif model_path.is_dir(): + collected_model_paths.extend(list(model_path.rglob("*.pt"))) + else: + raise ArgumentError( + argument=None, + message=f"Expected a .ckpt or .pt file, or a directory. Got {model_path}", + ) + + return collected_model_paths diff --git a/chemprop/chemprop/cli/conf.py b/chemprop/chemprop/cli/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..be7701c52cc6509817a7cb9d4223ea33083f422b --- /dev/null +++ b/chemprop/chemprop/cli/conf.py @@ -0,0 +1,9 @@ +from datetime import datetime +import logging +import os +from pathlib import Path + +LOG_DIR = Path(os.getenv("CHEMPROP_LOG_DIR", "chemprop_logs")) +LOG_LEVELS = {0: logging.INFO, 1: logging.DEBUG, -1: logging.WARNING, -2: logging.ERROR} +NOW = datetime.now().strftime("%Y-%m-%dT%H-%M-%S") +CHEMPROP_TRAIN_DIR = Path(os.getenv("CHEMPROP_TRAIN_DIR", "chemprop_training")) diff --git a/chemprop/chemprop/cli/convert.py b/chemprop/chemprop/cli/convert.py new file mode 100644 index 0000000000000000000000000000000000000000..e75795e9cb19985d49108d014b447d29209340fe --- /dev/null +++ b/chemprop/chemprop/cli/convert.py @@ -0,0 +1,55 @@ +from argparse import ArgumentError, ArgumentParser, Namespace +import logging +from pathlib import Path +import sys + +from chemprop.cli.utils import Subcommand +from chemprop.utils.v1_to_v2 import convert_model_file_v1_to_v2 + +logger = logging.getLogger(__name__) + + +class ConvertSubcommand(Subcommand): + COMMAND = "convert" + HELP = "Convert a v1 model checkpoint (.pt) to a v2 model checkpoint (.pt)." + + @classmethod + def add_args(cls, parser: ArgumentParser) -> ArgumentParser: + parser.add_argument( + "-i", + "--input-path", + required=True, + type=Path, + help="Path to a v1 model .pt checkpoint file", + ) + parser.add_argument( + "-o", + "--output-path", + type=Path, + help="Path to which the converted model will be saved (``CURRENT_DIRECTORY/STEM_OF_INPUT_v2.pt`` by default)", + ) + return parser + + @classmethod + def func(cls, args: Namespace): + if args.output_path is None: + args.output_path = Path(args.input_path.stem + "_v2.pt") + if args.output_path.suffix != ".pt": + raise ArgumentError( + argument=None, message=f"Output must be a `.pt` file. Got {args.output_path}" + ) + + logger.info( + f"Converting v1 model checkpoint '{args.input_path}' to v2 model checkpoint '{args.output_path}'..." + ) + convert_model_file_v1_to_v2(args.input_path, args.output_path) + + +if __name__ == "__main__": + parser = ArgumentParser() + parser = ConvertSubcommand.add_args(parser) + + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, force=True) + + args = parser.parse_args() + ConvertSubcommand.func(args) diff --git a/chemprop/chemprop/cli/fingerprint.py b/chemprop/chemprop/cli/fingerprint.py new file mode 100644 index 0000000000000000000000000000000000000000..c136730d51deb46c368b8f29ca996614e232acba --- /dev/null +++ b/chemprop/chemprop/cli/fingerprint.py @@ -0,0 +1,185 @@ +from argparse import ArgumentError, ArgumentParser, Namespace +import logging +from pathlib import Path +import sys + +import numpy as np +import pandas as pd +import torch + +from chemprop import data +from chemprop.cli.common import add_common_args, process_common_args, validate_common_args +from chemprop.cli.predict import find_models +from chemprop.cli.utils import Subcommand, build_data_from_files, make_dataset +from chemprop.models import load_model +from chemprop.nn.metrics import LossFunctionRegistry + +logger = logging.getLogger(__name__) + + +class FingerprintSubcommand(Subcommand): + COMMAND = "fingerprint" + HELP = "Use a pretrained chemprop model to calculate learned representations." + + @classmethod + def add_args(cls, parser: ArgumentParser) -> ArgumentParser: + parser = add_common_args(parser) + parser.add_argument( + "-i", + "--test-path", + required=True, + type=Path, + help="Path to an input CSV file containing SMILES", + ) + parser.add_argument( + "-o", + "--output", + "--preds-path", + type=Path, + help="Specify the path where predictions will be saved. If the file extension is .npz, they will be saved as a npz file. Otherwise, the predictions will be saved as a CSV. The index of the model will be appended to the filename's stem. By default, predictions will be saved to the same location as ``--test-path`` with '_fps' appended (e.g., 'PATH/TO/TEST_PATH_fps_0.csv').", + ) + parser.add_argument( + "--model-paths", + "--model-path", + required=True, + type=Path, + nargs="+", + help="Specify location of checkpoint(s) or model file(s) to use for prediction. It can be a path to either a single pretrained model checkpoint (.ckpt) or single pretrained model file (.pt), a directory that contains these files, or a list of path(s) and directory(s). If a directory, chemprop will recursively search and predict on all found (.pt) models.", + ) + parser.add_argument( + "--ffn-block-index", + required=True, + type=int, + default=-1, + help="The index indicates which linear layer returns the encoding in the FFN. An index of 0 denotes the post-aggregation representation through a 0-layer MLP, while an index of 1 represents the output from the first linear layer in the FFN, and so forth.", + ) + + return parser + + @classmethod + def func(cls, args: Namespace): + args = process_common_args(args) + validate_common_args(args) + args = process_fingerprint_args(args) + main(args) + + +def process_fingerprint_args(args: Namespace) -> Namespace: + if args.test_path.suffix not in [".csv"]: + raise ArgumentError( + argument=None, message=f"Input data must be a CSV file. Got {args.test_path}" + ) + if args.output is None: + args.output = args.test_path.parent / (args.test_path.stem + "_fps.csv") + if args.output.suffix not in [".csv", ".npz"]: + raise ArgumentError( + argument=None, message=f"Output must be a CSV or NPZ file. Got '{args.output}'." + ) + return args + + +def make_fingerprint_for_model( + args: Namespace, model_path: Path, multicomponent: bool, output_path: Path +): + model = load_model(model_path, multicomponent) + model.eval() + + bounded = any( + isinstance(model.criterion, LossFunctionRegistry[loss_function]) + for loss_function in LossFunctionRegistry.keys() + if "bounded" in loss_function + ) + + format_kwargs = dict( + no_header_row=args.no_header_row, + smiles_cols=args.smiles_columns, + rxn_cols=args.reaction_columns, + target_cols=[], + ignore_cols=None, + splits_col=None, + weight_col=None, + bounded=bounded, + ) + + featurization_kwargs = dict( + molecule_featurizers=args.molecule_featurizers, + keep_h=args.keep_h, + add_h=args.add_h, + ignore_chirality=args.ignore_chirality, + ) + + test_data = build_data_from_files( + args.test_path, + **format_kwargs, + p_descriptors=args.descriptors_path, + p_atom_feats=args.atom_features_path, + p_bond_feats=args.bond_features_path, + p_atom_descs=args.atom_descriptors_path, + **featurization_kwargs, + ) + logger.info(f"test size: {len(test_data[0])}") + test_dsets = [ + make_dataset(d, args.rxn_mode, args.multi_hot_atom_featurizer_mode) for d in test_data + ] + + if multicomponent: + test_dset = data.MulticomponentDataset(test_dsets) + else: + test_dset = test_dsets[0] + + test_loader = data.build_dataloader(test_dset, args.batch_size, args.num_workers, shuffle=False) + + logger.info(model) + + with torch.no_grad(): + if multicomponent: + encodings = [ + model.encoding(batch.bmgs, batch.V_ds, batch.X_d, args.ffn_block_index) + for batch in test_loader + ] + else: + encodings = [ + model.encoding(batch.bmg, batch.V_d, batch.X_d, args.ffn_block_index) + for batch in test_loader + ] + H = torch.cat(encodings, 0).numpy() + + if output_path.suffix in [".npz"]: + np.savez(output_path, H=H) + elif output_path.suffix == ".csv": + fingerprint_columns = [f"fp_{i}" for i in range(H.shape[1])] + df_fingerprints = pd.DataFrame(H, columns=fingerprint_columns) + df_fingerprints.to_csv(output_path, index=False) + else: + raise ArgumentError( + argument=None, message=f"Output must be a CSV or npz file. Got {args.output}." + ) + logger.info(f"Fingerprints saved to '{output_path}'") + + +def main(args): + match (args.smiles_columns, args.reaction_columns): + case [None, None]: + n_components = 1 + case [_, None]: + n_components = len(args.smiles_columns) + case [None, _]: + n_components = len(args.reaction_columns) + case _: + n_components = len(args.smiles_columns) + len(args.reaction_columns) + + multicomponent = n_components > 1 + + for i, model_path in enumerate(find_models(args.model_paths)): + logger.info(f"Fingerprints with model {i} at '{model_path}'") + output_path = args.output.parent / f"{args.output.stem}_{i}{args.output.suffix}" + make_fingerprint_for_model(args, model_path, multicomponent, output_path) + + +if __name__ == "__main__": + parser = ArgumentParser() + parser = FingerprintSubcommand.add_args(parser) + + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, force=True) + args = parser.parse_args() + args = FingerprintSubcommand.func(args) diff --git a/chemprop/chemprop/cli/hpopt.py b/chemprop/chemprop/cli/hpopt.py new file mode 100644 index 0000000000000000000000000000000000000000..f205594d9829072a88babf01b09a09618bd8f98c --- /dev/null +++ b/chemprop/chemprop/cli/hpopt.py @@ -0,0 +1,540 @@ +from copy import deepcopy +import logging +from pathlib import Path +import shutil +import sys + +from configargparse import ArgumentParser, Namespace +from lightning import pytorch as pl +from lightning.pytorch.callbacks import EarlyStopping +import numpy as np +import torch + +from chemprop.cli.common import add_common_args, process_common_args, validate_common_args +from chemprop.cli.train import ( + TrainSubcommand, + add_train_args, + build_datasets, + build_model, + build_splits, + normalize_inputs, + process_train_args, + save_config, + validate_train_args, +) +from chemprop.cli.utils.command import Subcommand +from chemprop.data import build_dataloader +from chemprop.nn import AggregationRegistry, MetricRegistry +from chemprop.nn.transforms import UnscaleTransform +from chemprop.nn.utils import Activation + +NO_RAY = False +DEFAULT_SEARCH_SPACE = { + "activation": None, + "aggregation": None, + "aggregation_norm": None, + "batch_size": None, + "depth": None, + "dropout": None, + "ffn_hidden_dim": None, + "ffn_num_layers": None, + "final_lr_ratio": None, + "message_hidden_dim": None, + "init_lr_ratio": None, + "max_lr": None, + "warmup_epochs": None, +} + +try: + import ray + from ray import tune + from ray.train import CheckpointConfig, RunConfig, ScalingConfig + from ray.train.lightning import ( + RayDDPStrategy, + RayLightningEnvironment, + RayTrainReportCallback, + prepare_trainer, + ) + from ray.train.torch import TorchTrainer + from ray.tune.schedulers import ASHAScheduler, FIFOScheduler + + DEFAULT_SEARCH_SPACE = { + "activation": tune.choice(categories=list(Activation.keys())), + "aggregation": tune.choice(categories=list(AggregationRegistry.keys())), + "aggregation_norm": tune.quniform(lower=1, upper=200, q=1), + "batch_size": tune.choice([16, 32, 64, 128, 256]), + "depth": tune.qrandint(lower=2, upper=6, q=1), + "dropout": tune.choice([0.0] * 8 + list(np.arange(0.05, 0.45, 0.05))), + "ffn_hidden_dim": tune.qrandint(lower=300, upper=2400, q=100), + "ffn_num_layers": tune.qrandint(lower=1, upper=3, q=1), + "final_lr_ratio": tune.loguniform(lower=1e-2, upper=1), + "message_hidden_dim": tune.qrandint(lower=300, upper=2400, q=100), + "init_lr_ratio": tune.loguniform(lower=1e-2, upper=1), + "max_lr": tune.loguniform(lower=1e-4, upper=1e-2), + "warmup_epochs": None, + } +except ImportError: + NO_RAY = True + +NO_HYPEROPT = False +try: + from ray.tune.search.hyperopt import HyperOptSearch +except ImportError: + NO_HYPEROPT = True + +NO_OPTUNA = False +try: + from ray.tune.search.optuna import OptunaSearch +except ImportError: + NO_OPTUNA = True + + +logger = logging.getLogger(__name__) + +SEARCH_SPACE = DEFAULT_SEARCH_SPACE + +SEARCH_PARAM_KEYWORDS_MAP = { + "basic": ["depth", "ffn_num_layers", "dropout", "ffn_hidden_dim", "message_hidden_dim"], + "learning_rate": ["max_lr", "init_lr_ratio", "final_lr_ratio", "warmup_epochs"], + "all": list(DEFAULT_SEARCH_SPACE.keys()), + "init_lr": ["init_lr_ratio"], + "final_lr": ["final_lr_ratio"], +} + + +class HpoptSubcommand(Subcommand): + COMMAND = "hpopt" + HELP = "Perform hyperparameter optimization on the given task." + + @classmethod + def add_args(cls, parser: ArgumentParser) -> ArgumentParser: + parser = add_common_args(parser) + parser = add_train_args(parser) + return add_hpopt_args(parser) + + @classmethod + def func(cls, args: Namespace): + args = process_common_args(args) + args = process_train_args(args) + args = process_hpopt_args(args) + validate_common_args(args) + validate_train_args(args) + main(args) + + +def add_hpopt_args(parser: ArgumentParser) -> ArgumentParser: + hpopt_args = parser.add_argument_group("Chemprop hyperparameter optimization arguments") + + hpopt_args.add_argument( + "--search-parameter-keywords", + type=str, + nargs="+", + default=["basic"], + help=f"""The model parameters over which to search for an optimal hyperparameter configuration. Some options are bundles of parameters or otherwise special parameter operations. Special keywords include: + - ``basic``: Default set of hyperparameters for search (depth, ffn_num_layers, dropout, message_hidden_dim, and ffn_hidden_dim) + - ``learning_rate``: Search for max_lr, init_lr_ratio, final_lr_ratio, and warmup_epochs. The search for init_lr and final_lr values are defined as fractions of the max_lr value. The search for warmup_epochs is as a fraction of the total epochs used. + - ``all``: Include search for all 13 individual keyword options (including: activation, aggregation, aggregation_norm, and batch_size which aren't included in the other two keywords). + Individual supported parameters: + {list(DEFAULT_SEARCH_SPACE.keys())} + """, + ) + + hpopt_args.add_argument( + "--hpopt-save-dir", + type=Path, + help="Directory to save the hyperparameter optimization results", + ) + + raytune_args = parser.add_argument_group("Ray Tune arguments") + + raytune_args.add_argument( + "--raytune-num-samples", + type=int, + default=10, + help="Passed directly to Ray Tune ``TuneConfig`` to control number of trials to run", + ) + + raytune_args.add_argument( + "--raytune-search-algorithm", + choices=["random", "hyperopt", "optuna"], + default="hyperopt", + help="Passed to Ray Tune ``TuneConfig`` to control search algorithm", + ) + + raytune_args.add_argument( + "--raytune-trial-scheduler", + choices=["FIFO", "AsyncHyperBand"], + default="FIFO", + help="Passed to Ray Tune ``TuneConfig`` to control trial scheduler", + ) + + raytune_args.add_argument( + "--raytune-num-workers", + type=int, + default=1, + help="Passed directly to Ray Tune ``ScalingConfig`` to control number of workers to use", + ) + + raytune_args.add_argument( + "--raytune-use-gpu", + action="store_true", + help="Passed directly to Ray Tune ``ScalingConfig`` to control whether to use GPUs", + ) + + raytune_args.add_argument( + "--raytune-num-checkpoints-to-keep", + type=int, + default=1, + help="Passed directly to Ray Tune ``CheckpointConfig`` to control number of checkpoints to keep", + ) + + raytune_args.add_argument( + "--raytune-grace-period", + type=int, + default=10, + help="Passed directly to Ray Tune ``ASHAScheduler`` to control grace period", + ) + + raytune_args.add_argument( + "--raytune-reduction-factor", + type=int, + default=2, + help="Passed directly to Ray Tune ``ASHAScheduler`` to control reduction factor", + ) + + raytune_args.add_argument( + "--raytune-temp-dir", help="Passed directly to Ray Tune init to control temporary directory" + ) + + raytune_args.add_argument( + "--raytune-num-cpus", + type=int, + help="Passed directly to Ray Tune init to control number of CPUs to use", + ) + + raytune_args.add_argument( + "--raytune-num-gpus", + type=int, + help="Passed directly to Ray Tune init to control number of GPUs to use", + ) + + raytune_args.add_argument( + "--raytune-max-concurrent-trials", + type=int, + help="Passed directly to Ray Tune TuneConfig to control maximum concurrent trials", + ) + + hyperopt_args = parser.add_argument_group("Hyperopt arguments") + + hyperopt_args.add_argument( + "--hyperopt-n-initial-points", + type=int, + help="Passed directly to ``HyperOptSearch`` to control number of initial points to sample", + ) + + hyperopt_args.add_argument( + "--hyperopt-random-state-seed", + type=int, + default=None, + help="Passed directly to ``HyperOptSearch`` to control random state seed", + ) + + return parser + + +def process_hpopt_args(args: Namespace) -> Namespace: + if args.hpopt_save_dir is None: + args.hpopt_save_dir = Path(f"chemprop_hpopt/{args.data_path.stem}") + + args.hpopt_save_dir.mkdir(exist_ok=True, parents=True) + + search_parameters = set() + + available_search_parameters = list(SEARCH_SPACE.keys()) + list(SEARCH_PARAM_KEYWORDS_MAP.keys()) + + for keyword in args.search_parameter_keywords: + if keyword not in available_search_parameters: + raise ValueError( + f"Search parameter keyword: {keyword} not in available options: {available_search_parameters}." + ) + + search_parameters.update( + SEARCH_PARAM_KEYWORDS_MAP[keyword] + if keyword in SEARCH_PARAM_KEYWORDS_MAP + else [keyword] + ) + + args.search_parameter_keywords = list(search_parameters) + + if not args.hyperopt_n_initial_points: + args.hyperopt_n_initial_points = args.raytune_num_samples // 2 + + return args + + +def build_search_space(search_parameters: list[str], train_epochs: int) -> dict: + if "warmup_epochs" in search_parameters and SEARCH_SPACE.get("warmup_epochs", None) is None: + assert ( + train_epochs >= 6 + ), "Training epochs must be at least 6 to perform hyperparameter optimization for warmup_epochs." + SEARCH_SPACE["warmup_epochs"] = tune.qrandint(lower=1, upper=train_epochs // 2, q=1) + + return {param: SEARCH_SPACE[param] for param in search_parameters} + + +def update_args_with_config(args: Namespace, config: dict) -> Namespace: + args = deepcopy(args) + + for key, value in config.items(): + match key: + case "final_lr_ratio": + setattr(args, "final_lr", value * config.get("max_lr", args.max_lr)) + + case "init_lr_ratio": + setattr(args, "init_lr", value * config.get("max_lr", args.max_lr)) + + case _: + assert key in args, f"Key: {key} not found in args." + setattr(args, key, value) + + return args + + +def train_model(config, args, train_dset, val_dset, logger, output_transform, input_transforms): + args = update_args_with_config(args, config) + + train_loader = build_dataloader( + train_dset, args.batch_size, args.num_workers, seed=args.data_seed + ) + val_loader = build_dataloader(val_dset, args.batch_size, args.num_workers, shuffle=False) + + seed = args.pytorch_seed if args.pytorch_seed is not None else torch.seed() + + torch.manual_seed(seed) + + model = build_model(args, train_loader.dataset, output_transform, input_transforms) + logger.info(model) + + if args.tracking_metric == "val_loss": + T_tracking_metric = model.criterion.__class__ + else: + T_tracking_metric = MetricRegistry[args.tracking_metric] + args.tracking_metric = "val/" + args.tracking_metric + + monitor_mode = "max" if T_tracking_metric.higher_is_better else "min" + logger.debug(f"Evaluation metric: '{T_tracking_metric.alias}', mode: '{monitor_mode}'") + + patience = args.patience if args.patience is not None else args.epochs + early_stopping = EarlyStopping(args.tracking_metric, patience=patience, mode=monitor_mode) + + trainer = pl.Trainer( + accelerator=args.accelerator, + devices=args.devices, + max_epochs=args.epochs, + gradient_clip_val=args.grad_clip, + strategy=RayDDPStrategy(), + callbacks=[RayTrainReportCallback(), early_stopping], + plugins=[RayLightningEnvironment()], + deterministic=args.pytorch_seed is not None, + ) + trainer = prepare_trainer(trainer) + trainer.fit(model, train_loader, val_loader) + + +def tune_model( + args, train_dset, val_dset, logger, monitor_mode, output_transform, input_transforms +): + match args.raytune_trial_scheduler: + case "FIFO": + scheduler = FIFOScheduler() + case "AsyncHyperBand": + scheduler = ASHAScheduler( + max_t=args.epochs, + grace_period=min(args.raytune_grace_period, args.epochs), + reduction_factor=args.raytune_reduction_factor, + ) + case _: + raise ValueError(f"Invalid trial scheduler! got: {args.raytune_trial_scheduler}.") + + resources_per_worker = {} + if args.raytune_num_cpus and args.raytune_max_concurrent_trials: + resources_per_worker["CPU"] = args.raytune_num_cpus / args.raytune_max_concurrent_trials + if args.raytune_num_gpus and args.raytune_max_concurrent_trials: + resources_per_worker["GPU"] = args.raytune_num_gpus / args.raytune_max_concurrent_trials + if not resources_per_worker: + resources_per_worker = None + + if args.raytune_num_gpus: + use_gpu = True + else: + use_gpu = args.raytune_use_gpu + + scaling_config = ScalingConfig( + num_workers=args.raytune_num_workers, + use_gpu=use_gpu, + resources_per_worker=resources_per_worker, + trainer_resources={"CPU": 0}, + ) + + checkpoint_config = CheckpointConfig( + num_to_keep=args.raytune_num_checkpoints_to_keep, + checkpoint_score_attribute=args.tracking_metric, + checkpoint_score_order=monitor_mode, + ) + + run_config = RunConfig( + checkpoint_config=checkpoint_config, + storage_path=args.hpopt_save_dir.absolute() / "ray_results", + ) + + ray_trainer = TorchTrainer( + lambda config: train_model( + config, args, train_dset, val_dset, logger, output_transform, input_transforms + ), + scaling_config=scaling_config, + run_config=run_config, + ) + + match args.raytune_search_algorithm: + case "random": + search_alg = None + case "hyperopt": + if NO_HYPEROPT: + raise ImportError( + "HyperOptSearch requires hyperopt to be installed. Use 'pip install -U hyperopt' to install or use 'pip install -e .[hpopt]' in chemprop folder if you installed from source to install all hpopt relevant packages." + ) + + search_alg = HyperOptSearch( + n_initial_points=args.hyperopt_n_initial_points, + random_state_seed=args.hyperopt_random_state_seed, + ) + case "optuna": + if NO_OPTUNA: + raise ImportError( + "OptunaSearch requires optuna to be installed. Use 'pip install -U optuna' to install or use 'pip install -e .[hpopt]' in chemprop folder if you installed from source to install all hpopt relevant packages." + ) + + search_alg = OptunaSearch() + + tune_config = tune.TuneConfig( + metric=args.tracking_metric, + mode=monitor_mode, + num_samples=args.raytune_num_samples, + scheduler=scheduler, + search_alg=search_alg, + trial_dirname_creator=lambda trial: str(trial.trial_id), + ) + + tuner = tune.Tuner( + ray_trainer, + param_space={ + "train_loop_config": build_search_space(args.search_parameter_keywords, args.epochs) + }, + tune_config=tune_config, + ) + + return tuner.fit() + + +def main(args: Namespace): + if NO_RAY: + raise ImportError( + "Ray Tune requires ray to be installed. If you installed Chemprop from PyPI, run 'pip install -U ray[tune]' to install ray. If you installed from source, use 'pip install -e .[hpopt]' in Chemprop folder to install all hpopt relevant packages." + ) + + if not ray.is_initialized(): + try: + ray.init( + _temp_dir=args.raytune_temp_dir, + num_cpus=args.raytune_num_cpus, + num_gpus=args.raytune_num_gpus, + ) + except OSError as e: + if "AF_UNIX path length cannot exceed 107 bytes" in str(e): + raise OSError( + f"Ray Tune fails due to: {e}. This can sometimes be solved by providing a temporary directory, num_cpus, and num_gpus to Ray Tune via the CLI: --raytune-temp-dir --raytune-num-cpus --raytune-num-gpus ." + ) + else: + raise e + else: + logger.info("Ray is already initialized.") + + format_kwargs = dict( + no_header_row=args.no_header_row, + smiles_cols=args.smiles_columns, + rxn_cols=args.reaction_columns, + target_cols=args.target_columns, + ignore_cols=args.ignore_columns, + splits_col=args.splits_column, + weight_col=args.weight_column, + bounded=args.loss_function is not None and "bounded" in args.loss_function, + ) + + featurization_kwargs = dict( + molecule_featurizers=args.molecule_featurizers, + keep_h=args.keep_h, + add_h=args.add_h, + ignore_chirality=args.ignore_chirality, + ) + + train_data, val_data, test_data = build_splits(args, format_kwargs, featurization_kwargs) + train_dset, val_dset, test_dset = build_datasets(args, train_data[0], val_data[0], test_data[0]) + + input_transforms = normalize_inputs(train_dset, val_dset, args) + + if "regression" in args.task_type: + output_scaler = train_dset.normalize_targets() + val_dset.normalize_targets(output_scaler) + logger.info(f"Train data: mean = {output_scaler.mean_} | std = {output_scaler.scale_}") + output_transform = UnscaleTransform.from_standard_scaler(output_scaler) + else: + output_transform = None + + train_loader = build_dataloader( + train_dset, args.batch_size, args.num_workers, seed=args.data_seed + ) + + model = build_model(args, train_loader.dataset, output_transform, input_transforms) + monitor_mode = "max" if model.metrics[0].higher_is_better else "min" + + results = tune_model( + args, train_dset, val_dset, logger, monitor_mode, output_transform, input_transforms + ) + + best_result = results.get_best_result() + best_config = best_result.config["train_loop_config"] + best_checkpoint_path = Path(best_result.checkpoint.path) / "checkpoint.ckpt" + + best_config_save_path = args.hpopt_save_dir / "best_config.toml" + best_checkpoint_save_path = args.hpopt_save_dir / "best_checkpoint.ckpt" + all_progress_save_path = args.hpopt_save_dir / "all_progress.csv" + + logger.info(f"Best hyperparameters saved to: '{best_config_save_path}'") + + args = update_args_with_config(args, best_config) + + args = TrainSubcommand.parser.parse_known_args(namespace=args)[0] + save_config(TrainSubcommand.parser, args, best_config_save_path) + + logger.info( + f"Best hyperparameter configuration checkpoint saved to '{best_checkpoint_save_path}'" + ) + + shutil.copyfile(best_checkpoint_path, best_checkpoint_save_path) + + logger.info(f"Hyperparameter optimization results saved to '{all_progress_save_path}'") + + result_df = results.get_dataframe() + + result_df.to_csv(all_progress_save_path, index=False) + + ray.shutdown() + + +if __name__ == "__main__": + parser = ArgumentParser() + parser = HpoptSubcommand.add_args(parser) + + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, force=True) + args = parser.parse_args() + HpoptSubcommand.func(args) diff --git a/chemprop/chemprop/cli/main.py b/chemprop/chemprop/cli/main.py new file mode 100644 index 0000000000000000000000000000000000000000..56d4f5205a0b7a5a50003a1c3ddba971260784ef --- /dev/null +++ b/chemprop/chemprop/cli/main.py @@ -0,0 +1,85 @@ +import logging +from pathlib import Path +import sys + +from configargparse import ArgumentParser + +from chemprop.cli.conf import LOG_DIR, LOG_LEVELS, NOW +from chemprop.cli.convert import ConvertSubcommand +from chemprop.cli.fingerprint import FingerprintSubcommand +from chemprop.cli.hpopt import HpoptSubcommand +from chemprop.cli.predict import PredictSubcommand +from chemprop.cli.train import TrainSubcommand +from chemprop.cli.utils import pop_attr + +logger = logging.getLogger(__name__) + +SUBCOMMANDS = [ + TrainSubcommand, + PredictSubcommand, + ConvertSubcommand, + FingerprintSubcommand, + HpoptSubcommand, +] + + +def construct_parser(): + parser = ArgumentParser() + subparsers = parser.add_subparsers(title="mode", dest="mode", required=True) + + parent = ArgumentParser(add_help=False) + parent.add_argument( + "--logfile", + "--log", + nargs="?", + const="default", + help=f"Path to which the log file should be written (specifying just the flag alone will automatically log to a file ``{LOG_DIR}/MODE/TIMESTAMP.log`` , where 'MODE' is the CLI mode chosen, e.g., ``{LOG_DIR}/MODE/{NOW}.log``)", + ) + parent.add_argument("-v", action="store_true", help="Increase verbosity level to DEBUG") + parent.add_argument( + "-q", + action="count", + default=0, + help="Decrease verbosity level to WARNING or ERROR if specified twice", + ) + + parents = [parent] + for subcommand in SUBCOMMANDS: + subcommand.add(subparsers, parents) + + return parser + + +def main(): + parser = construct_parser() + args = parser.parse_args() + logfile, v_flag, q_count, mode, func = ( + pop_attr(args, attr) for attr in ["logfile", "v", "q", "mode", "func"] + ) + + if v_flag and q_count: + parser.error("The -v and -q options cannot be used together.") + + match logfile: + case None: + handler = logging.StreamHandler(sys.stderr) + case "default": + (LOG_DIR / mode).mkdir(parents=True, exist_ok=True) + handler = logging.FileHandler(str(LOG_DIR / mode / f"{NOW}.log")) + case _: + Path(logfile).parent.mkdir(parents=True, exist_ok=True) + handler = logging.FileHandler(logfile) + + verbosity = q_count * -1 if q_count else (1 if v_flag else 0) + logging_level = LOG_LEVELS.get(verbosity, logging.ERROR) + logging.basicConfig( + handlers=[handler], + format="%(asctime)s - %(levelname)s:%(name)s - %(message)s", + level=logging_level, + datefmt="%Y-%m-%dT%H:%M:%S", + force=True, + ) + + logger.info(f"Running in mode '{mode}' with args: {vars(args)}") + + func(args) diff --git a/chemprop/chemprop/cli/predict.py b/chemprop/chemprop/cli/predict.py new file mode 100644 index 0000000000000000000000000000000000000000..1cfdeee0255e6a8a42782524170521f1fddefdaa --- /dev/null +++ b/chemprop/chemprop/cli/predict.py @@ -0,0 +1,447 @@ +from argparse import ArgumentError, ArgumentParser, Namespace +import logging +from pathlib import Path +import sys +from typing import Iterator + +from lightning import pytorch as pl +import numpy as np +import pandas as pd +import torch + +from chemprop import data +from chemprop.cli.common import ( + add_common_args, + find_models, + process_common_args, + validate_common_args, +) +from chemprop.cli.utils import LookupAction, Subcommand, build_data_from_files, make_dataset +from chemprop.models.utils import load_model, load_output_columns +from chemprop.nn.metrics import LossFunctionRegistry +from chemprop.nn.predictors import EvidentialFFN, MulticlassClassificationFFN, MveFFN +from chemprop.uncertainty import ( + MVEWeightingCalibrator, + NoUncertaintyEstimator, + RegressionCalibrator, + RegressionEvaluator, + UncertaintyCalibratorRegistry, + UncertaintyEstimatorRegistry, + UncertaintyEvaluatorRegistry, +) +from chemprop.utils import Factory + +logger = logging.getLogger(__name__) + + +class PredictSubcommand(Subcommand): + COMMAND = "predict" + HELP = "use a pretrained chemprop model for prediction" + + @classmethod + def add_args(cls, parser: ArgumentParser) -> ArgumentParser: + parser = add_common_args(parser) + return add_predict_args(parser) + + @classmethod + def func(cls, args: Namespace): + args = process_common_args(args) + validate_common_args(args) + args = process_predict_args(args) + main(args) + + +def add_predict_args(parser: ArgumentParser) -> ArgumentParser: + parser.add_argument( + "-i", + "--test-path", + required=True, + type=Path, + help="Path to an input CSV file containing SMILES", + ) + parser.add_argument( + "-o", + "--output", + "--preds-path", + type=Path, + help="Specify path to which predictions will be saved. If the file extension is .pkl, it will be saved as a pickle file. Otherwise, chemprop will save predictions as a CSV. If multiple models are used to make predictions, the average predictions will be saved in the file, and another file ending in '_individual' with the same file extension will save the predictions for each individual model, with the column names being the target names appended with the model index (e.g., '_model_').", + ) + parser.add_argument( + "--drop-extra-columns", + action="store_true", + help="Whether to drop all columns from the test data file besides the SMILES columns and the new prediction columns", + ) + parser.add_argument( + "--model-paths", + "--model-path", + required=True, + type=Path, + nargs="+", + help="Location of checkpoint(s) or model file(s) to use for prediction. It can be a path to either a single pretrained model checkpoint (.ckpt) or single pretrained model file (.pt), a directory that contains these files, or a list of path(s) and directory(s). If a directory, will recursively search and predict on all found (.pt) models.", + ) + + unc_args = parser.add_argument_group("Uncertainty and calibration args") + unc_args.add_argument( + "--cal-path", type=Path, help="Path to data file to be used for uncertainty calibration." + ) + unc_args.add_argument( + "--uncertainty-method", + default="none", + action=LookupAction(UncertaintyEstimatorRegistry), + help="The method of calculating uncertainty.", + ) + unc_args.add_argument( + "--calibration-method", + action=LookupAction(UncertaintyCalibratorRegistry), + help="The method used for calibrating the uncertainty calculated with uncertainty method.", + ) + unc_args.add_argument( + "--evaluation-methods", + "--evaluation-method", + nargs="+", + action=LookupAction(UncertaintyEvaluatorRegistry), + help="The methods used for evaluating the uncertainty performance if the test data provided includes targets. Available methods are [nll, miscalibration_area, ence, spearman] or any available classification or multiclass metric.", + ) + # unc_args.add_argument( + # "--evaluation-scores-path", help="Location to save the results of uncertainty evaluations." + # ) + unc_args.add_argument( + "--uncertainty-dropout-p", + type=float, + default=0.1, + help="The probability to use for Monte Carlo dropout uncertainty estimation.", + ) + unc_args.add_argument( + "--dropout-sampling-size", + type=int, + default=10, + help="The number of samples to use for Monte Carlo dropout uncertainty estimation. Distinct from the dropout used during training.", + ) + unc_args.add_argument( + "--calibration-interval-percentile", + type=float, + default=95, + help="Sets the percentile used in the calibration methods. Must be in the range (1, 100).", + ) + unc_args.add_argument( + "--conformal-alpha", + type=float, + default=0.1, + help="Target error rate for conformal prediction. Must be in the range (0, 1).", + ) + # TODO: Decide if we want to implment this in v2.1.x + # unc_args.add_argument( + # "--regression-calibrator-metric", + # choices=["stdev", "interval"], + # help="Regression calibrators can output either a stdev or an inverval.", + # ) + unc_args.add_argument( + "--cal-descriptors-path", + nargs="+", + action="append", + help="Path to extra descriptors to concatenate to learned representation in calibration dataset.", + ) + # TODO: Add in v2.1.x + # unc_args.add_argument( + # "--calibration-phase-features-path", + # help=" ", + # ) + unc_args.add_argument( + "--cal-atom-features-path", + nargs="+", + action="append", + help="Path to the extra atom features in calibration dataset.", + ) + unc_args.add_argument( + "--cal-atom-descriptors-path", + nargs="+", + action="append", + help="Path to the extra atom descriptors in calibration dataset.", + ) + unc_args.add_argument( + "--cal-bond-features-path", + nargs="+", + action="append", + help="Path to the extra bond descriptors in calibration dataset.", + ) + + return parser + + +def process_predict_args(args: Namespace) -> Namespace: + if args.test_path.suffix not in [".csv"]: + raise ArgumentError( + argument=None, message=f"Input data must be a CSV file. Got {args.test_path}" + ) + if args.output is None: + args.output = args.test_path.parent / (args.test_path.stem + "_preds.csv") + if args.output.suffix not in [".csv", ".pkl"]: + raise ArgumentError( + argument=None, message=f"Output must be a CSV or Pickle file. Got {args.output}" + ) + return args + + +def prepare_data_loader( + args: Namespace, multicomponent: bool, is_calibration: bool, format_kwargs: dict +): + data_path = args.cal_path if is_calibration else args.test_path + descriptors_path = args.cal_descriptors_path if is_calibration else args.descriptors_path + atom_feats_path = args.cal_atom_features_path if is_calibration else args.atom_features_path + bond_feats_path = args.cal_bond_features_path if is_calibration else args.bond_features_path + atom_descs_path = ( + args.cal_atom_descriptors_path if is_calibration else args.atom_descriptors_path + ) + + featurization_kwargs = dict( + molecule_featurizers=args.molecule_featurizers, + keep_h=args.keep_h, + add_h=args.add_h, + ignore_chirality=args.ignore_chirality, + ) + + datas = build_data_from_files( + data_path, + **format_kwargs, + p_descriptors=descriptors_path, + p_atom_feats=atom_feats_path, + p_bond_feats=bond_feats_path, + p_atom_descs=atom_descs_path, + **featurization_kwargs, + ) + + dsets = [make_dataset(d, args.rxn_mode, args.multi_hot_atom_featurizer_mode) for d in datas] + dset = data.MulticomponentDataset(dsets) if multicomponent else dsets[0] + + return data.build_dataloader(dset, args.batch_size, args.num_workers, shuffle=False) + + +def make_prediction_for_models( + args: Namespace, model_paths: Iterator[Path], multicomponent: bool, output_path: Path +): + model = load_model(model_paths[0], multicomponent) + output_columns = load_output_columns(model_paths[0]) + bounded = any( + isinstance(model.criterion, LossFunctionRegistry[loss_function]) + for loss_function in LossFunctionRegistry.keys() + if "bounded" in loss_function + ) + format_kwargs = dict( + no_header_row=args.no_header_row, + smiles_cols=args.smiles_columns, + rxn_cols=args.reaction_columns, + ignore_cols=None, + splits_col=None, + weight_col=None, + bounded=bounded, + ) + format_kwargs["target_cols"] = output_columns if args.evaluation_methods is not None else [] + test_loader = prepare_data_loader(args, multicomponent, False, format_kwargs) + logger.info(f"test size: {len(test_loader.dataset)}") + if args.cal_path is not None: + format_kwargs["target_cols"] = output_columns + cal_loader = prepare_data_loader(args, multicomponent, True, format_kwargs) + logger.info(f"calibration size: {len(cal_loader.dataset)}") + + uncertainty_estimator = Factory.build( + UncertaintyEstimatorRegistry[args.uncertainty_method], + ensemble_size=args.dropout_sampling_size, + dropout=args.uncertainty_dropout_p, + ) + + models = [load_model(model_path, multicomponent) for model_path in model_paths] + trainer = pl.Trainer( + logger=False, enable_progress_bar=True, accelerator=args.accelerator, devices=args.devices + ) + test_individual_preds, test_individual_uncs = uncertainty_estimator( + test_loader, models, trainer + ) + test_preds = torch.mean(test_individual_preds, dim=0) + if not isinstance(uncertainty_estimator, NoUncertaintyEstimator): + test_uncs = torch.mean(test_individual_uncs, dim=0) + else: + test_uncs = None + + if args.calibration_method is not None: + uncertainty_calibrator = Factory.build( + UncertaintyCalibratorRegistry[args.calibration_method], + p=args.calibration_interval_percentile / 100, + alpha=args.conformal_alpha, + ) + cal_targets = cal_loader.dataset.Y + cal_mask = torch.from_numpy(np.isfinite(cal_targets)) + cal_targets = np.nan_to_num(cal_targets, nan=0.0) + cal_targets = torch.from_numpy(cal_targets) + cal_individual_preds, cal_individual_uncs = uncertainty_estimator( + cal_loader, models, trainer + ) + cal_preds = torch.mean(cal_individual_preds, dim=0) + cal_uncs = torch.mean(cal_individual_uncs, dim=0) + if isinstance(uncertainty_calibrator, MVEWeightingCalibrator): + uncertainty_calibrator.fit(cal_preds, cal_individual_uncs, cal_targets, cal_mask) + test_uncs = uncertainty_calibrator.apply(cal_individual_uncs) + else: + if isinstance(uncertainty_calibrator, RegressionCalibrator): + uncertainty_calibrator.fit(cal_preds, cal_uncs, cal_targets, cal_mask) + else: + uncertainty_calibrator.fit(cal_uncs, cal_targets, cal_mask) + test_uncs = uncertainty_calibrator.apply(test_uncs) + for i in range(test_individual_uncs.shape[0]): + test_individual_uncs[i] = uncertainty_calibrator.apply(test_individual_uncs[i]) + + if args.evaluation_methods is not None: + uncertainty_evaluators = [ + Factory.build(UncertaintyEvaluatorRegistry[method]) + for method in args.evaluation_methods + ] + logger.info("Uncertainty evaluation metric:") + for evaluator in uncertainty_evaluators: + test_targets = test_loader.dataset.Y + test_mask = torch.from_numpy(np.isfinite(test_targets)) + test_targets = np.nan_to_num(test_targets, nan=0.0) + test_targets = torch.from_numpy(test_targets) + if isinstance(evaluator, RegressionEvaluator): + metric_value = evaluator.evaluate(test_preds, test_uncs, test_targets, test_mask) + else: + metric_value = evaluator.evaluate(test_uncs, test_targets, test_mask) + logger.info(f"{evaluator.alias}: {metric_value.tolist()}") + + if args.uncertainty_method == "none" and ( + isinstance(model.predictor, MveFFN) or isinstance(model.predictor, EvidentialFFN) + ): + test_preds = test_preds[..., 0] + test_individual_preds = test_individual_preds[..., 0] + + if output_columns is None: + output_columns = [ + f"pred_{i}" for i in range(test_preds.shape[1]) + ] # TODO: need to improve this for cases like multi-task MVE and multi-task multiclass + + save_predictions(args, model, output_columns, test_preds, test_uncs, output_path) + + if len(model_paths) > 1: + save_individual_predictions( + args, + model, + model_paths, + output_columns, + test_individual_preds, + test_individual_uncs, + output_path, + ) + + +def save_predictions(args, model, output_columns, test_preds, test_uncs, output_path): + unc_columns = [f"{col}_unc" for col in output_columns] + + if isinstance(model.predictor, MulticlassClassificationFFN): + output_columns = output_columns + [f"{col}_prob" for col in output_columns] + predicted_class_labels = test_preds.argmax(axis=-1) + formatted_probability_strings = np.apply_along_axis( + lambda x: ",".join(map(str, x)), 2, test_preds.numpy() + ) + test_preds = np.concatenate( + (predicted_class_labels, formatted_probability_strings), axis=-1 + ) + + df_test = pd.read_csv( + args.test_path, header=None if args.no_header_row else "infer", index_col=False + ) + df_test[output_columns] = test_preds + + if args.uncertainty_method not in ["none", "classification"]: + df_test[unc_columns] = np.round(test_uncs, 6) + + if output_path.suffix == ".pkl": + df_test = df_test.reset_index(drop=True) + df_test.to_pickle(output_path) + else: + df_test.to_csv(output_path, index=False) + logger.info(f"Predictions saved to '{output_path}'") + + +def save_individual_predictions( + args, + model, + model_paths, + output_columns, + test_individual_preds, + test_individual_uncs, + output_path, +): + unc_columns = [ + f"{col}_unc_model_{i}" for i in range(len(model_paths)) for col in output_columns + ] + + if isinstance(model.predictor, MulticlassClassificationFFN): + output_columns = [ + item + for i in range(len(model_paths)) + for col in output_columns + for item in (f"{col}_model_{i}", f"{col}_prob_model_{i}") + ] + + predicted_class_labels = test_individual_preds.argmax(axis=-1) + formatted_probability_strings = np.apply_along_axis( + lambda x: ",".join(map(str, x)), 3, test_individual_preds.numpy() + ) + test_individual_preds = np.concatenate( + (predicted_class_labels, formatted_probability_strings), axis=-1 + ) + else: + output_columns = [ + f"{col}_model_{i}" for i in range(len(model_paths)) for col in output_columns + ] + + m, n, t = test_individual_preds.shape + test_individual_preds = np.transpose(test_individual_preds, (1, 0, 2)).reshape(n, m * t) + df_test = pd.read_csv( + args.test_path, header=None if args.no_header_row else "infer", index_col=False + ) + df_test[output_columns] = test_individual_preds + + if args.uncertainty_method not in ["none", "classification", "ensemble"]: + m, n, t = test_individual_uncs.shape + test_individual_uncs = np.transpose(test_individual_uncs, (1, 0, 2)).reshape(n, m * t) + df_test[unc_columns] = np.round(test_individual_uncs, 6) + + output_path = output_path.parent / Path( + str(args.output.stem) + "_individual" + str(output_path.suffix) + ) + if output_path.suffix == ".pkl": + df_test = df_test.reset_index(drop=True) + df_test.to_pickle(output_path) + else: + df_test.to_csv(output_path, index=False) + logger.info(f"Individual predictions saved to '{output_path}'") + for i, model_path in enumerate(model_paths): + logger.info( + f"Results from model path {model_path} are saved under the column name ending with 'model_{i}'" + ) + + +def main(args): + match (args.smiles_columns, args.reaction_columns): + case [None, None]: + n_components = 1 + case [_, None]: + n_components = len(args.smiles_columns) + case [None, _]: + n_components = len(args.reaction_columns) + case _: + n_components = len(args.smiles_columns) + len(args.reaction_columns) + + multicomponent = n_components > 1 + + model_paths = find_models(args.model_paths) + + make_prediction_for_models(args, model_paths, multicomponent, output_path=args.output) + + +if __name__ == "__main__": + parser = ArgumentParser() + parser = PredictSubcommand.add_args(parser) + + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, force=True) + args = parser.parse_args() + args = PredictSubcommand.func(args) diff --git a/chemprop/chemprop/cli/train.py b/chemprop/chemprop/cli/train.py new file mode 100644 index 0000000000000000000000000000000000000000..50ac2f4365af09c5c3f1c246dd90413e8b51e940 --- /dev/null +++ b/chemprop/chemprop/cli/train.py @@ -0,0 +1,1343 @@ +from copy import deepcopy +from io import StringIO +import json +import logging +from pathlib import Path +import sys +from tempfile import TemporaryDirectory + +from configargparse import ArgumentError, ArgumentParser, Namespace +from lightning import pytorch as pl +from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint +from lightning.pytorch.loggers import CSVLogger, TensorBoardLogger +from lightning.pytorch.strategies import DDPStrategy +import numpy as np +import pandas as pd +from rich.console import Console +from rich.table import Column, Table +import torch +import torch.nn as nn + +from chemprop.cli.common import ( + add_common_args, + find_models, + process_common_args, + validate_common_args, +) +from chemprop.cli.conf import CHEMPROP_TRAIN_DIR, NOW +from chemprop.cli.utils import ( + LookupAction, + Subcommand, + build_data_from_files, + get_column_names, + make_dataset, + parse_indices, +) +from chemprop.cli.utils.args import uppercase +from chemprop.data import ( + MoleculeDataset, + MolGraphDataset, + MulticomponentDataset, + ReactionDatapoint, + SplitType, + build_dataloader, + make_split_indices, + split_data_by_indices, +) +from chemprop.data.datasets import _MolGraphDatasetMixin +from chemprop.models import MPNN, MulticomponentMPNN, save_model +from chemprop.nn import AggregationRegistry, LossFunctionRegistry, MetricRegistry, PredictorRegistry +from chemprop.nn.message_passing import ( + AtomMessagePassing, + BondMessagePassing, + MulticomponentMessagePassing, +) +from chemprop.nn.transforms import GraphTransform, ScaleTransform, UnscaleTransform +from chemprop.nn.utils import Activation +from chemprop.utils import Factory + +logger = logging.getLogger(__name__) + + +_CV_REMOVAL_ERROR = ( + "The -k/--num-folds argument was removed in v2.1.0 - use --num-replicates instead." +) + + +class TrainSubcommand(Subcommand): + COMMAND = "train" + HELP = "Train a chemprop model." + parser = None + + @classmethod + def add_args(cls, parser: ArgumentParser) -> ArgumentParser: + parser = add_common_args(parser) + parser = add_train_args(parser) + cls.parser = parser + return parser + + @classmethod + def func(cls, args: Namespace): + args = process_common_args(args) + validate_common_args(args) + args = process_train_args(args) + validate_train_args(args) + + args.output_dir.mkdir(exist_ok=True, parents=True) + config_path = args.output_dir / "config.toml" + save_config(cls.parser, args, config_path) + main(args) + + +def add_train_args(parser: ArgumentParser) -> ArgumentParser: + parser.add_argument( + "--config-path", + type=Path, + is_config_file=True, + help="Path to a configuration file (command line arguments override values in the configuration file)", + ) + parser.add_argument( + "-i", + "--data-path", + type=Path, + help="Path to an input CSV file containing SMILES and the associated target values", + ) + parser.add_argument( + "-o", + "--output-dir", + "--save-dir", + type=Path, + help="Directory where training outputs will be saved (defaults to ``CURRENT_DIRECTORY/chemprop_training/STEM_OF_INPUT/TIME_STAMP``)", + ) + parser.add_argument( + "--remove-checkpoints", + action="store_true", + help="Remove intermediate checkpoint files after training is complete.", + ) + + # TODO: Add in v2.1; see if we can tell lightning how often to log training loss + # parser.add_argument( + # "--log-frequency", + # type=int, + # default=10, + # help="The number of batches between each logging of the training loss.", + # ) + + transfer_args = parser.add_argument_group("transfer learning args") + transfer_args.add_argument( + "--checkpoint", + type=Path, + nargs="+", + help="Path to checkpoint(s) or model file(s) for loading and overwriting weights. Accepts a single pre-trained model checkpoint (.ckpt), a single model file (.pt), a directory containing such files, or a list of paths and directories. If a directory is provided, it will recursively search for and use all (.pt) files found for prediction.", + ) + transfer_args.add_argument( + "--freeze-encoder", + action="store_true", + help="Freeze the message passing layer from the checkpoint model (specified by ``--checkpoint``).", + ) + transfer_args.add_argument( + "--model-frzn", + help="Path to model checkpoint file to be loaded for overwriting and freezing weights. By default, all MPNN weights are frozen with this option.", + ) + transfer_args.add_argument( + "--frzn-ffn-layers", + type=int, + default=0, + help="Freeze the first ``n`` layers of the FFN from the checkpoint model (specified by ``--checkpoint``). The message passing layer should also be frozen with ``--freeze-encoder``.", + ) + # transfer_args.add_argument( + # "--freeze-first-only", + # action="store_true", + # help="Determines whether or not to use checkpoint_frzn for just the first encoder. Default (False) is to use the checkpoint to freeze all encoders. (only relevant for number_of_molecules > 1, where checkpoint model has number_of_molecules = 1)", + # ) + + # TODO: Add in v2.1 + # parser.add_argument( + # "--resume-experiment", + # action="store_true", + # help="Whether to resume the experiment. Loads test results from any folds that have already been completed and skips training those folds.", + # ) + # parser.add_argument( + # "--config-path", + # help="Path to a :code:`.json` file containing arguments. Any arguments present in the config file will override arguments specified via the command line or by the defaults.", + # ) + parser.add_argument( + "--ensemble-size", + type=int, + default=1, + help="Number of models in ensemble for each splitting of data", + ) + + # TODO: Add in v2.2 + # abt_args = parser.add_argument_group("atom/bond target args") + # abt_args.add_argument( + # "--is-atom-bond-targets", + # action="store_true", + # help="Whether this is atomic/bond properties prediction.", + # ) + # abt_args.add_argument( + # "--no-adding-bond-types", + # action="store_true", + # help="Whether the bond types determined by RDKit molecules added to the output of bond targets. This option is intended to be used with the :code:`is_atom_bond_targets`.", + # ) + # abt_args.add_argument( + # "--keeping-atom-map", + # action="store_true", + # help="Whether RDKit molecules keep the original atom mapping. This option is intended to be used when providing atom-mapped SMILES with the :code:`is_atom_bond_targets`.", + # ) + # abt_args.add_argument( + # "--no-shared-atom-bond-ffn", + # action="store_true", + # help="Whether the FFN weights for atom and bond targets should be independent between tasks.", + # ) + # abt_args.add_argument( + # "--weights-ffn-num-layers", + # type=int, + # default=2, + # help="Number of layers in FFN for determining weights used in constrained targets.", + # ) + + mp_args = parser.add_argument_group("message passing") + mp_args.add_argument( + "--message-hidden-dim", type=int, default=300, help="Hidden dimension of the messages" + ) + mp_args.add_argument( + "--message-bias", action="store_true", help="Add bias to the message passing layers" + ) + mp_args.add_argument("--depth", type=int, default=3, help="Number of message passing steps") + mp_args.add_argument( + "--undirected", + action="store_true", + help="Pass messages on undirected bonds/edges (always sum the two relevant bond vectors)", + ) + mp_args.add_argument( + "--dropout", + type=float, + default=0.0, + help="Dropout probability in message passing/FFN layers", + ) + mp_args.add_argument( + "--mpn-shared", + action="store_true", + help="Whether to use the same message passing neural network for all input molecules (only relevant if ``number_of_molecules`` > 1)", + ) + mp_args.add_argument( + "--activation", + type=uppercase, + default="RELU", + choices=list(Activation.keys()), + help="Activation function in message passing/FFN layers", + ) + mp_args.add_argument( + "--aggregation", + "--agg", + default="norm", + action=LookupAction(AggregationRegistry), + help="Aggregation mode to use during graph predictor", + ) + mp_args.add_argument( + "--aggregation-norm", + type=float, + default=100, + help="Normalization factor by which to divide summed up atomic features for ``norm`` aggregation", + ) + mp_args.add_argument( + "--atom-messages", action="store_true", help="Pass messages on atoms rather than bonds." + ) + + # TODO: Add in v2.1 + # mpsolv_args = parser.add_argument_group("message passing with solvent") + # mpsolv_args.add_argument( + # "--reaction-solvent", + # action="store_true", + # help="Whether to adjust the MPNN layer to take as input a reaction and a molecule, and to encode them with separate MPNNs.", + # ) + # mpsolv_args.add_argument( + # "--bias-solvent", + # action="store_true", + # help="Whether to add bias to linear layers for solvent MPN if :code:`reaction_solvent` is True.", + # ) + # mpsolv_args.add_argument( + # "--hidden-size-solvent", + # type=int, + # default=300, + # help="Dimensionality of hidden layers in solvent MPN if :code:`reaction_solvent` is True.", + # ) + # mpsolv_args.add_argument( + # "--depth-solvent", + # type=int, + # default=3, + # help="Number of message passing steps for solvent if :code:`reaction_solvent` is True.", + # ) + + ffn_args = parser.add_argument_group("FFN args") + ffn_args.add_argument( + "--ffn-hidden-dim", type=int, default=300, help="Hidden dimension in the FFN top model" + ) + ffn_args.add_argument( # TODO: the default in v1 was 2. (see weights_ffn_num_layers option) Do we really want the default to now be 1? + "--ffn-num-layers", type=int, default=1, help="Number of layers in FFN top model" + ) + # TODO: Decide if we want to implment this in v2 + # ffn_args.add_argument( + # "--features-only", + # action="store_true", + # help="Use only the additional features in an FFN, no graph network.", + # ) + + extra_mpnn_args = parser.add_argument_group("extra MPNN args") + extra_mpnn_args.add_argument( + "--batch-norm", action="store_true", help="Turn on batch normalization after aggregation" + ) + extra_mpnn_args.add_argument( + "--multiclass-num-classes", + type=int, + default=3, + help="Number of classes when running multiclass classification", + ) + # TODO: Add in v2.1 + # extra_mpnn_args.add_argument( + # "--spectral-activation", + # default="exp", + # choices=["softplus", "exp"], + # help="Indicates which function to use in task_type spectra training to constrain outputs to be positive.", + # ) + + train_data_args = parser.add_argument_group("training input data args") + train_data_args.add_argument( + "-w", + "--weight-column", + help="Name of the column in the input CSV containing individual data weights", + ) + train_data_args.add_argument( + "--target-columns", + nargs="+", + help="Name of the columns containing target values (by default, uses all columns except the SMILES column and the ``ignore_columns``)", + ) + train_data_args.add_argument( + "--ignore-columns", + nargs="+", + help="Name of the columns to ignore when ``target_columns`` is not provided", + ) + train_data_args.add_argument( + "--no-cache", + action="store_true", + help="Turn off caching the featurized ``MolGraph`` s at the beginning of training", + ) + train_data_args.add_argument( + "--splits-column", + help="Name of the column in the input CSV file containing 'train', 'val', or 'test' for each row.", + ) + # TODO: Add in v2.1 + # train_data_args.add_argument( + # "--spectra-phase-mask-path", + # help="Path to a file containing a phase mask array, used for excluding particular regions in spectra predictions.", + # ) + + train_args = parser.add_argument_group("training args") + train_args.add_argument( + "-t", + "--task-type", + default="regression", + action=LookupAction(PredictorRegistry), + help="Type of dataset (determines the default loss function used during training, defaults to ``regression``)", + ) + train_args.add_argument( + "-l", + "--loss-function", + action=LookupAction(LossFunctionRegistry), + help="Loss function to use during training (will use the default loss function for the given task type if not specified)", + ) + train_args.add_argument( + "--v-kl", + "--evidential-regularization", + type=float, + default=0.0, + help="Specify the value used in regularization for evidential loss function. The default value recommended by Soleimany et al. (2021) is 0.2. However, the optimal value is dataset-dependent, so it is recommended that users test different values to find the best value for their model.", + ) + + train_args.add_argument( + "--eps", type=float, default=1e-8, help="Evidential regularization epsilon" + ) + train_args.add_argument( + "--alpha", type=float, default=0.1, help="Target error bounds for quantile interval loss" + ) + # TODO: Add in v2.1 + # train_args.add_argument( # TODO: Is threshold the same thing as the spectra target floor? I'm not sure but combined them. + # "-T", + # "--threshold", + # "--spectra-target-floor", + # type=float, + # default=1e-8, + # help="spectral threshold limit. v1 help string: Values in targets for dataset type spectra are replaced with this value, intended to be a small positive number used to enforce positive values.", + # ) + train_args.add_argument( + "--metrics", + "--metric", + nargs="+", + action=LookupAction(MetricRegistry), + help="Specify the evaluation metrics. If unspecified, chemprop will use the following metrics for given dataset types: regression -> ``rmse``, classification -> ``roc``, multiclass -> ``ce`` ('cross entropy'), spectral -> ``sid``. If multiple metrics are provided, the 0-th one will be used for early stopping and checkpointing.", + ) + train_args.add_argument( + "--tracking-metric", + default="val_loss", + help="The metric to track for early stopping and checkpointing. Defaults to the criterion used during training.", + ) + train_args.add_argument( + "--show-individual-scores", + action="store_true", + help="Show all scores for individual targets, not just average, at the end.", + ) + train_args.add_argument( + "--task-weights", + nargs="+", + type=float, + help="Weights to apply for whole tasks in the loss function", + ) + train_args.add_argument( + "--warmup-epochs", + type=int, + default=2, + help="Number of epochs during which learning rate increases linearly from ``init_lr`` to ``max_lr`` (afterwards, learning rate decreases exponentially from ``max_lr`` to ``final_lr``)", + ) + + train_args.add_argument("--init-lr", type=float, default=1e-4, help="Initial learning rate.") + train_args.add_argument("--max-lr", type=float, default=1e-3, help="Maximum learning rate.") + train_args.add_argument("--final-lr", type=float, default=1e-4, help="Final learning rate.") + train_args.add_argument("--epochs", type=int, default=50, help="Number of epochs to train over") + train_args.add_argument( + "--patience", + type=int, + default=None, + help="Number of epochs to wait for improvement before early stopping", + ) + train_args.add_argument( + "--grad-clip", + type=float, + help="Passed directly to the lightning trainer which controls grad clipping (see the ``Trainer()`` docstring for details)", + ) + train_args.add_argument( + "--class-balance", + action="store_true", + help="Ensures each training batch contains an equal number of positive and negative samples.", + ) + + split_args = parser.add_argument_group("split args") + split_args.add_argument( + "--split", + "--split-type", + type=uppercase, + default="RANDOM", + choices=list(SplitType.keys()), + help="Method of splitting the data into train/val/test (case insensitive)", + ) + split_args.add_argument( + "--split-sizes", + type=float, + nargs=3, + default=[0.8, 0.1, 0.1], + help="Split proportions for train/validation/test sets", + ) + split_args.add_argument( + "--split-key-molecule", + type=int, + default=0, + help="Specify the index of the key molecule used for splitting when multiple molecules are present and constrained split_type is used (e.g., ``scaffold_balanced`` or ``random_with_repeated_smiles``). Note that this index begins with zero for the first molecule.", + ) + split_args.add_argument("--num-replicates", type=int, default=1, help="Number of replicates.") + split_args.add_argument("-k", "--num-folds", help=_CV_REMOVAL_ERROR) + split_args.add_argument( + "--save-smiles-splits", + action="store_true", + help="Whether to store the SMILES in each train/val/test split", + ) + split_args.add_argument( + "--splits-file", + type=Path, + help="Path to a JSON file containing pre-defined splits for the input data, formatted as a list of dictionaries with keys ``train``, ``val``, and ``test`` and values as lists of indices or formatted strings (e.g. [0, 1, 2, 4] or '0-2,4')", + ) + split_args.add_argument( + "--data-seed", + type=int, + default=0, + help="Specify the random seed to use when splitting data into train/val/test sets. When ``--num-replicates`` > 1, the first replicate uses this seed and all subsequent replicates add 1 to the seed (also used for shuffling data in ``build_dataloader`` when ``shuffle`` is True).", + ) + + parser.add_argument( + "--pytorch-seed", + type=int, + default=None, + help="Seed for PyTorch randomness (e.g., random initial weights)", + ) + + return parser + + +def process_train_args(args: Namespace) -> Namespace: + if args.output_dir is None: + args.output_dir = CHEMPROP_TRAIN_DIR / args.data_path.stem / NOW + + return args + + +def validate_train_args(args): + if args.config_path is None and args.data_path is None: + raise ArgumentError(argument=None, message="Data path must be provided for training.") + + if args.num_folds is not None: # i.e. user-specified + raise ArgumentError(argument=None, message=_CV_REMOVAL_ERROR) + + if args.data_path.suffix not in [".csv"]: + raise ArgumentError( + argument=None, message=f"Input data must be a CSV file. Got {args.data_path}" + ) + + if args.epochs != -1 and args.epochs <= args.warmup_epochs: + raise ArgumentError( + argument=None, + message=f"The number of epochs should be higher than the number of epochs during warmup. Got {args.epochs} epochs and {args.warmup_epochs} warmup epochs", + ) + + # TODO: model_frzn is deprecated and then remove in v2.2 + if args.checkpoint is not None and args.model_frzn is not None: + raise ArgumentError( + argument=None, + message="`--checkpoint` and `--model-frzn` cannot be used at the same time.", + ) + + if "--model-frzn" in sys.argv: + logger.warning( + "`--model-frzn` is deprecated and will be removed in v2.2. " + "Please use `--checkpoint` with `--freeze-encoder` instead." + ) + + if args.freeze_encoder and args.checkpoint is None: + raise ArgumentError( + argument=None, + message="`--freeze-encoder` can only be used when `--checkpoint` is used.", + ) + + if args.frzn_ffn_layers > 0: + if args.checkpoint is None and args.model_frzn is None: + raise ArgumentError( + argument=None, + message="`--frzn-ffn-layers` can only be used when `--checkpoint` or `--model-frzn` (depreciated in v2.1) is used.", + ) + if args.checkpoint is not None and not args.freeze_encoder: + raise ArgumentError( + argument=None, + message="To freeze the first `n` layers of the FFN via `--frzn-ffn-layers`. The message passing layer should also be frozen with `--freeze-encoder`.", + ) + + if args.class_balance and args.task_type != "classification": + raise ArgumentError( + argument=None, message="Class balance is only applicable for classification tasks." + ) + + valid_tracking_metrics = ( + args.metrics or [PredictorRegistry[args.task_type]._T_default_metric.alias] + ) + ["val_loss"] + if args.tracking_metric not in valid_tracking_metrics: + raise ArgumentError( + argument=None, + message=f"Tracking metric must be one of {','.join(valid_tracking_metrics)}. " + f"Got {args.tracking_metric}. Additional tracking metric options can be specified with " + "the `--metrics` flag.", + ) + + input_cols, target_cols = get_column_names( + args.data_path, + args.smiles_columns, + args.reaction_columns, + args.target_columns, + args.ignore_columns, + args.splits_column, + args.weight_column, + args.no_header_row, + ) + + args.input_columns = input_cols + args.target_columns = target_cols + + return args + + +def normalize_inputs(train_dset, val_dset, args): + multicomponent = isinstance(train_dset, MulticomponentDataset) + num_components = train_dset.n_components if multicomponent else 1 + + X_d_transform = None + V_f_transforms = [nn.Identity()] * num_components + E_f_transforms = [nn.Identity()] * num_components + V_d_transforms = [None] * num_components + graph_transforms = [] + + d_xd = train_dset.d_xd + d_vf = train_dset.d_vf + d_ef = train_dset.d_ef + d_vd = train_dset.d_vd + + if d_xd > 0 and not args.no_descriptor_scaling: + scaler = train_dset.normalize_inputs("X_d") + val_dset.normalize_inputs("X_d", scaler) + + scaler = scaler if not isinstance(scaler, list) else scaler[0] + + if scaler is not None: + logger.info( + f"Descriptors: loc = {np.array2string(scaler.mean_, precision=3)}, scale = {np.array2string(scaler.scale_, precision=3)}" + ) + X_d_transform = ScaleTransform.from_standard_scaler(scaler) + + if d_vf > 0 and not args.no_atom_feature_scaling: + scaler = train_dset.normalize_inputs("V_f") + val_dset.normalize_inputs("V_f", scaler) + + scalers = [scaler] if not isinstance(scaler, list) else scaler + + for i, scaler in enumerate(scalers): + if scaler is None: + continue + + logger.info( + f"Atom features for mol {i}: loc = {np.array2string(scaler.mean_, precision=3)}, scale = {np.array2string(scaler.scale_, precision=3)}" + ) + featurizer = ( + train_dset.datasets[i].featurizer if multicomponent else train_dset.featurizer + ) + V_f_transforms[i] = ScaleTransform.from_standard_scaler( + scaler, pad=featurizer.atom_fdim - featurizer.extra_atom_fdim + ) + + if d_ef > 0 and not args.no_bond_feature_scaling: + scaler = train_dset.normalize_inputs("E_f") + val_dset.normalize_inputs("E_f", scaler) + + scalers = [scaler] if not isinstance(scaler, list) else scaler + + for i, scaler in enumerate(scalers): + if scaler is None: + continue + + logger.info( + f"Bond features for mol {i}: loc = {np.array2string(scaler.mean_, precision=3)}, scale = {np.array2string(scaler.scale_, precision=3)}" + ) + featurizer = ( + train_dset.datasets[i].featurizer if multicomponent else train_dset.featurizer + ) + E_f_transforms[i] = ScaleTransform.from_standard_scaler( + scaler, pad=featurizer.bond_fdim - featurizer.extra_bond_fdim + ) + + for V_f_transform, E_f_transform in zip(V_f_transforms, E_f_transforms): + graph_transforms.append(GraphTransform(V_f_transform, E_f_transform)) + + if d_vd > 0 and not args.no_atom_descriptor_scaling: + scaler = train_dset.normalize_inputs("V_d") + val_dset.normalize_inputs("V_d", scaler) + + scalers = [scaler] if not isinstance(scaler, list) else scaler + + for i, scaler in enumerate(scalers): + if scaler is None: + continue + + logger.info( + f"Atom descriptors for mol {i}: loc = {np.array2string(scaler.mean_, precision=3)}, scale = {np.array2string(scaler.scale_, precision=3)}" + ) + V_d_transforms[i] = ScaleTransform.from_standard_scaler(scaler) + + return X_d_transform, graph_transforms, V_d_transforms + + +def load_and_use_pretrained_model_scalers(model_path: Path, train_dset, val_dset) -> None: + if isinstance(train_dset, MulticomponentDataset): + _model = MulticomponentMPNN.load_from_file(model_path) + blocks = _model.message_passing.blocks + train_dsets = train_dset.datasets + val_dsets = val_dset.datasets + else: + _model = MPNN.load_from_file(model_path) + blocks = [_model.message_passing] + train_dsets = [train_dset] + val_dsets = [val_dset] + + for i in range(len(blocks)): + if isinstance(_model.X_d_transform, ScaleTransform): + scaler = _model.X_d_transform.to_standard_scaler() + train_dsets[i].normalize_inputs("X_d", scaler) + val_dsets[i].normalize_inputs("X_d", scaler) + + if isinstance(blocks[i].graph_transform, GraphTransform): + if isinstance(blocks[i].graph_transform.V_transform, ScaleTransform): + V_anti_pad = ( + train_dsets[i].featurizer.atom_fdim - train_dsets[i].featurizer.extra_atom_fdim + ) + scaler = blocks[i].graph_transform.V_transform.to_standard_scaler( + anti_pad=V_anti_pad + ) + train_dsets[i].normalize_inputs("V_f", scaler) + val_dsets[i].normalize_inputs("V_f", scaler) + if isinstance(blocks[i].graph_transform.E_transform, ScaleTransform): + E_anti_pad = ( + train_dsets[i].featurizer.bond_fdim - train_dsets[i].featurizer.extra_bond_fdim + ) + scaler = blocks[i].graph_transform.E_transform.to_standard_scaler( + anti_pad=E_anti_pad + ) + train_dsets[i].normalize_inputs("E_f", scaler) + val_dsets[i].normalize_inputs("E_f", scaler) + + if isinstance(blocks[i].V_d_transform, ScaleTransform): + scaler = blocks[i].V_d_transform.to_standard_scaler() + train_dsets[i].normalize_inputs("V_d", scaler) + val_dsets[i].normalize_inputs("V_d", scaler) + + if isinstance(_model.predictor.output_transform, UnscaleTransform): + scaler = _model.predictor.output_transform.to_standard_scaler() + train_dset.normalize_targets(scaler) + val_dset.normalize_targets(scaler) + + +def save_config(parser: ArgumentParser, args: Namespace, config_path: Path): + config_args = deepcopy(args) + for key, value in vars(config_args).items(): + if isinstance(value, Path): + setattr(config_args, key, str(value)) + + for key in ["atom_features_path", "atom_descriptors_path", "bond_features_path"]: + if getattr(config_args, key) is not None: + for index, path in getattr(config_args, key).items(): + getattr(config_args, key)[index] = str(path) + + parser.write_config_file(parsed_namespace=config_args, output_file_paths=[str(config_path)]) + + +def save_smiles_splits(args: Namespace, output_dir, train_dset, val_dset, test_dset): + match (args.smiles_columns, args.reaction_columns): + case [_, None]: + column_labels = deepcopy(args.smiles_columns) + case [None, _]: + column_labels = deepcopy(args.reaction_columns) + case _: + column_labels = deepcopy(args.smiles_columns) + column_labels.extend(args.reaction_columns) + + train_smis = train_dset.names + df_train = pd.DataFrame(train_smis, columns=column_labels) + df_train.to_csv(output_dir / "train_smiles.csv", index=False) + + val_smis = val_dset.names + df_val = pd.DataFrame(val_smis, columns=column_labels) + df_val.to_csv(output_dir / "val_smiles.csv", index=False) + + if test_dset is not None: + test_smis = test_dset.names + df_test = pd.DataFrame(test_smis, columns=column_labels) + df_test.to_csv(output_dir / "test_smiles.csv", index=False) + + +def build_splits(args, format_kwargs, featurization_kwargs): + """build the train/val/test splits""" + logger.info(f"Pulling data from file: {args.data_path}") + all_data = build_data_from_files( + args.data_path, + p_descriptors=args.descriptors_path, + p_atom_feats=args.atom_features_path, + p_bond_feats=args.bond_features_path, + p_atom_descs=args.atom_descriptors_path, + **format_kwargs, + **featurization_kwargs, + ) + + if args.splits_column is not None: + df = pd.read_csv( + args.data_path, header=None if args.no_header_row else "infer", index_col=False + ) + grouped = df.groupby(df[args.splits_column].str.lower()) + train_indices = grouped.groups.get("train", pd.Index([])).tolist() + val_indices = grouped.groups.get("val", pd.Index([])).tolist() + test_indices = grouped.groups.get("test", pd.Index([])).tolist() + train_indices, val_indices, test_indices = [train_indices], [val_indices], [test_indices] + + elif args.splits_file is not None: + with open(args.splits_file, "rb") as json_file: + split_idxss = json.load(json_file) + train_indices = [parse_indices(d["train"]) for d in split_idxss] + val_indices = [parse_indices(d["val"]) for d in split_idxss] + test_indices = [parse_indices(d["test"]) for d in split_idxss] + args.num_replicates = len(split_idxss) + + else: + splitting_data = all_data[args.split_key_molecule] + if isinstance(splitting_data[0], ReactionDatapoint): + splitting_mols = [datapoint.rct for datapoint in splitting_data] + else: + splitting_mols = [datapoint.mol for datapoint in splitting_data] + train_indices, val_indices, test_indices = make_split_indices( + splitting_mols, args.split, args.split_sizes, args.data_seed, args.num_replicates + ) + + train_data, val_data, test_data = split_data_by_indices( + all_data, train_indices, val_indices, test_indices + ) + for i_split in range(len(train_data)): + sizes = [len(train_data[i_split][0]), len(val_data[i_split][0]), len(test_data[i_split][0])] + logger.info(f"train/val/test split_{i_split} sizes: {sizes}") + + return train_data, val_data, test_data + + +def summarize( + target_cols: list[str], task_type: str, dataset: _MolGraphDatasetMixin +) -> tuple[list, list]: + if task_type in [ + "regression", + "regression-mve", + "regression-evidential", + "regression-quantile", + ]: + if isinstance(dataset, MulticomponentDataset): + y = dataset.datasets[0].Y + else: + y = dataset.Y + y_mean = np.nanmean(y, axis=0) + y_std = np.nanstd(y, axis=0) + y_median = np.nanmedian(y, axis=0) + mean_dev_abs = np.abs(y - y_mean) + num_targets = np.sum(~np.isnan(y), axis=0) + frac_1_sigma = np.sum((mean_dev_abs < y_std), axis=0) / num_targets + frac_2_sigma = np.sum((mean_dev_abs < 2 * y_std), axis=0) / num_targets + + column_headers = ["Statistic"] + [f"Value ({target_cols[i]})" for i in range(y.shape[1])] + table_rows = [ + ["Num. smiles"] + [f"{len(y)}" for i in range(y.shape[1])], + ["Num. targets"] + [f"{num_targets[i]}" for i in range(y.shape[1])], + ["Num. NaN"] + [f"{len(y) - num_targets[i]}" for i in range(y.shape[1])], + ["Mean"] + [f"{mean:0.3g}" for mean in y_mean], + ["Std. dev."] + [f"{std:0.3g}" for std in y_std], + ["Median"] + [f"{median:0.3g}" for median in y_median], + ["% within 1 s.d."] + [f"{sigma:0.0%}" for sigma in frac_1_sigma], + ["% within 2 s.d."] + [f"{sigma:0.0%}" for sigma in frac_2_sigma], + ] + return (column_headers, table_rows) + elif task_type in [ + "classification", + "classification-dirichlet", + "multiclass", + "multiclass-dirichlet", + ]: + if isinstance(dataset, MulticomponentDataset): + y = dataset.datasets[0].Y + else: + y = dataset.Y + + mask = np.isnan(y) + classes = np.sort(np.unique(y[~mask])) + + class_counts = np.stack([(classes[:, None] == y[:, i]).sum(1) for i in range(y.shape[1])]) + class_fracs = class_counts / y.shape[0] + nan_count = np.nansum(mask, axis=0) + nan_frac = nan_count / y.shape[0] + + column_headers = ["Class"] + [f"Count/Percent {target_cols[i]}" for i in range(y.shape[1])] + + table_rows = [ + [f"{k}"] + [f"{class_counts[j, i]}/{class_fracs[j, i]:0.0%}" for j in range(y.shape[1])] + for i, k in enumerate(classes) + ] + + nan_row = ["NaN"] + [f"{nan_count[i]}/{nan_frac[i]:0.0%}" for i in range(y.shape[1])] + table_rows.append(nan_row) + + total_row = ["Total"] + [f"{y.shape[0]}/{100.00}%" for i in range(y.shape[1])] + table_rows.append(total_row) + + return (column_headers, table_rows) + else: + raise ValueError(f"unsupported task type! Task type '{task_type}' was not recognized.") + + +def build_table(column_headers: list[str], table_rows: list[str], title: str | None = None) -> str: + right_justified_columns = [ + Column(header=column_header, justify="right") for column_header in column_headers + ] + table = Table(*right_justified_columns, title=title) + for row in table_rows: + table.add_row(*row) + + console = Console(record=True, file=StringIO(), width=200) + console.print(table) + return console.export_text() + + +def build_datasets(args, train_data, val_data, test_data): + """build the train/val/test datasets, where :attr:`test_data` may be None""" + multicomponent = len(train_data) > 1 + if multicomponent: + train_dsets = [ + make_dataset(data, args.rxn_mode, args.multi_hot_atom_featurizer_mode) + for data in train_data + ] + val_dsets = [ + make_dataset(data, args.rxn_mode, args.multi_hot_atom_featurizer_mode) + for data in val_data + ] + train_dset = MulticomponentDataset(train_dsets) + val_dset = MulticomponentDataset(val_dsets) + if len(test_data[0]) > 0: + test_dsets = [ + make_dataset(data, args.rxn_mode, args.multi_hot_atom_featurizer_mode) + for data in test_data + ] + test_dset = MulticomponentDataset(test_dsets) + else: + test_dset = None + else: + train_data = train_data[0] + val_data = val_data[0] + test_data = test_data[0] + train_dset = make_dataset(train_data, args.rxn_mode, args.multi_hot_atom_featurizer_mode) + val_dset = make_dataset(val_data, args.rxn_mode, args.multi_hot_atom_featurizer_mode) + if len(test_data) > 0: + test_dset = make_dataset(test_data, args.rxn_mode, args.multi_hot_atom_featurizer_mode) + else: + test_dset = None + if args.task_type != "spectral": + for dataset, label in zip( + [train_dset, val_dset, test_dset], ["Training", "Validation", "Test"] + ): + column_headers, table_rows = summarize(args.target_columns, args.task_type, dataset) + output = build_table(column_headers, table_rows, f"Summary of {label} Data") + logger.info("\n" + output) + + return train_dset, val_dset, test_dset + + +def build_model( + args, + train_dset: MolGraphDataset | MulticomponentDataset, + output_transform: UnscaleTransform, + input_transforms: tuple[ScaleTransform, list[GraphTransform], list[ScaleTransform]], +) -> MPNN: + mp_cls = AtomMessagePassing if args.atom_messages else BondMessagePassing + + X_d_transform, graph_transforms, V_d_transforms = input_transforms + if isinstance(train_dset, MulticomponentDataset): + mp_blocks = [ + mp_cls( + train_dset.datasets[i].featurizer.atom_fdim, + train_dset.datasets[i].featurizer.bond_fdim, + d_h=args.message_hidden_dim, + d_vd=( + train_dset.datasets[i].d_vd + if isinstance(train_dset.datasets[i], MoleculeDataset) + else 0 + ), + bias=args.message_bias, + depth=args.depth, + undirected=args.undirected, + dropout=args.dropout, + activation=args.activation, + V_d_transform=V_d_transforms[i], + graph_transform=graph_transforms[i], + ) + for i in range(train_dset.n_components) + ] + if args.mpn_shared: + if args.reaction_columns is not None and args.smiles_columns is not None: + raise ArgumentError( + argument=None, + message="Cannot use shared MPNN with both molecule and reaction data.", + ) + + mp_block = MulticomponentMessagePassing(mp_blocks, train_dset.n_components, args.mpn_shared) + # NOTE(degraff): this if/else block should be handled by the init of MulticomponentMessagePassing + # if args.mpn_shared: + # mp_block = MulticomponentMessagePassing(mp_blocks[0], n_components, args.mpn_shared) + # else: + d_xd = train_dset.datasets[0].d_xd + n_tasks = train_dset.datasets[0].Y.shape[1] + mpnn_cls = MulticomponentMPNN + else: + mp_block = mp_cls( + train_dset.featurizer.atom_fdim, + train_dset.featurizer.bond_fdim, + d_h=args.message_hidden_dim, + d_vd=train_dset.d_vd if isinstance(train_dset, MoleculeDataset) else 0, + bias=args.message_bias, + depth=args.depth, + undirected=args.undirected, + dropout=args.dropout, + activation=args.activation, + V_d_transform=V_d_transforms[0], + graph_transform=graph_transforms[0], + ) + d_xd = train_dset.d_xd + n_tasks = train_dset.Y.shape[1] + mpnn_cls = MPNN + + agg = Factory.build(AggregationRegistry[args.aggregation], norm=args.aggregation_norm) + predictor_cls = PredictorRegistry[args.task_type] + if args.loss_function is not None: + task_weights = torch.ones(n_tasks) if args.task_weights is None else args.task_weights + criterion = Factory.build( + LossFunctionRegistry[args.loss_function], + task_weights=task_weights, + v_kl=args.v_kl, + # threshold=args.threshold, TODO: Add in v2.1 + eps=args.eps, + alpha=args.alpha, + ) + else: + criterion = None + if args.metrics is not None: + metrics = [Factory.build(MetricRegistry[metric]) for metric in args.metrics] + else: + metrics = None + + predictor = Factory.build( + predictor_cls, + input_dim=mp_block.output_dim + d_xd, + n_tasks=n_tasks, + hidden_dim=args.ffn_hidden_dim, + n_layers=args.ffn_num_layers, + dropout=args.dropout, + activation=args.activation, + criterion=criterion, + task_weights=args.task_weights, + n_classes=args.multiclass_num_classes, + output_transform=output_transform, + # spectral_activation=args.spectral_activation, TODO: Add in v2.1 + ) + + if args.loss_function is None: + logger.info( + f"No loss function was specified! Using class default: {predictor_cls._T_default_criterion}" + ) + + return mpnn_cls( + mp_block, + agg, + predictor, + args.batch_norm, + metrics, + args.warmup_epochs, + args.init_lr, + args.max_lr, + args.final_lr, + X_d_transform=X_d_transform, + ) + + +def train_model( + args, train_loader, val_loader, test_loader, output_dir, output_transform, input_transforms +): + if args.checkpoint is not None: + model_paths = find_models(args.checkpoint) + if args.ensemble_size != len(model_paths): + logger.warning( + f"The number of models in ensemble for each splitting of data is set to {len(model_paths)}." + ) + args.ensemble_size = len(model_paths) + + for model_idx in range(args.ensemble_size): + model_output_dir = output_dir / f"model_{model_idx}" + model_output_dir.mkdir(exist_ok=True, parents=True) + + if args.pytorch_seed is None: + seed = torch.seed() + deterministic = False + else: + seed = args.pytorch_seed + model_idx + deterministic = True + + torch.manual_seed(seed) + + if args.checkpoint or args.model_frzn is not None: + mpnn_cls = ( + MulticomponentMPNN + if isinstance(train_loader.dataset, MulticomponentDataset) + else MPNN + ) + model_path = model_paths[model_idx] if args.checkpoint else args.model_frzn + model = mpnn_cls.load_from_file(model_path) + + if args.checkpoint: + model.apply( + lambda m: setattr(m, "p", args.dropout) + if isinstance(m, torch.nn.Dropout) + else None + ) + + # TODO: model_frzn is deprecated and then remove in v2.2 + if args.model_frzn or args.freeze_encoder: + model.message_passing.apply(lambda module: module.requires_grad_(False)) + model.message_passing.eval() + model.bn.apply(lambda module: module.requires_grad_(False)) + model.bn.eval() + for idx in range(args.frzn_ffn_layers): + model.predictor.ffn[idx].requires_grad_(False) + model.predictor.ffn[idx + 1].eval() + else: + model = build_model(args, train_loader.dataset, output_transform, input_transforms) + logger.info(model) + + try: + trainer_logger = TensorBoardLogger( + model_output_dir, "trainer_logs", default_hp_metric=False + ) + except ModuleNotFoundError as e: + logger.warning( + f"Unable to import TensorBoardLogger, reverting to CSVLogger (original error: {e})." + ) + trainer_logger = CSVLogger(model_output_dir, "trainer_logs") + + if args.tracking_metric == "val_loss": + T_tracking_metric = model.criterion.__class__ + tracking_metric = args.tracking_metric + else: + T_tracking_metric = MetricRegistry[args.tracking_metric] + tracking_metric = "val/" + args.tracking_metric + + monitor_mode = "max" if T_tracking_metric.higher_is_better else "min" + logger.debug(f"Evaluation metric: '{T_tracking_metric.alias}', mode: '{monitor_mode}'") + + if args.remove_checkpoints: + temp_dir = TemporaryDirectory() + checkpoint_dir = Path(temp_dir.name) + else: + checkpoint_dir = model_output_dir + + checkpoint_filename = ( + f"best-epoch={{epoch}}-{tracking_metric.replace('/', '_')}=" + f"{{{tracking_metric}:.2f}}" + ) + checkpointing = ModelCheckpoint( + checkpoint_dir / "checkpoints", + checkpoint_filename, + tracking_metric, + mode=monitor_mode, + save_last=True, + auto_insert_metric_name=False, + ) + + if args.epochs != -1: + patience = args.patience if args.patience is not None else args.epochs + early_stopping = EarlyStopping(tracking_metric, patience=patience, mode=monitor_mode) + callbacks = [checkpointing, early_stopping] + else: + callbacks = [checkpointing] + + trainer = pl.Trainer( + logger=trainer_logger, + enable_progress_bar=True, + accelerator=args.accelerator, + devices=args.devices, + max_epochs=args.epochs, + callbacks=callbacks, + gradient_clip_val=args.grad_clip, + deterministic=deterministic, + ) + trainer.fit(model, train_loader, val_loader) + + if test_loader is not None: + if isinstance(trainer.strategy, DDPStrategy): + torch.distributed.destroy_process_group() + + best_ckpt_path = trainer.checkpoint_callback.best_model_path + trainer = pl.Trainer( + logger=trainer_logger, + enable_progress_bar=True, + accelerator=args.accelerator, + devices=1, + ) + model = model.load_from_checkpoint(best_ckpt_path) + predss = trainer.predict(model, dataloaders=test_loader) + else: + predss = trainer.predict(dataloaders=test_loader) + + preds = torch.concat(predss, 0) + if model.predictor.n_targets > 1: + preds = preds[..., 0] + preds = preds.numpy() + + evaluate_and_save_predictions( + preds, test_loader, model.metrics[:-1], model_output_dir, args + ) + + best_model_path = checkpointing.best_model_path + model = model.__class__.load_from_checkpoint(best_model_path) + p_model = model_output_dir / "best.pt" + save_model(p_model, model, args.target_columns) + logger.info(f"Best model saved to '{p_model}'") + + if args.remove_checkpoints: + temp_dir.cleanup() + + +def evaluate_and_save_predictions(preds, test_loader, metrics, model_output_dir, args): + if isinstance(test_loader.dataset, MulticomponentDataset): + test_dset = test_loader.dataset.datasets[0] + else: + test_dset = test_loader.dataset + targets = test_dset.Y + mask = torch.from_numpy(np.isfinite(targets)) + targets = np.nan_to_num(targets, nan=0.0) + weights = torch.ones(len(test_dset)) + lt_mask = torch.from_numpy(test_dset.lt_mask) if test_dset.lt_mask[0] is not None else None + gt_mask = torch.from_numpy(test_dset.gt_mask) if test_dset.gt_mask[0] is not None else None + + individual_scores = dict() + for metric in metrics: + individual_scores[metric.alias] = [] + for i, col in enumerate(args.target_columns): + if "multiclass" in args.task_type: + preds_slice = torch.from_numpy(preds[:, i : i + 1, :]) + targets_slice = torch.from_numpy(targets[:, i : i + 1]) + else: + preds_slice = torch.from_numpy(preds[:, i : i + 1]) + targets_slice = torch.from_numpy(targets[:, i : i + 1]) + preds_loss = metric( + preds_slice, + targets_slice, + mask[:, i : i + 1], + weights, + lt_mask[:, i] if lt_mask is not None else None, + gt_mask[:, i] if gt_mask is not None else None, + ) + individual_scores[metric.alias].append(preds_loss) + + logger.info("Test Set results:") + for metric in metrics: + avg_loss = sum(individual_scores[metric.alias]) / len(individual_scores[metric.alias]) + logger.info(f"test/{metric.alias}: {avg_loss}") + + if args.show_individual_scores: + logger.info("Entire Test Set individual results:") + for metric in metrics: + for i, col in enumerate(args.target_columns): + logger.info(f"test/{col}/{metric.alias}: {individual_scores[metric.alias][i]}") + + names = test_loader.dataset.names + if isinstance(test_loader.dataset, MulticomponentDataset): + namess = list(zip(*names)) + else: + namess = [names] + + columns = args.input_columns + args.target_columns + if "multiclass" in args.task_type: + columns = columns + [f"{col}_prob" for col in args.target_columns] + formatted_probability_strings = np.apply_along_axis( + lambda x: ",".join(map(str, x)), 2, preds + ) + predicted_class_labels = preds.argmax(axis=-1) + df_preds = pd.DataFrame( + list(zip(*namess, *predicted_class_labels.T, *formatted_probability_strings.T)), + columns=columns, + ) + else: + df_preds = pd.DataFrame(list(zip(*namess, *preds.T)), columns=columns) + df_preds.to_csv(model_output_dir / "test_predictions.csv", index=False) + + +def main(args): + format_kwargs = dict( + no_header_row=args.no_header_row, + smiles_cols=args.smiles_columns, + rxn_cols=args.reaction_columns, + target_cols=args.target_columns, + ignore_cols=args.ignore_columns, + splits_col=args.splits_column, + weight_col=args.weight_column, + bounded=args.loss_function is not None and "bounded" in args.loss_function, + ) + + featurization_kwargs = dict( + molecule_featurizers=args.molecule_featurizers, + keep_h=args.keep_h, + add_h=args.add_h, + ignore_chirality=args.ignore_chirality, + ) + + splits = build_splits(args, format_kwargs, featurization_kwargs) + + for replicate_idx, (train_data, val_data, test_data) in enumerate(zip(*splits)): + if args.num_replicates == 1: + output_dir = args.output_dir + else: + output_dir = args.output_dir / f"replicate_{replicate_idx}" + + output_dir.mkdir(exist_ok=True, parents=True) + + train_dset, val_dset, test_dset = build_datasets(args, train_data, val_data, test_data) + + if args.save_smiles_splits: + save_smiles_splits(args, output_dir, train_dset, val_dset, test_dset) + + if args.checkpoint or args.model_frzn is not None: + model_paths = find_models(args.checkpoint) + if len(model_paths) > 1: + logger.warning( + "Multiple checkpoint files were loaded, but only the scalers from " + f"{model_paths[0]} are used. It is assumed that all models provided have the " + "same data scalings, meaning they were trained on the same data." + ) + model_path = model_paths[0] if args.checkpoint else args.model_frzn + load_and_use_pretrained_model_scalers(model_path, train_dset, val_dset) + input_transforms = (None, None, None) + output_transform = None + else: + input_transforms = normalize_inputs(train_dset, val_dset, args) + + if "regression" in args.task_type: + output_scaler = train_dset.normalize_targets() + val_dset.normalize_targets(output_scaler) + logger.info( + f"Train data: mean = {output_scaler.mean_} | std = {output_scaler.scale_}" + ) + output_transform = UnscaleTransform.from_standard_scaler(output_scaler) + else: + output_transform = None + + if not args.no_cache: + train_dset.cache = True + val_dset.cache = True + + train_loader = build_dataloader( + train_dset, + args.batch_size, + args.num_workers, + class_balance=args.class_balance, + seed=args.data_seed, + ) + if args.class_balance: + logger.debug( + f"With `--class-balance`, effective train size = {len(train_loader.sampler)}" + ) + val_loader = build_dataloader(val_dset, args.batch_size, args.num_workers, shuffle=False) + if test_dset is not None: + test_loader = build_dataloader( + test_dset, args.batch_size, args.num_workers, shuffle=False + ) + else: + test_loader = None + + train_model( + args, + train_loader, + val_loader, + test_loader, + output_dir, + output_transform, + input_transforms, + ) + + +if __name__ == "__main__": + # TODO: update this old code or remove it. + parser = ArgumentParser() + parser = TrainSubcommand.add_args(parser) + + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, force=True) + args = parser.parse_args() + TrainSubcommand.func(args) diff --git a/chemprop/chemprop/cli/utils/__init__.py b/chemprop/chemprop/cli/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fdd239a2a06724abe893c0913cd079addab26ea6 --- /dev/null +++ b/chemprop/chemprop/cli/utils/__init__.py @@ -0,0 +1,30 @@ +from .actions import LookupAction +from .args import bounded +from .command import Subcommand +from .parsing import ( + build_data_from_files, + get_column_names, + make_datapoints, + make_dataset, + parse_indices, +) +from .utils import _pop_attr, _pop_attr_d, pop_attr + +__all__ = [ + "bounded", + "LookupAction", + "Subcommand", + "build_data_from_files", + "make_datapoints", + "make_dataset", + "get_column_names", + "parse_indices", + "actions", + "args", + "command", + "parsing", + "utils", + "pop_attr", + "_pop_attr", + "_pop_attr_d", +] diff --git a/chemprop/chemprop/cli/utils/actions.py b/chemprop/chemprop/cli/utils/actions.py new file mode 100644 index 0000000000000000000000000000000000000000..23e870f37b638499235ddccba0f72355efc3b7c7 --- /dev/null +++ b/chemprop/chemprop/cli/utils/actions.py @@ -0,0 +1,19 @@ +from argparse import _StoreAction +from typing import Any, Mapping + + +def LookupAction(obj: Mapping[str, Any]): + class LookupAction_(_StoreAction): + def __init__(self, option_strings, dest, default=None, choices=None, **kwargs): + if default not in obj.keys() and default is not None: + raise ValueError( + f"Invalid value for arg 'default': '{default}'. " + f"Expected one of {tuple(obj.keys())}" + ) + + kwargs["choices"] = choices if choices is not None else obj.keys() + kwargs["default"] = default + + super().__init__(option_strings, dest, **kwargs) + + return LookupAction_ diff --git a/chemprop/chemprop/cli/utils/args.py b/chemprop/chemprop/cli/utils/args.py new file mode 100644 index 0000000000000000000000000000000000000000..5c6f29e3cd48a39cda6555f6a35a133412df2dd2 --- /dev/null +++ b/chemprop/chemprop/cli/utils/args.py @@ -0,0 +1,34 @@ +import functools + +__all__ = ["bounded"] + + +def bounded(lo: float | None = None, hi: float | None = None): + if lo is None and hi is None: + raise ValueError("No bounds provided!") + + def decorator(f): + @functools.wraps(f) + def wrapper(*args, **kwargs): + x = f(*args, **kwargs) + + if (lo is not None and hi is not None) and not lo <= x <= hi: + raise ValueError(f"Parsed value outside of range [{lo}, {hi}]! got: {x}") + if hi is not None and x > hi: + raise ValueError(f"Parsed value below {hi}! got: {x}") + if lo is not None and x < lo: + raise ValueError(f"Parsed value above {lo}]! got: {x}") + + return x + + return wrapper + + return decorator + + +def uppercase(x: str): + return x.upper() + + +def lowercase(x: str): + return x.lower() diff --git a/chemprop/chemprop/cli/utils/command.py b/chemprop/chemprop/cli/utils/command.py new file mode 100644 index 0000000000000000000000000000000000000000..d9edd0d91855240dade06b5d67ae929339d155fa --- /dev/null +++ b/chemprop/chemprop/cli/utils/command.py @@ -0,0 +1,24 @@ +from abc import ABC, abstractmethod +from argparse import ArgumentParser, Namespace, _SubParsersAction + + +class Subcommand(ABC): + COMMAND: str + HELP: str | None = None + + @classmethod + def add(cls, subparsers: _SubParsersAction, parents) -> ArgumentParser: + parser = subparsers.add_parser(cls.COMMAND, help=cls.HELP, parents=parents) + cls.add_args(parser).set_defaults(func=cls.func) + + return parser + + @classmethod + @abstractmethod + def add_args(cls, parser: ArgumentParser) -> ArgumentParser: + pass + + @classmethod + @abstractmethod + def func(cls, args: Namespace): + pass diff --git a/chemprop/chemprop/cli/utils/parsing.py b/chemprop/chemprop/cli/utils/parsing.py new file mode 100644 index 0000000000000000000000000000000000000000..064dd8614a2e0f717d3152ca5bba8b1a9d95d685 --- /dev/null +++ b/chemprop/chemprop/cli/utils/parsing.py @@ -0,0 +1,457 @@ +import logging +from os import PathLike +from typing import Literal, Mapping, Sequence + +import numpy as np +import pandas as pd + +from chemprop.data.datapoints import MoleculeDatapoint, ReactionDatapoint +from chemprop.data.datasets import MoleculeDataset, ReactionDataset +from chemprop.featurizers.atom import get_multi_hot_atom_featurizer +from chemprop.featurizers.bond import MultiHotBondFeaturizer, RIGRBondFeaturizer +from chemprop.featurizers.molecule import MoleculeFeaturizerRegistry +from chemprop.featurizers.molgraph import ( + CondensedGraphOfReactionFeaturizer, + SimpleMoleculeMolGraphFeaturizer, +) +from chemprop.utils import make_mol + +logger = logging.getLogger(__name__) + + +def parse_csv( + path: PathLike, + smiles_cols: Sequence[str] | None, + rxn_cols: Sequence[str] | None, + target_cols: Sequence[str] | None, + ignore_cols: Sequence[str] | None, + splits_col: str | None, + weight_col: str | None, + bounded: bool = False, + no_header_row: bool = False, +): + df = pd.read_csv(path, header=None if no_header_row else "infer", index_col=False) + + if smiles_cols is not None and rxn_cols is not None: + smiss = df[smiles_cols].T.values.tolist() + rxnss = df[rxn_cols].T.values.tolist() + input_cols = [*smiles_cols, *rxn_cols] + elif smiles_cols is not None and rxn_cols is None: + smiss = df[smiles_cols].T.values.tolist() + rxnss = None + input_cols = smiles_cols + elif smiles_cols is None and rxn_cols is not None: + smiss = None + rxnss = df[rxn_cols].T.values.tolist() + input_cols = rxn_cols + else: + smiss = df.iloc[:, [0]].T.values.tolist() + rxnss = None + input_cols = [df.columns[0]] + + if target_cols is None: + target_cols = list( + column + for column in df.columns + if column + not in set( # if splits or weight is None, df.columns will never have None + input_cols + (ignore_cols or []) + [splits_col] + [weight_col] + ) + ) + + Y = df[target_cols] + weights = None if weight_col is None else df[weight_col].to_numpy(np.single) + + if bounded: + Y = Y.astype(str) + lt_mask = Y.applymap(lambda x: "<" in x).to_numpy() + gt_mask = Y.applymap(lambda x: ">" in x).to_numpy() + Y = Y.applymap(lambda x: x.strip("<").strip(">")).to_numpy(np.single) + else: + Y = Y.to_numpy(np.single) + lt_mask = None + gt_mask = None + + return smiss, rxnss, Y, weights, lt_mask, gt_mask + + +def get_column_names( + path: PathLike, + smiles_cols: Sequence[str] | None, + rxn_cols: Sequence[str] | None, + target_cols: Sequence[str] | None, + ignore_cols: Sequence[str] | None, + splits_col: str | None, + weight_col: str | None, + no_header_row: bool = False, +) -> tuple[list[str], list[str]]: + df_cols = pd.read_csv(path, index_col=False, nrows=0).columns.tolist() + + if no_header_row: + return ["SMILES"], ["pred_" + str(i) for i in range((len(df_cols) - 1))] + + input_cols = (smiles_cols or []) + (rxn_cols or []) + + if len(input_cols) == 0: + input_cols = [df_cols[0]] + + if target_cols is None: + target_cols = list( + column + for column in df_cols + if column + not in set( + input_cols + (ignore_cols or []) + ([splits_col] or []) + ([weight_col] or []) + ) + ) + + return input_cols, target_cols + + +def make_datapoints( + smiss: list[list[str]] | None, + rxnss: list[list[str]] | None, + Y: np.ndarray, + weights: np.ndarray | None, + lt_mask: np.ndarray | None, + gt_mask: np.ndarray | None, + X_d: np.ndarray | None, + V_fss: list[list[np.ndarray] | list[None]] | None, + E_fss: list[list[np.ndarray] | list[None]] | None, + V_dss: list[list[np.ndarray] | list[None]] | None, + molecule_featurizers: list[str] | None, + keep_h: bool, + add_h: bool, + ignore_chirality: bool, +) -> tuple[list[list[MoleculeDatapoint]], list[list[ReactionDatapoint]]]: + """Make the :class:`MoleculeDatapoint`s and :class:`ReactionDatapoint`s for a given + dataset. + + Parameters + ---------- + smiss : list[list[str]] | None + a list of ``j`` lists of ``n`` SMILES strings, where ``j`` is the number of molecules per + datapoint and ``n`` is the number of datapoints. If ``None``, the corresponding list of + :class:`MoleculeDatapoint`\s will be empty. + rxnss : list[list[str]] | None + a list of ``k`` lists of ``n`` reaction SMILES strings, where ``k`` is the number of + reactions per datapoint. If ``None``, the corresponding list of :class:`ReactionDatapoint`\s + will be empty. + Y : np.ndarray + the target values of shape ``n x m``, where ``m`` is the number of targets + weights : np.ndarray | None + the weights of the datapoints to use in the loss function of shape ``n x m``. If ``None``, + the weights all default to 1. + lt_mask : np.ndarray | None + a boolean mask of shape ``n x m`` indicating whether the targets are less than inequality + targets. If ``None``, ``lt_mask`` for all datapoints will be ``None``. + gt_mask : np.ndarray | None + a boolean mask of shape ``n x m`` indicating whether the targets are greater than inequality + targets. If ``None``, ``gt_mask`` for all datapoints will be ``None``. + X_d : np.ndarray | None + the extra descriptors of shape ``n x p``, where ``p`` is the number of extra descriptors. If + ``None``, ``x_d`` for all datapoints will be ``None``. + V_fss : list[list[np.ndarray] | list[None]] | None + a list of ``j`` lists of ``n`` np.ndarrays each of shape ``v_jn x q_j``, where ``v_jn`` is + the number of atoms in the j-th molecule of the n-th datapoint and ``q_j`` is the number of + extra atom features used for the j-th molecules. Any of the ``j`` lists can be a list of + None values if the corresponding component does not use extra atom features. If ``None``, + ``V_f`` for all datapoints will be ``None``. + E_fss : list[list[np.ndarray] | list[None]] | None + a list of ``j`` lists of ``n`` np.ndarrays each of shape ``e_jn x r_j``, where ``e_jn`` is + the number of bonds in the j-th molecule of the n-th datapoint and ``r_j`` is the number of + extra bond features used for the j-th molecules. Any of the ``j`` lists can be a list of + None values if the corresponding component does not use extra bond features. If ``None``, + ``E_f`` for all datapoints will be ``None``. + V_dss : list[list[np.ndarray] | list[None]] | None + a list of ``j`` lists of ``n`` np.ndarrays each of shape ``v_jn x s_j``, where ``s_j`` is + the number of extra atom descriptors used for the j-th molecules. Any of the ``j`` lists can + be a list of None values if the corresponding component does not use extra atom features. If + ``None``, ``V_d`` for all datapoints will be ``None``. + molecule_featurizers : list[str] | None + a list of molecule featurizer names to generate additional molecule features to use as extra + descriptors. If there are multiple molecules per datapoint, the featurizers will be applied + to each molecule and concatenated. Note that a :code:`ReactionDatapoint` has two + RDKit :class:`~rdkit.Chem.Mol` objects, reactant(s) and product(s). Each + ``molecule_featurizer`` will be applied to both of these objects. + keep_h : bool + whether to keep hydrogen atoms + add_h : bool + whether to add hydrogen atoms + ignore_chirality : bool + whether to ignore chirality information + + Returns + ------- + list[list[MoleculeDatapoint]] + a list of ``j`` lists of ``n`` :class:`MoleculeDatapoint`\s + list[list[ReactionDatapoint]] + a list of ``k`` lists of ``n`` :class:`ReactionDatapoint`\s + .. note:: + either ``j`` or ``k`` may be 0, in which case the corresponding list will be empty. + + Raises + ------ + ValueError + if both ``smiss`` and ``rxnss`` are ``None``. + if ``smiss`` and ``rxnss`` are both given and have different lengths. + """ + if smiss is None and rxnss is None: + raise ValueError("args 'smiss' and 'rnxss' were both `None`!") + elif rxnss is None: + N = len(smiss[0]) + rxnss = [] + elif smiss is None: + N = len(rxnss[0]) + smiss = [] + elif len(smiss[0]) != len(rxnss[0]): + raise ValueError( + f"args 'smiss' and 'rxnss' must have same length! got {len(smiss[0])} and {len(rxnss[0])}" + ) + else: + N = len(smiss[0]) + + if len(smiss) > 0: + molss = [[make_mol(smi, keep_h, add_h, ignore_chirality) for smi in smis] for smis in smiss] + if len(rxnss) > 0: + rctss = [ + [ + make_mol( + f"{rct_smi}.{agt_smi}" if agt_smi else rct_smi, keep_h, add_h, ignore_chirality + ) + for rct_smi, agt_smi, _ in (rxn.split(">") for rxn in rxns) + ] + for rxns in rxnss + ] + pdtss = [ + [ + make_mol(pdt_smi, keep_h, add_h, ignore_chirality) + for _, _, pdt_smi in (rxn.split(">") for rxn in rxns) + ] + for rxns in rxnss + ] + + weights = np.ones(N, dtype=np.single) if weights is None else weights + gt_mask = [None] * N if gt_mask is None else gt_mask + lt_mask = [None] * N if lt_mask is None else lt_mask + + n_mols = len(smiss) if smiss else 0 + V_fss = [[None] * N] * n_mols if V_fss is None else V_fss + E_fss = [[None] * N] * n_mols if E_fss is None else E_fss + V_dss = [[None] * N] * n_mols if V_dss is None else V_dss + + if X_d is None and molecule_featurizers is None: + X_d = [None] * N + elif molecule_featurizers is None: + pass + else: + molecule_featurizers = [MoleculeFeaturizerRegistry[mf]() for mf in molecule_featurizers] + + if len(smiss) > 0: + mol_descriptors = np.hstack( + [ + np.vstack([np.hstack([mf(mol) for mf in molecule_featurizers]) for mol in mols]) + for mols in molss + ] + ) + if X_d is None: + X_d = mol_descriptors + else: + X_d = np.hstack([X_d, mol_descriptors]) + + if len(rxnss) > 0: + rct_pdt_descriptors = np.hstack( + [ + np.vstack( + [ + np.hstack( + [mf(mol) for mf in molecule_featurizers for mol in (rct, pdt)] + ) + for rct, pdt in zip(rcts, pdts) + ] + ) + for rcts, pdts in zip(rctss, pdtss) + ] + ) + if X_d is None: + X_d = rct_pdt_descriptors + else: + X_d = np.hstack([X_d, rct_pdt_descriptors]) + + mol_data = [ + [ + MoleculeDatapoint( + mol=molss[mol_idx][i], + name=smis[i], + y=Y[i], + weight=weights[i], + gt_mask=gt_mask[i], + lt_mask=lt_mask[i], + x_d=X_d[i], + x_phase=None, + V_f=V_fss[mol_idx][i], + E_f=E_fss[mol_idx][i], + V_d=V_dss[mol_idx][i], + ) + for i in range(N) + ] + for mol_idx, smis in enumerate(smiss) + ] + rxn_data = [ + [ + ReactionDatapoint( + rct=rctss[rxn_idx][i], + pdt=pdtss[rxn_idx][i], + name=rxns[i], + y=Y[i], + weight=weights[i], + gt_mask=gt_mask[i], + lt_mask=lt_mask[i], + x_d=X_d[i], + x_phase=None, + ) + for i in range(N) + ] + for rxn_idx, rxns in enumerate(rxnss) + ] + + return mol_data, rxn_data + + +def build_data_from_files( + p_data: PathLike, + no_header_row: bool, + smiles_cols: Sequence[str] | None, + rxn_cols: Sequence[str] | None, + target_cols: Sequence[str] | None, + ignore_cols: Sequence[str] | None, + splits_col: str | None, + weight_col: str | None, + bounded: bool, + p_descriptors: PathLike, + p_atom_feats: dict[int, PathLike], + p_bond_feats: dict[int, PathLike], + p_atom_descs: dict[int, PathLike], + **featurization_kwargs: Mapping, +) -> list[list[MoleculeDatapoint] | list[ReactionDatapoint]]: + smiss, rxnss, Y, weights, lt_mask, gt_mask = parse_csv( + p_data, + smiles_cols, + rxn_cols, + target_cols, + ignore_cols, + splits_col, + weight_col, + bounded, + no_header_row, + ) + n_molecules = len(smiss) if smiss is not None else 0 + n_datapoints = len(Y) + + X_ds = load_input_feats_and_descs(p_descriptors, None, None, feat_desc="X_d") + V_fss = load_input_feats_and_descs(p_atom_feats, n_molecules, n_datapoints, feat_desc="V_f") + E_fss = load_input_feats_and_descs(p_bond_feats, n_molecules, n_datapoints, feat_desc="E_f") + V_dss = load_input_feats_and_descs(p_atom_descs, n_molecules, n_datapoints, feat_desc="V_d") + + mol_data, rxn_data = make_datapoints( + smiss, + rxnss, + Y, + weights, + lt_mask, + gt_mask, + X_ds, + V_fss, + E_fss, + V_dss, + **featurization_kwargs, + ) + + return mol_data + rxn_data + + +def load_input_feats_and_descs( + paths: dict[int, PathLike] | PathLike, + n_molecules: int | None, + n_datapoints: int | None, + feat_desc: str, +): + if paths is None: + return None + + match feat_desc: + case "X_d": + path = paths + loaded_feature = np.load(path) + features = loaded_feature["arr_0"] + + case _: + for index in paths: + if index >= n_molecules: + raise ValueError( + f"For {n_molecules} molecules, atom/bond features/descriptors can only be " + f"specified for indices 0-{n_molecules - 1}! Got index {index}." + ) + + features = [] + for idx in range(n_molecules): + path = paths.get(idx, None) + + if path is not None: + loaded_feature = np.load(path) + loaded_feature = [ + loaded_feature[f"arr_{i}"] for i in range(len(loaded_feature)) + ] + else: + loaded_feature = [None] * n_datapoints + + features.append(loaded_feature) + return features + + +def make_dataset( + data: Sequence[MoleculeDatapoint] | Sequence[ReactionDatapoint], + reaction_mode: str, + multi_hot_atom_featurizer_mode: Literal["V1", "V2", "ORGANIC", "RIGR"] = "V2", +) -> MoleculeDataset | ReactionDataset: + atom_featurizer = get_multi_hot_atom_featurizer(multi_hot_atom_featurizer_mode) + match multi_hot_atom_featurizer_mode: + case "RIGR": + bond_featurizer = RIGRBondFeaturizer() + case "V1" | "V2" | "ORGANIC": + bond_featurizer = MultiHotBondFeaturizer() + case _: + raise TypeError( + f"Unsupported atom featurizer mode '{multi_hot_atom_featurizer_mode=}'!" + ) + + if isinstance(data[0], MoleculeDatapoint): + extra_atom_fdim = data[0].V_f.shape[1] if data[0].V_f is not None else 0 + extra_bond_fdim = data[0].E_f.shape[1] if data[0].E_f is not None else 0 + featurizer = SimpleMoleculeMolGraphFeaturizer( + atom_featurizer=atom_featurizer, + bond_featurizer=bond_featurizer, + extra_atom_fdim=extra_atom_fdim, + extra_bond_fdim=extra_bond_fdim, + ) + return MoleculeDataset(data, featurizer) + + featurizer = CondensedGraphOfReactionFeaturizer( + mode_=reaction_mode, atom_featurizer=atom_featurizer + ) + + return ReactionDataset(data, featurizer) + + +def parse_indices(idxs): + """Parses a string of indices into a list of integers. e.g. '0,1,2-4' -> [0, 1, 2, 3, 4]""" + if isinstance(idxs, str): + indices = [] + for idx in idxs.split(","): + if "-" in idx: + start, end = map(int, idx.split("-")) + indices.extend(range(start, end + 1)) + else: + indices.append(int(idx)) + return indices + return idxs diff --git a/chemprop/chemprop/cli/utils/utils.py b/chemprop/chemprop/cli/utils/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8f63d224a36065f6a3332b7a3450a5ceb6a05568 --- /dev/null +++ b/chemprop/chemprop/cli/utils/utils.py @@ -0,0 +1,31 @@ +from typing import Any + +__all__ = ["pop_attr"] + + +def pop_attr(o: object, attr: str, *args) -> Any | None: + """like ``pop()`` but for attribute maps""" + match len(args): + case 0: + return _pop_attr(o, attr) + case 1: + return _pop_attr_d(o, attr, args[0]) + case _: + raise TypeError(f"Expected at most 2 arguments! got: {len(args)}") + + +def _pop_attr(o: object, attr: str) -> Any: + val = getattr(o, attr) + delattr(o, attr) + + return val + + +def _pop_attr_d(o: object, attr: str, default: Any | None = None) -> Any | None: + try: + val = getattr(o, attr) + delattr(o, attr) + except AttributeError: + val = default + + return val diff --git a/chemprop/chemprop/conf.py b/chemprop/chemprop/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..f8e3681442d4d4cb553b38d698c4b102bd7c088d --- /dev/null +++ b/chemprop/chemprop/conf.py @@ -0,0 +1,6 @@ +"""Global configuration variables for chemprop""" + +from chemprop.featurizers.molgraph.molecule import SimpleMoleculeMolGraphFeaturizer + +DEFAULT_ATOM_FDIM, DEFAULT_BOND_FDIM = SimpleMoleculeMolGraphFeaturizer().shape +DEFAULT_HIDDEN_DIM = 300 diff --git a/chemprop/chemprop/data/__init__.py b/chemprop/chemprop/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..843b2a94583f12bf5ca08ac6052f721d67bb2b37 --- /dev/null +++ b/chemprop/chemprop/data/__init__.py @@ -0,0 +1,41 @@ +from .collate import ( + BatchMolGraph, + MulticomponentTrainingBatch, + TrainingBatch, + collate_batch, + collate_multicomponent, +) +from .dataloader import build_dataloader +from .datapoints import MoleculeDatapoint, ReactionDatapoint +from .datasets import ( + Datum, + MoleculeDataset, + MolGraphDataset, + MulticomponentDataset, + ReactionDataset, +) +from .molgraph import MolGraph +from .samplers import ClassBalanceSampler, SeededSampler +from .splitting import SplitType, make_split_indices, split_data_by_indices + +__all__ = [ + "BatchMolGraph", + "TrainingBatch", + "collate_batch", + "MulticomponentTrainingBatch", + "collate_multicomponent", + "build_dataloader", + "MoleculeDatapoint", + "ReactionDatapoint", + "MoleculeDataset", + "ReactionDataset", + "Datum", + "MulticomponentDataset", + "MolGraphDataset", + "MolGraph", + "ClassBalanceSampler", + "SeededSampler", + "SplitType", + "make_split_indices", + "split_data_by_indices", +] diff --git a/chemprop/chemprop/data/__pycache__/__init__.cpython-37.pyc b/chemprop/chemprop/data/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6ba5b174fcc8ba84a8d88c7393bd31d834300826 Binary files /dev/null and b/chemprop/chemprop/data/__pycache__/__init__.cpython-37.pyc differ diff --git a/chemprop/chemprop/data/__pycache__/data.cpython-37.pyc b/chemprop/chemprop/data/__pycache__/data.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..be9b5658c2e6c9bd0459e266f1d96bfe53f7b58a Binary files /dev/null and b/chemprop/chemprop/data/__pycache__/data.cpython-37.pyc differ diff --git a/chemprop/chemprop/data/__pycache__/scaffold.cpython-37.pyc b/chemprop/chemprop/data/__pycache__/scaffold.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8dcd3f273cf63eb42e4ea8edd23d1016e7912beb Binary files /dev/null and b/chemprop/chemprop/data/__pycache__/scaffold.cpython-37.pyc differ diff --git a/chemprop/chemprop/data/__pycache__/scaler.cpython-37.pyc b/chemprop/chemprop/data/__pycache__/scaler.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e4178af6742c18488b79dfcec26a804fa465c392 Binary files /dev/null and b/chemprop/chemprop/data/__pycache__/scaler.cpython-37.pyc differ diff --git a/chemprop/chemprop/data/__pycache__/utils.cpython-37.pyc b/chemprop/chemprop/data/__pycache__/utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c70de06fd22b9a9e8d7894402c32121be965d7b5 Binary files /dev/null and b/chemprop/chemprop/data/__pycache__/utils.cpython-37.pyc differ diff --git a/chemprop/chemprop/data/collate.py b/chemprop/chemprop/data/collate.py new file mode 100644 index 0000000000000000000000000000000000000000..a59c87cad679a5de8c6fd987caaa95ddf7d5ba87 --- /dev/null +++ b/chemprop/chemprop/data/collate.py @@ -0,0 +1,120 @@ +from dataclasses import InitVar, dataclass, field +from typing import Iterable, NamedTuple, Sequence + +import numpy as np +import torch +from torch import Tensor + +from chemprop.data.datasets import Datum +from chemprop.data.molgraph import MolGraph + + +@dataclass(repr=False, eq=False, slots=True) +class BatchMolGraph: + """A :class:`BatchMolGraph` represents a batch of individual :class:`MolGraph`\s. + + It has all the attributes of a ``MolGraph`` with the addition of the ``batch`` attribute. This + class is intended for use with data loading, so it uses :obj:`~torch.Tensor`\s to store data + """ + + mgs: InitVar[Sequence[MolGraph]] + """A list of individual :class:`MolGraph`\s to be batched together""" + V: Tensor = field(init=False) + """the atom feature matrix""" + E: Tensor = field(init=False) + """the bond feature matrix""" + edge_index: Tensor = field(init=False) + """an tensor of shape ``2 x E`` containing the edges of the graph in COO format""" + rev_edge_index: Tensor = field(init=False) + """A tensor of shape ``E`` that maps from an edge index to the index of the source of the + reverse edge in the ``edge_index`` attribute.""" + batch: Tensor = field(init=False) + """the index of the parent :class:`MolGraph` in the batched graph""" + + __size: int = field(init=False) + + def __post_init__(self, mgs: Sequence[MolGraph]): + self.__size = len(mgs) + + Vs = [] + Es = [] + edge_indexes = [] + rev_edge_indexes = [] + batch_indexes = [] + + num_nodes = 0 + num_edges = 0 + for i, mg in enumerate(mgs): + Vs.append(mg.V) + Es.append(mg.E) + edge_indexes.append(mg.edge_index + num_nodes) + rev_edge_indexes.append(mg.rev_edge_index + num_edges) + batch_indexes.append([i] * len(mg.V)) + + num_nodes += mg.V.shape[0] + num_edges += mg.edge_index.shape[1] + + self.V = torch.from_numpy(np.concatenate(Vs)).float() + self.E = torch.from_numpy(np.concatenate(Es)).float() + self.edge_index = torch.from_numpy(np.hstack(edge_indexes)).long() + self.rev_edge_index = torch.from_numpy(np.concatenate(rev_edge_indexes)).long() + self.batch = torch.tensor(np.concatenate(batch_indexes)).long() + + def __len__(self) -> int: + """the number of individual :class:`MolGraph`\s in this batch""" + return self.__size + + def to(self, device: str | torch.device): + self.V = self.V.to(device) + self.E = self.E.to(device) + self.edge_index = self.edge_index.to(device) + self.rev_edge_index = self.rev_edge_index.to(device) + self.batch = self.batch.to(device) + + +class TrainingBatch(NamedTuple): + bmg: BatchMolGraph + V_d: Tensor | None + X_d: Tensor | None + Y: Tensor | None + w: Tensor + lt_mask: Tensor | None + gt_mask: Tensor | None + + +def collate_batch(batch: Iterable[Datum]) -> TrainingBatch: + mgs, V_ds, x_ds, ys, weights, lt_masks, gt_masks = zip(*batch) + + return TrainingBatch( + BatchMolGraph(mgs), + None if V_ds[0] is None else torch.from_numpy(np.concatenate(V_ds)).float(), + None if x_ds[0] is None else torch.from_numpy(np.array(x_ds)).float(), + None if ys[0] is None else torch.from_numpy(np.array(ys)).float(), + torch.tensor(weights, dtype=torch.float).unsqueeze(1), + None if lt_masks[0] is None else torch.from_numpy(np.array(lt_masks)), + None if gt_masks[0] is None else torch.from_numpy(np.array(gt_masks)), + ) + + +class MulticomponentTrainingBatch(NamedTuple): + bmgs: list[BatchMolGraph] + V_ds: list[Tensor | None] + X_d: Tensor | None + Y: Tensor | None + w: Tensor + lt_mask: Tensor | None + gt_mask: Tensor | None + + +def collate_multicomponent(batches: Iterable[Iterable[Datum]]) -> MulticomponentTrainingBatch: + tbs = [collate_batch(batch) for batch in zip(*batches)] + + return MulticomponentTrainingBatch( + [tb.bmg for tb in tbs], + [tb.V_d for tb in tbs], + tbs[0].X_d, + tbs[0].Y, + tbs[0].w, + tbs[0].lt_mask, + tbs[0].gt_mask, + ) diff --git a/chemprop/chemprop/data/dataloader.py b/chemprop/chemprop/data/dataloader.py new file mode 100644 index 0000000000000000000000000000000000000000..4fc2b2ddee50c70794cb5b60403eecfa5241049f --- /dev/null +++ b/chemprop/chemprop/data/dataloader.py @@ -0,0 +1,71 @@ +import logging + +from torch.utils.data import DataLoader + +from chemprop.data.collate import collate_batch, collate_multicomponent +from chemprop.data.datasets import MoleculeDataset, MulticomponentDataset, ReactionDataset +from chemprop.data.samplers import ClassBalanceSampler, SeededSampler + +logger = logging.getLogger(__name__) + + +def build_dataloader( + dataset: MoleculeDataset | ReactionDataset | MulticomponentDataset, + batch_size: int = 64, + num_workers: int = 0, + class_balance: bool = False, + seed: int | None = None, + shuffle: bool = True, + **kwargs, +): + """Return a :obj:`~torch.utils.data.DataLoader` for :class:`MolGraphDataset`\s + + Parameters + ---------- + dataset : MoleculeDataset | ReactionDataset | MulticomponentDataset + The dataset containing the molecules or reactions to load. + batch_size : int, default=64 + the batch size to load. + num_workers : int, default=0 + the number of workers used to build batches. + class_balance : bool, default=False + Whether to perform class balancing (i.e., use an equal number of positive and negative + molecules). Class balance is only available for single task classification datasets. Set + shuffle to True in order to get a random subset of the larger class. + seed : int, default=None + the random seed to use for shuffling (only used when `shuffle` is `True`). + shuffle : bool, default=False + whether to shuffle the data during sampling. + """ + + if class_balance: + sampler = ClassBalanceSampler(dataset.Y, seed, shuffle) + elif shuffle and seed is not None: + sampler = SeededSampler(len(dataset), seed) + else: + sampler = None + + if isinstance(dataset, MulticomponentDataset): + collate_fn = collate_multicomponent + else: + collate_fn = collate_batch + + if len(dataset) % batch_size == 1: + logger.warning( + f"Dropping last batch of size 1 to avoid issues with batch normalization \ +(dataset size = {len(dataset)}, batch_size = {batch_size})" + ) + drop_last = True + else: + drop_last = False + + return DataLoader( + dataset, + batch_size, + sampler is None and shuffle, + sampler, + num_workers=num_workers, + collate_fn=collate_fn, + drop_last=drop_last, + **kwargs, + ) diff --git a/chemprop/chemprop/data/datapoints.py b/chemprop/chemprop/data/datapoints.py new file mode 100644 index 0000000000000000000000000000000000000000..8c94a9a78c74b1fbb946706248839ff9d1c00e26 --- /dev/null +++ b/chemprop/chemprop/data/datapoints.py @@ -0,0 +1,150 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import numpy as np +from rdkit.Chem import AllChem as Chem + +from chemprop.featurizers import Featurizer +from chemprop.utils import make_mol + +MoleculeFeaturizer = Featurizer[Chem.Mol, np.ndarray] + + +@dataclass(slots=True) +class _DatapointMixin: + """A mixin class for both molecule- and reaction- and multicomponent-type data""" + + y: np.ndarray | None = None + """the targets for the molecule with unknown targets indicated by `nan`s""" + weight: float = 1.0 + """the weight of this datapoint for the loss calculation.""" + gt_mask: np.ndarray | None = None + """Indicates whether the targets are an inequality regression target of the form `x`""" + x_d: np.ndarray | None = None + """A vector of length ``d_f`` containing additional features (e.g., Morgan fingerprint) that + will be concatenated to the global representation *after* aggregation""" + x_phase: list[float] = None + """A one-hot vector indicating the phase of the data, as used in spectra data.""" + name: str | None = None + """A string identifier for the datapoint.""" + + def __post_init__(self): + NAN_TOKEN = 0 + if self.x_d is not None: + self.x_d[np.isnan(self.x_d)] = NAN_TOKEN + + @property + def t(self) -> int | None: + return len(self.y) if self.y is not None else None + + +@dataclass +class _MoleculeDatapointMixin: + mol: Chem.Mol + """the molecule associated with this datapoint""" + + @classmethod + def from_smi( + cls, + smi: str, + *args, + keep_h: bool = False, + add_h: bool = False, + ignore_chirality: bool = False, + **kwargs, + ) -> _MoleculeDatapointMixin: + mol = make_mol(smi, keep_h, add_h, ignore_chirality) + + kwargs["name"] = smi if "name" not in kwargs else kwargs["name"] + + return cls(mol, *args, **kwargs) + + +@dataclass +class MoleculeDatapoint(_DatapointMixin, _MoleculeDatapointMixin): + """A :class:`MoleculeDatapoint` contains a single molecule and its associated features and targets.""" + + V_f: np.ndarray | None = None + """a numpy array of shape ``V x d_vf``, where ``V`` is the number of atoms in the molecule, and + ``d_vf`` is the number of additional features that will be concatenated to atom-level features + *before* message passing""" + E_f: np.ndarray | None = None + """A numpy array of shape ``E x d_ef``, where ``E`` is the number of bonds in the molecule, and + ``d_ef`` is the number of additional features containing additional features that will be + concatenated to bond-level features *before* message passing""" + V_d: np.ndarray | None = None + """A numpy array of shape ``V x d_vd``, where ``V`` is the number of atoms in the molecule, and + ``d_vd`` is the number of additional descriptors that will be concatenated to atom-level + descriptors *after* message passing""" + + def __post_init__(self): + NAN_TOKEN = 0 + if self.V_f is not None: + self.V_f[np.isnan(self.V_f)] = NAN_TOKEN + if self.E_f is not None: + self.E_f[np.isnan(self.E_f)] = NAN_TOKEN + if self.V_d is not None: + self.V_d[np.isnan(self.V_d)] = NAN_TOKEN + + super().__post_init__() + + def __len__(self) -> int: + return 1 + + +@dataclass +class _ReactionDatapointMixin: + rct: Chem.Mol + """the reactant associated with this datapoint""" + pdt: Chem.Mol + """the product associated with this datapoint""" + + @classmethod + def from_smi( + cls, + rxn_or_smis: str | tuple[str, str], + *args, + keep_h: bool = False, + add_h: bool = False, + ignore_chirality: bool = False, + **kwargs, + ) -> _ReactionDatapointMixin: + match rxn_or_smis: + case str(): + rct_smi, agt_smi, pdt_smi = rxn_or_smis.split(">") + rct_smi = f"{rct_smi}.{agt_smi}" if agt_smi else rct_smi + name = rxn_or_smis + case tuple(): + rct_smi, pdt_smi = rxn_or_smis + name = ">>".join(rxn_or_smis) + case _: + raise TypeError( + "Must provide either a reaction SMARTS string or a tuple of reactant and" + " a product SMILES strings!" + ) + + rct = make_mol(rct_smi, keep_h, add_h, ignore_chirality) + pdt = make_mol(pdt_smi, keep_h, add_h, ignore_chirality) + + kwargs["name"] = name if "name" not in kwargs else kwargs["name"] + + return cls(rct, pdt, *args, **kwargs) + + +@dataclass +class ReactionDatapoint(_DatapointMixin, _ReactionDatapointMixin): + """A :class:`ReactionDatapoint` contains a single reaction and its associated features and targets.""" + + def __post_init__(self): + if self.rct is None: + raise ValueError("Reactant cannot be `None`!") + if self.pdt is None: + raise ValueError("Product cannot be `None`!") + + return super().__post_init__() + + def __len__(self) -> int: + return 2 diff --git a/chemprop/chemprop/data/datasets.py b/chemprop/chemprop/data/datasets.py new file mode 100644 index 0000000000000000000000000000000000000000..d2b13dd47b918ba1e9071b932aa335b702893a23 --- /dev/null +++ b/chemprop/chemprop/data/datasets.py @@ -0,0 +1,459 @@ +from dataclasses import dataclass, field +from functools import cached_property +from typing import NamedTuple, TypeAlias + +import numpy as np +from numpy.typing import ArrayLike +from rdkit import Chem +from rdkit.Chem import Mol +from sklearn.preprocessing import StandardScaler +from torch.utils.data import Dataset + +from chemprop.data.datapoints import MoleculeDatapoint, ReactionDatapoint +from chemprop.data.molgraph import MolGraph +from chemprop.featurizers.base import Featurizer +from chemprop.featurizers.molgraph import CGRFeaturizer, SimpleMoleculeMolGraphFeaturizer +from chemprop.featurizers.molgraph.cache import MolGraphCache, MolGraphCacheOnTheFly +from chemprop.types import Rxn + + +class Datum(NamedTuple): + """a singular training data point""" + + mg: MolGraph + V_d: np.ndarray | None + x_d: np.ndarray | None + y: np.ndarray | None + weight: float + lt_mask: np.ndarray | None + gt_mask: np.ndarray | None + + +MolGraphDataset: TypeAlias = Dataset[Datum] + + +class _MolGraphDatasetMixin: + def __len__(self) -> int: + return len(self.data) + + @cached_property + def _Y(self) -> np.ndarray: + """the raw targets of the dataset""" + return np.array([d.y for d in self.data], float) + + @property + def Y(self) -> np.ndarray: + """the (scaled) targets of the dataset""" + return self.__Y + + @Y.setter + def Y(self, Y: ArrayLike): + self._validate_attribute(Y, "targets") + + self.__Y = np.array(Y, float) + + @cached_property + def _X_d(self) -> np.ndarray: + """the raw extra descriptors of the dataset""" + return np.array([d.x_d for d in self.data]) + + @property + def X_d(self) -> np.ndarray: + """the (scaled) extra descriptors of the dataset""" + return self.__X_d + + @X_d.setter + def X_d(self, X_d: ArrayLike): + self._validate_attribute(X_d, "extra descriptors") + + self.__X_d = np.array(X_d) + + @property + def weights(self) -> np.ndarray: + return np.array([d.weight for d in self.data]) + + @property + def gt_mask(self) -> np.ndarray: + return np.array([d.gt_mask for d in self.data]) + + @property + def lt_mask(self) -> np.ndarray: + return np.array([d.lt_mask for d in self.data]) + + @property + def t(self) -> int | None: + return self.data[0].t if len(self.data) > 0 else None + + @property + def d_xd(self) -> int: + """the extra molecule descriptor dimension, if any""" + return 0 if self.X_d[0] is None else self.X_d.shape[1] + + @property + def names(self) -> list[str]: + return [d.name for d in self.data] + + def normalize_targets(self, scaler: StandardScaler | None = None) -> StandardScaler: + """Normalizes the targets of this dataset using a :obj:`StandardScaler` + + The :obj:`StandardScaler` subtracts the mean and divides by the standard deviation for + each task independently. NOTE: This should only be used for regression datasets. + + Returns + ------- + StandardScaler + a scaler fit to the targets. + """ + + if scaler is None: + scaler = StandardScaler().fit(self._Y) + + self.Y = scaler.transform(self._Y) + + return scaler + + def normalize_inputs( + self, key: str = "X_d", scaler: StandardScaler | None = None + ) -> StandardScaler: + VALID_KEYS = {"X_d"} + if key not in VALID_KEYS: + raise ValueError(f"Invalid feature key! got: {key}. expected one of: {VALID_KEYS}") + + X = self.X_d if self.X_d[0] is not None else None + + if X is None: + return scaler + + if scaler is None: + scaler = StandardScaler().fit(X) + + self.X_d = scaler.transform(X) + + return scaler + + def reset(self): + """Reset the atom and bond features; atom and extra descriptors; and targets of each + datapoint to their initial, unnormalized values.""" + self.__Y = self._Y + self.__X_d = self._X_d + + def _validate_attribute(self, X: np.ndarray, label: str): + if not len(self.data) == len(X): + raise ValueError( + f"number of molecules ({len(self.data)}) and {label} ({len(X)}) " + "must have same length!" + ) + + +@dataclass +class MoleculeDataset(_MolGraphDatasetMixin, MolGraphDataset): + """A :class:`MoleculeDataset` composed of :class:`MoleculeDatapoint`\s + + A :class:`MoleculeDataset` produces featurized data for input to a + :class:`MPNN` model. Typically, data featurization is performed on-the-fly + and parallelized across multiple workers via the :class:`~torch.utils.data + DataLoader` class. However, for small datasets, it may be more efficient to + featurize the data in advance and cache the results. This can be done by + setting ``MoleculeDataset.cache=True``. + + Parameters + ---------- + data : Iterable[MoleculeDatapoint] + the data from which to create a dataset + featurizer : MoleculeFeaturizer + the featurizer with which to generate MolGraphs of the molecules + """ + + data: list[MoleculeDatapoint] + featurizer: Featurizer[Mol, MolGraph] = field(default_factory=SimpleMoleculeMolGraphFeaturizer) + + def __post_init__(self): + if self.data is None: + raise ValueError("Data cannot be None!") + + self.reset() + self.cache = False + + def __getitem__(self, idx: int) -> Datum: + d = self.data[idx] + mg = self.mg_cache[idx] + + return Datum(mg, self.V_ds[idx], self.X_d[idx], self.Y[idx], d.weight, d.lt_mask, d.gt_mask) + + @property + def cache(self) -> bool: + return self.__cache + + @cache.setter + def cache(self, cache: bool = False): + self.__cache = cache + self._init_cache() + + def _init_cache(self): + """initialize the cache""" + self.mg_cache = (MolGraphCache if self.cache else MolGraphCacheOnTheFly)( + self.mols, self.V_fs, self.E_fs, self.featurizer + ) + + @property + def smiles(self) -> list[str]: + """the SMILES strings associated with the dataset""" + return [Chem.MolToSmiles(d.mol) for d in self.data] + + @property + def mols(self) -> list[Chem.Mol]: + """the molecules associated with the dataset""" + return [d.mol for d in self.data] + + @property + def _V_fs(self) -> list[np.ndarray]: + """the raw atom features of the dataset""" + return [d.V_f for d in self.data] + + @property + def V_fs(self) -> list[np.ndarray]: + """the (scaled) atom descriptors of the dataset""" + return self.__V_fs + + @V_fs.setter + def V_fs(self, V_fs: list[np.ndarray]): + """the (scaled) atom features of the dataset""" + self._validate_attribute(V_fs, "atom features") + + self.__V_fs = V_fs + self._init_cache() + + @property + def _E_fs(self) -> list[np.ndarray]: + """the raw bond features of the dataset""" + return [d.E_f for d in self.data] + + @property + def E_fs(self) -> list[np.ndarray]: + """the (scaled) bond features of the dataset""" + return self.__E_fs + + @E_fs.setter + def E_fs(self, E_fs: list[np.ndarray]): + self._validate_attribute(E_fs, "bond features") + + self.__E_fs = E_fs + self._init_cache() + + @property + def _V_ds(self) -> list[np.ndarray]: + """the raw atom descriptors of the dataset""" + return [d.V_d for d in self.data] + + @property + def V_ds(self) -> list[np.ndarray]: + """the (scaled) atom descriptors of the dataset""" + return self.__V_ds + + @V_ds.setter + def V_ds(self, V_ds: list[np.ndarray]): + self._validate_attribute(V_ds, "atom descriptors") + + self.__V_ds = V_ds + + @property + def d_vf(self) -> int: + """the extra atom feature dimension, if any""" + return 0 if self.V_fs[0] is None else self.V_fs[0].shape[1] + + @property + def d_ef(self) -> int: + """the extra bond feature dimension, if any""" + return 0 if self.E_fs[0] is None else self.E_fs[0].shape[1] + + @property + def d_vd(self) -> int: + """the extra atom descriptor dimension, if any""" + return 0 if self.V_ds[0] is None else self.V_ds[0].shape[1] + + def normalize_inputs( + self, key: str = "X_d", scaler: StandardScaler | None = None + ) -> StandardScaler: + VALID_KEYS = {"X_d", "V_f", "E_f", "V_d"} + + match key: + case "X_d": + X = None if self.d_xd == 0 else self.X_d + case "V_f": + X = None if self.d_vf == 0 else np.concatenate(self.V_fs, axis=0) + case "E_f": + X = None if self.d_ef == 0 else np.concatenate(self.E_fs, axis=0) + case "V_d": + X = None if self.d_vd == 0 else np.concatenate(self.V_ds, axis=0) + case _: + raise ValueError(f"Invalid feature key! got: {key}. expected one of: {VALID_KEYS}") + + if X is None: + return scaler + + if scaler is None: + scaler = StandardScaler().fit(X) + + match key: + case "X_d": + self.X_d = scaler.transform(X) + case "V_f": + self.V_fs = [scaler.transform(V_f) if V_f.size > 0 else V_f for V_f in self.V_fs] + case "E_f": + self.E_fs = [scaler.transform(E_f) if E_f.size > 0 else E_f for E_f in self.E_fs] + case "V_d": + self.V_ds = [scaler.transform(V_d) if V_d.size > 0 else V_d for V_d in self.V_ds] + case _: + raise RuntimeError("unreachable code reached!") + + return scaler + + def reset(self): + """Reset the atom and bond features; atom and extra descriptors; and targets of each + datapoint to their initial, unnormalized values.""" + super().reset() + self.__V_fs = self._V_fs + self.__E_fs = self._E_fs + self.__V_ds = self._V_ds + + +@dataclass +class ReactionDataset(_MolGraphDatasetMixin, MolGraphDataset): + """A :class:`ReactionDataset` composed of :class:`ReactionDatapoint`\s + + .. note:: + The featurized data provided by this class may be cached, simlar to a + :class:`MoleculeDataset`. To enable the cache, set ``ReactionDataset + cache=True``. + """ + + data: list[ReactionDatapoint] + """the dataset from which to load""" + featurizer: Featurizer[Rxn, MolGraph] = field(default_factory=CGRFeaturizer) + """the featurizer with which to generate MolGraphs of the input""" + + def __post_init__(self): + if self.data is None: + raise ValueError("Data cannot be None!") + + self.reset() + self.cache = False + + @property + def cache(self) -> bool: + return self.__cache + + @cache.setter + def cache(self, cache: bool = False): + self.__cache = cache + self.mg_cache = (MolGraphCache if cache else MolGraphCacheOnTheFly)( + self.mols, [None] * len(self), [None] * len(self), self.featurizer + ) + + def __getitem__(self, idx: int) -> Datum: + d = self.data[idx] + mg = self.mg_cache[idx] + + return Datum(mg, None, self.X_d[idx], self.Y[idx], d.weight, d.lt_mask, d.gt_mask) + + @property + def smiles(self) -> list[tuple]: + return [(Chem.MolToSmiles(d.rct), Chem.MolToSmiles(d.pdt)) for d in self.data] + + @property + def mols(self) -> list[Rxn]: + return [(d.rct, d.pdt) for d in self.data] + + @property + def d_vf(self) -> int: + return 0 + + @property + def d_ef(self) -> int: + return 0 + + @property + def d_vd(self) -> int: + return 0 + + +@dataclass(repr=False, eq=False) +class MulticomponentDataset(_MolGraphDatasetMixin, Dataset): + """A :class:`MulticomponentDataset` is a :class:`Dataset` composed of parallel + :class:`MoleculeDatasets` and :class:`ReactionDataset`\s""" + + datasets: list[MoleculeDataset | ReactionDataset] + """the parallel datasets""" + + def __post_init__(self): + sizes = [len(dset) for dset in self.datasets] + if not all(sizes[0] == size for size in sizes[1:]): + raise ValueError(f"Datasets must have all same length! got: {sizes}") + + def __len__(self) -> int: + return len(self.datasets[0]) + + @property + def n_components(self) -> int: + return len(self.datasets) + + def __getitem__(self, idx: int) -> list[Datum]: + return [dset[idx] for dset in self.datasets] + + @property + def smiles(self) -> list[list[str]]: + return list(zip(*[dset.smiles for dset in self.datasets])) + + @property + def names(self) -> list[list[str]]: + return list(zip(*[dset.names for dset in self.datasets])) + + @property + def mols(self) -> list[list[Chem.Mol]]: + return list(zip(*[dset.mols for dset in self.datasets])) + + def normalize_targets(self, scaler: StandardScaler | None = None) -> StandardScaler: + return self.datasets[0].normalize_targets(scaler) + + def normalize_inputs( + self, key: str = "X_d", scaler: list[StandardScaler] | None = None + ) -> list[StandardScaler]: + RXN_VALID_KEYS = {"X_d"} + match scaler: + case None: + return [ + dset.normalize_inputs(key) + if isinstance(dset, MoleculeDataset) or key in RXN_VALID_KEYS + else None + for dset in self.datasets + ] + case _: + assert len(scaler) == len( + self.datasets + ), "Number of scalers must match number of datasets!" + + return [ + dset.normalize_inputs(key, s) + if isinstance(dset, MoleculeDataset) or key in RXN_VALID_KEYS + else None + for dset, s in zip(self.datasets, scaler) + ] + + def reset(self): + return [dset.reset() for dset in self.datasets] + + @property + def d_xd(self) -> list[int]: + return self.datasets[0].d_xd + + @property + def d_vf(self) -> list[int]: + return sum(dset.d_vf for dset in self.datasets) + + @property + def d_ef(self) -> list[int]: + return sum(dset.d_ef for dset in self.datasets) + + @property + def d_vd(self) -> list[int]: + return sum(dset.d_vd for dset in self.datasets) diff --git a/chemprop/chemprop/data/molgraph.py b/chemprop/chemprop/data/molgraph.py new file mode 100644 index 0000000000000000000000000000000000000000..45d1e08734756ba56b8f39e6152071a1ac04e6bf --- /dev/null +++ b/chemprop/chemprop/data/molgraph.py @@ -0,0 +1,16 @@ +from typing import NamedTuple + +import numpy as np + + +class MolGraph(NamedTuple): + """A :class:`MolGraph` represents the graph featurization of a molecule.""" + + V: np.ndarray + """an array of shape ``V x d_v`` containing the atom features of the molecule""" + E: np.ndarray + """an array of shape ``E x d_e`` containing the bond features of the molecule""" + edge_index: np.ndarray + """an array of shape ``2 x E`` containing the edges of the graph in COO format""" + rev_edge_index: np.ndarray + """A array of shape ``E`` that maps from an edge index to the index of the source of the reverse edge in :attr:`edge_index` attribute.""" diff --git a/chemprop/chemprop/data/samplers.py b/chemprop/chemprop/data/samplers.py new file mode 100644 index 0000000000000000000000000000000000000000..8a24c9769ce73fa7c6a853f25899d6a95bc212cb --- /dev/null +++ b/chemprop/chemprop/data/samplers.py @@ -0,0 +1,66 @@ +from itertools import chain +from typing import Iterator, Optional + +import numpy as np +from torch.utils.data import Sampler + + +class SeededSampler(Sampler): + """A :class`SeededSampler` is a class for iterating through a dataset in a randomly seeded + fashion""" + + def __init__(self, N: int, seed: int): + if seed is None: + raise ValueError("arg 'seed' was `None`! A SeededSampler must be seeded!") + + self.idxs = np.arange(N) + self.rg = np.random.default_rng(seed) + + def __iter__(self) -> Iterator[int]: + """an iterator over indices to sample.""" + self.rg.shuffle(self.idxs) + + return iter(self.idxs) + + def __len__(self) -> int: + """the number of indices that will be sampled.""" + return len(self.idxs) + + +class ClassBalanceSampler(Sampler): + """A :class:`ClassBalanceSampler` samples data from a :class:`MolGraphDataset` such that + positive and negative classes are equally sampled + + Parameters + ---------- + dataset : MolGraphDataset + the dataset from which to sample + seed : int + the random seed to use for shuffling (only used when `shuffle` is `True`) + shuffle : bool, default=False + whether to shuffle the data during sampling + """ + + def __init__(self, Y: np.ndarray, seed: Optional[int] = None, shuffle: bool = False): + self.shuffle = shuffle + self.rg = np.random.default_rng(seed) + + idxs = np.arange(len(Y)) + actives = Y.any(1) + + self.pos_idxs = idxs[actives] + self.neg_idxs = idxs[~actives] + + self.length = 2 * min(len(self.pos_idxs), len(self.neg_idxs)) + + def __iter__(self) -> Iterator[int]: + """an iterator over indices to sample.""" + if self.shuffle: + self.rg.shuffle(self.pos_idxs) + self.rg.shuffle(self.neg_idxs) + + return chain(*zip(self.pos_idxs, self.neg_idxs)) + + def __len__(self) -> int: + """the number of indices that will be sampled.""" + return self.length diff --git a/chemprop/chemprop/data/splitting.py b/chemprop/chemprop/data/splitting.py new file mode 100644 index 0000000000000000000000000000000000000000..f4bb1b6f91667634bb21ad9460d9ee6e87286df3 --- /dev/null +++ b/chemprop/chemprop/data/splitting.py @@ -0,0 +1,225 @@ +from collections.abc import Iterable, Sequence +import copy +from enum import auto +import logging + +from astartes import train_test_split, train_val_test_split +from astartes.molecules import train_test_split_molecules, train_val_test_split_molecules +import numpy as np +from rdkit import Chem + +from chemprop.data.datapoints import MoleculeDatapoint, ReactionDatapoint +from chemprop.utils.utils import EnumMapping + +logger = logging.getLogger(__name__) + +Datapoints = Sequence[MoleculeDatapoint] | Sequence[ReactionDatapoint] +MulticomponentDatapoints = Sequence[Datapoints] + + +class SplitType(EnumMapping): + SCAFFOLD_BALANCED = auto() + RANDOM_WITH_REPEATED_SMILES = auto() + RANDOM = auto() + KENNARD_STONE = auto() + KMEANS = auto() + + +def make_split_indices( + mols: Sequence[Chem.Mol], + split: SplitType | str = "random", + sizes: tuple[float, float, float] = (0.8, 0.1, 0.1), + seed: int = 0, + num_replicates: int = 1, + num_folds: None = None, +) -> tuple[list[list[int]], ...]: + """Splits data into training, validation, and test splits. + + Parameters + ---------- + mols : Sequence[Chem.Mol] + Sequence of RDKit molecules to use for structure based splitting + split : SplitType | str, optional + Split type, one of ~chemprop.data.utils.SplitType, by default "random" + sizes : tuple[float, float, float], optional + 3-tuple with the proportions of data in the train, validation, and test sets, by default + (0.8, 0.1, 0.1). Set the middle value to 0 for a two way split. + seed : int, optional + The random seed passed to astartes, by default 0 + num_replicates : int, optional + Number of replicates, by default 1 + num_folds : None, optional + This argument was removed in v2.1 - use `num_replicates` instead. + + Returns + ------- + tuple[list[list[int]], ...] + 2- or 3-member tuple containing num_replicates length lists of training, validation, and testing indexes. + + .. important:: + Validation may or may not be present + + Raises + ------ + ValueError + Requested split sizes tuple not of length 3 + ValueError + Unsupported split method requested + """ + if num_folds is not None: + raise RuntimeError("This argument was removed in v2.1 - use `num_replicates` instead.") + if num_replicates == 1: + logger.warning( + "The return type of make_split_indices has changed in v2.1 - see help(make_split_indices)" + ) + if (num_splits := len(sizes)) != 3: + raise ValueError( + f"Specify sizes for train, validation, and test (got {num_splits} values)." + ) + # typically include a validation set + include_val = True + split_fun = train_val_test_split + mol_split_fun = train_val_test_split_molecules + # default sampling arguments for astartes sampler + astartes_kwargs = dict( + train_size=sizes[0], test_size=sizes[2], return_indices=True, random_state=seed + ) + # if no validation set, reassign the splitting functions + if sizes[1] == 0.0: + include_val = False + split_fun = train_test_split + mol_split_fun = train_test_split_molecules + else: + astartes_kwargs["val_size"] = sizes[1] + + n_datapoints = len(mols) + train_replicates, val_replicates, test_replicates = [], [], [] + for _ in range(num_replicates): + train, val, test = None, None, None + match SplitType.get(split): + case SplitType.SCAFFOLD_BALANCED: + mols_without_atommaps = [] + for mol in mols: + copied_mol = copy.deepcopy(mol) + for atom in copied_mol.GetAtoms(): + atom.SetAtomMapNum(0) + mols_without_atommaps.append(copied_mol) + result = mol_split_fun( + np.array(mols_without_atommaps), sampler="scaffold", **astartes_kwargs + ) + train, val, test = _unpack_astartes_result(result, include_val) + + # Use to constrain data with the same smiles go in the same split. + case SplitType.RANDOM_WITH_REPEATED_SMILES: + # get two arrays: one of all the smiles strings, one of just the unique + all_smiles = np.array([Chem.MolToSmiles(mol) for mol in mols]) + unique_smiles = np.unique(all_smiles) + + # save a mapping of smiles -> all the indices that it appeared at + smiles_indices = {} + for smiles in unique_smiles: + smiles_indices[smiles] = np.where(all_smiles == smiles)[0].tolist() + + # randomly split the unique smiles + result = split_fun( + np.arange(len(unique_smiles)), sampler="random", **astartes_kwargs + ) + train_idxs, val_idxs, test_idxs = _unpack_astartes_result(result, include_val) + + # convert these to the 'actual' indices from the original list using the dict we made + train = sum((smiles_indices[unique_smiles[i]] for i in train_idxs), []) + val = sum((smiles_indices[unique_smiles[j]] for j in val_idxs), []) + test = sum((smiles_indices[unique_smiles[k]] for k in test_idxs), []) + + case SplitType.RANDOM: + result = split_fun(np.arange(n_datapoints), sampler="random", **astartes_kwargs) + train, val, test = _unpack_astartes_result(result, include_val) + + case SplitType.KENNARD_STONE: + result = mol_split_fun( + np.array(mols), + sampler="kennard_stone", + hopts=dict(metric="jaccard"), + fingerprint="morgan_fingerprint", + fprints_hopts=dict(n_bits=2048), + **astartes_kwargs, + ) + train, val, test = _unpack_astartes_result(result, include_val) + + case SplitType.KMEANS: + result = mol_split_fun( + np.array(mols), + sampler="kmeans", + hopts=dict(metric="jaccard"), + fingerprint="morgan_fingerprint", + fprints_hopts=dict(n_bits=2048), + **astartes_kwargs, + ) + train, val, test = _unpack_astartes_result(result, include_val) + + case _: + raise RuntimeError("Unreachable code reached!") + train_replicates.append(train) + val_replicates.append(val) + test_replicates.append(test) + astartes_kwargs["random_state"] += 1 + return train_replicates, val_replicates, test_replicates + + +def _unpack_astartes_result( + result: tuple, include_val: bool +) -> tuple[list[int], list[int], list[int]]: + """Helper function to partition input data based on output of astartes sampler + + Parameters + ----------- + result: tuple + Output from call to astartes containing the split indices + include_val: bool + True if a validation set is included, False otherwise. + + Returns + --------- + train: list[int] + val: list[int] + .. important:: + validation possibly empty + test: list[int] + """ + train_idxs, val_idxs, test_idxs = [], [], [] + # astartes returns a set of lists containing the data, clusters (if applicable) + # and indices (always last), so we pull out the indices + if include_val: + train_idxs, val_idxs, test_idxs = result[-3], result[-2], result[-1] + else: + train_idxs, test_idxs = result[-2], result[-1] + return list(train_idxs), list(val_idxs), list(test_idxs) + + +def split_data_by_indices( + data: Datapoints | MulticomponentDatapoints, + train_indices: Iterable[Iterable[int]] | None = None, + val_indices: Iterable[Iterable[int]] | None = None, + test_indices: Iterable[Iterable[int]] | None = None, +): + """Splits data into training, validation, and test groups based on split indices given.""" + + train_data = _splitter_helper(data, train_indices) + val_data = _splitter_helper(data, val_indices) + test_data = _splitter_helper(data, test_indices) + + return train_data, val_data, test_data + + +def _splitter_helper(data, indices): + if indices is None: + return None + + if isinstance(data[0], (MoleculeDatapoint, ReactionDatapoint)): + datapoints = data + idxss = indices + return [[datapoints[idx] for idx in idxs] for idxs in idxss] + else: + datapointss = data + idxss = indices + return [[[datapoints[idx] for idx in idxs] for datapoints in datapointss] for idxs in idxss] diff --git a/chemprop/chemprop/exceptions.py b/chemprop/chemprop/exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..29229ca41753dbe886312ff91850e75e7d69a556 --- /dev/null +++ b/chemprop/chemprop/exceptions.py @@ -0,0 +1,12 @@ +from typing import Iterable + +from chemprop.utils import pretty_shape + + +class InvalidShapeError(ValueError): + def __init__(self, var_name: str, received: Iterable[int], expected: Iterable[int]): + message = ( + f"arg '{var_name}' has incorrect shape! " + f"got: `{pretty_shape(received)}`. expected: `{pretty_shape(expected)}`" + ) + super().__init__(message) diff --git a/chemprop/chemprop/features/__pycache__/__init__.cpython-37.pyc b/chemprop/chemprop/features/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ba2b45b0e6e63a3a213f6d022537d2ad7497dc17 Binary files /dev/null and b/chemprop/chemprop/features/__pycache__/__init__.cpython-37.pyc differ diff --git a/chemprop/chemprop/features/__pycache__/features_generators.cpython-37.pyc b/chemprop/chemprop/features/__pycache__/features_generators.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a8c1bbf65c9224d7bbcf025ea68df7e932d49ece Binary files /dev/null and b/chemprop/chemprop/features/__pycache__/features_generators.cpython-37.pyc differ diff --git a/chemprop/chemprop/features/__pycache__/featurization.cpython-37.pyc b/chemprop/chemprop/features/__pycache__/featurization.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2cb4f7da9e474aae8a68f5e1ef355d2843a3914b Binary files /dev/null and b/chemprop/chemprop/features/__pycache__/featurization.cpython-37.pyc differ diff --git a/chemprop/chemprop/features/__pycache__/utils.cpython-37.pyc b/chemprop/chemprop/features/__pycache__/utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5759d70f4ca7a6203e1ac4eeddc98cc02306952e Binary files /dev/null and b/chemprop/chemprop/features/__pycache__/utils.cpython-37.pyc differ diff --git a/chemprop/chemprop/featurizers/__init__.py b/chemprop/chemprop/featurizers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a266fd820ac640d47a22b5a68a6afcb2ab7a2d9c --- /dev/null +++ b/chemprop/chemprop/featurizers/__init__.py @@ -0,0 +1,52 @@ +from .atom import AtomFeatureMode, MultiHotAtomFeaturizer, get_multi_hot_atom_featurizer +from .base import Featurizer, GraphFeaturizer, S, T, VectorFeaturizer +from .bond import MultiHotBondFeaturizer +from .molecule import ( + BinaryFeaturizerMixin, + CountFeaturizerMixin, + MoleculeFeaturizerRegistry, + MorganBinaryFeaturizer, + MorganCountFeaturizer, + MorganFeaturizerMixin, + RDKit2DFeaturizer, + V1RDKit2DFeaturizer, + V1RDKit2DNormalizedFeaturizer, +) +from .molgraph import ( + CGRFeaturizer, + CondensedGraphOfReactionFeaturizer, + MolGraphCache, + MolGraphCacheFacade, + MolGraphCacheOnTheFly, + RxnMode, + SimpleMoleculeMolGraphFeaturizer, +) + +__all__ = [ + "Featurizer", + "S", + "T", + "VectorFeaturizer", + "GraphFeaturizer", + "MultiHotAtomFeaturizer", + "AtomFeatureMode", + "get_multi_hot_atom_featurizer", + "MultiHotBondFeaturizer", + "MolGraphCacheFacade", + "MolGraphCache", + "MolGraphCacheOnTheFly", + "SimpleMoleculeMolGraphFeaturizer", + "CondensedGraphOfReactionFeaturizer", + "CGRFeaturizer", + "RxnMode", + "MoleculeFeaturizer", + "MorganFeaturizerMixin", + "BinaryFeaturizerMixin", + "CountFeaturizerMixin", + "MorganBinaryFeaturizer", + "MorganCountFeaturizer", + "RDKit2DFeaturizer", + "MoleculeFeaturizerRegistry", + "V1RDKit2DFeaturizer", + "V1RDKit2DNormalizedFeaturizer", +] diff --git a/chemprop/chemprop/featurizers/atom.py b/chemprop/chemprop/featurizers/atom.py new file mode 100644 index 0000000000000000000000000000000000000000..c224423f1a4f311bd371f9a2e83666a138f0659d --- /dev/null +++ b/chemprop/chemprop/featurizers/atom.py @@ -0,0 +1,281 @@ +from enum import auto +from typing import Sequence + +import numpy as np +from rdkit.Chem.rdchem import Atom, HybridizationType + +from chemprop.featurizers.base import VectorFeaturizer +from chemprop.utils.utils import EnumMapping + + +class MultiHotAtomFeaturizer(VectorFeaturizer[Atom]): + """A :class:`MultiHotAtomFeaturizer` uses a multi-hot encoding to featurize atoms. + + .. seealso:: + The class provides three default parameterization schemes: + + * :meth:`MultiHotAtomFeaturizer.v1` + * :meth:`MultiHotAtomFeaturizer.v2` + * :meth:`MultiHotAtomFeaturizer.organic` + + The generated atom features are ordered as follows: + * atomic number + * degree + * formal charge + * chiral tag + * number of hydrogens + * hybridization + * aromaticity + * mass + + .. important:: + Each feature, except for aromaticity and mass, includes a pad for unknown values. + + Parameters + ---------- + atomic_nums : Sequence[int] + the choices for atom type denoted by atomic number. Ex: ``[4, 5, 6]`` for C, N and O. + degrees : Sequence[int] + the choices for number of bonds an atom is engaged in. + formal_charges : Sequence[int] + the choices for integer electronic charge assigned to an atom. + chiral_tags : Sequence[int] + the choices for an atom's chiral tag. See :class:`rdkit.Chem.rdchem.ChiralType` for possible integer values. + num_Hs : Sequence[int] + the choices for number of bonded hydrogen atoms. + hybridizations : Sequence[int] + the choices for an atom’s hybridization type. See :class:`rdkit.Chem.rdchem.HybridizationType` for possible integer values. + """ + + def __init__( + self, + atomic_nums: Sequence[int], + degrees: Sequence[int], + formal_charges: Sequence[int], + chiral_tags: Sequence[int], + num_Hs: Sequence[int], + hybridizations: Sequence[int], + ): + self.atomic_nums = {j: i for i, j in enumerate(atomic_nums)} + self.degrees = {i: i for i in degrees} + self.formal_charges = {j: i for i, j in enumerate(formal_charges)} + self.chiral_tags = {i: i for i in chiral_tags} + self.num_Hs = {i: i for i in num_Hs} + self.hybridizations = {ht: i for i, ht in enumerate(hybridizations)} + + self._subfeats: list[dict] = [ + self.atomic_nums, + self.degrees, + self.formal_charges, + self.chiral_tags, + self.num_Hs, + self.hybridizations, + ] + subfeat_sizes = [ + 1 + len(self.atomic_nums), + 1 + len(self.degrees), + 1 + len(self.formal_charges), + 1 + len(self.chiral_tags), + 1 + len(self.num_Hs), + 1 + len(self.hybridizations), + 1, + 1, + ] + self.__size = sum(subfeat_sizes) + + def __len__(self) -> int: + return self.__size + + def __call__(self, a: Atom | None) -> np.ndarray: + x = np.zeros(self.__size) + + if a is None: + return x + + feats = [ + a.GetAtomicNum(), + a.GetTotalDegree(), + a.GetFormalCharge(), + int(a.GetChiralTag()), + int(a.GetTotalNumHs()), + a.GetHybridization(), + ] + i = 0 + for feat, choices in zip(feats, self._subfeats): + j = choices.get(feat, len(choices)) + x[i + j] = 1 + i += len(choices) + 1 + x[i] = int(a.GetIsAromatic()) + x[i + 1] = 0.01 * a.GetMass() + + return x + + def num_only(self, a: Atom) -> np.ndarray: + """featurize the atom by setting only the atomic number bit""" + x = np.zeros(len(self)) + + if a is None: + return x + + i = self.atomic_nums.get(a.GetAtomicNum(), len(self.atomic_nums)) + x[i] = 1 + + return x + + @classmethod + def v1(cls, max_atomic_num: int = 100): + """The original implementation used in Chemprop V1 [1]_, [2]_. + + Parameters + ---------- + max_atomic_num : int, default=100 + Include a bit for all atomic numbers in the interval :math:`[1, \mathtt{max\_atomic\_num}]` + + References + ----------- + .. [1] Yang, K.; Swanson, K.; Jin, W.; Coley, C.; Eiden, P.; Gao, H.; Guzman-Perez, A.; Hopper, T.; + Kelley, B.; Mathea, M.; Palmer, A. "Analyzing Learned Molecular Representations for Property Prediction." + J. Chem. Inf. Model. 2019, 59 (8), 3370–3388. https://doi.org/10.1021/acs.jcim.9b00237 + .. [2] Heid, E.; Greenman, K.P.; Chung, Y.; Li, S.C.; Graff, D.E.; Vermeire, F.H.; Wu, H.; Green, W.H.; McGill, + C.J. "Chemprop: A machine learning package for chemical property prediction." J. Chem. Inf. Model. 2024, + 64 (1), 9–17. https://doi.org/10.1021/acs.jcim.3c01250 + """ + + return cls( + atomic_nums=list(range(1, max_atomic_num + 1)), + degrees=list(range(6)), + formal_charges=[-1, -2, 1, 2, 0], + chiral_tags=list(range(4)), + num_Hs=list(range(5)), + hybridizations=[ + HybridizationType.SP, + HybridizationType.SP2, + HybridizationType.SP3, + HybridizationType.SP3D, + HybridizationType.SP3D2, + ], + ) + + @classmethod + def v2(cls): + """An implementation that includes an atom type bit for all elements in the first four rows of the periodic table plus iodine.""" + + return cls( + atomic_nums=list(range(1, 37)) + [53], + degrees=list(range(6)), + formal_charges=[-1, -2, 1, 2, 0], + chiral_tags=list(range(4)), + num_Hs=list(range(5)), + hybridizations=[ + HybridizationType.S, + HybridizationType.SP, + HybridizationType.SP2, + HybridizationType.SP2D, + HybridizationType.SP3, + HybridizationType.SP3D, + HybridizationType.SP3D2, + ], + ) + + @classmethod + def organic(cls): + r"""A specific parameterization intended for use with organic or drug-like molecules. + + This parameterization features: + 1. includes an atomic number bit only for H, B, C, N, O, F, Si, P, S, Cl, Br, and I atoms + 2. a hybridization bit for :math:`s, sp, sp^2` and :math:`sp^3` hybridizations. + """ + + return cls( + atomic_nums=[1, 5, 6, 7, 8, 9, 14, 15, 16, 17, 35, 53], + degrees=list(range(6)), + formal_charges=[-1, -2, 1, 2, 0], + chiral_tags=list(range(4)), + num_Hs=list(range(5)), + hybridizations=[ + HybridizationType.S, + HybridizationType.SP, + HybridizationType.SP2, + HybridizationType.SP3, + ], + ) + + +class RIGRAtomFeaturizer(VectorFeaturizer[Atom]): + """A :class:`RIGRAtomFeaturizer` uses a multi-hot encoding to featurize atoms using resonance-invariant features. + + The generated atom features are ordered as follows: + * atomic number + * degree + * number of hydrogens + * mass + """ + + def __init__( + self, + atomic_nums: Sequence[int] | None = None, + degrees: Sequence[int] | None = None, + num_Hs: Sequence[int] | None = None, + ): + self.atomic_nums = {j: i for i, j in enumerate(atomic_nums or list(range(1, 37)) + [53])} + self.degrees = {i: i for i in (degrees or list(range(6)))} + self.num_Hs = {i: i for i in (num_Hs or list(range(5)))} + + self._subfeats: list[dict] = [self.atomic_nums, self.degrees, self.num_Hs] + subfeat_sizes = [1 + len(self.atomic_nums), 1 + len(self.degrees), 1 + len(self.num_Hs), 1] + self.__size = sum(subfeat_sizes) + + def __len__(self) -> int: + return self.__size + + def __call__(self, a: Atom | None) -> np.ndarray: + x = np.zeros(self.__size) + + if a is None: + return x + + feats = [a.GetAtomicNum(), a.GetTotalDegree(), int(a.GetTotalNumHs())] + i = 0 + for feat, choices in zip(feats, self._subfeats): + j = choices.get(feat, len(choices)) + x[i + j] = 1 + i += len(choices) + 1 + x[i] = 0.01 * a.GetMass() # scaled to about the same range as other features + + return x + + def num_only(self, a: Atom) -> np.ndarray: + """featurize the atom by setting only the atomic number bit""" + x = np.zeros(len(self)) + + if a is None: + return x + + i = self.atomic_nums.get(a.GetAtomicNum(), len(self.atomic_nums)) + x[i] = 1 + + return x + + +class AtomFeatureMode(EnumMapping): + """The mode of an atom is used for featurization into a `MolGraph`""" + + V1 = auto() + V2 = auto() + ORGANIC = auto() + RIGR = auto() + + +def get_multi_hot_atom_featurizer(mode: str | AtomFeatureMode) -> MultiHotAtomFeaturizer: + """Build the corresponding multi-hot atom featurizer.""" + match AtomFeatureMode.get(mode): + case AtomFeatureMode.V1: + return MultiHotAtomFeaturizer.v1() + case AtomFeatureMode.V2: + return MultiHotAtomFeaturizer.v2() + case AtomFeatureMode.ORGANIC: + return MultiHotAtomFeaturizer.organic() + case AtomFeatureMode.RIGR: + return RIGRAtomFeaturizer() + case _: + raise RuntimeError("unreachable code reached!") diff --git a/chemprop/chemprop/featurizers/base.py b/chemprop/chemprop/featurizers/base.py new file mode 100644 index 0000000000000000000000000000000000000000..29b876bd8751e13ac151c43f3a7d8b1d42d4a831 --- /dev/null +++ b/chemprop/chemprop/featurizers/base.py @@ -0,0 +1,30 @@ +from abc import abstractmethod +from collections.abc import Sized +from typing import Generic, TypeVar + +import numpy as np + +from chemprop.data.molgraph import MolGraph + +S = TypeVar("S") +T = TypeVar("T") + + +class Featurizer(Generic[S, T]): + """An :class:`Featurizer` featurizes inputs type ``S`` into outputs of + type ``T``.""" + + @abstractmethod + def __call__(self, input: S, *args, **kwargs) -> T: + """featurize an input""" + + +class VectorFeaturizer(Featurizer[S, np.ndarray], Sized): + ... + + +class GraphFeaturizer(Featurizer[S, MolGraph]): + @property + @abstractmethod + def shape(self) -> tuple[int, int]: + ... diff --git a/chemprop/chemprop/featurizers/bond.py b/chemprop/chemprop/featurizers/bond.py new file mode 100644 index 0000000000000000000000000000000000000000..c604b89d1c7b7d991fac2ebbce9f866cc1b1603c --- /dev/null +++ b/chemprop/chemprop/featurizers/bond.py @@ -0,0 +1,122 @@ +from typing import Sequence + +import numpy as np +from rdkit.Chem.rdchem import Bond, BondType + +from chemprop.featurizers.base import VectorFeaturizer + + +class MultiHotBondFeaturizer(VectorFeaturizer[Bond]): + """A :class:`MultiHotBondFeaturizer` feauturizes bonds based on the following attributes: + + * ``null``-ity (i.e., is the bond ``None``?) + * bond type + * conjugated? + * in ring? + * stereochemistry + + The feature vectors produced by this featurizer have the following (general) signature: + + +---------------------+-----------------+--------------+ + | slice [start, stop) | subfeature | unknown pad? | + +=====================+=================+==============+ + | 0-1 | null? | N | + +---------------------+-----------------+--------------+ + | 1-5 | bond type | N | + +---------------------+-----------------+--------------+ + | 5-6 | conjugated? | N | + +---------------------+-----------------+--------------+ + | 6-8 | in ring? | N | + +---------------------+-----------------+--------------+ + | 7-14 | stereochemistry | Y | + +---------------------+-----------------+--------------+ + + **NOTE**: the above signature only applies for the default arguments, as the bond type and + sterochemistry slices can increase in size depending on the input arguments. + + Parameters + ---------- + bond_types : Sequence[BondType] | None, default=[SINGLE, DOUBLE, TRIPLE, AROMATIC] + the known bond types + stereos : Sequence[int] | None, default=[0, 1, 2, 3, 4, 5] + the known bond stereochemistries. See [1]_ for more details + + References + ---------- + .. [1] https://www.rdkit.org/docs/source/rdkit.Chem.rdchem.html#rdkit.Chem.rdchem.BondStereo.values + """ + + def __init__( + self, bond_types: Sequence[BondType] | None = None, stereos: Sequence[int] | None = None + ): + self.bond_types = bond_types or [ + BondType.SINGLE, + BondType.DOUBLE, + BondType.TRIPLE, + BondType.AROMATIC, + ] + self.stereo = stereos or range(6) + + def __len__(self): + return 1 + len(self.bond_types) + 2 + (len(self.stereo) + 1) + + def __call__(self, b: Bond) -> np.ndarray: + x = np.zeros(len(self), int) + + if b is None: + x[0] = 1 + return x + + i = 1 + bond_type = b.GetBondType() + bt_bit, size = self.one_hot_index(bond_type, self.bond_types) + if bt_bit != size: + x[i + bt_bit] = 1 + i += size - 1 + + x[i] = int(b.GetIsConjugated()) + x[i + 1] = int(b.IsInRing()) + i += 2 + + stereo_bit, _ = self.one_hot_index(int(b.GetStereo()), self.stereo) + x[i + stereo_bit] = 1 + + return x + + @classmethod + def one_hot_index(cls, x, xs: Sequence) -> tuple[int, int]: + """Returns a tuple of the index of ``x`` in ``xs`` and ``len(xs) + 1`` if ``x`` is in ``xs``. + Otherwise, returns a tuple with ``len(xs)`` and ``len(xs) + 1``.""" + n = len(xs) + + return xs.index(x) if x in xs else n, n + 1 + + +class RIGRBondFeaturizer(VectorFeaturizer[Bond]): + """A :class:`RIGRBondFeaturizer` feauturizes bonds based on only the resonance-invariant features: + + * ``null``-ity (i.e., is the bond ``None``?) + * in ring? + """ + + def __len__(self): + return 2 + + def __call__(self, b: Bond) -> np.ndarray: + x = np.zeros(len(self), int) + + if b is None: + x[0] = 1 + return x + + x[1] = int(b.IsInRing()) + + return x + + @classmethod + def one_hot_index(cls, x, xs: Sequence) -> tuple[int, int]: + """Returns a tuple of the index of ``x`` in ``xs`` and ``len(xs) + 1`` if ``x`` is in ``xs``. + Otherwise, returns a tuple with ``len(xs)`` and ``len(xs) + 1``.""" + n = len(xs) + + return xs.index(x) if x in xs else n, n + 1 diff --git a/chemprop/chemprop/featurizers/molecule.py b/chemprop/chemprop/featurizers/molecule.py new file mode 100644 index 0000000000000000000000000000000000000000..df35f066f27b64dc6524c54dbbc0c3e4d7233bdb --- /dev/null +++ b/chemprop/chemprop/featurizers/molecule.py @@ -0,0 +1,104 @@ +import logging + +from descriptastorus.descriptors import rdDescriptors, rdNormalizedDescriptors +import numpy as np +from rdkit import Chem +from rdkit.Chem import Descriptors, Mol +from rdkit.Chem.rdFingerprintGenerator import GetMorganGenerator + +from chemprop.featurizers.base import VectorFeaturizer +from chemprop.utils import ClassRegistry + +logger = logging.getLogger(__name__) + +MoleculeFeaturizerRegistry = ClassRegistry[VectorFeaturizer[Mol]]() + + +class MorganFeaturizerMixin: + def __init__(self, radius: int = 2, length: int = 2048, include_chirality: bool = True): + if radius < 0: + raise ValueError(f"arg 'radius' must be >= 0! got: {radius}") + + self.length = length + self.F = GetMorganGenerator( + radius=radius, fpSize=length, includeChirality=include_chirality + ) + + def __len__(self) -> int: + return self.length + + +class BinaryFeaturizerMixin: + def __call__(self, mol: Chem.Mol) -> np.ndarray: + return self.F.GetFingerprintAsNumPy(mol) + + +class CountFeaturizerMixin: + def __call__(self, mol: Chem.Mol) -> np.ndarray: + return self.F.GetCountFingerprintAsNumPy(mol).astype(np.int32) + + +@MoleculeFeaturizerRegistry("morgan_binary") +class MorganBinaryFeaturizer(MorganFeaturizerMixin, BinaryFeaturizerMixin, VectorFeaturizer[Mol]): + pass + + +@MoleculeFeaturizerRegistry("morgan_count") +class MorganCountFeaturizer(MorganFeaturizerMixin, CountFeaturizerMixin, VectorFeaturizer[Mol]): + pass + + +@MoleculeFeaturizerRegistry("rdkit_2d") +class RDKit2DFeaturizer(VectorFeaturizer[Mol]): + def __init__(self): + logger.warning( + "The RDKit 2D features can deviate signifcantly from a normal distribution. Consider " + "manually scaling them using an appropriate scaler before creating datapoints, rather " + "than using the scikit-learn `StandardScaler` (the default in Chemprop)." + ) + + def __len__(self) -> int: + return len(Descriptors.descList) + + def __call__(self, mol: Chem.Mol) -> np.ndarray: + features = np.array( + [ + 0.0 if name == "SPS" and mol.GetNumHeavyAtoms() == 0 else func(mol) + for name, func in Descriptors.descList + ], + dtype=float, + ) + + return features + + +class V1RDKit2DFeaturizerMixin(VectorFeaturizer[Mol]): + def __len__(self) -> int: + return 200 + + def __call__(self, mol: Mol) -> np.ndarray: + smiles = Chem.MolToSmiles(mol, isomericSmiles=True) + features = self.generator.process(smiles)[1:] + + return np.array(features) + + +@MoleculeFeaturizerRegistry("v1_rdkit_2d") +class V1RDKit2DFeaturizer(V1RDKit2DFeaturizerMixin): + def __init__(self): + self.generator = rdDescriptors.RDKit2D() + + +@MoleculeFeaturizerRegistry("v1_rdkit_2d_normalized") +class V1RDKit2DNormalizedFeaturizer(V1RDKit2DFeaturizerMixin): + def __init__(self): + self.generator = rdNormalizedDescriptors.RDKit2DNormalized() + + +@MoleculeFeaturizerRegistry("charge") +class ChargeFeaturizer(VectorFeaturizer[Mol]): + def __call__(self, mol: Chem.Mol) -> np.ndarray: + return np.array([Chem.GetFormalCharge(mol)]) + + def __len__(self) -> int: + return 1 diff --git a/chemprop/chemprop/featurizers/molgraph/__init__.py b/chemprop/chemprop/featurizers/molgraph/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9fb21580de633d627d3144c55fe809d33466d26e --- /dev/null +++ b/chemprop/chemprop/featurizers/molgraph/__init__.py @@ -0,0 +1,13 @@ +from .cache import MolGraphCache, MolGraphCacheFacade, MolGraphCacheOnTheFly +from .molecule import SimpleMoleculeMolGraphFeaturizer +from .reaction import CGRFeaturizer, CondensedGraphOfReactionFeaturizer, RxnMode + +__all__ = [ + "MolGraphCacheFacade", + "MolGraphCache", + "MolGraphCacheOnTheFly", + "SimpleMoleculeMolGraphFeaturizer", + "CondensedGraphOfReactionFeaturizer", + "CGRFeaturizer", + "RxnMode", +] diff --git a/chemprop/chemprop/featurizers/molgraph/cache.py b/chemprop/chemprop/featurizers/molgraph/cache.py new file mode 100644 index 0000000000000000000000000000000000000000..171d2b26f21c19d42539843d29c765b773651e2c --- /dev/null +++ b/chemprop/chemprop/featurizers/molgraph/cache.py @@ -0,0 +1,89 @@ +from abc import abstractmethod +from collections.abc import Sequence +from typing import Generic, Iterable + +import numpy as np + +from chemprop.data.molgraph import MolGraph +from chemprop.featurizers.base import Featurizer, S + + +class MolGraphCacheFacade(Sequence[MolGraph], Generic[S]): + """ + A :class:`MolGraphCacheFacade` provided an interface for caching + :class:`~chemprop.data.molgraph.MolGraph`\s. + + .. note:: + This class only provides a facade for a cached dataset, but it *does not guarantee* + whether the underlying data is truly cached. + + + Parameters + ---------- + inputs : Iterable[S] + The inputs to be featurized. + V_fs : Iterable[np.ndarray] + The node features for each input. + E_fs : Iterable[np.ndarray] + The edge features for each input. + featurizer : Featurizer[S, MolGraph] + The featurizer with which to generate the + :class:`~chemprop.data.molgraph.MolGraph`\s. + """ + + @abstractmethod + def __init__( + self, + inputs: Iterable[S], + V_fs: Iterable[np.ndarray], + E_fs: Iterable[np.ndarray], + featurizer: Featurizer[S, MolGraph], + ): + pass + + +class MolGraphCache(MolGraphCacheFacade): + """ + A :class:`MolGraphCache` precomputes the corresponding + :class:`~chemprop.data.molgraph.MolGraph`\s and caches them in memory. + """ + + def __init__( + self, + inputs: Iterable[S], + V_fs: Iterable[np.ndarray | None], + E_fs: Iterable[np.ndarray | None], + featurizer: Featurizer[S, MolGraph], + ): + self._mgs = [featurizer(input, V_f, E_f) for input, V_f, E_f in zip(inputs, V_fs, E_fs)] + + def __len__(self) -> int: + return len(self._mgs) + + def __getitem__(self, index: int) -> MolGraph: + return self._mgs[index] + + +class MolGraphCacheOnTheFly(MolGraphCacheFacade): + """ + A :class:`MolGraphCacheOnTheFly` computes the corresponding + :class:`~chemprop.data.molgraph.MolGraph`\s as they are requested. + """ + + def __init__( + self, + inputs: Iterable[S], + V_fs: Iterable[np.ndarray | None], + E_fs: Iterable[np.ndarray | None], + featurizer: Featurizer[S, MolGraph], + ): + self._inputs = list(inputs) + self._V_fs = list(V_fs) + self._E_fs = list(E_fs) + self._featurizer = featurizer + + def __len__(self) -> int: + return len(self._inputs) + + def __getitem__(self, index: int) -> MolGraph: + return self._featurizer(self._inputs[index], self._V_fs[index], self._E_fs[index]) diff --git a/chemprop/chemprop/featurizers/molgraph/mixins.py b/chemprop/chemprop/featurizers/molgraph/mixins.py new file mode 100644 index 0000000000000000000000000000000000000000..afa461d481388d51f6e8434a21a5f5f99199616a --- /dev/null +++ b/chemprop/chemprop/featurizers/molgraph/mixins.py @@ -0,0 +1,23 @@ +from dataclasses import dataclass, field + +from rdkit.Chem.rdchem import Atom, Bond + +from chemprop.featurizers.atom import MultiHotAtomFeaturizer +from chemprop.featurizers.base import VectorFeaturizer +from chemprop.featurizers.bond import MultiHotBondFeaturizer + + +@dataclass +class _MolGraphFeaturizerMixin: + atom_featurizer: VectorFeaturizer[Atom] = field(default_factory=MultiHotAtomFeaturizer.v2) + bond_featurizer: VectorFeaturizer[Bond] = field(default_factory=MultiHotBondFeaturizer) + + def __post_init__(self): + self.atom_fdim = len(self.atom_featurizer) + self.bond_fdim = len(self.bond_featurizer) + + @property + def shape(self) -> tuple[int, int]: + """the feature dimension of the atoms and bonds, respectively, of `MolGraph`s generated by + this featurizer""" + return self.atom_fdim, self.bond_fdim diff --git a/chemprop/chemprop/featurizers/molgraph/molecule.py b/chemprop/chemprop/featurizers/molgraph/molecule.py new file mode 100644 index 0000000000000000000000000000000000000000..7ac7fafd4e15c57e1823ff0904e0888126c8352c --- /dev/null +++ b/chemprop/chemprop/featurizers/molgraph/molecule.py @@ -0,0 +1,91 @@ +from dataclasses import InitVar, dataclass + +import numpy as np +from rdkit import Chem +from rdkit.Chem import Mol + +from chemprop.data.molgraph import MolGraph +from chemprop.featurizers.base import GraphFeaturizer +from chemprop.featurizers.molgraph.mixins import _MolGraphFeaturizerMixin + + +@dataclass +class SimpleMoleculeMolGraphFeaturizer(_MolGraphFeaturizerMixin, GraphFeaturizer[Mol]): + """A :class:`SimpleMoleculeMolGraphFeaturizer` is the default implementation of a + :class:`MoleculeMolGraphFeaturizer` + + Parameters + ---------- + atom_featurizer : AtomFeaturizer, default=MultiHotAtomFeaturizer() + the featurizer with which to calculate feature representations of the atoms in a given + molecule + bond_featurizer : BondFeaturizer, default=MultiHotBondFeaturizer() + the featurizer with which to calculate feature representations of the bonds in a given + molecule + extra_atom_fdim : int, default=0 + the dimension of the additional features that will be concatenated onto the calculated + features of each atom + extra_bond_fdim : int, default=0 + the dimension of the additional features that will be concatenated onto the calculated + features of each bond + """ + + extra_atom_fdim: InitVar[int] = 0 + extra_bond_fdim: InitVar[int] = 0 + + def __post_init__(self, extra_atom_fdim: int = 0, extra_bond_fdim: int = 0): + super().__post_init__() + + self.extra_atom_fdim = extra_atom_fdim + self.extra_bond_fdim = extra_bond_fdim + self.atom_fdim += self.extra_atom_fdim + self.bond_fdim += self.extra_bond_fdim + + def __call__( + self, + mol: Chem.Mol, + atom_features_extra: np.ndarray | None = None, + bond_features_extra: np.ndarray | None = None, + ) -> MolGraph: + n_atoms = mol.GetNumAtoms() + n_bonds = mol.GetNumBonds() + + if atom_features_extra is not None and len(atom_features_extra) != n_atoms: + raise ValueError( + "Input molecule must have same number of atoms as `len(atom_features_extra)`!" + f"got: {n_atoms} and {len(atom_features_extra)}, respectively" + ) + if bond_features_extra is not None and len(bond_features_extra) != n_bonds: + raise ValueError( + "Input molecule must have same number of bonds as `len(bond_features_extra)`!" + f"got: {n_bonds} and {len(bond_features_extra)}, respectively" + ) + + if n_atoms == 0: + V = np.zeros((1, self.atom_fdim), dtype=np.single) + else: + V = np.array([self.atom_featurizer(a) for a in mol.GetAtoms()], dtype=np.single) + E = np.empty((2 * n_bonds, self.bond_fdim)) + edge_index = [[], []] + + if atom_features_extra is not None: + V = np.hstack((V, atom_features_extra)) + + i = 0 + for bond in mol.GetBonds(): + x_e = self.bond_featurizer(bond) + if bond_features_extra is not None: + x_e = np.concatenate((x_e, bond_features_extra[bond.GetIdx()]), dtype=np.single) + + E[i : i + 2] = x_e + + u, v = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx() + edge_index[0].extend([u, v]) + edge_index[1].extend([v, u]) + + i += 2 + + rev_edge_index = np.arange(len(E)).reshape(-1, 2)[:, ::-1].ravel() + edge_index = np.array(edge_index, int) + + return MolGraph(V, E, edge_index, rev_edge_index) diff --git a/chemprop/chemprop/featurizers/molgraph/reaction.py b/chemprop/chemprop/featurizers/molgraph/reaction.py new file mode 100644 index 0000000000000000000000000000000000000000..f35b03e037b45553743c0af53363a5f9d68585e9 --- /dev/null +++ b/chemprop/chemprop/featurizers/molgraph/reaction.py @@ -0,0 +1,332 @@ +from dataclasses import InitVar, dataclass +from enum import auto +import logging +from typing import Iterable, Sequence, TypeAlias + +import numpy as np +from rdkit import Chem +from rdkit.Chem.rdchem import Bond, Mol + +from chemprop.data.molgraph import MolGraph +from chemprop.featurizers.base import GraphFeaturizer +from chemprop.featurizers.molgraph.mixins import _MolGraphFeaturizerMixin +from chemprop.types import Rxn +from chemprop.utils.utils import EnumMapping + +logger = logging.getLogger(__name__) + + +class RxnMode(EnumMapping): + """The mode by which a reaction should be featurized into a `MolGraph`""" + + REAC_PROD = auto() + """concatenate the reactant features with the product features.""" + REAC_PROD_BALANCE = auto() + """concatenate the reactant features with the products feature and balances imbalanced + reactions""" + REAC_DIFF = auto() + """concatenates the reactant features with the difference in features between reactants and + products""" + REAC_DIFF_BALANCE = auto() + """concatenates the reactant features with the difference in features between reactants and + product and balances imbalanced reactions""" + PROD_DIFF = auto() + """concatenates the product features with the difference in features between reactants and + products""" + PROD_DIFF_BALANCE = auto() + """concatenates the product features with the difference in features between reactants and + products and balances imbalanced reactions""" + + +@dataclass +class CondensedGraphOfReactionFeaturizer(_MolGraphFeaturizerMixin, GraphFeaturizer[Rxn]): + """A :class:`CondensedGraphOfReactionFeaturizer` featurizes reactions using the condensed + reaction graph method utilized in [1]_ + + **NOTE**: This class *does not* accept a :class:`AtomFeaturizer` instance. This is because + it requries the :meth:`num_only()` method, which is only implemented in the concrete + :class:`AtomFeaturizer` class + + Parameters + ---------- + atom_featurizer : AtomFeaturizer, default=AtomFeaturizer() + the featurizer with which to calculate feature representations of the atoms in a given + molecule + bond_featurizer : BondFeaturizerBase, default=BondFeaturizer() + the featurizer with which to calculate feature representations of the bonds in a given + molecule + mode_ : Union[str, ReactionMode], default=ReactionMode.REAC_DIFF + the mode by which to featurize the reaction as either the string code or enum value + + References + ---------- + .. [1] Heid, E.; Green, W.H. "Machine Learning of Reaction Properties via Learned + Representations of the Condensed Graph of Reaction." J. Chem. Inf. Model. 2022, 62, + 2101-2110. https://doi.org/10.1021/acs.jcim.1c00975 + """ + + mode_: InitVar[str | RxnMode] = RxnMode.REAC_DIFF + + def __post_init__(self, mode_: str | RxnMode): + super().__post_init__() + + self.mode = mode_ + self.atom_fdim += len(self.atom_featurizer) - len(self.atom_featurizer.atomic_nums) - 1 + self.bond_fdim *= 2 + + @property + def mode(self) -> RxnMode: + return self.__mode + + @mode.setter + def mode(self, m: str | RxnMode): + self.__mode = RxnMode.get(m) + + def __call__( + self, + rxn: tuple[Chem.Mol, Chem.Mol], + atom_features_extra: np.ndarray | None = None, + bond_features_extra: np.ndarray | None = None, + ) -> MolGraph: + """Featurize the input reaction into a molecular graph + + Parameters + ---------- + rxn : Rxn + a 2-tuple of atom-mapped rdkit molecules, where the 0th element is the reactant and the + 1st element is the product + atom_features_extra : np.ndarray | None, default=None + *UNSUPPORTED* maintained only to maintain parity with the method signature of the + `MoleculeFeaturizer` + bond_features_extra : np.ndarray | None, default=None + *UNSUPPORTED* maintained only to maintain parity with the method signature of the + `MoleculeFeaturizer` + + Returns + ------- + MolGraph + the molecular graph of the reaction + """ + + if atom_features_extra is not None: + logger.warning("'atom_features_extra' is currently unsupported for reactions") + if bond_features_extra is not None: + logger.warning("'bond_features_extra' is currently unsupported for reactions") + + reac, pdt = rxn + r2p_idx_map, pdt_idxs, reac_idxs = self.map_reac_to_prod(reac, pdt) + + V = self._calc_node_feature_matrix(reac, pdt, r2p_idx_map, pdt_idxs, reac_idxs) + E = [] + edge_index = [[], []] + + n_atoms_tot = len(V) + n_atoms_reac = reac.GetNumAtoms() + + for u in range(n_atoms_tot): + for v in range(u + 1, n_atoms_tot): + b_reac, b_prod = self._get_bonds( + reac, pdt, r2p_idx_map, pdt_idxs, n_atoms_reac, u, v + ) + if b_reac is None and b_prod is None: + continue + + x_e = self._calc_edge_feature(b_reac, b_prod) + E.extend([x_e, x_e]) + edge_index[0].extend([u, v]) + edge_index[1].extend([v, u]) + + E = np.array(E) if len(E) > 0 else np.empty((0, self.bond_fdim)) + rev_edge_index = np.arange(len(E)).reshape(-1, 2)[:, ::-1].ravel() + edge_index = np.array(edge_index, int) + + return MolGraph(V, E, edge_index, rev_edge_index) + + def _calc_node_feature_matrix( + self, + rct: Mol, + pdt: Mol, + r2p_idx_map: dict[int, int], + pdt_idxs: Iterable[int], + reac_idxs: Iterable[int], + ) -> np.ndarray: + """Calculate the node feature matrix for the reaction""" + X_v_r1 = np.array([self.atom_featurizer(a) for a in rct.GetAtoms()]) + X_v_p2 = np.array([self.atom_featurizer(pdt.GetAtomWithIdx(i)) for i in pdt_idxs]) + X_v_p2 = X_v_p2.reshape(-1, X_v_r1.shape[1]) + + if self.mode in [RxnMode.REAC_DIFF, RxnMode.PROD_DIFF, RxnMode.REAC_PROD]: + # Reactant: + # (1) regular features for each atom in the reactants + # (2) zero features for each atom that's only in the products + X_v_r2 = [self.atom_featurizer.num_only(pdt.GetAtomWithIdx(i)) for i in pdt_idxs] + X_v_r2 = np.array(X_v_r2).reshape(-1, X_v_r1.shape[1]) + + # Product: + # (1) either (a) product-side features for each atom in both + # or (b) zero features for each atom only in the reatants + # (2) regular features for each atom only in the products + X_v_p1 = np.array( + [ + ( + self.atom_featurizer(pdt.GetAtomWithIdx(r2p_idx_map[a.GetIdx()])) + if a.GetIdx() not in reac_idxs + else self.atom_featurizer.num_only(a) + ) + for a in rct.GetAtoms() + ] + ) + else: + # Reactant: + # (1) regular features for each atom in the reactants + # (2) regular features for each atom only in the products + X_v_r2 = [self.atom_featurizer(pdt.GetAtomWithIdx(i)) for i in pdt_idxs] + X_v_r2 = np.array(X_v_r2).reshape(-1, X_v_r1.shape[1]) + + # Product: + # (1) either (a) product-side features for each atom in both + # or (b) reactant-side features for each atom only in the reatants + # (2) regular features for each atom only in the products + X_v_p1 = np.array( + [ + ( + self.atom_featurizer(pdt.GetAtomWithIdx(r2p_idx_map[a.GetIdx()])) + if a.GetIdx() not in reac_idxs + else self.atom_featurizer(a) + ) + for a in rct.GetAtoms() + ] + ) + + X_v_r = np.concatenate((X_v_r1, X_v_r2)) + X_v_p = np.concatenate((X_v_p1, X_v_p2)) + + m = min(len(X_v_r), len(X_v_p)) + + if self.mode in [RxnMode.REAC_PROD, RxnMode.REAC_PROD_BALANCE]: + X_v = np.hstack((X_v_r[:m], X_v_p[:m, len(self.atom_featurizer.atomic_nums) + 1 :])) + else: + X_v_d = X_v_p[:m] - X_v_r[:m] + if self.mode in [RxnMode.REAC_DIFF, RxnMode.REAC_DIFF_BALANCE]: + X_v = np.hstack((X_v_r[:m], X_v_d[:m, len(self.atom_featurizer.atomic_nums) + 1 :])) + else: + X_v = np.hstack((X_v_p[:m], X_v_d[:m, len(self.atom_featurizer.atomic_nums) + 1 :])) + + return X_v + + def _get_bonds( + self, + rct: Bond, + pdt: Bond, + ri2pj: dict[int, int], + pids: Sequence[int], + n_atoms_r: int, + u: int, + v: int, + ) -> tuple[Bond, Bond]: + """get the corresponding reactant- and product-side bond, respectively, betweeen atoms `u` and `v`""" + if u >= n_atoms_r and v >= n_atoms_r: + b_prod = pdt.GetBondBetweenAtoms(pids[u - n_atoms_r], pids[v - n_atoms_r]) + + if self.mode in [ + RxnMode.REAC_PROD_BALANCE, + RxnMode.REAC_DIFF_BALANCE, + RxnMode.PROD_DIFF_BALANCE, + ]: + b_reac = b_prod + else: + b_reac = None + elif u < n_atoms_r and v >= n_atoms_r: # One atom only in product + b_reac = None + + if u in ri2pj: + b_prod = pdt.GetBondBetweenAtoms(ri2pj[u], pids[v - n_atoms_r]) + else: # Atom atom only in reactant, the other only in product + b_prod = None + else: + b_reac = rct.GetBondBetweenAtoms(u, v) + + if u in ri2pj and v in ri2pj: # Both atoms in both reactant and product + b_prod = pdt.GetBondBetweenAtoms(ri2pj[u], ri2pj[v]) + elif self.mode in [ + RxnMode.REAC_PROD_BALANCE, + RxnMode.REAC_DIFF_BALANCE, + RxnMode.PROD_DIFF_BALANCE, + ]: + b_prod = None if (u in ri2pj or v in ri2pj) else b_reac + else: # One or both atoms only in reactant + b_prod = None + + return b_reac, b_prod + + def _calc_edge_feature(self, b_reac: Bond, b_pdt: Bond): + """Calculate the global features of the two bonds""" + x_e_r = self.bond_featurizer(b_reac) + x_e_p = self.bond_featurizer(b_pdt) + x_e_d = x_e_p - x_e_r + + if self.mode in [RxnMode.REAC_PROD, RxnMode.REAC_PROD_BALANCE]: + x_e = np.hstack((x_e_r, x_e_p)) + elif self.mode in [RxnMode.REAC_DIFF, RxnMode.REAC_DIFF_BALANCE]: + x_e = np.hstack((x_e_r, x_e_d)) + else: + x_e = np.hstack((x_e_p, x_e_d)) + + return x_e + + @classmethod + def map_reac_to_prod( + cls, reacs: Chem.Mol, pdts: Chem.Mol + ) -> tuple[dict[int, int], list[int], list[int]]: + """Map atom indices between corresponding atoms in the reactant and product molecules + + Parameters + ---------- + reacs : Chem.Mol + An RDKit molecule of the reactants + pdts : Chem.Mol + An RDKit molecule of the products + + Returns + ------- + ri2pi : dict[int, int] + A dictionary of corresponding atom indices from reactant atoms to product atoms + pdt_idxs : list[int] + atom indices of poduct atoms + rct_idxs : list[int] + atom indices of reactant atoms + """ + pdt_idxs = [] + mapno2pj = {} + reac_atommap_nums = {a.GetAtomMapNum() for a in reacs.GetAtoms()} + + for a in pdts.GetAtoms(): + map_num = a.GetAtomMapNum() + j = a.GetIdx() + + if map_num > 0: + mapno2pj[map_num] = j + if map_num not in reac_atommap_nums: + pdt_idxs.append(j) + else: + pdt_idxs.append(j) + + rct_idxs = [] + r2p_idx_map = {} + + for a in reacs.GetAtoms(): + map_num = a.GetAtomMapNum() + i = a.GetIdx() + + if map_num > 0: + try: + r2p_idx_map[i] = mapno2pj[map_num] + except KeyError: + rct_idxs.append(i) + else: + rct_idxs.append(i) + + return r2p_idx_map, pdt_idxs, rct_idxs + + +CGRFeaturizer: TypeAlias = CondensedGraphOfReactionFeaturizer diff --git a/chemprop/chemprop/models/__init__.py b/chemprop/chemprop/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..76946d73b599668b8cdd7adc2a1c48b38b8d1108 --- /dev/null +++ b/chemprop/chemprop/models/__init__.py @@ -0,0 +1,5 @@ +from .model import MPNN +from .multi import MulticomponentMPNN +from .utils import load_model, save_model + +__all__ = ["MPNN", "MulticomponentMPNN", "load_model", "save_model"] diff --git a/chemprop/chemprop/models/__pycache__/__init__.cpython-37.pyc b/chemprop/chemprop/models/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6503ed123f3d65114e035321f0cbf9882c566608 Binary files /dev/null and b/chemprop/chemprop/models/__pycache__/__init__.cpython-37.pyc differ diff --git a/chemprop/chemprop/models/__pycache__/ffn.cpython-37.pyc b/chemprop/chemprop/models/__pycache__/ffn.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f2b4f0b097a79f0ccabf82aae5f2e77c672a9eb5 Binary files /dev/null and b/chemprop/chemprop/models/__pycache__/ffn.cpython-37.pyc differ diff --git a/chemprop/chemprop/models/__pycache__/model.cpython-37.pyc b/chemprop/chemprop/models/__pycache__/model.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab53bf808264e8ab0121ba648c645b3852d114cd Binary files /dev/null and b/chemprop/chemprop/models/__pycache__/model.cpython-37.pyc differ diff --git a/chemprop/chemprop/models/__pycache__/mpn.cpython-37.pyc b/chemprop/chemprop/models/__pycache__/mpn.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9e647541f4fb9aa7804d2c7393f5f55c7bff876a Binary files /dev/null and b/chemprop/chemprop/models/__pycache__/mpn.cpython-37.pyc differ diff --git a/chemprop/chemprop/models/model.py b/chemprop/chemprop/models/model.py new file mode 100644 index 0000000000000000000000000000000000000000..f8a142fd4dba38a50ff76c799d50af6cbe660b9c --- /dev/null +++ b/chemprop/chemprop/models/model.py @@ -0,0 +1,334 @@ +from __future__ import annotations + +import io +import logging +from typing import Iterable, TypeAlias + +from lightning import pytorch as pl +import torch +from torch import Tensor, nn, optim + +from chemprop.data import BatchMolGraph, MulticomponentTrainingBatch, TrainingBatch +from chemprop.nn import Aggregation, ChempropMetric, MessagePassing, Predictor +from chemprop.nn.transforms import ScaleTransform +from chemprop.schedulers import build_NoamLike_LRSched +from chemprop.utils.registry import Factory + +logger = logging.getLogger(__name__) + +BatchType: TypeAlias = TrainingBatch | MulticomponentTrainingBatch + + +class MPNN(pl.LightningModule): + r"""An :class:`MPNN` is a sequence of message passing layers, an aggregation routine, and a + predictor routine. + + The first two modules calculate learned fingerprints from an input molecule + reaction graph, and the final module takes these learned fingerprints as input to calculate a + final prediction. I.e., the following operation: + + .. math:: + \mathtt{MPNN}(\mathcal{G}) = + \mathtt{predictor}(\mathtt{agg}(\mathtt{message\_passing}(\mathcal{G}))) + + The full model is trained end-to-end. + + Parameters + ---------- + message_passing : MessagePassing + the message passing block to use to calculate learned fingerprints + agg : Aggregation + the aggregation operation to use during molecule-level predictor + predictor : Predictor + the function to use to calculate the final prediction + batch_norm : bool, default=False + if `True`, apply batch normalization to the output of the aggregation operation + metrics : Iterable[Metric] | None, default=None + the metrics to use to evaluate the model during training and evaluation + warmup_epochs : int, default=2 + the number of epochs to use for the learning rate warmup + init_lr : int, default=1e-4 + the initial learning rate + max_lr : float, default=1e-3 + the maximum learning rate + final_lr : float, default=1e-4 + the final learning rate + + Raises + ------ + ValueError + if the output dimension of the message passing block does not match the input dimension of + the predictor function + """ + + def __init__( + self, + message_passing: MessagePassing, + agg: Aggregation, + predictor: Predictor, + batch_norm: bool = False, + metrics: Iterable[ChempropMetric] | None = None, + warmup_epochs: int = 2, + init_lr: float = 1e-4, + max_lr: float = 1e-3, + final_lr: float = 1e-4, + X_d_transform: ScaleTransform | None = None, + ): + super().__init__() + # manually add X_d_transform to hparams to suppress lightning's warning about double saving + # its state_dict values. + self.save_hyperparameters(ignore=["X_d_transform", "message_passing", "agg", "predictor"]) + self.hparams["X_d_transform"] = X_d_transform + self.hparams.update( + { + "message_passing": message_passing.hparams, + "agg": agg.hparams, + "predictor": predictor.hparams, + } + ) + + self.message_passing = message_passing + self.agg = agg + self.bn = nn.BatchNorm1d(self.message_passing.output_dim) if batch_norm else nn.Identity() + self.predictor = predictor + + self.X_d_transform = X_d_transform if X_d_transform is not None else nn.Identity() + + self.metrics = ( + nn.ModuleList([*metrics, self.criterion.clone()]) + if metrics + else nn.ModuleList([self.predictor._T_default_metric(), self.criterion.clone()]) + ) + + self.warmup_epochs = warmup_epochs + self.init_lr = init_lr + self.max_lr = max_lr + self.final_lr = final_lr + + @property + def output_dim(self) -> int: + return self.predictor.output_dim + + @property + def n_tasks(self) -> int: + return self.predictor.n_tasks + + @property + def n_targets(self) -> int: + return self.predictor.n_targets + + @property + def criterion(self) -> ChempropMetric: + return self.predictor.criterion + + def fingerprint( + self, bmg: BatchMolGraph, V_d: Tensor | None = None, X_d: Tensor | None = None + ) -> Tensor: + """the learned fingerprints for the input molecules""" + H_v = self.message_passing(bmg, V_d) + H = self.agg(H_v, bmg.batch) + H = self.bn(H) + + return H if X_d is None else torch.cat((H, self.X_d_transform(X_d)), 1) + + def encoding( + self, bmg: BatchMolGraph, V_d: Tensor | None = None, X_d: Tensor | None = None, i: int = -1 + ) -> Tensor: + """Calculate the :attr:`i`-th hidden representation""" + return self.predictor.encode(self.fingerprint(bmg, V_d, X_d), i) + + def forward( + self, bmg: BatchMolGraph, V_d: Tensor | None = None, X_d: Tensor | None = None + ) -> Tensor: + """Generate predictions for the input molecules/reactions""" + return self.predictor(self.fingerprint(bmg, V_d, X_d)) + + def training_step(self, batch: BatchType, batch_idx): + batch_size = self.get_batch_size(batch) + bmg, V_d, X_d, targets, weights, lt_mask, gt_mask = batch + + mask = targets.isfinite() + targets = targets.nan_to_num(nan=0.0) + + Z = self.fingerprint(bmg, V_d, X_d) + preds = self.predictor.train_step(Z) + l = self.criterion(preds, targets, mask, weights, lt_mask, gt_mask) + + self.log("train_loss", self.criterion, batch_size=batch_size, prog_bar=True, on_epoch=True) + + return l + + def on_validation_model_eval(self) -> None: + self.eval() + self.message_passing.V_d_transform.train() + self.message_passing.graph_transform.train() + self.X_d_transform.train() + self.predictor.output_transform.train() + + def validation_step(self, batch: BatchType, batch_idx: int = 0): + self._evaluate_batch(batch, "val") + + batch_size = self.get_batch_size(batch) + bmg, V_d, X_d, targets, weights, lt_mask, gt_mask = batch + + mask = targets.isfinite() + targets = targets.nan_to_num(nan=0.0) + + Z = self.fingerprint(bmg, V_d, X_d) + preds = self.predictor.train_step(Z) + self.metrics[-1](preds, targets, mask, weights, lt_mask, gt_mask) + self.log("val_loss", self.metrics[-1], batch_size=batch_size, prog_bar=True) + + def test_step(self, batch: BatchType, batch_idx: int = 0): + self._evaluate_batch(batch, "test") + + def _evaluate_batch(self, batch: BatchType, label: str) -> None: + batch_size = self.get_batch_size(batch) + bmg, V_d, X_d, targets, weights, lt_mask, gt_mask = batch + + mask = targets.isfinite() + targets = targets.nan_to_num(nan=0.0) + preds = self(bmg, V_d, X_d) + weights = torch.ones_like(weights) + + if self.predictor.n_targets > 1: + preds = preds[..., 0] + + for m in self.metrics[:-1]: + m.update(preds, targets, mask, weights, lt_mask, gt_mask) + self.log(f"{label}/{m.alias}", m, batch_size=batch_size) + + def predict_step(self, batch: BatchType, batch_idx: int, dataloader_idx: int = 0) -> Tensor: + """Return the predictions of the input batch + + Parameters + ---------- + batch : TrainingBatch + the input batch + + Returns + ------- + Tensor + a tensor of varying shape depending on the task type: + + * regression/binary classification: ``n x (t * s)``, where ``n`` is the number of input + molecules/reactions, ``t`` is the number of tasks, and ``s`` is the number of targets + per task. The final dimension is flattened, so that the targets for each task are + grouped. I.e., the first ``t`` elements are the first target for each task, the second + ``t`` elements the second target, etc. + + * multiclass classification: ``n x t x c``, where ``c`` is the number of classes + """ + bmg, X_vd, X_d, *_ = batch + + return self(bmg, X_vd, X_d) + + def configure_optimizers(self): + opt = optim.Adam(self.parameters(), self.init_lr) + if self.trainer.train_dataloader is None: + # Loading `train_dataloader` to estimate number of training batches. + # Using this line of code can pypass the issue of using `num_training_batches` as described [here](https://github.com/Lightning-AI/pytorch-lightning/issues/16060). + self.trainer.estimated_stepping_batches + steps_per_epoch = self.trainer.num_training_batches + warmup_steps = self.warmup_epochs * steps_per_epoch + if self.trainer.max_epochs == -1: + logger.warning( + "For infinite training, the number of cooldown epochs in learning rate scheduler is set to 100 times the number of warmup epochs." + ) + cooldown_steps = 100 * warmup_steps + else: + cooldown_epochs = self.trainer.max_epochs - self.warmup_epochs + cooldown_steps = cooldown_epochs * steps_per_epoch + + lr_sched = build_NoamLike_LRSched( + opt, warmup_steps, cooldown_steps, self.init_lr, self.max_lr, self.final_lr + ) + + lr_sched_config = {"scheduler": lr_sched, "interval": "step"} + + return {"optimizer": opt, "lr_scheduler": lr_sched_config} + + def get_batch_size(self, batch: TrainingBatch) -> int: + return len(batch[0]) + + @classmethod + def _load(cls, path, map_location, **submodules): + d = torch.load(path, map_location, weights_only=False) + + try: + hparams = d["hyper_parameters"] + state_dict = d["state_dict"] + except KeyError: + raise KeyError(f"Could not find hyper parameters and/or state dict in {path}.") + + if hparams["metrics"] is not None: + hparams["metrics"] = [ + cls._rebuild_metric(metric) + if not hasattr(metric, "_defaults") + or (not torch.cuda.is_available() and metric.device.type != "cpu") + else metric + for metric in hparams["metrics"] + ] + + if hparams["predictor"]["criterion"] is not None: + metric = hparams["predictor"]["criterion"] + if not hasattr(metric, "_defaults") or ( + not torch.cuda.is_available() and metric.device.type != "cpu" + ): + hparams["predictor"]["criterion"] = cls._rebuild_metric(metric) + + submodules |= { + key: hparams[key].pop("cls")(**hparams[key]) + for key in ("message_passing", "agg", "predictor") + if key not in submodules + } + + return submodules, state_dict, hparams + + @classmethod + def _add_metric_task_weights_to_state_dict(cls, state_dict, hparams): + if "metrics.0.task_weights" not in state_dict: + metrics = hparams["metrics"] + n_metrics = len(metrics) if metrics is not None else 1 + for i_metric in range(n_metrics): + state_dict[f"metrics.{i_metric}.task_weights"] = torch.tensor([[1.0]]) + state_dict[f"metrics.{i_metric + 1}.task_weights"] = state_dict[ + "predictor.criterion.task_weights" + ] + return state_dict + + @classmethod + def _rebuild_metric(cls, metric): + return Factory.build(metric.__class__, task_weights=metric.task_weights, **metric.__dict__) + + @classmethod + def load_from_checkpoint( + cls, checkpoint_path, map_location=None, hparams_file=None, strict=True, **kwargs + ) -> MPNN: + submodules = { + k: v for k, v in kwargs.items() if k in ["message_passing", "agg", "predictor"] + } + submodules, state_dict, hparams = cls._load(checkpoint_path, map_location, **submodules) + kwargs.update(submodules) + + state_dict = cls._add_metric_task_weights_to_state_dict(state_dict, hparams) + d = torch.load(checkpoint_path, map_location, weights_only=False) + d["state_dict"] = state_dict + d["hyper_parameters"] = hparams + buffer = io.BytesIO() + torch.save(d, buffer) + buffer.seek(0) + + return super().load_from_checkpoint(buffer, map_location, hparams_file, strict, **kwargs) + + @classmethod + def load_from_file(cls, model_path, map_location=None, strict=True, **submodules) -> MPNN: + submodules, state_dict, hparams = cls._load(model_path, map_location, **submodules) + hparams.update(submodules) + + state_dict = cls._add_metric_task_weights_to_state_dict(state_dict, hparams) + + model = cls(**hparams) + model.load_state_dict(state_dict, strict=strict) + + return model diff --git a/chemprop/chemprop/models/multi.py b/chemprop/chemprop/models/multi.py new file mode 100644 index 0000000000000000000000000000000000000000..930b815b1e8f8688101ab8ce14697f54c41b3e0e --- /dev/null +++ b/chemprop/chemprop/models/multi.py @@ -0,0 +1,101 @@ +from typing import Iterable + +import torch +from torch import Tensor + +from chemprop.data import BatchMolGraph, MulticomponentTrainingBatch +from chemprop.models.model import MPNN +from chemprop.nn import Aggregation, MulticomponentMessagePassing, Predictor +from chemprop.nn.metrics import ChempropMetric +from chemprop.nn.transforms import ScaleTransform + + +class MulticomponentMPNN(MPNN): + def __init__( + self, + message_passing: MulticomponentMessagePassing, + agg: Aggregation, + predictor: Predictor, + batch_norm: bool = False, + metrics: Iterable[ChempropMetric] | None = None, + warmup_epochs: int = 2, + init_lr: float = 1e-4, + max_lr: float = 1e-3, + final_lr: float = 1e-4, + X_d_transform: ScaleTransform | None = None, + ): + super().__init__( + message_passing, + agg, + predictor, + batch_norm, + metrics, + warmup_epochs, + init_lr, + max_lr, + final_lr, + X_d_transform, + ) + self.message_passing: MulticomponentMessagePassing + + def fingerprint( + self, + bmgs: Iterable[BatchMolGraph], + V_ds: Iterable[Tensor | None], + X_d: Tensor | None = None, + ) -> Tensor: + H_vs: list[Tensor] = self.message_passing(bmgs, V_ds) + Hs = [self.agg(H_v, bmg.batch) for H_v, bmg in zip(H_vs, bmgs)] + H = torch.cat(Hs, 1) + H = self.bn(H) + + return H if X_d is None else torch.cat((H, self.X_d_transform(X_d)), 1) + + def on_validation_model_eval(self) -> None: + self.eval() + for block in self.message_passing.blocks: + block.V_d_transform.train() + block.graph_transform.train() + self.X_d_transform.train() + self.predictor.output_transform.train() + + def get_batch_size(self, batch: MulticomponentTrainingBatch) -> int: + return len(batch[0][0]) + + @classmethod + def _load(cls, path, map_location, **submodules): + d = torch.load(path, map_location, weights_only=False) + + try: + hparams = d["hyper_parameters"] + state_dict = d["state_dict"] + except KeyError: + raise KeyError(f"Could not find hyper parameters and/or state dict in {path}.") + + if hparams["metrics"] is not None: + hparams["metrics"] = [ + cls._rebuild_metric(metric) + if not hasattr(metric, "_defaults") + or (not torch.cuda.is_available() and metric.device.type != "cpu") + else metric + for metric in hparams["metrics"] + ] + + if hparams["predictor"]["criterion"] is not None: + metric = hparams["predictor"]["criterion"] + if not hasattr(metric, "_defaults") or ( + not torch.cuda.is_available() and metric.device.type != "cpu" + ): + hparams["predictor"]["criterion"] = cls._rebuild_metric(metric) + + hparams["message_passing"]["blocks"] = [ + block_hparams.pop("cls")(**block_hparams) + for block_hparams in hparams["message_passing"]["blocks"] + ] + submodules |= { + key: hparams[key].pop("cls")(**hparams[key]) + for key in ("message_passing", "agg", "predictor") + if key not in submodules + } + + return submodules, state_dict, hparams diff --git a/chemprop/chemprop/models/utils.py b/chemprop/chemprop/models/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..cf0d06b5fb8e7841856f3a143e10e16701d62783 --- /dev/null +++ b/chemprop/chemprop/models/utils.py @@ -0,0 +1,32 @@ +from os import PathLike + +import torch + +from chemprop.models.model import MPNN +from chemprop.models.multi import MulticomponentMPNN + + +def save_model(path: PathLike, model: MPNN, output_columns: list[str] = None) -> None: + torch.save( + { + "hyper_parameters": model.hparams, + "state_dict": model.state_dict(), + "output_columns": output_columns, + }, + path, + ) + + +def load_model(path: PathLike, multicomponent: bool) -> MPNN: + if multicomponent: + model = MulticomponentMPNN.load_from_file(path, map_location=torch.device("cpu")) + else: + model = MPNN.load_from_file(path, map_location=torch.device("cpu")) + + return model + + +def load_output_columns(path: PathLike) -> list[str] | None: + model_file = torch.load(path, map_location=torch.device("cpu"), weights_only=False) + + return model_file.get("output_columns") diff --git a/chemprop/chemprop/nn/__init__.py b/chemprop/chemprop/nn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3e8680ede568cfe936fefe16626f3bbdf39a5725 --- /dev/null +++ b/chemprop/chemprop/nn/__init__.py @@ -0,0 +1,127 @@ +from .agg import ( + Aggregation, + AggregationRegistry, + AttentiveAggregation, + MeanAggregation, + NormAggregation, + SumAggregation, +) +from .message_passing import ( + AtomMessagePassing, + BondMessagePassing, + MessagePassing, + MulticomponentMessagePassing, +) +from .metrics import ( + MAE, + MSE, + RMSE, + SID, + BCELoss, + BinaryAccuracy, + BinaryAUPRC, + BinaryAUROC, + BinaryF1Score, + BinaryMCCLoss, + BinaryMCCMetric, + BoundedMAE, + BoundedMixin, + BoundedMSE, + BoundedRMSE, + ChempropMetric, + ClassificationMixin, + CrossEntropyLoss, + DirichletLoss, + EvidentialLoss, + LossFunctionRegistry, + MetricRegistry, + MulticlassMCCLoss, + MulticlassMCCMetric, + MVELoss, + QuantileLoss, + R2Score, + Wasserstein, +) +from .predictors import ( + BinaryClassificationFFN, + BinaryClassificationFFNBase, + BinaryDirichletFFN, + EvidentialFFN, + MulticlassClassificationFFN, + MulticlassDirichletFFN, + MveFFN, + Predictor, + PredictorRegistry, + QuantileFFN, + RegressionFFN, + SpectralFFN, +) +from .transforms import GraphTransform, ScaleTransform, UnscaleTransform +from .utils import Activation + +__all__ = [ + "Aggregation", + "AggregationRegistry", + "MeanAggregation", + "SumAggregation", + "NormAggregation", + "AttentiveAggregation", + "ChempropMetric", + "ClassificationMixin", + "LossFunctionRegistry", + "MetricRegistry", + "MSE", + "MAE", + "RMSE", + "BoundedMixin", + "BoundedMSE", + "BoundedMAE", + "BoundedRMSE", + "BinaryAccuracy", + "BinaryAUPRC", + "BinaryAUROC", + "BinaryF1Score", + "BinaryMCCMetric", + "BoundedMAE", + "BoundedMSE", + "BoundedRMSE", + "MetricRegistry", + "MulticlassMCCMetric", + "R2Score", + "MVELoss", + "EvidentialLoss", + "BCELoss", + "CrossEntropyLoss", + "BinaryMCCLoss", + "BinaryMCCMetric", + "MulticlassMCCLoss", + "MulticlassMCCMetric", + "BinaryAUROC", + "BinaryAUPRC", + "BinaryAccuracy", + "BinaryF1Score", + "MulticlassDirichletLoss", + "SID", + "Wasserstein", + "QuantileLoss", + "MessagePassing", + "AtomMessagePassing", + "BondMessagePassing", + "MulticomponentMessagePassing", + "Predictor", + "PredictorRegistry", + "QuantileFFN", + "RegressionFFN", + "MveFFN", + "DirichletLoss", + "EvidentialFFN", + "BinaryClassificationFFNBase", + "BinaryClassificationFFN", + "BinaryDirichletFFN", + "MulticlassClassificationFFN", + "SpectralFFN", + "Activation", + "GraphTransform", + "ScaleTransform", + "UnscaleTransform", +] diff --git a/chemprop/chemprop/nn/agg.py b/chemprop/chemprop/nn/agg.py new file mode 100644 index 0000000000000000000000000000000000000000..ed921b41d41f68534931a93c56552f31bd792d34 --- /dev/null +++ b/chemprop/chemprop/nn/agg.py @@ -0,0 +1,133 @@ +from abc import abstractmethod + +import torch +from torch import Tensor, nn + +from chemprop.nn.hparams import HasHParams +from chemprop.utils import ClassRegistry + +__all__ = [ + "Aggregation", + "AggregationRegistry", + "MeanAggregation", + "SumAggregation", + "NormAggregation", + "AttentiveAggregation", +] + + +class Aggregation(nn.Module, HasHParams): + """An :class:`Aggregation` aggregates the node-level representations of a batch of graphs into + a batch of graph-level representations + + .. note:: + this class is abstract and cannot be instantiated. + + See also + -------- + :class:`~chemprop.v2.models.modules.agg.MeanAggregation` + :class:`~chemprop.v2.models.modules.agg.SumAggregation` + :class:`~chemprop.v2.models.modules.agg.NormAggregation` + """ + + def __init__(self, dim: int = 0, *args, **kwargs): + super().__init__() + + self.dim = dim + self.hparams = {"dim": dim, "cls": self.__class__} + + @abstractmethod + def forward(self, H: Tensor, batch: Tensor) -> Tensor: + """Aggregate the graph-level representations of a batch of graphs into their respective + global representations + + NOTE: it is possible for a graph to have 0 nodes. In this case, the representation will be + a zero vector of length `d` in the final output. + + Parameters + ---------- + H : Tensor + a tensor of shape ``V x d`` containing the batched node-level representations of ``b`` + graphs + batch : Tensor + a tensor of shape ``V`` containing the index of the graph a given vertex corresponds to + + Returns + ------- + Tensor + a tensor of shape ``b x d`` containing the graph-level representations + """ + + +AggregationRegistry = ClassRegistry[Aggregation]() + + +@AggregationRegistry.register("mean") +class MeanAggregation(Aggregation): + r"""Average the graph-level representation: + + .. math:: + \mathbf h = \frac{1}{|V|} \sum_{v \in V} \mathbf h_v + """ + + def forward(self, H: Tensor, batch: Tensor) -> Tensor: + index_torch = batch.unsqueeze(1).repeat(1, H.shape[1]) + dim_size = batch.max().int() + 1 + return torch.zeros(dim_size, H.shape[1], dtype=H.dtype, device=H.device).scatter_reduce_( + self.dim, index_torch, H, reduce="mean", include_self=False + ) + + +@AggregationRegistry.register("sum") +class SumAggregation(Aggregation): + r"""Sum the graph-level representation: + + .. math:: + \mathbf h = \sum_{v \in V} \mathbf h_v + + """ + + def forward(self, H: Tensor, batch: Tensor) -> Tensor: + index_torch = batch.unsqueeze(1).repeat(1, H.shape[1]) + dim_size = batch.max().int() + 1 + return torch.zeros(dim_size, H.shape[1], dtype=H.dtype, device=H.device).scatter_reduce_( + self.dim, index_torch, H, reduce="sum", include_self=False + ) + + +@AggregationRegistry.register("norm") +class NormAggregation(SumAggregation): + r"""Sum the graph-level representation and divide by a normalization constant: + + .. math:: + \mathbf h = \frac{1}{c} \sum_{v \in V} \mathbf h_v + """ + + def __init__(self, dim: int = 0, *args, norm: float = 100.0, **kwargs): + super().__init__(dim, **kwargs) + + self.norm = norm + self.hparams["norm"] = norm + + def forward(self, H: Tensor, batch: Tensor) -> Tensor: + return super().forward(H, batch) / self.norm + + +class AttentiveAggregation(Aggregation): + def __init__(self, dim: int = 0, *args, output_size: int, **kwargs): + super().__init__(dim, *args, **kwargs) + + self.hparams["output_size"] = output_size + self.W = nn.Linear(output_size, 1) + + def forward(self, H: Tensor, batch: Tensor) -> Tensor: + dim_size = batch.max().int() + 1 + attention_logits = self.W(H).exp() + Z = torch.zeros(dim_size, 1, dtype=H.dtype, device=H.device).scatter_reduce_( + self.dim, batch.unsqueeze(1), attention_logits, reduce="sum", include_self=False + ) + alphas = attention_logits / Z[batch] + index_torch = batch.unsqueeze(1).repeat(1, H.shape[1]) + return torch.zeros(dim_size, H.shape[1], dtype=H.dtype, device=H.device).scatter_reduce_( + self.dim, index_torch, alphas * H, reduce="sum", include_self=False + ) diff --git a/chemprop/chemprop/nn/ffn.py b/chemprop/chemprop/nn/ffn.py new file mode 100644 index 0000000000000000000000000000000000000000..f5a02fe92391adeca9c88ec371296951a0132928 --- /dev/null +++ b/chemprop/chemprop/nn/ffn.py @@ -0,0 +1,63 @@ +from abc import abstractmethod + +from torch import Tensor, nn + +from chemprop.nn.utils import get_activation_function + + +class FFN(nn.Module): + r"""A :class:`FFN` is a differentiable function + :math:`f_\theta : \mathbb R^i \mapsto \mathbb R^o`""" + + input_dim: int + output_dim: int + + @abstractmethod + def forward(self, X: Tensor) -> Tensor: + pass + + +class MLP(nn.Sequential, FFN): + r"""An :class:`MLP` is an FFN that implements the following function: + + .. math:: + \mathbf h_0 &= \mathbf W_0 \mathbf x \,+ \mathbf b_{0} \\ + \mathbf h_l &= \mathbf W_l \left( \mathtt{dropout} \left( \sigma ( \,\mathbf h_{l-1}\, ) \right) \right) + \mathbf b_l\\ + + where :math:`\mathbf x` is the input tensor, :math:`\mathbf W_l` and :math:`\mathbf b_l` + are the learned weight matrix and bias, respectively, of the :math:`l`-th layer, + :math:`\mathbf h_l` is the hidden representation after layer :math:`l`, and :math:`\sigma` + is the activation function. + """ + + @classmethod + def build( + cls, + input_dim: int, + output_dim: int, + hidden_dim: int = 300, + n_layers: int = 1, + dropout: float = 0.0, + activation: str = "relu", + ): + dropout = nn.Dropout(dropout) + act = get_activation_function(activation) + dims = [input_dim] + [hidden_dim] * n_layers + [output_dim] + blocks = [nn.Sequential(nn.Linear(dims[0], dims[1]))] + if len(dims) > 2: + blocks.extend( + [ + nn.Sequential(act, dropout, nn.Linear(d1, d2)) + for d1, d2 in zip(dims[1:-1], dims[2:]) + ] + ) + + return cls(*blocks) + + @property + def input_dim(self) -> int: + return self[0][-1].in_features + + @property + def output_dim(self) -> int: + return self[-1][-1].out_features diff --git a/chemprop/chemprop/nn/hparams.py b/chemprop/chemprop/nn/hparams.py new file mode 100644 index 0000000000000000000000000000000000000000..ffa17ab80c16bbae47b35a18d8fe9f3eb66ee590 --- /dev/null +++ b/chemprop/chemprop/nn/hparams.py @@ -0,0 +1,38 @@ +from typing import Protocol, Type, TypedDict + + +class HParamsDict(TypedDict): + """A dictionary containing a module's class and it's hyperparameters + + Using this type should essentially allow for initializing a module via:: + + module = hparams.pop('cls')(**hparams) + """ + + cls: Type + + +class HasHParams(Protocol): + """:class:`HasHParams` is a protocol for clases which possess an :attr:`hparams` attribute which is a dictionary containing the object's class and arguments required to initialize it. + + That is, any object which implements :class:`HasHParams` should be able to be initialized via:: + + class Foo(HasHParams): + def __init__(self, *args, **kwargs): + ... + + foo1 = Foo(...) + foo1_cls = foo1.hparams['cls'] + foo1_kwargs = {k: v for k, v in foo1.hparams.items() if k != "cls"} + foo2 = foo1_cls(**foo1_kwargs) + # code to compare foo1 and foo2 goes here and they should be equal + """ + + hparams: HParamsDict + + +def from_hparams(hparams: HParamsDict): + cls = hparams["cls"] + kwargs = {k: v for k, v in hparams.items() if k != "cls"} + + return cls(**kwargs) diff --git a/chemprop/chemprop/nn/message_passing/__init__.py b/chemprop/chemprop/nn/message_passing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..97078653c6524d5645d4862a8ef683a2d8eb457e --- /dev/null +++ b/chemprop/chemprop/nn/message_passing/__init__.py @@ -0,0 +1,10 @@ +from .base import AtomMessagePassing, BondMessagePassing +from .multi import MulticomponentMessagePassing +from .proto import MessagePassing + +__all__ = [ + "MessagePassing", + "AtomMessagePassing", + "BondMessagePassing", + "MulticomponentMessagePassing", +] diff --git a/chemprop/chemprop/nn/message_passing/base.py b/chemprop/chemprop/nn/message_passing/base.py new file mode 100644 index 0000000000000000000000000000000000000000..8bb14b0f51c97ba083e26b402db7a0c77023a8db --- /dev/null +++ b/chemprop/chemprop/nn/message_passing/base.py @@ -0,0 +1,319 @@ +from abc import abstractmethod + +from lightning.pytorch.core.mixins import HyperparametersMixin +import torch +from torch import Tensor, nn + +from chemprop.conf import DEFAULT_ATOM_FDIM, DEFAULT_BOND_FDIM, DEFAULT_HIDDEN_DIM +from chemprop.data import BatchMolGraph +from chemprop.exceptions import InvalidShapeError +from chemprop.nn.message_passing.proto import MessagePassing +from chemprop.nn.transforms import GraphTransform, ScaleTransform +from chemprop.nn.utils import Activation, get_activation_function + + +class _MessagePassingBase(MessagePassing, HyperparametersMixin): + """The base message-passing block for atom- and bond-based message-passing schemes + + NOTE: this class is an abstract base class and cannot be instantiated + + Parameters + ---------- + d_v : int, default=DEFAULT_ATOM_FDIM + the feature dimension of the vertices + d_e : int, default=DEFAULT_BOND_FDIM + the feature dimension of the edges + d_h : int, default=DEFAULT_HIDDEN_DIM + the hidden dimension during message passing + bias : bool, defuault=False + if `True`, add a bias term to the learned weight matrices + depth : int, default=3 + the number of message passing iterations + undirected : bool, default=False + if `True`, pass messages on undirected edges + dropout : float, default=0.0 + the dropout probability + activation : str, default="relu" + the activation function to use + d_vd : int | None, default=None + the dimension of additional vertex descriptors that will be concatenated to the hidden features before readout + + See also + -------- + * :class:`AtomMessagePassing` + + * :class:`BondMessagePassing` + """ + + def __init__( + self, + d_v: int = DEFAULT_ATOM_FDIM, + d_e: int = DEFAULT_BOND_FDIM, + d_h: int = DEFAULT_HIDDEN_DIM, + bias: bool = False, + depth: int = 3, + dropout: float = 0.0, + activation: str | Activation = Activation.RELU, + undirected: bool = False, + d_vd: int | None = None, + V_d_transform: ScaleTransform | None = None, + graph_transform: GraphTransform | None = None, + # layers_per_message: int = 1, + ): + super().__init__() + # manually add V_d_transform and graph_transform to hparams to suppress lightning's warning + # about double saving their state_dict values. + self.save_hyperparameters(ignore=["V_d_transform", "graph_transform"]) + self.hparams["V_d_transform"] = V_d_transform + self.hparams["graph_transform"] = graph_transform + self.hparams["cls"] = self.__class__ + + self.W_i, self.W_h, self.W_o, self.W_d = self.setup(d_v, d_e, d_h, d_vd, bias) + self.depth = depth + self.undirected = undirected + self.dropout = nn.Dropout(dropout) + self.tau = get_activation_function(activation) + self.V_d_transform = V_d_transform if V_d_transform is not None else nn.Identity() + self.graph_transform = graph_transform if graph_transform is not None else nn.Identity() + + @property + def output_dim(self) -> int: + return self.W_d.out_features if self.W_d is not None else self.W_o.out_features + + @abstractmethod + def setup( + self, + d_v: int = DEFAULT_ATOM_FDIM, + d_e: int = DEFAULT_BOND_FDIM, + d_h: int = DEFAULT_HIDDEN_DIM, + d_vd: int | None = None, + bias: bool = False, + ) -> tuple[nn.Module, nn.Module, nn.Module, nn.Module | None]: + """setup the weight matrices used in the message passing update functions + + Parameters + ---------- + d_v : int + the vertex feature dimension + d_e : int + the edge feature dimension + d_h : int, default=300 + the hidden dimension during message passing + d_vd : int | None, default=None + the dimension of additional vertex descriptors that will be concatenated to the hidden + features before readout, if any + bias: bool, default=False + whether to add a learned bias to the matrices + + Returns + ------- + W_i, W_h, W_o, W_d : tuple[nn.Module, nn.Module, nn.Module, nn.Module | None] + the input, hidden, output, and descriptor weight matrices, respectively, used in the + message passing update functions. The descriptor weight matrix is `None` if no vertex + dimension is supplied + """ + + @abstractmethod + def initialize(self, bmg: BatchMolGraph) -> Tensor: + """initialize the message passing scheme by calculating initial matrix of hidden features""" + + @abstractmethod + def message(self, H_t: Tensor, bmg: BatchMolGraph): + """Calculate the message matrix""" + + def update(self, M_t, H_0): + """Calcualte the updated hidden for each edge""" + H_t = self.W_h(M_t) + H_t = self.tau(H_0 + H_t) + H_t = self.dropout(H_t) + + return H_t + + def finalize(self, M: Tensor, V: Tensor, V_d: Tensor | None) -> Tensor: + r"""Finalize message passing by (1) concatenating the final message ``M`` and the original + vertex features ``V`` and (2) if provided, further concatenating additional vertex + descriptors ``V_d``. + + This function implements the following operation: + + .. math:: + H &= \mathtt{dropout} \left( \tau(\mathbf{W}_o(V \mathbin\Vert M)) \right) \\ + H &= \mathtt{dropout} \left( \tau(\mathbf{W}_d(H \mathbin\Vert V_d)) \right), + + where :math:`\tau` is the activation function, :math:`\Vert` is the concatenation operator, + :math:`\mathbf{W}_o` and :math:`\mathbf{W}_d` are learned weight matrices, :math:`M` is + the message matrix, :math:`V` is the original vertex feature matrix, and :math:`V_d` is an + optional vertex descriptor matrix. + + Parameters + ---------- + M : Tensor + a tensor of shape ``V x d_h`` containing the message vector of each vertex + V : Tensor + a tensor of shape ``V x d_v`` containing the original vertex features + V_d : Tensor | None + an optional tensor of shape ``V x d_vd`` containing additional vertex descriptors + + Returns + ------- + Tensor + a tensor of shape ``V x (d_h + d_v [+ d_vd])`` containing the final hidden + representations + + Raises + ------ + InvalidShapeError + if ``V_d`` is not of shape ``b x d_vd``, where ``b`` is the batch size and ``d_vd`` is + the vertex descriptor dimension + """ + H = self.W_o(torch.cat((V, M), dim=1)) # V x d_o + H = self.tau(H) + H = self.dropout(H) + + if V_d is not None: + V_d = self.V_d_transform(V_d) + try: + H = self.W_d(torch.cat((H, V_d), dim=1)) # V x (d_o + d_vd) + H = self.dropout(H) + except RuntimeError: + raise InvalidShapeError( + "V_d", V_d.shape, [len(H), self.W_d.in_features - self.W_o.out_features] + ) + + return H + + def forward(self, bmg: BatchMolGraph, V_d: Tensor | None = None) -> Tensor: + """Encode a batch of molecular graphs. + + Parameters + ---------- + bmg: BatchMolGraph + a batch of :class:`BatchMolGraph`s to encode + V_d : Tensor | None, default=None + an optional tensor of shape ``V x d_vd`` containing additional descriptors for each atom + in the batch. These will be concatenated to the learned atomic descriptors and + transformed before the readout phase. + + Returns + ------- + Tensor + a tensor of shape ``V x d_h`` or ``V x (d_h + d_vd)`` containing the encoding of each + molecule in the batch, depending on whether additional atom descriptors were provided + """ + bmg = self.graph_transform(bmg) + H_0 = self.initialize(bmg) + + H = self.tau(H_0) + for _ in range(1, self.depth): + if self.undirected: + H = (H + H[bmg.rev_edge_index]) / 2 + + M = self.message(H, bmg) + H = self.update(M, H_0) + + index_torch = bmg.edge_index[1].unsqueeze(1).repeat(1, H.shape[1]) + M = torch.zeros(len(bmg.V), H.shape[1], dtype=H.dtype, device=H.device).scatter_reduce_( + 0, index_torch, H, reduce="sum", include_self=False + ) + return self.finalize(M, bmg.V, V_d) + + +class BondMessagePassing(_MessagePassingBase): + r"""A :class:`BondMessagePassing` encodes a batch of molecular graphs by passing messages along + directed bonds. + + It implements the following operation: + + .. math:: + + h_{vw}^{(0)} &= \tau \left( \mathbf W_i(e_{vw}) \right) \\ + m_{vw}^{(t)} &= \sum_{u \in \mathcal N(v)\setminus w} h_{uv}^{(t-1)} \\ + h_{vw}^{(t)} &= \tau \left(h_v^{(0)} + \mathbf W_h m_{vw}^{(t-1)} \right) \\ + m_v^{(T)} &= \sum_{w \in \mathcal N(v)} h_w^{(T-1)} \\ + h_v^{(T)} &= \tau \left (\mathbf W_o \left( x_v \mathbin\Vert m_{v}^{(T)} \right) \right), + + where :math:`\tau` is the activation function; :math:`\mathbf W_i`, :math:`\mathbf W_h`, and + :math:`\mathbf W_o` are learned weight matrices; :math:`e_{vw}` is the feature vector of the + bond between atoms :math:`v` and :math:`w`; :math:`x_v` is the feature vector of atom :math:`v`; + :math:`h_{vw}^{(t)}` is the hidden representation of the bond :math:`v \rightarrow w` at + iteration :math:`t`; :math:`m_{vw}^{(t)}` is the message received by the bond :math:`v + \to w` at iteration :math:`t`; and :math:`t \in \{1, \dots, T-1\}` is the number of + message passing iterations. + """ + + def setup( + self, + d_v: int = DEFAULT_ATOM_FDIM, + d_e: int = DEFAULT_BOND_FDIM, + d_h: int = DEFAULT_HIDDEN_DIM, + d_vd: int | None = None, + bias: bool = False, + ): + W_i = nn.Linear(d_v + d_e, d_h, bias) + W_h = nn.Linear(d_h, d_h, bias) + W_o = nn.Linear(d_v + d_h, d_h) + # initialize W_d only when d_vd is neither 0 nor None + W_d = nn.Linear(d_h + d_vd, d_h + d_vd) if d_vd else None + + return W_i, W_h, W_o, W_d + + def initialize(self, bmg: BatchMolGraph) -> Tensor: + return self.W_i(torch.cat([bmg.V[bmg.edge_index[0]], bmg.E], dim=1)) + + def message(self, H: Tensor, bmg: BatchMolGraph) -> Tensor: + index_torch = bmg.edge_index[1].unsqueeze(1).repeat(1, H.shape[1]) + M_all = torch.zeros(len(bmg.V), H.shape[1], dtype=H.dtype, device=H.device).scatter_reduce_( + 0, index_torch, H, reduce="sum", include_self=False + )[bmg.edge_index[0]] + M_rev = H[bmg.rev_edge_index] + + return M_all - M_rev + + +class AtomMessagePassing(_MessagePassingBase): + r"""A :class:`AtomMessagePassing` encodes a batch of molecular graphs by passing messages along + atoms. + + It implements the following operation: + + .. math:: + + h_v^{(0)} &= \tau \left( \mathbf{W}_i(x_v) \right) \\ + m_v^{(t)} &= \sum_{u \in \mathcal{N}(v)} h_u^{(t-1)} \mathbin\Vert e_{uv} \\ + h_v^{(t)} &= \tau\left(h_v^{(0)} + \mathbf{W}_h m_v^{(t-1)}\right) \\ + m_v^{(T)} &= \sum_{w \in \mathcal{N}(v)} h_w^{(T-1)} \\ + h_v^{(T)} &= \tau \left (\mathbf{W}_o \left( x_v \mathbin\Vert m_{v}^{(T)} \right) \right), + + where :math:`\tau` is the activation function; :math:`\mathbf{W}_i`, :math:`\mathbf{W}_h`, and + :math:`\mathbf{W}_o` are learned weight matrices; :math:`e_{vw}` is the feature vector of the + bond between atoms :math:`v` and :math:`w`; :math:`x_v` is the feature vector of atom :math:`v`; + :math:`h_v^{(t)}` is the hidden representation of atom :math:`v` at iteration :math:`t`; + :math:`m_v^{(t)}` is the message received by atom :math:`v` at iteration :math:`t`; and + :math:`t \in \{1, \dots, T\}` is the number of message passing iterations. + """ + + def setup( + self, + d_v: int = DEFAULT_ATOM_FDIM, + d_e: int = DEFAULT_BOND_FDIM, + d_h: int = DEFAULT_HIDDEN_DIM, + d_vd: int | None = None, + bias: bool = False, + ): + W_i = nn.Linear(d_v, d_h, bias) + W_h = nn.Linear(d_e + d_h, d_h, bias) + W_o = nn.Linear(d_v + d_h, d_h) + # initialize W_d only when d_vd is neither 0 nor None + W_d = nn.Linear(d_h + d_vd, d_h + d_vd) if d_vd else None + + return W_i, W_h, W_o, W_d + + def initialize(self, bmg: BatchMolGraph) -> Tensor: + return self.W_i(bmg.V[bmg.edge_index[0]]) + + def message(self, H: Tensor, bmg: BatchMolGraph): + H = torch.cat((H, bmg.E), dim=1) + index_torch = bmg.edge_index[1].unsqueeze(1).repeat(1, H.shape[1]) + return torch.zeros(len(bmg.V), H.shape[1], dtype=H.dtype, device=H.device).scatter_reduce_( + 0, index_torch, H, reduce="sum", include_self=False + )[bmg.edge_index[0]] diff --git a/chemprop/chemprop/nn/message_passing/multi.py b/chemprop/chemprop/nn/message_passing/multi.py new file mode 100644 index 0000000000000000000000000000000000000000..98a9cb84c55dbba5c8fa50ac56cac0c30a2171b8 --- /dev/null +++ b/chemprop/chemprop/nn/message_passing/multi.py @@ -0,0 +1,80 @@ +import logging +from typing import Iterable, Sequence + +from torch import Tensor, nn + +from chemprop.data import BatchMolGraph +from chemprop.nn.hparams import HasHParams +from chemprop.nn.message_passing.proto import MessagePassing + +logger = logging.getLogger(__name__) + + +class MulticomponentMessagePassing(nn.Module, HasHParams): + """A `MulticomponentMessagePassing` performs message-passing on each individual input in a + multicomponent input then concatenates the representation of each input to construct a + global representation + + Parameters + ---------- + blocks : Sequence[MessagePassing] + the invidual message-passing blocks for each input + n_components : int + the number of components in each input + shared : bool, default=False + whether one block will be shared among all components in an input. If not, a separate + block will be learned for each component. + """ + + def __init__(self, blocks: Sequence[MessagePassing], n_components: int, shared: bool = False): + super().__init__() + self.hparams = { + "cls": self.__class__, + "blocks": [block.hparams for block in blocks], + "n_components": n_components, + "shared": shared, + } + + if len(blocks) == 0: + raise ValueError("arg 'blocks' was empty!") + if shared and len(blocks) > 1: + logger.warning( + "More than 1 block was supplied but 'shared' was True! Using only the 0th block..." + ) + elif not shared and len(blocks) != n_components: + raise ValueError( + "arg 'n_components' must be equal to `len(blocks)` if 'shared' is False! " + f"got: {n_components} and {len(blocks)}, respectively." + ) + + self.n_components = n_components + self.shared = shared + self.blocks = nn.ModuleList([blocks[0]] * self.n_components if shared else blocks) + + def __len__(self) -> int: + return len(self.blocks) + + @property + def output_dim(self) -> int: + d_o = sum(block.output_dim for block in self.blocks) + + return d_o + + def forward(self, bmgs: Iterable[BatchMolGraph], V_ds: Iterable[Tensor | None]) -> list[Tensor]: + """Encode the multicomponent inputs + + Parameters + ---------- + bmgs : Iterable[BatchMolGraph] + V_ds : Iterable[Tensor | None] + + Returns + ------- + list[Tensor] + a list of tensors of shape `V x d_i` containing the respective encodings of the `i`\th + component, where `d_i` is the output dimension of the `i`\th encoder + """ + if V_ds is None: + return [block(bmg) for block, bmg in zip(self.blocks, bmgs)] + else: + return [block(bmg, V_d) for block, bmg, V_d in zip(self.blocks, bmgs, V_ds)] diff --git a/chemprop/chemprop/nn/message_passing/proto.py b/chemprop/chemprop/nn/message_passing/proto.py new file mode 100644 index 0000000000000000000000000000000000000000..f00c8a36002c36485da6fbdb08d6137c6d954765 --- /dev/null +++ b/chemprop/chemprop/nn/message_passing/proto.py @@ -0,0 +1,35 @@ +from abc import abstractmethod + +from torch import Tensor, nn + +from chemprop.data import BatchMolGraph +from chemprop.nn.hparams import HasHParams + + +class MessagePassing(nn.Module, HasHParams): + """A :class:`MessagePassing` module encodes a batch of molecular graphs + using message passing to learn vertex-level hidden representations.""" + + input_dim: int + output_dim: int + + @abstractmethod + def forward(self, bmg: BatchMolGraph, V_d: Tensor | None = None) -> Tensor: + """Encode a batch of molecular graphs. + + Parameters + ---------- + bmg: BatchMolGraph + the batch of :class:`~chemprop.featurizers.molgraph.MolGraph`\s to encode + V_d : Tensor | None, default=None + an optional tensor of shape `V x d_vd` containing additional descriptors for each atom + in the batch. These will be concatenated to the learned atomic descriptors and + transformed before the readout phase. + + Returns + ------- + Tensor + a tensor of shape `V x d_h` or `V x (d_h + d_vd)` containing the hidden representation + of each vertex in the batch of graphs. The feature dimension depends on whether + additional atom descriptors were provided + """ diff --git a/chemprop/chemprop/nn/metrics.py b/chemprop/chemprop/nn/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..cbbb346f4f9e61a11d97e857aee8c75de28ed44f --- /dev/null +++ b/chemprop/chemprop/nn/metrics.py @@ -0,0 +1,567 @@ +from abc import abstractmethod + +from numpy.typing import ArrayLike +import torch +from torch import Tensor +from torch.nn import functional as F +import torchmetrics +from torchmetrics.utilities.compute import auc +from torchmetrics.utilities.data import dim_zero_cat + +from chemprop.utils.registry import ClassRegistry + +__all__ = [ + "ChempropMetric", + "LossFunctionRegistry", + "MetricRegistry", + "MSE", + "MAE", + "RMSE", + "BoundedMixin", + "BoundedMSE", + "BoundedMAE", + "BoundedRMSE", + "BinaryAccuracy", + "BinaryAUPRC", + "BinaryAUROC", + "BinaryF1Score", + "BinaryMCCMetric", + "BoundedMAE", + "BoundedMSE", + "BoundedRMSE", + "MetricRegistry", + "MulticlassMCCMetric", + "R2Score", + "MVELoss", + "EvidentialLoss", + "BCELoss", + "CrossEntropyLoss", + "BinaryMCCLoss", + "BinaryMCCMetric", + "MulticlassMCCLoss", + "MulticlassMCCMetric", + "ClassificationMixin", + "BinaryAUROC", + "BinaryAUPRC", + "BinaryAccuracy", + "BinaryF1Score", + "DirichletLoss", + "SID", + "Wasserstein", + "QuantileLoss", +] + + +class ChempropMetric(torchmetrics.Metric): + is_differentiable = True + higher_is_better = False + full_state_update = False + + def __init__(self, task_weights: ArrayLike = 1.0): + """ + Parameters + ---------- + task_weights : ArrayLike, default=1.0 + the per-task weights of shape `t` or `1 x t`. Defaults to all tasks having a weight of 1. + """ + super().__init__() + task_weights = torch.as_tensor(task_weights, dtype=torch.float).view(1, -1) + self.register_buffer("task_weights", task_weights) + + self.add_state("total_loss", default=torch.tensor(0.0), dist_reduce_fx="sum") + self.add_state("num_samples", default=torch.tensor(0), dist_reduce_fx="sum") + + def update( + self, + preds: Tensor, + targets: Tensor, + mask: Tensor | None = None, + weights: Tensor | None = None, + lt_mask: Tensor | None = None, + gt_mask: Tensor | None = None, + ) -> None: + """Calculate the mean loss function value given predicted and target values + + Parameters + ---------- + preds : Tensor + a tensor of shape `b x t x u` (regression), `b x t` (binary classification), or + `b x t x c` (multiclass classification) containing the predictions, where `b` is the + batch size, `t` is the number of tasks to predict, `u` is the number of + targets to predict for each task, and `c` is the number of classes. + targets : Tensor + a float tensor of shape `b x t` containing the target values + mask : Tensor + a boolean tensor of shape `b x t` indicating whether the given prediction should be + included in the loss calculation + weights : Tensor + a tensor of shape `b` or `b x 1` containing the per-sample weight + lt_mask: Tensor + gt_mask: Tensor + """ + mask = torch.ones_like(targets, dtype=torch.bool) if mask is None else mask + weights = torch.ones_like(targets, dtype=torch.float) if weights is None else weights + lt_mask = torch.zeros_like(targets, dtype=torch.bool) if lt_mask is None else lt_mask + gt_mask = torch.zeros_like(targets, dtype=torch.bool) if gt_mask is None else gt_mask + + L = self._calc_unreduced_loss(preds, targets, mask, weights, lt_mask, gt_mask) + L = L * weights.view(-1, 1) * self.task_weights * mask + + self.total_loss += L.sum() + self.num_samples += mask.sum() + + def compute(self): + return self.total_loss / self.num_samples + + @abstractmethod + def _calc_unreduced_loss(self, preds, targets, mask, weights, lt_mask, gt_mask) -> Tensor: + """Calculate a tensor of shape `b x t` containing the unreduced loss values.""" + + def extra_repr(self) -> str: + return f"task_weights={self.task_weights.tolist()}" + + +LossFunctionRegistry = ClassRegistry[ChempropMetric]() +MetricRegistry = ClassRegistry[ChempropMetric]() + + +@LossFunctionRegistry.register("mse") +@MetricRegistry.register("mse") +class MSE(ChempropMetric): + def _calc_unreduced_loss(self, preds: Tensor, targets: Tensor, *args) -> Tensor: + return F.mse_loss(preds, targets, reduction="none") + + +@MetricRegistry.register("mae") +@LossFunctionRegistry.register("mae") +class MAE(ChempropMetric): + def _calc_unreduced_loss(self, preds, targets, *args) -> Tensor: + return (preds - targets).abs() + + +@LossFunctionRegistry.register("rmse") +@MetricRegistry.register("rmse") +class RMSE(MSE): + def compute(self): + return (self.total_loss / self.num_samples).sqrt() + + +class BoundedMixin: + def _calc_unreduced_loss(self, preds, targets, mask, weights, lt_mask, gt_mask) -> Tensor: + preds = torch.where((preds < targets) & lt_mask, targets, preds) + preds = torch.where((preds > targets) & gt_mask, targets, preds) + + return super()._calc_unreduced_loss(preds, targets, mask, weights) + + +@LossFunctionRegistry.register("bounded-mse") +@MetricRegistry.register("bounded-mse") +class BoundedMSE(BoundedMixin, MSE): + pass + + +@LossFunctionRegistry.register("bounded-mae") +@MetricRegistry.register("bounded-mae") +class BoundedMAE(BoundedMixin, MAE): + pass + + +@LossFunctionRegistry.register("bounded-rmse") +@MetricRegistry.register("bounded-rmse") +class BoundedRMSE(BoundedMixin, RMSE): + pass + + +@MetricRegistry.register("r2") +class R2Score(torchmetrics.R2Score): + def __init__(self, task_weights: ArrayLike = 1.0, **kwargs): + """ + Parameters + ---------- + task_weights : ArrayLike = 1.0 + .. important:: + Ignored. Maintained for compatibility with :class:`ChempropMetric` + """ + super().__init__() + task_weights = torch.as_tensor(task_weights, dtype=torch.float).view(1, -1) + self.register_buffer("task_weights", task_weights) + + def update(self, preds: Tensor, targets: Tensor, mask: Tensor, *args, **kwargs): + super().update(preds[mask], targets[mask]) + + +@LossFunctionRegistry.register("mve") +class MVELoss(ChempropMetric): + """Calculate the loss using Eq. 9 from [nix1994]_ + + References + ---------- + .. [nix1994] Nix, D. A.; Weigend, A. S. "Estimating the mean and variance of the target + probability distribution." Proceedings of 1994 IEEE International Conference on Neural + Networks, 1994 https://doi.org/10.1109/icnn.1994.374138 + """ + + def _calc_unreduced_loss(self, preds: Tensor, targets: Tensor, *args) -> Tensor: + mean, var = torch.unbind(preds, dim=-1) + + L_sos = (mean - targets) ** 2 / (2 * var) + L_kl = (2 * torch.pi * var).log() / 2 + + return L_sos + L_kl + + +@LossFunctionRegistry.register("evidential") +class EvidentialLoss(ChempropMetric): + """Calculate the loss using Eqs. 8, 9, and 10 from [amini2020]_. See also [soleimany2021]_. + + References + ---------- + .. [amini2020] Amini, A; Schwarting, W.; Soleimany, A.; Rus, D.; + "Deep Evidential Regression" Advances in Neural Information Processing Systems; 2020; Vol.33. + https://proceedings.neurips.cc/paper_files/paper/2020/file/aab085461de182608ee9f607f3f7d18f-Paper.pdf + .. [soleimany2021] Soleimany, A.P.; Amini, A.; Goldman, S.; Rus, D.; Bhatia, S.N.; Coley, C.W.; + "Evidential Deep Learning for Guided Molecular Property Prediction and Discovery." ACS + Cent. Sci. 2021, 7, 8, 1356-1367. https://doi.org/10.1021/acscentsci.1c00546 + """ + + def __init__(self, task_weights: ArrayLike = 1.0, v_kl: float = 0.2, eps: float = 1e-8): + super().__init__(task_weights) + self.v_kl = v_kl + self.eps = eps + + def _calc_unreduced_loss(self, preds: Tensor, targets: Tensor, *args) -> Tensor: + mean, v, alpha, beta = torch.unbind(preds, dim=-1) + + residuals = targets - mean + twoBlambda = 2 * beta * (1 + v) + + L_nll = ( + 0.5 * (torch.pi / v).log() + - alpha * twoBlambda.log() + + (alpha + 0.5) * torch.log(v * residuals**2 + twoBlambda) + + torch.lgamma(alpha) + - torch.lgamma(alpha + 0.5) + ) + + L_reg = (2 * v + alpha) * residuals.abs() + + return L_nll + self.v_kl * (L_reg - self.eps) + + def extra_repr(self) -> str: + parent_repr = super().extra_repr() + return parent_repr + f", v_kl={self.v_kl}, eps={self.eps}" + + +@LossFunctionRegistry.register("bce") +class BCELoss(ChempropMetric): + def _calc_unreduced_loss(self, preds: Tensor, targets: Tensor, *args) -> Tensor: + return F.binary_cross_entropy_with_logits(preds, targets, reduction="none") + + +@LossFunctionRegistry.register("ce") +class CrossEntropyLoss(ChempropMetric): + def _calc_unreduced_loss(self, preds: Tensor, targets: Tensor, *args) -> Tensor: + preds = preds.transpose(1, 2) + targets = targets.long() + + return F.cross_entropy(preds, targets, reduction="none") + + +@LossFunctionRegistry.register("binary-mcc") +class BinaryMCCLoss(ChempropMetric): + def __init__(self, task_weights: ArrayLike = 1.0): + """ + Parameters + ---------- + task_weights : ArrayLike, default=1.0 + the per-task weights of shape `t` or `1 x t`. Defaults to all tasks having a weight of 1. + """ + super().__init__(task_weights) + + self.add_state("TP", default=[], dist_reduce_fx="cat") + self.add_state("FP", default=[], dist_reduce_fx="cat") + self.add_state("TN", default=[], dist_reduce_fx="cat") + self.add_state("FN", default=[], dist_reduce_fx="cat") + + def update( + self, + preds: Tensor, + targets: Tensor, + mask: Tensor | None = None, + weights: Tensor | None = None, + *args, + ): + mask = torch.ones_like(targets, dtype=torch.bool) if mask is None else mask + weights = torch.ones_like(targets, dtype=torch.float) if weights is None else weights + + if not (0 <= preds.min() and preds.max() <= 1): # assume logits + preds = preds.sigmoid() + + TP, FP, TN, FN = self._calc_unreduced_loss(preds, targets.long(), mask, weights, *args) + + self.TP += [TP] + self.FP += [FP] + self.TN += [TN] + self.FN += [FN] + + def _calc_unreduced_loss(self, preds, targets, mask, weights, *args) -> Tensor: + TP = (targets * preds * weights * mask).sum(0, keepdim=True) + FP = ((1 - targets) * preds * weights * mask).sum(0, keepdim=True) + TN = ((1 - targets) * (1 - preds) * weights * mask).sum(0, keepdim=True) + FN = (targets * (1 - preds) * weights * mask).sum(0, keepdim=True) + + return TP, FP, TN, FN + + def compute(self): + TP = dim_zero_cat(self.TP).sum(0) + FP = dim_zero_cat(self.FP).sum(0) + TN = dim_zero_cat(self.TN).sum(0) + FN = dim_zero_cat(self.FN).sum(0) + + MCC = (TP * TN - FP * FN) / ((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN) + 1e-8).sqrt() + MCC = MCC * self.task_weights + return 1 - MCC.mean() + + +@MetricRegistry.register("binary-mcc") +class BinaryMCCMetric(BinaryMCCLoss): + def compute(self): + return 1 - super().compute() + + +@LossFunctionRegistry.register("multiclass-mcc") +class MulticlassMCCLoss(ChempropMetric): + """Calculate a soft Matthews correlation coefficient ([mccWiki]_) loss for multiclass + classification based on the implementataion of [mccSklearn]_ + References + ---------- + .. [mccWiki] https://en.wikipedia.org/wiki/Phi_coefficient#Multiclass_case + .. [mccSklearn] https://scikit-learn.org/stable/modules/generated/sklearn.metrics.matthews_corrcoef.html + """ + + def __init__(self, task_weights: ArrayLike = 1.0): + """ + Parameters + ---------- + task_weights : ArrayLike, default=1.0 + the per-task weights of shape `t` or `1 x t`. Defaults to all tasks having a weight of 1. + """ + super().__init__(task_weights) + + self.add_state("p", default=[], dist_reduce_fx="cat") + self.add_state("t", default=[], dist_reduce_fx="cat") + self.add_state("c", default=[], dist_reduce_fx="cat") + self.add_state("s", default=[], dist_reduce_fx="cat") + + def update( + self, + preds: Tensor, + targets: Tensor, + mask: Tensor | None = None, + weights: Tensor | None = None, + *args, + ): + mask = torch.ones_like(targets, dtype=torch.bool) if mask is None else mask + weights = ( + torch.ones_like(targets, dtype=torch.float) if weights is None else weights.view(-1, 1) + ) + + if not (0 <= preds.min() and preds.max() <= 1): # assume logits + preds = preds.softmax(2) + + p, t, c, s = self._calc_unreduced_loss(preds, targets.long(), mask, weights, *args) + + self.p += [p] + self.t += [t] + self.c += [c] + self.s += [s] + + def _calc_unreduced_loss(self, preds, targets, mask, weights, *args) -> Tensor: + device = preds.device + C = preds.shape[2] + bin_targets = torch.eye(C, device=device)[targets] + bin_preds = torch.eye(C, device=device)[preds.argmax(-1)] + masked_data_weights = weights.unsqueeze(2) * mask.unsqueeze(2) + p = (bin_preds * masked_data_weights).sum(0, keepdims=True) + t = (bin_targets * masked_data_weights).sum(0, keepdims=True) + c = (bin_preds * bin_targets * masked_data_weights).sum(2).sum(0, keepdims=True) + s = (preds * masked_data_weights).sum(2).sum(0, keepdims=True) + + return p, t, c, s + + def compute(self): + p = dim_zero_cat(self.p).sum(0) + t = dim_zero_cat(self.t).sum(0) + c = dim_zero_cat(self.c).sum(0) + s = dim_zero_cat(self.s).sum(0) + s2 = s.square() + + # the `einsum` calls amount to calculating the batched dot product + cov_ytyp = c * s - torch.einsum("ij,ij->i", p, t) + cov_ypyp = s2 - torch.einsum("ij,ij->i", p, p) + cov_ytyt = s2 - torch.einsum("ij,ij->i", t, t) + + x = cov_ypyp * cov_ytyt + MCC = torch.where(x == 0, torch.tensor(0.0), cov_ytyp / x.sqrt()) + MCC = MCC * self.task_weights + + return 1 - MCC.mean() + + +@MetricRegistry.register("multiclass-mcc") +class MulticlassMCCMetric(MulticlassMCCLoss): + def compute(self): + return 1 - super().compute() + + +class ClassificationMixin: + def __init__(self, task_weights: ArrayLike = 1.0, **kwargs): + """ + Parameters + ---------- + task_weights : ArrayLike = 1.0 + .. important:: + Ignored. Maintained for compatibility with :class:`ChempropMetric` + """ + super().__init__() + task_weights = torch.as_tensor(task_weights, dtype=torch.float).view(1, -1) + self.register_buffer("task_weights", task_weights) + + def update(self, preds: Tensor, targets: Tensor, mask: Tensor, *args, **kwargs): + super().update(preds[mask], targets[mask].long()) + + +@MetricRegistry.register("roc") +class BinaryAUROC(ClassificationMixin, torchmetrics.classification.BinaryAUROC): + pass + + +@MetricRegistry.register("prc") +class BinaryAUPRC(ClassificationMixin, torchmetrics.classification.BinaryPrecisionRecallCurve): + def compute(self) -> Tensor: + p, r, _ = super().compute() + return auc(r, p) + + +@MetricRegistry.register("accuracy") +class BinaryAccuracy(ClassificationMixin, torchmetrics.classification.BinaryAccuracy): + pass + + +@MetricRegistry.register("f1") +class BinaryF1Score(ClassificationMixin, torchmetrics.classification.BinaryF1Score): + pass + + +@LossFunctionRegistry.register("dirichlet") +class DirichletLoss(ChempropMetric): + """Uses the loss function from [sensoy2018]_ based on the implementation at [sensoyGithub]_ + + References + ---------- + .. [sensoy2018] Sensoy, M.; Kaplan, L.; Kandemir, M. "Evidential deep learning to quantify + classification uncertainty." NeurIPS, 2018, 31. https://doi.org/10.48550/arXiv.1806.01768 + .. [sensoyGithub] https://muratsensoy.github.io/uncertainty.html#Define-the-loss-function + """ + + def __init__(self, task_weights: ArrayLike = 1.0, v_kl: float = 0.2): + super().__init__(task_weights) + self.v_kl = v_kl + + def _calc_unreduced_loss(self, preds: Tensor, targets: Tensor, *args) -> Tensor: + targets = torch.eye(preds.shape[2], device=preds.device)[targets.long()] + + S = preds.sum(-1, keepdim=True) + p = preds / S + + A = (targets - p).square().sum(-1, keepdim=True) + B = ((p * (1 - p)) / (S + 1)).sum(-1, keepdim=True) + + L_mse = A + B + + alpha = targets + (1 - targets) * preds + beta = torch.ones_like(alpha) + S_alpha = alpha.sum(-1, keepdim=True) + S_beta = beta.sum(-1, keepdim=True) + + ln_alpha = S_alpha.lgamma() - alpha.lgamma().sum(-1, keepdim=True) + ln_beta = beta.lgamma().sum(-1, keepdim=True) - S_beta.lgamma() + + dg0 = torch.digamma(alpha) + dg1 = torch.digamma(S_alpha) + + L_kl = ln_alpha + ln_beta + torch.sum((alpha - beta) * (dg0 - dg1), -1, keepdim=True) + + return (L_mse + self.v_kl * L_kl).mean(-1) + + def extra_repr(self) -> str: + return f"v_kl={self.v_kl}" + + +@LossFunctionRegistry.register("sid") +class SID(ChempropMetric): + def __init__(self, task_weights: ArrayLike = 1.0, threshold: float | None = None, **kwargs): + super().__init__(task_weights, **kwargs) + + self.threshold = threshold + + def _calc_unreduced_loss(self, preds: Tensor, targets: Tensor, mask: Tensor, *args) -> Tensor: + if self.threshold is not None: + preds = preds.clamp(min=self.threshold) + + preds_norm = preds / (preds * mask).sum(1, keepdim=True) + + targets = targets.masked_fill(~mask, 1) + preds_norm = preds_norm.masked_fill(~mask, 1) + + return (preds_norm / targets).log() * preds_norm + (targets / preds_norm).log() * targets + + def extra_repr(self) -> str: + return f"threshold={self.threshold}" + + +@LossFunctionRegistry.register(["earthmovers", "wasserstein"]) +class Wasserstein(ChempropMetric): + def __init__(self, task_weights: ArrayLike = 1.0, threshold: float | None = None): + super().__init__(task_weights) + + self.threshold = threshold + + def _calc_unreduced_loss(self, preds: Tensor, targets: Tensor, mask: Tensor, *args) -> Tensor: + if self.threshold is not None: + preds = preds.clamp(min=self.threshold) + + preds_norm = preds / (preds * mask).sum(1, keepdim=True) + + return (targets.cumsum(1) - preds_norm.cumsum(1)).abs() + + def extra_repr(self) -> str: + return f"threshold={self.threshold}" + + +@LossFunctionRegistry.register(["quantile", "pinball"]) +class QuantileLoss(ChempropMetric): + def __init__(self, task_weights: ArrayLike = 1.0, alpha: float = 0.1): + super().__init__(task_weights) + self.alpha = alpha + + bounds = torch.tensor([-1 / 2, 1 / 2]).view(-1, 1, 1) + tau = torch.tensor([[alpha / 2, 1 - alpha / 2], [alpha / 2 - 1, -alpha / 2]]).view( + 2, 2, 1, 1 + ) + + self.register_buffer("bounds", bounds) + self.register_buffer("tau", tau) + + def _calc_unreduced_loss(self, preds: Tensor, targets: Tensor, mask: Tensor, *args) -> Tensor: + mean, interval = torch.unbind(preds, dim=-1) + + interval_bounds = self.bounds * interval + pred_bounds = mean + interval_bounds + error_bounds = targets - pred_bounds + loss_bounds = (self.tau * error_bounds).amax(0) + + return loss_bounds.sum(0) + + def extra_repr(self) -> str: + return f"alpha={self.alpha}" diff --git a/chemprop/chemprop/nn/predictors.py b/chemprop/chemprop/nn/predictors.py new file mode 100644 index 0000000000000000000000000000000000000000..45d6ed415a7f8b599d1d213f768590c8ee3a8112 --- /dev/null +++ b/chemprop/chemprop/nn/predictors.py @@ -0,0 +1,369 @@ +from abc import abstractmethod + +from lightning.pytorch.core.mixins import HyperparametersMixin +import torch +from torch import Tensor, nn +from torch.nn import functional as F + +from chemprop.conf import DEFAULT_HIDDEN_DIM +from chemprop.nn.ffn import MLP +from chemprop.nn.hparams import HasHParams +from chemprop.nn.metrics import ( + MSE, + SID, + BCELoss, + BinaryAUROC, + ChempropMetric, + CrossEntropyLoss, + DirichletLoss, + EvidentialLoss, + MulticlassMCCMetric, + MVELoss, + QuantileLoss, +) +from chemprop.nn.transforms import UnscaleTransform +from chemprop.utils import ClassRegistry, Factory + +__all__ = [ + "Predictor", + "PredictorRegistry", + "RegressionFFN", + "MveFFN", + "EvidentialFFN", + "BinaryClassificationFFNBase", + "BinaryClassificationFFN", + "BinaryDirichletFFN", + "MulticlassClassificationFFN", + "MulticlassDirichletFFN", + "SpectralFFN", +] + + +class Predictor(nn.Module, HasHParams): + r"""A :class:`Predictor` is a protocol that defines a differentiable function + :math:`f` : \mathbb R^d \mapsto \mathbb R^o""" + + input_dim: int + """the input dimension""" + output_dim: int + """the output dimension""" + n_tasks: int + """the number of tasks `t` to predict for each input""" + n_targets: int + """the number of targets `s` to predict for each task `t`""" + criterion: ChempropMetric + """the loss function to use for training""" + task_weights: Tensor + """the weights to apply to each task when calculating the loss""" + output_transform: UnscaleTransform + """the transform to apply to the output of the predictor""" + + @abstractmethod + def forward(self, Z: Tensor) -> Tensor: + pass + + @abstractmethod + def train_step(self, Z: Tensor) -> Tensor: + pass + + @abstractmethod + def encode(self, Z: Tensor, i: int) -> Tensor: + """Calculate the :attr:`i`-th hidden representation + + Parameters + ---------- + Z : Tensor + a tensor of shape ``n x d`` containing the input data to encode, where ``d`` is the + input dimensionality. + i : int + The stop index of slice of the MLP used to encode the input. That is, use all + layers in the MLP *up to* :attr:`i` (i.e., ``MLP[:i]``). This can be any integer + value, and the behavior of this function is dependent on the underlying list + slicing behavior. For example: + + * ``i=0``: use a 0-layer MLP (i.e., a no-op) + * ``i=1``: use only the first block + * ``i=-1``: use *up to* the final block + + Returns + ------- + Tensor + a tensor of shape ``n x h`` containing the :attr:`i`-th hidden representation, where + ``h`` is the number of neurons in the :attr:`i`-th hidden layer. + """ + pass + + +PredictorRegistry = ClassRegistry[Predictor]() + + +class _FFNPredictorBase(Predictor, HyperparametersMixin): + """A :class:`_FFNPredictorBase` is the base class for all :class:`Predictor`\s that use an + underlying :class:`SimpleFFN` to map the learned fingerprint to the desired output. + """ + + _T_default_criterion: ChempropMetric + _T_default_metric: ChempropMetric + + def __init__( + self, + n_tasks: int = 1, + input_dim: int = DEFAULT_HIDDEN_DIM, + hidden_dim: int = 300, + n_layers: int = 1, + dropout: float = 0.0, + activation: str = "relu", + criterion: ChempropMetric | None = None, + task_weights: Tensor | None = None, + threshold: float | None = None, + output_transform: UnscaleTransform | None = None, + ): + super().__init__() + # manually add criterion and output_transform to hparams to suppress lightning's warning + # about double saving their state_dict values. + self.save_hyperparameters(ignore=["criterion", "output_transform"]) + self.hparams["criterion"] = criterion + self.hparams["output_transform"] = output_transform + self.hparams["cls"] = self.__class__ + + self.ffn = MLP.build( + input_dim, n_tasks * self.n_targets, hidden_dim, n_layers, dropout, activation + ) + task_weights = torch.ones(n_tasks) if task_weights is None else task_weights + self.criterion = criterion or Factory.build( + self._T_default_criterion, task_weights=task_weights, threshold=threshold + ) + self.output_transform = output_transform if output_transform is not None else nn.Identity() + + @property + def input_dim(self) -> int: + return self.ffn.input_dim + + @property + def output_dim(self) -> int: + return self.ffn.output_dim + + @property + def n_tasks(self) -> int: + return self.output_dim // self.n_targets + + def forward(self, Z: Tensor) -> Tensor: + return self.ffn(Z) + + def encode(self, Z: Tensor, i: int) -> Tensor: + return self.ffn[:i](Z) + + +@PredictorRegistry.register("regression") +class RegressionFFN(_FFNPredictorBase): + n_targets = 1 + _T_default_criterion = MSE + _T_default_metric = MSE + + def forward(self, Z: Tensor) -> Tensor: + return self.output_transform(self.ffn(Z)) + + train_step = forward + + +@PredictorRegistry.register("regression-mve") +class MveFFN(RegressionFFN): + n_targets = 2 + _T_default_criterion = MVELoss + + def forward(self, Z: Tensor) -> Tensor: + Y = self.ffn(Z) + mean, var = torch.chunk(Y, self.n_targets, 1) + var = F.softplus(var) + + mean = self.output_transform(mean) + if not isinstance(self.output_transform, nn.Identity): + var = self.output_transform.transform_variance(var) + + return torch.stack((mean, var), dim=2) + + train_step = forward + + +@PredictorRegistry.register("regression-evidential") +class EvidentialFFN(RegressionFFN): + n_targets = 4 + _T_default_criterion = EvidentialLoss + + def forward(self, Z: Tensor) -> Tensor: + Y = self.ffn(Z) + mean, v, alpha, beta = torch.chunk(Y, self.n_targets, 1) + v = F.softplus(v) + alpha = F.softplus(alpha) + 1 + beta = F.softplus(beta) + + mean = self.output_transform(mean) + if not isinstance(self.output_transform, nn.Identity): + beta = self.output_transform.transform_variance(beta) + + return torch.stack((mean, v, alpha, beta), dim=2) + + train_step = forward + + +@PredictorRegistry.register("regression-quantile") +class QuantileFFN(RegressionFFN): + n_targets = 2 + _T_default_criterion = QuantileLoss + + def forward(self, Z: Tensor) -> Tensor: + Y = super().forward(Z) + lower_bound, upper_bound = torch.chunk(Y, self.n_targets, 1) + + lower_bound = self.output_transform(lower_bound) + upper_bound = self.output_transform(upper_bound) + + mean = (lower_bound + upper_bound) / 2 + interval = upper_bound - lower_bound + + return torch.stack((mean, interval), dim=2) + + train_step = forward + + +class BinaryClassificationFFNBase(_FFNPredictorBase): + pass + + +@PredictorRegistry.register("classification") +class BinaryClassificationFFN(BinaryClassificationFFNBase): + n_targets = 1 + _T_default_criterion = BCELoss + _T_default_metric = BinaryAUROC + + def forward(self, Z: Tensor) -> Tensor: + Y = super().forward(Z) + + return Y.sigmoid() + + def train_step(self, Z: Tensor) -> Tensor: + return super().forward(Z) + + +@PredictorRegistry.register("classification-dirichlet") +class BinaryDirichletFFN(BinaryClassificationFFNBase): + n_targets = 2 + _T_default_criterion = DirichletLoss + _T_default_metric = BinaryAUROC + + def forward(self, Z: Tensor) -> Tensor: + Y = super().forward(Z).reshape(len(Z), -1, 2) + + alpha = F.softplus(Y) + 1 + + u = 2 / alpha.sum(-1) + Y = alpha / alpha.sum(-1, keepdim=True) + + return torch.stack((Y[..., 1], u), dim=2) + + def train_step(self, Z: Tensor) -> Tensor: + Y = super().forward(Z).reshape(len(Z), -1, 2) + + return F.softplus(Y) + 1 + + +@PredictorRegistry.register("multiclass") +class MulticlassClassificationFFN(_FFNPredictorBase): + n_targets = 1 + _T_default_criterion = CrossEntropyLoss + _T_default_metric = MulticlassMCCMetric + + def __init__( + self, + n_classes: int, + n_tasks: int = 1, + input_dim: int = DEFAULT_HIDDEN_DIM, + hidden_dim: int = 300, + n_layers: int = 1, + dropout: float = 0.0, + activation: str = "relu", + criterion: ChempropMetric | None = None, + task_weights: Tensor | None = None, + threshold: float | None = None, + output_transform: UnscaleTransform | None = None, + ): + task_weights = torch.ones(n_tasks) if task_weights is None else task_weights + super().__init__( + n_tasks * n_classes, + input_dim, + hidden_dim, + n_layers, + dropout, + activation, + criterion, + task_weights, + threshold, + output_transform, + ) + + self.n_classes = n_classes + + @property + def n_tasks(self) -> int: + return self.output_dim // (self.n_targets * self.n_classes) + + def forward(self, Z: Tensor) -> Tensor: + return self.train_step(Z).softmax(-1) + + def train_step(self, Z: Tensor) -> Tensor: + return super().forward(Z).reshape(Z.shape[0], -1, self.n_classes) + + +@PredictorRegistry.register("multiclass-dirichlet") +class MulticlassDirichletFFN(MulticlassClassificationFFN): + _T_default_criterion = DirichletLoss + _T_default_metric = MulticlassMCCMetric + + def forward(self, Z: Tensor) -> Tensor: + Y = super().train_step(Z) + + alpha = F.softplus(Y) + 1 + + Y = alpha / alpha.sum(-1, keepdim=True) + + return Y + + def train_step(self, Z: Tensor) -> Tensor: + Y = super().train_step(Z) + + return F.softplus(Y) + 1 + + +class _Exp(nn.Module): + def forward(self, X: Tensor): + return X.exp() + + +@PredictorRegistry.register("spectral") +class SpectralFFN(_FFNPredictorBase): + n_targets = 1 + _T_default_criterion = SID + _T_default_metric = SID + + def __init__(self, *args, spectral_activation: str | None = "softplus", **kwargs): + super().__init__(*args, **kwargs) + + match spectral_activation: + case "exp": + spectral_activation = _Exp() + case "softplus" | None: + spectral_activation = nn.Softplus() + case _: + raise ValueError( + f"Unknown spectral activation: {spectral_activation}. " + "Expected one of 'exp', 'softplus' or None." + ) + + self.ffn.add_module("spectral_activation", spectral_activation) + + def forward(self, Z: Tensor) -> Tensor: + Y = super().forward(Z) + Y = self.ffn.spectral_activation(Y) + return Y / Y.sum(1, keepdim=True) + + train_step = forward diff --git a/chemprop/chemprop/nn/transforms.py b/chemprop/chemprop/nn/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..2af42099aab6409b138316342babd9c209b1b060 --- /dev/null +++ b/chemprop/chemprop/nn/transforms.py @@ -0,0 +1,70 @@ +from numpy.typing import ArrayLike +from sklearn.preprocessing import StandardScaler +import torch +from torch import Tensor, nn + +from chemprop.data.collate import BatchMolGraph + + +class _ScaleTransformMixin(nn.Module): + def __init__(self, mean: ArrayLike, scale: ArrayLike, pad: int = 0): + super().__init__() + + mean = torch.cat([torch.zeros(pad), torch.tensor(mean, dtype=torch.float)]) + scale = torch.cat([torch.ones(pad), torch.tensor(scale, dtype=torch.float)]) + + if mean.shape != scale.shape: + raise ValueError( + f"uneven shapes for 'mean' and 'scale'! got: mean={mean.shape}, scale={scale.shape}" + ) + + self.register_buffer("mean", mean.unsqueeze(0)) + self.register_buffer("scale", scale.unsqueeze(0)) + + @classmethod + def from_standard_scaler(cls, scaler: StandardScaler, pad: int = 0): + return cls(scaler.mean_, scaler.scale_, pad=pad) + + def to_standard_scaler(self, anti_pad: int = 0) -> StandardScaler: + scaler = StandardScaler() + scaler.mean_ = self.mean[anti_pad:].numpy() + scaler.scale_ = self.scale[anti_pad:].numpy() + return scaler + + +class ScaleTransform(_ScaleTransformMixin): + def forward(self, X: Tensor) -> Tensor: + if self.training: + return X + + return (X - self.mean) / self.scale + + +class UnscaleTransform(_ScaleTransformMixin): + def forward(self, X: Tensor) -> Tensor: + if self.training: + return X + + return X * self.scale + self.mean + + def transform_variance(self, var: Tensor) -> Tensor: + if self.training: + return var + return var * (self.scale**2) + + +class GraphTransform(nn.Module): + def __init__(self, V_transform: ScaleTransform, E_transform: ScaleTransform): + super().__init__() + + self.V_transform = V_transform + self.E_transform = E_transform + + def forward(self, bmg: BatchMolGraph) -> BatchMolGraph: + if self.training: + return bmg + + bmg.V = self.V_transform(bmg.V) + bmg.E = self.E_transform(bmg.E) + + return bmg diff --git a/chemprop/chemprop/nn/utils.py b/chemprop/chemprop/nn/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..19913bd0164f385944deb01a70c58fbdb7cd8587 --- /dev/null +++ b/chemprop/chemprop/nn/utils.py @@ -0,0 +1,46 @@ +from enum import auto + +from torch import nn + +from chemprop.utils.utils import EnumMapping + + +class Activation(EnumMapping): + RELU = auto() + LEAKYRELU = auto() + PRELU = auto() + TANH = auto() + SELU = auto() + ELU = auto() + + +def get_activation_function(activation: str | Activation) -> nn.Module: + """Gets an activation function module given the name of the activation. + + See :class:`~chemprop.v2.models.utils.Activation` for available activations. + + Parameters + ---------- + activation : str | Activation + The name of the activation function. + + Returns + ------- + nn.Module + The activation function module. + """ + match Activation.get(activation): + case Activation.RELU: + return nn.ReLU() + case Activation.LEAKYRELU: + return nn.LeakyReLU(0.1) + case Activation.PRELU: + return nn.PReLU() + case Activation.TANH: + return nn.Tanh() + case Activation.SELU: + return nn.SELU() + case Activation.ELU: + return nn.ELU() + case _: + raise RuntimeError("unreachable code reached!") diff --git a/chemprop/chemprop/schedulers.py b/chemprop/chemprop/schedulers.py new file mode 100644 index 0000000000000000000000000000000000000000..843df0f8a75585ea6a309cc1792f93ff15096218 --- /dev/null +++ b/chemprop/chemprop/schedulers.py @@ -0,0 +1,65 @@ +from torch.optim import Optimizer +from torch.optim.lr_scheduler import LambdaLR + + +def build_NoamLike_LRSched( + optimizer: Optimizer, + warmup_steps: int, + cooldown_steps: int, + init_lr: float, + max_lr: float, + final_lr: float, +): + r"""Build a Noam-like learning rate scheduler which schedules the learning rate with a piecewise linear followed + by an exponential decay. + + The learning rate increases linearly from ``init_lr`` to ``max_lr`` over the course of + the first warmup_steps then decreases exponentially to ``final_lr`` over the course of the + remaining ``total_steps - warmup_steps`` (where ``total_steps = total_epochs * steps_per_epoch``). This is roughly based on the learning rate schedule from [1]_, section 5.3. + + Formally, the learning rate schedule is defined as: + + .. math:: + \mathtt{lr}(i) &= + \begin{cases} + \mathtt{init\_lr} + \delta \cdot i &\text{if } i < \mathtt{warmup\_steps} \\ + \mathtt{max\_lr} \cdot \left( \frac{\mathtt{final\_lr}}{\mathtt{max\_lr}} \right)^{\gamma(i)} &\text{otherwise} \\ + \end{cases} + \\ + \delta &\mathrel{:=} + \frac{\mathtt{max\_lr} - \mathtt{init\_lr}}{\mathtt{warmup\_steps}} \\ + \gamma(i) &\mathrel{:=} + \frac{i - \mathtt{warmup\_steps}}{\mathtt{total\_steps} - \mathtt{warmup\_steps}} + + + Parameters + ----------- + optimizer : Optimizer + A PyTorch optimizer. + warmup_steps : int + The number of steps during which to linearly increase the learning rate. + cooldown_steps : int + The number of steps during which to exponential decay the learning rate. + init_lr : float + The initial learning rate. + max_lr : float + The maximum learning rate (achieved after ``warmup_steps``). + final_lr : float + The final learning rate (achieved after ``cooldown_steps``). + + References + ---------- + .. [1] Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, Ł. and Polosukhin, I. "Attention is all you need." Advances in neural information processing systems, 2017, 30. https://arxiv.org/abs/1706.03762 + """ + + def lr_lambda(step: int): + if step < warmup_steps: + warmup_factor = (max_lr - init_lr) / warmup_steps + return step * warmup_factor / init_lr + 1 + elif warmup_steps <= step < warmup_steps + cooldown_steps: + cooldown_factor = (final_lr / max_lr) ** (1 / cooldown_steps) + return (max_lr * (cooldown_factor ** (step - warmup_steps))) / init_lr + else: + return final_lr / init_lr + + return LambdaLR(optimizer, lr_lambda) diff --git a/chemprop/chemprop/train/__pycache__/__init__.cpython-37.pyc b/chemprop/chemprop/train/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..44d67a6fc3904d2cdb0108af55f86ed8c7d27c11 Binary files /dev/null and b/chemprop/chemprop/train/__pycache__/__init__.cpython-37.pyc differ diff --git a/chemprop/chemprop/train/__pycache__/cross_validate.cpython-37.pyc b/chemprop/chemprop/train/__pycache__/cross_validate.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4e23c8c9cea5c9ab57acbeb920dd00b62b851e1b Binary files /dev/null and b/chemprop/chemprop/train/__pycache__/cross_validate.cpython-37.pyc differ diff --git a/chemprop/chemprop/train/__pycache__/evaluate.cpython-37.pyc b/chemprop/chemprop/train/__pycache__/evaluate.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..44c1858923c6b770124edc0bb26cf82fb4bcc080 Binary files /dev/null and b/chemprop/chemprop/train/__pycache__/evaluate.cpython-37.pyc differ diff --git a/chemprop/chemprop/train/__pycache__/loss_functions.cpython-37.pyc b/chemprop/chemprop/train/__pycache__/loss_functions.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1fab53738a6550cb4998803b7b83e59bfa8f0dce Binary files /dev/null and b/chemprop/chemprop/train/__pycache__/loss_functions.cpython-37.pyc differ diff --git a/chemprop/chemprop/train/__pycache__/make_predictions.cpython-37.pyc b/chemprop/chemprop/train/__pycache__/make_predictions.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..92cfc083cdacf41747c5695c45200be25e7778c3 Binary files /dev/null and b/chemprop/chemprop/train/__pycache__/make_predictions.cpython-37.pyc differ diff --git a/chemprop/chemprop/train/__pycache__/metrics.cpython-37.pyc b/chemprop/chemprop/train/__pycache__/metrics.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6c04dcf0fa814e32cd868bc918606271acc6eb81 Binary files /dev/null and b/chemprop/chemprop/train/__pycache__/metrics.cpython-37.pyc differ diff --git a/chemprop/chemprop/train/__pycache__/molecule_fingerprint.cpython-37.pyc b/chemprop/chemprop/train/__pycache__/molecule_fingerprint.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..881b8725854d2168acc4645b07ff5c2dbc4bd27c Binary files /dev/null and b/chemprop/chemprop/train/__pycache__/molecule_fingerprint.cpython-37.pyc differ diff --git a/chemprop/chemprop/train/__pycache__/predict.cpython-37.pyc b/chemprop/chemprop/train/__pycache__/predict.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1749473999e3adbeecc35ca7bc9169595d7ab61e Binary files /dev/null and b/chemprop/chemprop/train/__pycache__/predict.cpython-37.pyc differ diff --git a/chemprop/chemprop/train/__pycache__/run_training.cpython-37.pyc b/chemprop/chemprop/train/__pycache__/run_training.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..db47bbf8b1fc85943c5e820bf67488da35d73665 Binary files /dev/null and b/chemprop/chemprop/train/__pycache__/run_training.cpython-37.pyc differ diff --git a/chemprop/chemprop/train/__pycache__/train.cpython-37.pyc b/chemprop/chemprop/train/__pycache__/train.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fa495ba3f298f0c511b601efc272ee055a9a128b Binary files /dev/null and b/chemprop/chemprop/train/__pycache__/train.cpython-37.pyc differ diff --git a/chemprop/chemprop/types.py b/chemprop/chemprop/types.py new file mode 100644 index 0000000000000000000000000000000000000000..71ef27b18cfde9504644f7e627668d5ce62aa431 --- /dev/null +++ b/chemprop/chemprop/types.py @@ -0,0 +1,3 @@ +from rdkit.Chem import Mol + +Rxn = tuple[Mol, Mol] diff --git a/chemprop/chemprop/uncertainty/__init__.py b/chemprop/chemprop/uncertainty/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d81fe53b0575837d843eaca6ae52e4462149c526 --- /dev/null +++ b/chemprop/chemprop/uncertainty/__init__.py @@ -0,0 +1,94 @@ +from .calibrator import ( + AdaptiveMulticlassConformalCalibrator, + BinaryClassificationCalibrator, + CalibratorBase, + IsotonicCalibrator, + IsotonicMulticlassCalibrator, + MulticlassClassificationCalibrator, + MulticlassConformalCalibrator, + MultilabelConformalCalibrator, + MVEWeightingCalibrator, + PlattCalibrator, + RegressionCalibrator, + RegressionConformalCalibrator, + UncertaintyCalibratorRegistry, + ZelikmanCalibrator, + ZScalingCalibrator, +) +from .estimator import ( # RoundRobinSpectraEstimator, + ClassEstimator, + ClassificationDirichletEstimator, + DropoutEstimator, + EnsembleEstimator, + EvidentialAleatoricEstimator, + EvidentialEpistemicEstimator, + EvidentialTotalEstimator, + MulticlassDirichletEstimator, + MVEEstimator, + NoUncertaintyEstimator, + QuantileRegressionEstimator, + UncertaintyEstimator, + UncertaintyEstimatorRegistry, +) +from .evaluator import ( + BinaryClassificationEvaluator, + CalibrationAreaEvaluator, + ExpectedNormalizedErrorEvaluator, + MulticlassClassificationEvaluator, + MulticlassConformalEvaluator, + MultilabelConformalEvaluator, + NLLClassEvaluator, + NLLMulticlassEvaluator, + NLLRegressionEvaluator, + RegressionConformalEvaluator, + RegressionEvaluator, + SpearmanEvaluator, + UncertaintyEvaluatorRegistry, +) + +__all__ = [ + "AdaptiveMulticlassConformalCalibrator", + "BinaryClassificationCalibrator", + "CalibratorBase", + "IsotonicCalibrator", + "IsotonicMulticlassCalibrator", + "MulticlassClassificationCalibrator", + "MulticlassConformalCalibrator", + "MultilabelConformalCalibrator", + "MVEWeightingCalibrator", + "PlattCalibrator", + "RegressionCalibrator", + "RegressionConformalCalibrator", + "UncertaintyCalibratorRegistry", + "ZelikmanCalibrator", + "ZScalingCalibrator", + "BinaryClassificationEvaluator", + "CalibrationAreaEvaluator", + "ExpectedNormalizedErrorEvaluator", + "MulticlassClassificationEvaluator", + "MetricEvaluator", + "MulticlassConformalEvaluator", + "MultilabelConformalEvaluator", + "NLLClassEvaluator", + "NLLMulticlassEvaluator", + "NLLRegressionEvaluator", + "RegressionConformalEvaluator", + "RegressionEvaluator", + "SpearmanEvaluator", + "UncertaintyEvaluator", + "UncertaintyEvaluatorRegistry", + "ClassificationDirichletEstimator", + "ClassEstimator", + "MulticlassDirichletEstimator", + "DropoutEstimator", + "EnsembleEstimator", + "EvidentialAleatoricEstimator", + "EvidentialEpistemicEstimator", + "EvidentialTotalEstimator", + "MVEEstimator", + "NoUncertaintyEstimator", + "QuantileRegressionEstimator", + # "RoundRobinSpectraEstimator", + "UncertaintyEstimator", + "UncertaintyEstimatorRegistry", +] diff --git a/chemprop/chemprop/uncertainty/__pycache__/__init__.cpython-37.pyc b/chemprop/chemprop/uncertainty/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..43c37cc34ec737d54c1d2e08e665efd9c3520b24 Binary files /dev/null and b/chemprop/chemprop/uncertainty/__pycache__/__init__.cpython-37.pyc differ diff --git a/chemprop/chemprop/uncertainty/__pycache__/uncertainty_calibrator.cpython-37.pyc b/chemprop/chemprop/uncertainty/__pycache__/uncertainty_calibrator.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d522582d59e6606267b4e8ba0eff021c66ffb258 Binary files /dev/null and b/chemprop/chemprop/uncertainty/__pycache__/uncertainty_calibrator.cpython-37.pyc differ diff --git a/chemprop/chemprop/uncertainty/__pycache__/uncertainty_estimator.cpython-37.pyc b/chemprop/chemprop/uncertainty/__pycache__/uncertainty_estimator.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..046c33150f6a21736d768d14293ef97fd8c23a17 Binary files /dev/null and b/chemprop/chemprop/uncertainty/__pycache__/uncertainty_estimator.cpython-37.pyc differ diff --git a/chemprop/chemprop/uncertainty/__pycache__/uncertainty_evaluator.cpython-37.pyc b/chemprop/chemprop/uncertainty/__pycache__/uncertainty_evaluator.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fc1951758f631ce5e1331991ec1ade98b9244cff Binary files /dev/null and b/chemprop/chemprop/uncertainty/__pycache__/uncertainty_evaluator.cpython-37.pyc differ diff --git a/chemprop/chemprop/uncertainty/__pycache__/uncertainty_predictor.cpython-37.pyc b/chemprop/chemprop/uncertainty/__pycache__/uncertainty_predictor.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..131fadea910f50ac7b82fa340cf6cbcf39d0c981 Binary files /dev/null and b/chemprop/chemprop/uncertainty/__pycache__/uncertainty_predictor.cpython-37.pyc differ diff --git a/chemprop/chemprop/uncertainty/calibrator.py b/chemprop/chemprop/uncertainty/calibrator.py new file mode 100644 index 0000000000000000000000000000000000000000..6c9769e7aed4bfaf1e54030932be11fb54304171 --- /dev/null +++ b/chemprop/chemprop/uncertainty/calibrator.py @@ -0,0 +1,715 @@ +from abc import ABC, abstractmethod +import logging +import math +from typing import Self + +import numpy as np +from scipy.optimize import fmin +from scipy.special import expit, logit, softmax +from sklearn.isotonic import IsotonicRegression +import torch +from torch import Tensor + +from chemprop.utils.registry import ClassRegistry + +logger = logging.getLogger(__name__) + + +class CalibratorBase(ABC): + """ + A base class for calibrating the predicted uncertainties. + """ + + @abstractmethod + def fit(self, *args, **kwargs) -> Self: + """ + Fit calibration method for the calibration data. + """ + + @abstractmethod + def apply(self, uncs: Tensor) -> Tensor: + """ + Apply this calibrator to the input uncertainties. + + Parameters + ---------- + uncs: Tensor + a tensor containinig uncalibrated uncertainties + + Returns + ------- + Tensor + the calibrated uncertainties + """ + + +UncertaintyCalibratorRegistry = ClassRegistry[CalibratorBase]() + + +class RegressionCalibrator(CalibratorBase): + """ + A class for calibrating the predicted uncertainties in regressions tasks. + """ + + @abstractmethod + def fit(self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + """ + Fit calibration method for the calibration data. + + Parameters + ---------- + preds: Tensor + the predictions for regression tasks. It is a tensor of the shape of ``n x t``, where ``n`` is + the number of input molecules/reactions, and ``t`` is the number of tasks. + uncs: Tensor + the predicted uncertainties of the shape of ``n x t`` + targets: Tensor + a tensor of the shape ``n x t`` + mask: Tensor + a tensor of the shape ``n x t`` indicating whether the given values should be used in the fitting + + Returns + ------- + self : RegressionCalibrator + the fitted calibrator + """ + + +@UncertaintyCalibratorRegistry.register("zscaling") +class ZScalingCalibrator(RegressionCalibrator): + """Calibrate regression datasets by applying a scaling value to the uncalibrated standard deviation, + fitted by minimizing the negative-log-likelihood of a normal distribution around each prediction. [levi2022]_ + + References + ---------- + .. [levi2022] Levi, D.; Gispan, L.; Giladi, N.; Fetaya, E. "Evaluating and Calibrating Uncertainty Prediction in + Regression Tasks." Sensors, 2022, 22(15), 5540. https://www.mdpi.com/1424-8220/22/15/5540 + """ + + def fit(self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + scalings = np.zeros(uncs.shape[1]) + for j in range(uncs.shape[1]): + mask_j = mask[:, j] + preds_j = preds[:, j][mask_j].numpy() + uncs_j = uncs[:, j][mask_j].numpy() + targets_j = targets[:, j][mask_j].numpy() + errors = preds_j - targets_j + + def objective(scaler_value: float): + scaled_vars = uncs_j * scaler_value**2 + nll = np.log(2 * np.pi * scaled_vars) / 2 + errors**2 / (2 * scaled_vars) + return nll.sum() + + zscore = errors / np.sqrt(uncs_j) + initial_guess = np.std(zscore) + scalings[j] = fmin(objective, x0=initial_guess, disp=False) + + self.scalings = torch.tensor(scalings) + return self + + def apply(self, uncs: Tensor) -> Tensor: + return uncs * self.scalings**2 + + +@UncertaintyCalibratorRegistry.register("zelikman-interval") +class ZelikmanCalibrator(RegressionCalibrator): + """Calibrate regression datasets using a method that does not depend on a particular probability function form. + + It uses the "CRUDE" method as described in [zelikman2020]_. We implemented this method to be used with variance as the uncertainty. + + Parameters + ---------- + p: float + The target qunatile, :math:`p \in [0, 1]` + + References + ---------- + .. [zelikman2020] Zelikman, E.; Healy, C.; Zhou, S.; Avati, A. "CRUDE: calibrating regression uncertainty distributions + empirically." arXiv preprint arXiv:2005.12496. https://doi.org/10.48550/arXiv.2005.12496 + """ + + def __init__(self, p: float): + super().__init__() + self.p = p + if not 0 <= self.p <= 1: + raise ValueError(f"arg `p` must be between 0 and 1. got: {p}.") + + def fit(self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + scalings = [] + for j in range(uncs.shape[1]): + mask_j = mask[:, j] + preds_j = preds[:, j][mask_j] + uncs_j = uncs[:, j][mask_j] + targets_j = targets[:, j][mask_j] + z = (preds_j - targets_j).abs() / (uncs_j).sqrt() + scaling = torch.quantile(z, self.p, interpolation="lower") + scalings.append(scaling) + + self.scalings = torch.tensor(scalings) + return self + + def apply(self, uncs: Tensor) -> Tensor: + return uncs * self.scalings**2 + + +@UncertaintyCalibratorRegistry.register("mve-weighting") +class MVEWeightingCalibrator(RegressionCalibrator): + """Calibrate regression datasets that have ensembles of individual models that make variance predictions. + + This method minimizes the negative log likelihood for the predictions versus the targets by applying + a weighted average across the variance predictions of the ensemble. [wang2021]_ + + References + ---------- + .. [wang2021] Wang, D.; Yu, J.; Chen, L.; Li, X.; Jiang, H.; Chen, K.; Zheng, M.; Luo, X. "A hybrid framework + for improving uncertainty quantification in deep learning-based QSAR regression modeling." J. Cheminform., + 2021, 13, 1-17. https://doi.org/10.1186/s13321-021-00551-x + """ + + def fit(self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + """ + Fit calibration method for the calibration data. + + Parameters + ---------- + preds: Tensor + the predictions for regression tasks. It is a tensor of the shape of ``n x t``, where ``n`` is + the number of input molecules/reactions, and ``t`` is the number of tasks. + uncs: Tensor + the predicted uncertainties of the shape of ``m x n x t`` + targets: Tensor + a tensor of the shape ``n x t`` + mask: Tensor + a tensor of the shape ``n x t`` indicating whether the given values should be used in the fitting + + Returns + ------- + self : MVEWeightingCalibrator + the fitted calibrator + """ + scalings = [] + for j in range(uncs.shape[2]): + mask_j = mask[:, j] + preds_j = preds[:, j][mask_j].numpy() + uncs_j = uncs[:, mask_j, j].numpy() + targets_j = targets[:, j][mask_j].numpy() + errors = preds_j - targets_j + + def objective(scaler_values: np.ndarray): + scaler_values = np.reshape(softmax(scaler_values), [-1, 1]) # (m, 1) + scaled_vars = np.sum(uncs_j * scaler_values, axis=0, keepdims=False) + nll = np.log(2 * np.pi * scaled_vars) / 2 + errors**2 / (2 * scaled_vars) + return np.sum(nll) + + initial_guess = np.ones(uncs_j.shape[0]) + sol = fmin(objective, x0=initial_guess, disp=False) + scalings.append(torch.tensor(softmax(sol))) + + self.scalings = torch.stack(scalings).t().unsqueeze(1) + return self + + def apply(self, uncs: Tensor) -> Tensor: + """ + Apply this calibrator to the input uncertainties. + + Parameters + ---------- + uncs: Tensor + a tensor containinig uncalibrated uncertainties of the shape of ``m x n x t`` + + Returns + ------- + Tensor + the calibrated uncertainties of the shape of ``n x t`` + """ + return (uncs * self.scalings).sum(0) + + +@UncertaintyCalibratorRegistry.register("conformal-regression") +class RegressionConformalCalibrator(RegressionCalibrator): + r"""Conformalize quantiles to make the interval :math:`[\hat{t}_{\alpha/2}(x),\hat{t}_{1-\alpha/2}(x)]` to have + approximately :math:`1-\alpha` coverage. [angelopoulos2021]_ + + .. math:: + s(x, y) &= \max \left\{ \hat{t}_{\alpha/2}(x) - y, y - \hat{t}_{1-\alpha/2}(x) \right\} + + \hat{q} &= Q(s_1, \ldots, s_n; \left\lceil \frac{(n+1)(1-\alpha)}{n} \right\rceil) + + C(x) &= \left[ \hat{t}_{\alpha/2}(x) - \hat{q}, \hat{t}_{1-\alpha/2}(x) + \hat{q} \right] + + where :math:`s` is the nonconformity score as the difference between :math:`y` and its nearest quantile. + :math:`\hat{t}_{\alpha/2}(x)` and :math:`\hat{t}_{1-\alpha/2}(x)` are the predicted quantiles from a quantile + regression model. + + .. note:: + The algorithm is specifically designed for quantile regression model. Intuitively, the set :math:`C(x)` just + grows or shrinks the distance between the quantiles by :math:`\hat{q}` to achieve coverage. However, this + function can also be applied to regression model without quantiles being provided. In this case, both + :math:`\hat{t}_{\alpha/2}(x)` and :math:`\hat{t}_{1-\alpha/2}(x)` are the same as :math:`\hat{y}`. Then, the + interval would be the same for every data point (i.e., :math:`\left[-\hat{q}, \hat{q} \right]`). + + Parameters + ---------- + alpha: float + The error rate, :math:`\alpha \in [0, 1]` + + References + ---------- + .. [angelopoulos2021] Angelopoulos, A.N.; Bates, S.; "A Gentle Introduction to Conformal Prediction and Distribution-Free + Uncertainty Quantification." arXiv Preprint 2021, https://arxiv.org/abs/2107.07511 + """ + + def __init__(self, alpha: float): + super().__init__() + self.alpha = alpha + self.bounds = torch.tensor([-1 / 2, 1 / 2]).view(-1, 1) + if not 0 <= self.alpha <= 1: + raise ValueError(f"arg `alpha` must be between 0 and 1. got: {alpha}.") + + def fit(self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + self.qhats = [] + for j in range(preds.shape[1]): + mask_j = mask[:, j] + targets_j = targets[:, j][mask_j] + preds_j = preds[:, j][mask_j] + interval_j = uncs[:, j][mask_j] + + interval_bounds = self.bounds * interval_j.unsqueeze(0) + pred_bounds = preds_j.unsqueeze(0) + interval_bounds + + calibration_scores = torch.max(pred_bounds[0] - targets_j, targets_j - pred_bounds[1]) + + num_data = targets_j.shape[0] + if self.alpha >= 1 / (num_data + 1): + q_level = math.ceil((num_data + 1) * (1 - self.alpha)) / num_data + else: + q_level = 1 + logger.warning( + "The error rate (i.e., `alpha`) is smaller than `1 / (number of data + 1)`, so the `1 - alpha` quantile is set to 1, " + "but this only ensures that the coverage is trivially satisfied." + ) + qhat = torch.quantile(calibration_scores, q_level, interpolation="higher") + self.qhats.append(qhat) + + self.qhats = torch.tensor(self.qhats) + return self + + def apply(self, uncs: Tensor) -> tuple[Tensor, Tensor]: + """ + Apply this calibrator to the input uncertainties. + + Parameters + ---------- + uncs: Tensor + a tensor containinig uncalibrated uncertainties + + Returns + ------- + Tensor + the calibrated intervals + """ + cal_intervals = uncs + 2 * self.qhats + + return cal_intervals + + +class BinaryClassificationCalibrator(CalibratorBase): + """ + A class for calibrating the predicted uncertainties in binary classification tasks. + """ + + @abstractmethod + def fit(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + """ + Fit calibration method for the calibration data. + + Parameters + ---------- + uncs: Tensor + the predicted uncertainties (i.e., the predicted probability of class 1) of the shape of ``n x t``, where ``n`` is the number of input + molecules/reactions, and ``t`` is the number of tasks. + targets: Tensor + a tensor of the shape ``n x t`` + mask: Tensor + a tensor of the shape ``n x t`` indicating whether the given values should be used in the fitting + + Returns + ------- + self : BinaryClassificationCalibrator + the fitted calibrator + """ + + +@UncertaintyCalibratorRegistry.register("platt") +class PlattCalibrator(BinaryClassificationCalibrator): + """Calibrate classification datasets using the Platt scaling algorithm [guo2017]_, [platt1999]_. + + In [platt1999]_, Platt suggests using the number of positive and negative training examples to + adjust the value of target probabilities used to fit the parameters. + + References + ---------- + .. [guo2017] Guo, C.; Pleiss, G.; Sun, Y.; Weinberger, K. Q. "On calibration of modern neural + networks". ICML, 2017. https://arxiv.org/abs/1706.04599 + .. [platt1999] Platt, J. "Probabilistic Outputs for Support Vector Machines and Comparisons to + Regularized Likelihood Methods." Adv. Large Margin Classif. 1999, 10 (3), 61–74. + """ + + def fit( + self, uncs: Tensor, targets: Tensor, mask: Tensor, training_targets: Tensor | None = None + ) -> Self: + if torch.any((targets[mask] != 0) & (targets[mask] != 1)): + raise ValueError( + "Platt scaling is only implemented for binary classification tasks! Input tensor " + "must contain only 0's and 1's." + ) + + if training_targets is not None: + logger.info( + "Training targets were provided. Platt scaling for calibration uses a Bayesian " + "correction to avoid training set overfitting. Now replacing calibration targets " + "[0, 1] with adjusted values." + ) + + n_negative_examples = (training_targets == 0).sum(dim=0) + n_positive_examples = (training_targets == 1).sum(dim=0) + + negative_target_bayes_MAP = (1 / (n_negative_examples + 2)).expand_as(targets) + positive_target_bayes_MAP = ( + (n_positive_examples + 1) / (n_positive_examples + 2) + ).expand_as(targets) + + targets = targets.float() + targets[targets == 0] = negative_target_bayes_MAP[targets == 0] + targets[targets == 1] = positive_target_bayes_MAP[targets == 1] + else: + logger.info("No training targets were provided. No Bayesian correction is applied.") + + xs = [] + for j in range(uncs.shape[1]): + mask_j = mask[:, j] + uncs_j = uncs[:, j][mask_j].numpy() + targets_j = targets[:, j][mask_j].numpy() + + def objective(parameters): + a, b = parameters + scaled_uncs = expit(a * logit(uncs_j) + b) + nll = -1 * np.sum( + targets_j * np.log(scaled_uncs) + (1 - targets_j) * np.log(1 - scaled_uncs) + ) + return nll + + xs.append(fmin(objective, x0=[1, 0], disp=False)) + + xs = np.vstack(xs) + self.a, self.b = torch.tensor(xs).T.unbind(dim=0) + + return self + + def apply(self, uncs: Tensor) -> Tensor: + return torch.sigmoid(self.a * torch.logit(uncs) + self.b) + + +@UncertaintyCalibratorRegistry.register("isotonic") +class IsotonicCalibrator(BinaryClassificationCalibrator): + """Calibrate binary classification datasets using isotonic regression as discussed in [guo2017]_. + In effect, the method transforms incoming uncalibrated confidences using a histogram-like + function where the range of each transforming bin and its magnitude is learned. + + References + ---------- + .. [guo2017] Guo, C.; Pleiss, G.; Sun, Y.; Weinberger, K. Q. "On calibration of modern neural + networks". ICML, 2017. https://arxiv.org/abs/1706.04599 + """ + + def fit(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + if torch.any((targets[mask] != 0) & (targets[mask] != 1)): + raise ValueError( + "Isotonic calibration is only implemented for binary classification tasks! Input " + "tensor must contain only 0's and 1's." + ) + + isotonic_models = [] + for j in range(uncs.shape[1]): + mask_j = mask[:, j] + uncs_j = uncs[:, j][mask_j].numpy() + targets_j = targets[:, j][mask_j].numpy() + + isotonic_model = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip") + isotonic_model.fit(uncs_j, targets_j) + isotonic_models.append(isotonic_model) + + self.isotonic_models = isotonic_models + + return self + + def apply(self, uncs: Tensor) -> Tensor: + cal_uncs = [] + for j, isotonic_model in enumerate(self.isotonic_models): + cal_uncs.append(isotonic_model.predict(uncs[:, j].numpy())) + return torch.tensor(np.array(cal_uncs)).t() + + +@UncertaintyCalibratorRegistry.register("conformal-multilabel") +class MultilabelConformalCalibrator(BinaryClassificationCalibrator): + r"""Creates conformal in-set and conformal out-set such that, for :math:`1-\alpha` proportion of datapoints, + the set of labels is bounded by the in- and out-sets [1]_: + + .. math:: + \Pr \left( + \hat{\mathcal C}_{\text{in}}(X) \subseteq \mathcal Y \subseteq \hat{\mathcal C}_{\text{out}}(X) + \right) \geq 1 - \alpha, + + where the in-set :math:`\hat{\mathcal C}_\text{in}` is contained by the set of true labels :math:`\mathcal Y` and + :math:`\mathcal Y` is contained within the out-set :math:`\hat{\mathcal C}_\text{out}`. + + Parameters + ---------- + alpha: float + The error rate, :math:`\alpha \in [0, 1]` + + References + ---------- + .. [1] Cauchois, M.; Gupta, S.; Duchi, J.; "Knowing What You Know: Valid and Validated Confidence Sets + in Multiclass and Multilabel Prediction." arXiv Preprint 2020, https://arxiv.org/abs/2004.10181 + """ + + def __init__(self, alpha: float): + super().__init__() + self.alpha = alpha + if not 0 <= self.alpha <= 1: + raise ValueError(f"arg `alpha` must be between 0 and 1. got: {alpha}.") + + @staticmethod + def nonconformity_scores(preds: Tensor): + r""" + Compute nonconformity score as the negative of the predicted probability. + + .. math:: + s_i = -\hat{f}(X_i)_{Y_i} + """ + return -preds + + def fit(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + if targets.shape[1] < 2: + raise ValueError( + f"the number of tasks should be larger than 1! got: {targets.shape[1]}." + ) + + has_zeros = torch.any(targets == 0, dim=1) + index_zeros = targets[has_zeros] == 0 + scores_in = self.nonconformity_scores(uncs[has_zeros]) + masked_scores_in = scores_in * index_zeros.float() + torch.where( + index_zeros, torch.zeros_like(scores_in), torch.tensor(float("inf")) + ) + calibration_scores_in = torch.min( + masked_scores_in.masked_fill(~mask, float("inf")), dim=1 + ).values + + has_ones = torch.any(targets == 1, dim=1) + index_ones = targets[has_ones] == 1 + scores_out = self.nonconformity_scores(uncs[has_ones]) + masked_scores_out = scores_out * index_ones.float() + torch.where( + index_ones, torch.zeros_like(scores_out), torch.tensor(float("-inf")) + ) + calibration_scores_out = torch.max( + masked_scores_out.masked_fill(~mask, float("-inf")), dim=1 + ).values + + self.tout = torch.quantile( + calibration_scores_out, 1 - self.alpha / 2, interpolation="higher" + ) + self.tin = torch.quantile(calibration_scores_in, self.alpha / 2, interpolation="higher") + return self + + def apply(self, uncs: Tensor) -> Tensor: + """ + Apply this calibrator to the input uncertainties. + + Parameters + ---------- + uncs: Tensor + a tensor containinig uncalibrated uncertainties + + Returns + ------- + Tensor + the calibrated uncertainties of the shape of ``n x t x 2``, where ``n`` is the number of input + molecules/reactions, ``t`` is the number of tasks, and the first element in the last dimension + corresponds to the in-set :math:`\hat{\mathcal C}_\text{in}`, while the second corresponds to + the out-set :math:`\hat{\mathcal C}_\text{out}`. + """ + scores = self.nonconformity_scores(uncs) + + cal_preds_in = (scores <= self.tin).int() + cal_preds_out = (scores <= self.tout).int() + cal_preds_in_out = torch.stack((cal_preds_in, cal_preds_out), dim=2) + + return cal_preds_in_out + + +class MulticlassClassificationCalibrator(CalibratorBase): + """ + A class for calibrating the predicted uncertainties in multiclass classification tasks. + """ + + @abstractmethod + def fit(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + """ + Fit calibration method for the calibration data. + + Parameters + ---------- + uncs: Tensor + the predicted uncertainties (i.e., the predicted probabilities for each class) of the + shape of ``n x t x c``, where ``n`` is the number of input molecules/reactions, ``t`` is + the number of tasks, and ``c`` is the number of classes. + targets: Tensor + a tensor of the shape ``n x t`` + mask: Tensor + a tensor of the shape ``n x t`` indicating whether the given values should be used in + the fitting + + Returns + ------- + self : MulticlassClassificationCalibrator + the fitted calibrator + """ + + +@UncertaintyCalibratorRegistry.register("conformal-multiclass") +class MulticlassConformalCalibrator(MulticlassClassificationCalibrator): + r"""Create a prediction sets of possible labels :math:`C(X_{\text{test}}) \subset \{1 \mathrel{.\,.} K\}` that follows: + + .. math:: + 1 - \alpha \leq \Pr (Y_{\text{test}} \in C(X_{\text{test}})) \leq 1 - \alpha + \frac{1}{n + 1} + + In other words, the probability that the prediction set contains the correct label is almost exactly :math:`1-\alpha`. + More detailes can be found in [1]_. + + Parameters + ---------- + alpha: float + Error rate, :math:`\alpha \in [0, 1]` + + References + ---------- + .. [1] Angelopoulos, A.N.; Bates, S.; "A Gentle Introduction to Conformal Prediction and Distribution-Free + Uncertainty Quantification." arXiv Preprint 2021, https://arxiv.org/abs/2107.07511 + """ + + def __init__(self, alpha: float): + super().__init__() + self.alpha = alpha + if not 0 <= self.alpha <= 1: + raise ValueError(f"arg `alpha` must be between 0 and 1. got: {alpha}.") + + @staticmethod + def nonconformity_scores(preds: Tensor): + r"""Compute nonconformity score as the negative of the softmax output for the true class. + + .. math:: + s_i = -\hat{f}(X_i)_{Y_i} + """ + return -preds + + def fit(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + self.qhats = [] + scores = self.nonconformity_scores(uncs) + for j in range(uncs.shape[1]): + mask_j = mask[:, j] + targets_j = targets[:, j][mask_j] + scores_j = scores[:, j][mask_j] + + scores_j = torch.gather(scores_j, 1, targets_j.unsqueeze(1)).squeeze(1) + num_data = targets_j.shape[0] + if self.alpha >= 1 / (num_data + 1): + q_level = math.ceil((num_data + 1) * (1 - self.alpha)) / num_data + else: + q_level = 1 + logger.warning( + "`alpha` is smaller than `1 / (number of data + 1)`, so the `1 - alpha` quantile is set to 1, " + "but this only ensures that the coverage is trivially satisfied." + ) + qhat = torch.quantile(scores_j, q_level, interpolation="higher") + self.qhats.append(qhat) + + self.qhats = torch.tensor(self.qhats) + return self + + def apply(self, uncs: Tensor) -> Tensor: + calibrated_preds = torch.zeros_like(uncs, dtype=torch.int) + scores = self.nonconformity_scores(uncs) + + for j, qhat in enumerate(self.qhats): + calibrated_preds[:, j] = (scores[:, j] <= qhat).int() + + return calibrated_preds + + +@UncertaintyCalibratorRegistry.register("conformal-adaptive") +class AdaptiveMulticlassConformalCalibrator(MulticlassConformalCalibrator): + @staticmethod + def nonconformity_scores(preds): + r"""Compute nonconformity score by greedily including classes in the classification set until it reaches the true label. + + .. math:: + s(x, y) = \sum_{j=1}^{k} \hat{f}(x)_{\pi_j(x)}, \text{ where } y = \pi_k(x) + + where :math:`\pi_k(x)` is the permutation of :math:`\{1 \mathrel{.\,.} K\}` that sorts :math:`\hat{f}(X_{test})` from most likely to least likely. + """ + + sort_index = torch.argsort(-preds, dim=2) + sorted_preds = torch.gather(preds, 2, sort_index) + sorted_scores = sorted_preds.cumsum(dim=2) + unsorted_scores = torch.zeros_like(sorted_scores).scatter_(2, sort_index, sorted_scores) + + return unsorted_scores + + +@UncertaintyCalibratorRegistry.register("isotonic-multiclass") +class IsotonicMulticlassCalibrator(MulticlassClassificationCalibrator): + """Calibrate multiclass classification datasets using isotonic regression as discussed in + [guo2017]_. It uses a one-vs-all aggregation scheme to extend isotonic regression from binary to + multiclass classifiers. + + References + ---------- + .. [guo2017] Guo, C.; Pleiss, G.; Sun, Y.; Weinberger, K. Q. "On calibration of modern neural + networks". ICML, 2017. https://arxiv.org/abs/1706.04599 + """ + + def fit(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Self: + isotonic_models = [] + for j in range(uncs.shape[1]): + mask_j = mask[:, j] + uncs_j = uncs[:, j, :][mask_j].numpy() + targets_j = targets[:, j][mask_j].numpy() + + class_isotonic_models = [] + for k in range(uncs.shape[2]): + class_uncs_j = uncs_j[..., k] + positive_class_targets = targets_j == k + + class_targets = np.ones_like(class_uncs_j) + class_targets[positive_class_targets] = 1 + class_targets[~positive_class_targets] = 0 + + isotonic_model = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip") + isotonic_model.fit(class_uncs_j, class_targets) + class_isotonic_models.append(isotonic_model) + + isotonic_models.append(class_isotonic_models) + + self.isotonic_models = isotonic_models + + return self + + def apply(self, uncs: Tensor) -> Tensor: + cal_uncs = torch.zeros_like(uncs) + for j, class_isotonic_models in enumerate(self.isotonic_models): + for k, isotonic_model in enumerate(class_isotonic_models): + class_uncs_j = uncs[:, j, k].numpy() + class_cal_uncs = isotonic_model.predict(class_uncs_j) + cal_uncs[:, j, k] = torch.tensor(class_cal_uncs) + return cal_uncs / cal_uncs.sum(dim=-1, keepdim=True) diff --git a/chemprop/chemprop/uncertainty/estimator.py b/chemprop/chemprop/uncertainty/estimator.py new file mode 100644 index 0000000000000000000000000000000000000000..95269ac62896ab9ae1ad34ba6053777bc9a1a207 --- /dev/null +++ b/chemprop/chemprop/uncertainty/estimator.py @@ -0,0 +1,376 @@ +from abc import ABC, abstractmethod +from typing import Iterable + +from lightning import pytorch as pl +import torch +from torch import Tensor +from torch.utils.data import DataLoader + +from chemprop.models.model import MPNN +from chemprop.utils.registry import ClassRegistry + + +class UncertaintyEstimator(ABC): + """A helper class for making model predictions and associated uncertainty predictions.""" + + @abstractmethod + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + """ + Calculate the uncalibrated predictions and uncertainties for the dataloader. + + dataloader: DataLoader + the dataloader used for model predictions and uncertainty predictions + models: Iterable[MPNN] + the models used for model predictions and uncertainty predictions + trainer: pl.Trainer + an instance of the :class:`~lightning.pytorch.trainer.trainer.Trainer` used to manage model inference + + Returns + ------- + preds : Tensor + the model predictions, with shape varying by task type: + + * regression/binary classification: ``m x n x t`` + + * multiclass classification: ``m x n x t x c``, where ``m`` is the number of models, + ``n`` is the number of inputs, ``t`` is the number of tasks, and ``c`` is the number of classes. + uncs : Tensor + the predicted uncertainties, with shapes of ``m' x n x t``. + + .. note:: + The ``m`` and ``m'`` are different by definition. The ``m`` is the number of models, + while the ``m'`` is the number of uncertainty estimations. For example, if two MVE + or evidential models are provided, both ``m`` and ``m'`` are two. However, for an + ensemble of two models, ``m'`` would be one (even though ``m = 2``). + """ + + +UncertaintyEstimatorRegistry = ClassRegistry[UncertaintyEstimator]() + + +@UncertaintyEstimatorRegistry.register("none") +class NoUncertaintyEstimator(UncertaintyEstimator): + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + predss = [] + for model in models: + preds = torch.concat(trainer.predict(model, dataloader), 0) + predss.append(preds) + return torch.stack(predss), None + + +@UncertaintyEstimatorRegistry.register("mve") +class MVEEstimator(UncertaintyEstimator): + """ + Class that estimates prediction means and variances (MVE). [nix1994]_ + + References + ---------- + .. [nix1994] Nix, D. A.; Weigend, A. S. "Estimating the mean and variance of the target + probability distribution." Proceedings of 1994 IEEE International Conference on Neural + Networks, 1994 https://doi.org/10.1109/icnn.1994.374138 + """ + + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + mves = [] + for model in models: + preds = torch.concat(trainer.predict(model, dataloader), 0) + mves.append(preds) + mves = torch.stack(mves, dim=0) + mean, var = mves.unbind(dim=-1) + return mean, var + + +@UncertaintyEstimatorRegistry.register("ensemble") +class EnsembleEstimator(UncertaintyEstimator): + """ + Class that predicts the uncertainty of predictions based on the variance in predictions among + an ensemble's submodels. + """ + + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + if len(models) <= 1: + raise ValueError( + "Ensemble method for uncertainty is only available when multiple models are provided." + ) + ensemble_preds = [] + for model in models: + preds = torch.concat(trainer.predict(model, dataloader), 0) + ensemble_preds.append(preds) + stacked_preds = torch.stack(ensemble_preds).float() + vars = torch.var(stacked_preds, dim=0, correction=0).unsqueeze(0) + return stacked_preds, vars + + +@UncertaintyEstimatorRegistry.register("classification") +class ClassEstimator(UncertaintyEstimator): + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + predss = [] + for model in models: + preds = torch.concat(trainer.predict(model, dataloader), 0) + predss.append(preds) + return torch.stack(predss), torch.stack(predss) + + +@UncertaintyEstimatorRegistry.register("evidential-total") +class EvidentialTotalEstimator(UncertaintyEstimator): + """ + Class that predicts the total evidential uncertainty based on hyperparameters of + the evidential distribution [amini2020]_. + + References + ----------- + .. [amini2020] Amini, A.; Schwarting, W.; Soleimany, A.; Rus, D. "Deep Evidential Regression". + NeurIPS, 2020. https://arxiv.org/abs/1910.02600 + """ + + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + uncs = [] + for model in models: + preds = torch.concat(trainer.predict(model, dataloader), 0) + uncs.append(preds) + uncs = torch.stack(uncs) + mean, v, alpha, beta = uncs.unbind(-1) + total_uncs = (1 + 1 / v) * (beta / (alpha - 1)) + return mean, total_uncs + + +@UncertaintyEstimatorRegistry.register("evidential-epistemic") +class EvidentialEpistemicEstimator(UncertaintyEstimator): + """ + Class that predicts the epistemic evidential uncertainty based on hyperparameters of + the evidential distribution. + """ + + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + uncs = [] + for model in models: + preds = torch.concat(trainer.predict(model, dataloader), 0) + uncs.append(preds) + uncs = torch.stack(uncs) + mean, v, alpha, beta = uncs.unbind(-1) + epistemic_uncs = (1 / v) * (beta / (alpha - 1)) + return mean, epistemic_uncs + + +@UncertaintyEstimatorRegistry.register("evidential-aleatoric") +class EvidentialAleatoricEstimator(UncertaintyEstimator): + """ + Class that predicts the aleatoric evidential uncertainty based on hyperparameters of + the evidential distribution. + """ + + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + uncs = [] + for model in models: + preds = torch.concat(trainer.predict(model, dataloader), 0) + uncs.append(preds) + uncs = torch.stack(uncs) + mean, _, alpha, beta = uncs.unbind(-1) + aleatoric_uncs = beta / (alpha - 1) + return mean, aleatoric_uncs + + +@UncertaintyEstimatorRegistry.register("dropout") +class DropoutEstimator(UncertaintyEstimator): + """ + A :class:`DropoutEstimator` creates a virtual ensemble of models via Monte Carlo dropout with + the provided model [gal2016]_. + + Parameters + ---------- + ensemble_size: int + The number of samples to draw for the ensemble. + dropout: float | None + The probability of dropping out units in the dropout layers. If unspecified, + the training probability is used, which is prefered but not possible if the model was not + trained with dropout (i.e. p=0). + + References + ----------- + .. [gal2016] Gal, Y.; Ghahramani, Z. "Dropout as a bayesian approximation: Representing model uncertainty in deep learning." + International conference on machine learning. PMLR, 2016. https://arxiv.org/abs/1506.02142 + """ + + def __init__(self, ensemble_size: int, dropout: None | float = None): + self.ensemble_size = ensemble_size + self.dropout = dropout + + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + meanss, varss = [], [] + for model in models: + self._setup_model(model) + individual_preds = [] + + for _ in range(self.ensemble_size): + predss = trainer.predict(model, dataloader) + preds = torch.concat(predss, 0) + individual_preds.append(preds) + + stacked_preds = torch.stack(individual_preds, dim=0).float() + means = torch.mean(stacked_preds, dim=0) + vars = torch.var(stacked_preds, dim=0, correction=0) + self._restore_model(model) + meanss.append(means) + varss.append(vars) + return torch.stack(meanss), torch.stack(varss) + + def _setup_model(self, model): + model._predict_step = model.predict_step + model.predict_step = self._predict_step(model) + model.apply(self._change_dropout) + + def _restore_model(self, model): + model.predict_step = model._predict_step + del model._predict_step + model.apply(self._restore_dropout) + + def _predict_step(self, model): + def _wrapped_predict_step(*args, **kwargs): + model.apply(self._activate_dropout) + return model._predict_step(*args, **kwargs) + + return _wrapped_predict_step + + def _activate_dropout(self, module): + if isinstance(module, torch.nn.Dropout): + module.train() + + def _change_dropout(self, module): + if isinstance(module, torch.nn.Dropout): + module._p = module.p + if self.dropout: + module.p = self.dropout + + def _restore_dropout(self, module): + if isinstance(module, torch.nn.Dropout): + if hasattr(module, "_p"): + module.p = module._p + del module._p + + +# TODO: Add in v2.1.x +# @UncertaintyEstimatorRegistry.register("spectra-roundrobin") +# class RoundRobinSpectraEstimator(UncertaintyEstimator): +# def __call__( +# self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer +# ) -> tuple[Tensor, Tensor]: +# return + + +@UncertaintyEstimatorRegistry.register("classification-dirichlet") +class ClassificationDirichletEstimator(UncertaintyEstimator): + """ + A :class:`ClassificationDirichletEstimator` predicts an amount of 'evidence' for both the + negative class and the positive class as described in [sensoy2018]_. The class probabilities and + the uncertainty are calculated based on the evidence. + + .. math:: + S = \sum_{i=1}^K \alpha_i + p_i = \alpha_i / S + u = K / S + + where :math:`K` is the number of classes, :math:`\alpha_i` is the evidence for class :math:`i`, + :math:`p_i` is the probability of class :math:`i`, and :math:`u` is the uncertainty. + + References + ---------- + .. [sensoy2018] Sensoy, M.; Kaplan, L.; Kandemir, M. "Evidential deep learning to quantify + classification uncertainty." NeurIPS, 2018, 31. https://doi.org/10.48550/arXiv.1806.01768 + """ + + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + uncs = [] + for model in models: + preds = torch.concat(trainer.predict(model, dataloader), 0) + uncs.append(preds) + uncs = torch.stack(uncs, dim=0) + y, u = uncs.unbind(dim=-1) + return y, u + + +@UncertaintyEstimatorRegistry.register("multiclass-dirichlet") +class MulticlassDirichletEstimator(UncertaintyEstimator): + """ + A :class:`MulticlassDirichletEstimator` predicts an amount of 'evidence' for each class as + described in [sensoy2018]_. The class probabilities and the uncertainty are calculated based on + the evidence. + + .. math:: + S = \sum_{i=1}^K \alpha_i + p_i = \alpha_i / S + u = K / S + + where :math:`K` is the number of classes, :math:`\alpha_i` is the evidence for class :math:`i`, + :math:`p_i` is the probability of class :math:`i`, and :math:`u` is the uncertainty. + + References + ---------- + .. [sensoy2018] Sensoy, M.; Kaplan, L.; Kandemir, M. "Evidential deep learning to quantify + classification uncertainty." NeurIPS, 2018, 31. https://doi.org/10.48550/arXiv.1806.01768 + """ + + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + preds = [] + uncs = [] + for model in models: + self._setup_model(model) + output = torch.concat(trainer.predict(model, dataloader), 0) + self._restore_model(model) + preds.append(output[..., :-1]) + uncs.append(output[..., -1]) + preds = torch.stack(preds, 0) + uncs = torch.stack(uncs, 0) + + return preds, uncs + + def _setup_model(self, model): + model.predictor._forward = model.predictor.forward + model.predictor.forward = self._forward.__get__(model.predictor, model.predictor.__class__) + + def _restore_model(self, model): + model.predictor.forward = model.predictor._forward + del model.predictor._forward + + def _forward(self, Z: Tensor) -> Tensor: + alpha = self.train_step(Z) + + u = alpha.shape[2] / alpha.sum(-1, keepdim=True) + Y = alpha / alpha.sum(-1, keepdim=True) + + return torch.concat([Y, u], -1) + + +@UncertaintyEstimatorRegistry.register("quantile-regression") +class QuantileRegressionEstimator(UncertaintyEstimator): + def __call__( + self, dataloader: DataLoader, models: Iterable[MPNN], trainer: pl.Trainer + ) -> tuple[Tensor, Tensor]: + individual_preds = [] + for model in models: + predss = trainer.predict(model, dataloader) + individual_preds.append(torch.concat(predss, 0)) + stacked_preds = torch.stack(individual_preds).float() + mean, interval = stacked_preds.unbind(2) + return mean, interval diff --git a/chemprop/chemprop/uncertainty/evaluator.py b/chemprop/chemprop/uncertainty/evaluator.py new file mode 100644 index 0000000000000000000000000000000000000000..1e88fab2835b0b29aadd654949176b38ff899476 --- /dev/null +++ b/chemprop/chemprop/uncertainty/evaluator.py @@ -0,0 +1,368 @@ +from abc import ABC, abstractmethod + +import numpy as np +import torch +from torch import Tensor +from torchmetrics.regression import SpearmanCorrCoef + +from chemprop.utils.registry import ClassRegistry + +UncertaintyEvaluatorRegistry = ClassRegistry() + + +class RegressionEvaluator(ABC): + """Evaluates the quality of uncertainty estimates in regression tasks.""" + + @abstractmethod + def evaluate(self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor) -> Tensor: + """Evaluate the performance of uncertainty predictions against the model target values. + + Parameters + ---------- + preds: Tensor + the predictions for regression tasks. It is a tensor of the shape of ``n x t``, where ``n`` is + the number of input molecules/reactions, and ``t`` is the number of tasks. + uncs: Tensor + the predicted uncertainties of the shape of ``n x t`` + targets: Tensor + a tensor of the shape ``n x t`` + mask: Tensor + a tensor of the shape ``n x t`` indicating whether the given values should be used in the evaluation + + Returns + ------- + Tensor + a tensor of the shape ``t`` containing the evaluated metrics + """ + + +@UncertaintyEvaluatorRegistry.register("nll-regression") +class NLLRegressionEvaluator(RegressionEvaluator): + r""" + Evaluate uncertainty values for regression datasets using the mean negative-log-likelihood + of the targets given the probability distributions estimated by the model: + + .. math:: + + \mathrm{NLL}(y, \hat y) = \frac{1}{2} \log(2 \pi \sigma^2) + \frac{(y - \hat{y})^2}{2 \sigma^2} + + where :math:`\hat{y}` is the predicted value, :math:`y` is the true value, and + :math:`\sigma^2` is the predicted uncertainty (variance). + + The function returns a tensor containing the mean NLL for each task. + """ + + def evaluate(self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor) -> Tensor: + nlls = [] + for j in range(uncs.shape[1]): + mask_j = mask[:, j] + preds_j = preds[:, j][mask_j] + targets_j = targets[:, j][mask_j] + uncs_j = uncs[:, j][mask_j] + errors = preds_j - targets_j + nll = (2 * torch.pi * uncs_j).log() / 2 + errors**2 / (2 * uncs_j) + nlls.append(nll.mean(dim=0)) + return torch.stack(nlls) + + +@UncertaintyEvaluatorRegistry.register("miscalibration_area") +class CalibrationAreaEvaluator(RegressionEvaluator): + """ + A class for evaluating regression uncertainty values based on how they deviate from perfect + calibration on an observed-probability versus expected-probability plot. + """ + + def evaluate( + self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor, num_bins: int = 100 + ) -> Tensor: + """Evaluate the performance of uncertainty predictions against the model target values. + + Parameters + ---------- + preds: Tensor + the predictions for regression tasks. It is a tensor of the shape of ``n x t``, where ``n`` is + the number of input molecules/reactions, and ``t`` is the number of tasks. + uncs: Tensor + the predicted uncertainties (variance) of the shape of ``n x t`` + targets: Tensor + a tensor of the shape ``n x t`` + mask: Tensor + a tensor of the shape ``n x t`` indicating whether the given values should be used in the evaluation + num_bins: int, default=100 + the number of bins to discretize the ``[0, 1]`` interval + + Returns + ------- + Tensor + a tensor of the shape ``t`` containing the evaluated metrics + """ + bins = torch.arange(1, num_bins) + bin_scaling = torch.special.erfinv(bins / num_bins).view(-1, 1, 1) * np.sqrt(2) + errors = torch.abs(preds - targets) + uncs = torch.sqrt(uncs).unsqueeze(0) + bin_unc = uncs * bin_scaling + bin_count = bin_unc >= errors.unsqueeze(0) + mask = mask.unsqueeze(0) + observed_auc = (bin_count & mask).sum(1) / mask.sum(1) + num_tasks = uncs.shape[-1] + observed_auc = torch.cat( + [torch.zeros(1, num_tasks), observed_auc, torch.ones(1, num_tasks)] + ).T + ideal_auc = torch.arange(num_bins + 1) / num_bins + miscal_area = (1 / num_bins) * (observed_auc - ideal_auc).abs().sum(dim=1) + return miscal_area + + +@UncertaintyEvaluatorRegistry.register("ence") +class ExpectedNormalizedErrorEvaluator(RegressionEvaluator): + r""" + A class that evaluates uncertainty performance by binning together clusters of predictions + and comparing the average predicted variance of the clusters against the RMSE of the cluster. [1]_ + + .. math:: + \mathrm{ENCE} = \frac{1}{N} \sum_{i=1}^{N} \frac{|\mathrm{RMV}_i - \mathrm{RMSE}_i|}{\mathrm{RMV}_i} + + where :math:`N` is the number of bins, :math:`\mathrm{RMV}_i` is the root of the mean uncertainty over the + :math:`i`-th bin and :math:`\mathrm{RMSE}_i` is the root mean square error over the :math:`i`-th bin. This + discrepancy is further normalized by the uncertainty over the bin, :math:`\mathrm{RMV}_i`, because the error + is expected to be naturally higher as the uncertainty increases. + + References + ---------- + .. [1] Levi, D.; Gispan, L.; Giladi, N.; Fetaya, E. "Evaluating and Calibrating Uncertainty Prediction in Regression Tasks." + Sensors, 2022, 22(15), 5540. https://www.mdpi.com/1424-8220/22/15/5540 + """ + + def evaluate( + self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor, num_bins: int = 100 + ) -> Tensor: + """Evaluate the performance of uncertainty predictions against the model target values. + + Parameters + ---------- + preds: Tensor + the predictions for regression tasks. It is a tensor of the shape of ``n x t``, where ``n`` is + the number of input molecules/reactions, and ``t`` is the number of tasks. + uncs: Tensor + the predicted uncertainties (variance) of the shape of ``n x t`` + targets: Tensor + a tensor of the shape ``n x t`` + mask: Tensor + a tensor of the shape ``n x t`` indicating whether the given values should be used in the evaluation + num_bins: int, default=100 + the number of bins the data are divided into + + Returns + ------- + Tensor + a tensor of the shape ``t`` containing the evaluated metrics + """ + masked_preds = preds * mask + masked_targets = targets * mask + masked_uncs = uncs * mask + errors = torch.abs(masked_preds - masked_targets) + + sort_idx = torch.argsort(masked_uncs, dim=0) + sorted_uncs = torch.gather(masked_uncs, 0, sort_idx) + sorted_errors = torch.gather(errors, 0, sort_idx) + + split_unc = torch.chunk(sorted_uncs, num_bins, dim=0) + split_error = torch.chunk(sorted_errors, num_bins, dim=0) + + root_mean_vars = torch.sqrt(torch.stack([chunk.mean(0) for chunk in split_unc])) + rmses = torch.sqrt(torch.stack([chunk.pow(2).mean(0) for chunk in split_error])) + + ence = torch.mean(torch.abs(root_mean_vars - rmses) / root_mean_vars, dim=0) + return ence + + +@UncertaintyEvaluatorRegistry.register("spearman") +class SpearmanEvaluator(RegressionEvaluator): + """ + Evaluate the Spearman rank correlation coefficient between the uncertainties and errors in the model predictions. + + The correlation coefficient returns a value in the [-1, 1] range, with better scores closer to 1 + observed when the uncertainty values are predictive of the rank ordering of the errors in the model prediction. + """ + + def evaluate(self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor) -> Tensor: + spearman_coeffs = [] + for j in range(uncs.shape[1]): + mask_j = mask[:, j] + preds_j = preds[:, j][mask_j] + targets_j = targets[:, j][mask_j] + uncs_j = uncs[:, j][mask_j] + errs_j = (preds_j - targets_j).abs() + spearman = SpearmanCorrCoef() + spearman_coeff = spearman(uncs_j, errs_j) + spearman_coeffs.append(spearman_coeff) + return torch.stack(spearman_coeffs) + + +@UncertaintyEvaluatorRegistry.register("conformal-coverage-regression") +class RegressionConformalEvaluator(RegressionEvaluator): + r""" + Evaluate the coverage of conformal prediction for regression datasets. + + .. math:: + \Pr (Y_{\text{test}} \in C(X_{\text{test}})) + + where the :math:`C(X_{\text{test}})` is the predicted interval. + """ + + def evaluate(self, preds: Tensor, uncs: Tensor, targets: Tensor, mask: Tensor) -> Tensor: + bounds = torch.tensor([-1 / 2, 1 / 2], device=mask.device) + interval = uncs.unsqueeze(0) * bounds.view([-1] + [1] * preds.ndim) + lower, upper = preds.unsqueeze(0) + interval + covered_mask = torch.logical_and(lower <= targets, targets <= upper) + + return (covered_mask & mask).sum(0) / mask.sum(0) + + +class BinaryClassificationEvaluator(ABC): + """Evaluates the quality of uncertainty estimates in binary classification tasks.""" + + @abstractmethod + def evaluate(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Tensor: + """Evaluate the performance of uncertainty predictions against the model target values. + + Parameters + ---------- + uncs: Tensor + the predicted uncertainties (i.e., the predicted probability of class 1) of the shape of ``n x t``, where ``n`` is the number of input + molecules/reactions, and ``t`` is the number of tasks. + targets: Tensor + a tensor of the shape ``n x t`` + mask: Tensor + a tensor of the shape ``n x t`` indicating whether the given values should be used in the evaluation + + Returns + ------- + Tensor + a tensor of the shape ``t`` containing the evaluated metrics + """ + + +@UncertaintyEvaluatorRegistry.register("nll-classification") +class NLLClassEvaluator(BinaryClassificationEvaluator): + """ + Evaluate uncertainty values for binary classification datasets using the mean negative-log-likelihood + of the targets given the assigned probabilities from the model: + + .. math:: + + \mathrm{NLL} = -\log(\hat{y} \cdot y + (1 - \hat{y}) \cdot (1 - y)) + + where :math:`y` is the true binary label (0 or 1), and + :math:`\hat{y}` is the predicted probability associated with the class label 1. + + The function returns a tensor containing the mean NLL for each task. + """ + + def evaluate(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Tensor: + nlls = [] + for j in range(uncs.shape[1]): + mask_j = mask[:, j] + targets_j = targets[:, j][mask_j] + uncs_j = uncs[:, j][mask_j] + likelihood = uncs_j * targets_j + (1 - uncs_j) * (1 - targets_j) + nll = -1 * likelihood.log() + nlls.append(nll.mean(dim=0)) + return torch.stack(nlls) + + +@UncertaintyEvaluatorRegistry.register("conformal-coverage-classification") +class MultilabelConformalEvaluator(BinaryClassificationEvaluator): + r""" + Evaluate the coverage of conformal prediction for binary classification datasets with multiple labels. + + .. math:: + \Pr \left( + \hat{\mathcal C}_{\text{in}}(X) \subseteq \mathcal Y \subseteq \hat{\mathcal C}_{\text{out}}(X) + \right) + + where the in-set :math:`\hat{\mathcal C}_\text{in}` is contained by the set of true labels :math:`\mathcal Y` and + :math:`\mathcal Y` is contained within the out-set :math:`\hat{\mathcal C}_\text{out}`. + """ + + def evaluate(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Tensor: + in_set, out_set = torch.chunk(uncs, 2, 1) + covered_mask = torch.logical_and(in_set <= targets, targets <= out_set) + return (covered_mask & mask).sum(0) / mask.sum(0) + + +class MulticlassClassificationEvaluator(ABC): + """Evaluates the quality of uncertainty estimates in multiclass classification tasks.""" + + @abstractmethod + def evaluate(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Tensor: + """Evaluate the performance of uncertainty predictions against the model target values. + + Parameters + ---------- + uncs: Tensor + the predicted uncertainties (i.e., the predicted probabilities for each class) of the shape of ``n x t x c``, where ``n`` is the number of input + molecules/reactions, ``t`` is the number of tasks, and ``c`` is the number of classes. + targets: Tensor + a tensor of the shape ``n x t`` + mask: Tensor + a tensor of the shape ``n x t`` indicating whether the given values should be used in the evaluation + + Returns + ------- + Tensor + a tensor of the shape ``t`` containing the evaluated metrics + """ + + +@UncertaintyEvaluatorRegistry.register("nll-multiclass") +class NLLMulticlassEvaluator(MulticlassClassificationEvaluator): + """ + Evaluate uncertainty values for multiclass classification datasets using the mean negative-log-likelihood + of the targets given the assigned probabilities from the model: + + .. math:: + + \mathrm{NLL} = -\log(p_{y_i}) + + where :math:`p_{y_i}` is the predicted probability for the true class :math:`y_i`, calculated as: + + .. math:: + + p_{y_i} = \sum_{k=1}^{K} \mathbb{1}(y_i = k) \cdot p_k + + Here: :math:`K` is the total number of classes, + :math:`\mathbb{1}(y_i = k)` is the indicator function that is 1 when the true class :math:`y_i` equals class :math:`k`, and 0 otherwise, + and :math:`p_k` is the predicted probability for class :math:`k`. + + The function returns a tensor containing the mean NLL for each task. + """ + + def evaluate(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Tensor: + nlls = [] + for j in range(uncs.shape[1]): + mask_j = mask[:, j] + targets_j = targets[:, j][mask_j] + uncs_j = uncs[:, j][mask_j] + targets_one_hot = torch.eye(uncs_j.shape[-1])[targets_j.long()] + likelihood = (targets_one_hot * uncs_j).sum(dim=-1) + nll = -1 * likelihood.log() + nlls.append(nll.mean(dim=0)) + return torch.stack(nlls) + + +@UncertaintyEvaluatorRegistry.register("conformal-coverage-multiclass") +class MulticlassConformalEvaluator(MulticlassClassificationEvaluator): + r""" + Evaluate the coverage of conformal prediction for multiclass classification datasets. + + .. math:: + \Pr (Y_{\text{test}} \in C(X_{\text{test}})) + + where the :math:`C(X_{\text{test}}) \subset \{1 \mathrel{.\,.} K\}` is a prediction set of possible labels . + """ + + def evaluate(self, uncs: Tensor, targets: Tensor, mask: Tensor) -> Tensor: + targets_one_hot = torch.nn.functional.one_hot(targets, num_classes=uncs.shape[2]) + covered_mask = torch.max(uncs * targets_one_hot, dim=-1)[0] > 0 + return (covered_mask & mask).sum(0) / mask.sum(0) diff --git a/chemprop/chemprop/utils/__init__.py b/chemprop/chemprop/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a8937a6e06591c8b5eb19bbe5ae00851364351a --- /dev/null +++ b/chemprop/chemprop/utils/__init__.py @@ -0,0 +1,4 @@ +from .registry import ClassRegistry, Factory +from .utils import EnumMapping, make_mol, pretty_shape + +__all__ = ["ClassRegistry", "Factory", "EnumMapping", "make_mol", "pretty_shape"] diff --git a/chemprop/chemprop/utils/registry.py b/chemprop/chemprop/utils/registry.py new file mode 100644 index 0000000000000000000000000000000000000000..58137351965bc80749f10654abce8d8c4d8570e0 --- /dev/null +++ b/chemprop/chemprop/utils/registry.py @@ -0,0 +1,46 @@ +import inspect +from typing import Any, Iterable, Type, TypeVar + +T = TypeVar("T") + + +class ClassRegistry(dict[str, Type[T]]): + def register(self, alias: Any | Iterable[Any] | None = None): + def decorator(cls): + if alias is None: + keys = [cls.__name__.lower()] + elif isinstance(alias, str): + keys = [alias] + else: + keys = alias + + cls.alias = keys[0] + for k in keys: + self[k] = cls + + return cls + + return decorator + + __call__ = register + + def __repr__(self) -> str: # pragma: no cover + return f"{self.__class__.__name__}: {super().__repr__()}" + + def __str__(self) -> str: # pragma: no cover + INDENT = 4 + items = [f"{' ' * INDENT}{repr(k)}: {repr(v)}" for k, v in self.items()] + + return "\n".join([f"{self.__class__.__name__} {'{'}", ",\n".join(items), "}"]) + + +class Factory: + @classmethod + def build(cls, clz_T: Type[T], *args, **kwargs) -> T: + if not inspect.isclass(clz_T): + raise TypeError(f"Expected a class type! got: {type(clz_T)}") + + sig = inspect.signature(clz_T) + kwargs = {k: v for k, v in kwargs.items() if k in sig.parameters.keys()} + + return clz_T(*args, **kwargs) diff --git a/chemprop/chemprop/utils/utils.py b/chemprop/chemprop/utils/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..6d7cd4d0c68439563c085847c5153f66849e637b --- /dev/null +++ b/chemprop/chemprop/utils/utils.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +from enum import StrEnum +from typing import Iterable, Iterator + +from rdkit import Chem + + +class EnumMapping(StrEnum): + @classmethod + def get(cls, name: str | EnumMapping) -> EnumMapping: + if isinstance(name, cls): + return name + + try: + return cls[name.upper()] + except KeyError: + raise KeyError( + f"Unsupported {cls.__name__} member! got: '{name}'. expected one of: {cls.keys()}" + ) + + @classmethod + def keys(cls) -> Iterator[str]: + return (e.name for e in cls) + + @classmethod + def values(cls) -> Iterator[str]: + return (e.value for e in cls) + + @classmethod + def items(cls) -> Iterator[tuple[str, str]]: + return zip(cls.keys(), cls.values()) + + +def make_mol(smi: str, keep_h: bool, add_h: bool, ignore_chirality: bool = False) -> Chem.Mol: + """build an RDKit molecule from a SMILES string. + + Parameters + ---------- + smi : str + a SMILES string. + keep_h : bool + whether to keep hydrogens in the input smiles. This does not add hydrogens, it only keeps them if they are specified + add_h : bool + If True, adds hydrogens to the molecule. + ignore_chirality : bool, optional + If True, ignores chirality information when constructing the molecule. Default is False. + + Returns + ------- + Chem.Mol + the RDKit molecule. + """ + if keep_h: + mol = Chem.MolFromSmiles(smi, sanitize=False) + Chem.SanitizeMol( + mol, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_ADJUSTHS + ) + else: + mol = Chem.MolFromSmiles(smi) + + if mol is None: + raise RuntimeError(f"SMILES {smi} is invalid! (RDKit returned None)") + + if add_h: + mol = Chem.AddHs(mol) + + if ignore_chirality: + for atom in mol.GetAtoms(): + atom.SetChiralTag(Chem.ChiralType.CHI_UNSPECIFIED) + + return mol + + +def pretty_shape(shape: Iterable[int]) -> str: + """Make a pretty string from an input shape + + Example + -------- + >>> X = np.random.rand(10, 4) + >>> X.shape + (10, 4) + >>> pretty_shape(X.shape) + '10 x 4' + """ + return " x ".join(map(str, shape)) diff --git a/chemprop/chemprop/utils/v1_to_v2.py b/chemprop/chemprop/utils/v1_to_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..cd059340a147db7cd383fac6d60ebf1cf87debb2 --- /dev/null +++ b/chemprop/chemprop/utils/v1_to_v2.py @@ -0,0 +1,188 @@ +from os import PathLike + +from lightning.pytorch import __version__ +from lightning.pytorch.utilities.parsing import AttributeDict +import torch + +from chemprop.nn.agg import AggregationRegistry +from chemprop.nn.message_passing import AtomMessagePassing, BondMessagePassing +from chemprop.nn.metrics import LossFunctionRegistry, MetricRegistry +from chemprop.nn.predictors import PredictorRegistry +from chemprop.nn.transforms import UnscaleTransform +from chemprop.utils import Factory + + +def convert_state_dict_v1_to_v2(model_v1_dict: dict) -> dict: + """Converts v1 model dictionary to a v2 state dictionary""" + + state_dict_v2 = {} + args_v1 = model_v1_dict["args"] + + state_dict_v1 = model_v1_dict["state_dict"] + state_dict_v2["message_passing.W_i.weight"] = state_dict_v1["encoder.encoder.0.W_i.weight"] + state_dict_v2["message_passing.W_h.weight"] = state_dict_v1["encoder.encoder.0.W_h.weight"] + state_dict_v2["message_passing.W_o.weight"] = state_dict_v1["encoder.encoder.0.W_o.weight"] + state_dict_v2["message_passing.W_o.bias"] = state_dict_v1["encoder.encoder.0.W_o.bias"] + + # v1.6 renamed ffn to readout + if "readout.1.weight" in state_dict_v1: + for i in range(args_v1.ffn_num_layers): + suffix = 0 if i == 0 else 2 + state_dict_v2[f"predictor.ffn.{i}.{suffix}.weight"] = state_dict_v1[ + f"readout.{i * 3 + 1}.weight" + ] + state_dict_v2[f"predictor.ffn.{i}.{suffix}.bias"] = state_dict_v1[ + f"readout.{i * 3 + 1}.bias" + ] + else: + for i in range(args_v1.ffn_num_layers): + suffix = 0 if i == 0 else 2 + state_dict_v2[f"predictor.ffn.{i}.{suffix}.weight"] = state_dict_v1[ + f"ffn.{i * 3 + 1}.weight" + ] + state_dict_v2[f"predictor.ffn.{i}.{suffix}.bias"] = state_dict_v1[ + f"ffn.{i * 3 + 1}.bias" + ] + + if args_v1.dataset_type == "regression": + state_dict_v2["predictor.output_transform.mean"] = torch.tensor( + model_v1_dict["data_scaler"]["means"], dtype=torch.float32 + ).unsqueeze(0) + state_dict_v2["predictor.output_transform.scale"] = torch.tensor( + model_v1_dict["data_scaler"]["stds"], dtype=torch.float32 + ).unsqueeze(0) + + # target_weights was added in #183 + if getattr(args_v1, "target_weights", None) is not None: + task_weights = torch.tensor(args_v1.target_weights).unsqueeze(0) + else: + task_weights = torch.ones(args_v1.num_tasks).unsqueeze(0) + + state_dict_v2["predictor.criterion.task_weights"] = task_weights + + return state_dict_v2 + + +def convert_hyper_parameters_v1_to_v2(model_v1_dict: dict) -> dict: + """Converts v1 model dictionary to v2 hyper_parameters dictionary""" + hyper_parameters_v2 = {} + renamed_metrics = { + "auc": "roc", + "prc-auc": "prc", + "cross_entropy": "ce", + "binary_cross_entropy": "bce", + "mcc": "binary-mcc", + "recall": "recall is not in v2", + "precision": "precision is not in v2", + "balanced_accuracy": "balanced_accuracy is not in v2", + } + + args_v1 = model_v1_dict["args"] + hyper_parameters_v2["batch_norm"] = False + hyper_parameters_v2["metrics"] = [ + Factory.build(MetricRegistry[renamed_metrics.get(args_v1.metric, args_v1.metric)]) + ] + hyper_parameters_v2["warmup_epochs"] = args_v1.warmup_epochs + hyper_parameters_v2["init_lr"] = args_v1.init_lr + hyper_parameters_v2["max_lr"] = args_v1.max_lr + hyper_parameters_v2["final_lr"] = args_v1.final_lr + + # convert the message passing block + W_i_shape = model_v1_dict["state_dict"]["encoder.encoder.0.W_i.weight"].shape + W_h_shape = model_v1_dict["state_dict"]["encoder.encoder.0.W_h.weight"].shape + W_o_shape = model_v1_dict["state_dict"]["encoder.encoder.0.W_o.weight"].shape + + d_h = W_i_shape[0] + d_v = W_o_shape[1] - d_h + d_e = W_h_shape[1] - d_h if args_v1.atom_messages else W_i_shape[1] - d_v + + hyper_parameters_v2["message_passing"] = AttributeDict( + { + "activation": args_v1.activation, + "bias": args_v1.bias, + "cls": BondMessagePassing if not args_v1.atom_messages else AtomMessagePassing, + "d_e": d_e, # the feature dimension of the edges + "d_h": args_v1.hidden_size, # dimension of the hidden layer + "d_v": d_v, # the feature dimension of the vertices + "d_vd": args_v1.atom_descriptors_size, + "depth": args_v1.depth, + "dropout": args_v1.dropout, + "undirected": args_v1.undirected, + } + ) + + # convert the aggregation block + hyper_parameters_v2["agg"] = { + "dim": 0, # in v1, the aggregation is always done on the atom features + "cls": AggregationRegistry[args_v1.aggregation], + } + if args_v1.aggregation == "norm": + hyper_parameters_v2["agg"]["norm"] = args_v1.aggregation_norm + + # convert the predictor block + fgs = args_v1.features_generator or [] + d_xd = sum((200 if "rdkit" in fg else 0) + (2048 if "morgan" in fg else 0) for fg in fgs) + + if getattr(args_v1, "target_weights", None) is not None: + task_weights = torch.tensor(args_v1.target_weights).unsqueeze(0) + else: + task_weights = torch.ones(args_v1.num_tasks).unsqueeze(0) + + # loss_function was added in #238 + loss_fn_defaults = { + "classification": "bce", + "regression": "mse", + "multiclass": "ce", + "specitra": "sid", + } + T_loss_fn = LossFunctionRegistry[ + getattr(args_v1, "loss_function", loss_fn_defaults[args_v1.dataset_type]) + ] + + hyper_parameters_v2["predictor"] = AttributeDict( + { + "activation": args_v1.activation, + "cls": PredictorRegistry[args_v1.dataset_type], + "criterion": Factory.build(T_loss_fn, task_weights=task_weights), + "task_weights": None, + "dropout": args_v1.dropout, + "hidden_dim": args_v1.ffn_hidden_size, + "input_dim": args_v1.hidden_size + args_v1.atom_descriptors_size + d_xd, + "n_layers": args_v1.ffn_num_layers - 1, + "n_tasks": args_v1.num_tasks, + } + ) + + if args_v1.dataset_type == "regression": + hyper_parameters_v2["predictor"]["output_transform"] = UnscaleTransform( + model_v1_dict["data_scaler"]["means"], model_v1_dict["data_scaler"]["stds"] + ) + + return hyper_parameters_v2 + + +def convert_model_dict_v1_to_v2(model_v1_dict: dict) -> dict: + """Converts a v1 model dictionary from a loaded .pt file to a v2 model dictionary""" + + model_v2_dict = {} + + model_v2_dict["epoch"] = None + model_v2_dict["global_step"] = None + model_v2_dict["pytorch-lightning_version"] = __version__ + model_v2_dict["state_dict"] = convert_state_dict_v1_to_v2(model_v1_dict) + model_v2_dict["loops"] = None + model_v2_dict["callbacks"] = None + model_v2_dict["optimizer_states"] = None + model_v2_dict["lr_schedulers"] = None + model_v2_dict["hparams_name"] = "kwargs" + model_v2_dict["hyper_parameters"] = convert_hyper_parameters_v1_to_v2(model_v1_dict) + + return model_v2_dict + + +def convert_model_file_v1_to_v2(model_v1_file: PathLike, model_v2_file: PathLike) -> None: + """Converts a v1 model .pt file to a v2 model .pt file""" + + model_v1_dict = torch.load(model_v1_file, map_location=torch.device("cpu"), weights_only=False) + model_v2_dict = convert_model_dict_v1_to_v2(model_v1_dict) + torch.save(model_v2_dict, model_v2_file) diff --git a/chemprop/chemprop/utils/v2_0_to_v2_1.py b/chemprop/chemprop/utils/v2_0_to_v2_1.py new file mode 100644 index 0000000000000000000000000000000000000000..8627637bc63d6594547f8b4f401e52d43808bb16 --- /dev/null +++ b/chemprop/chemprop/utils/v2_0_to_v2_1.py @@ -0,0 +1,40 @@ +import pickle +import sys + +import torch + + +class Unpickler(pickle.Unpickler): + name_mappings = { + "MSELoss": "MSE", + "MSEMetric": "MSE", + "MAEMetric": "MAE", + "RMSEMetric": "RMSE", + "BoundedMSELoss": "BoundedMSE", + "BoundedMSEMetric": "BoundedMSE", + "BoundedMAEMetric": "BoundedMAE", + "BoundedRMSEMetric": "BoundedRMSE", + "SIDLoss": "SID", + "SIDMetric": "SID", + "WassersteinLoss": "Wasserstein", + "WassersteinMetric": "Wasserstein", + "R2Metric": "R2Score", + "BinaryAUROCMetric": "BinaryAUROC", + "BinaryAUPRCMetric": "BinaryAUPRC", + "BinaryAccuracyMetric": "BinaryAccuracy", + "BinaryF1Metric": "BinaryF1Score", + "BCEMetric": "BCELoss", + } + + def find_class(self, module, name): + if module == "chemprop.nn.loss": + module = "chemprop.nn.metrics" + name = self.name_mappings.get(name, name) + return super().find_class(module, name) + + +if __name__ == "__main__": + model = torch.load( + sys.argv[1], map_location="cpu", pickle_module=sys.modules[__name__], weights_only=False + ) + torch.save(model, sys.argv[2]) diff --git a/chemprop/chemprop/web/__pycache__/__init__.cpython-37.pyc b/chemprop/chemprop/web/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e583789216f0a21aa82e5c4521bfe4e7e2e70979 Binary files /dev/null and b/chemprop/chemprop/web/__pycache__/__init__.cpython-37.pyc differ diff --git a/chemprop/chemprop/web/__pycache__/config.cpython-37.pyc b/chemprop/chemprop/web/__pycache__/config.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7b98aa54568a30ac337c685689bd24259deaa5bc Binary files /dev/null and b/chemprop/chemprop/web/__pycache__/config.cpython-37.pyc differ diff --git a/chemprop/chemprop/web/__pycache__/run.cpython-37.pyc b/chemprop/chemprop/web/__pycache__/run.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9244cc7bcf7465d40584fdbdb7b94d816832035a Binary files /dev/null and b/chemprop/chemprop/web/__pycache__/run.cpython-37.pyc differ diff --git a/chemprop/chemprop/web/__pycache__/utils.cpython-37.pyc b/chemprop/chemprop/web/__pycache__/utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..98bc7d90a7a7819fed797504bcc66e32deac6e61 Binary files /dev/null and b/chemprop/chemprop/web/__pycache__/utils.cpython-37.pyc differ diff --git a/chemprop/chemprop/web/app/__pycache__/__init__.cpython-37.pyc b/chemprop/chemprop/web/app/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2b429c2989b044e1459612ef2e200beaf4b58720 Binary files /dev/null and b/chemprop/chemprop/web/app/__pycache__/__init__.cpython-37.pyc differ diff --git a/chemprop/chemprop/web/app/__pycache__/db.cpython-37.pyc b/chemprop/chemprop/web/app/__pycache__/db.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0771eb4cfafc14b740d8b5a12d037816f94fa006 Binary files /dev/null and b/chemprop/chemprop/web/app/__pycache__/db.cpython-37.pyc differ diff --git a/chemprop/chemprop/web/app/__pycache__/views.cpython-37.pyc b/chemprop/chemprop/web/app/__pycache__/views.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7afc9091c06166c507ae72789ba64b4a8332a283 Binary files /dev/null and b/chemprop/chemprop/web/app/__pycache__/views.cpython-37.pyc differ diff --git a/chemprop/chemprop/web/chemprop.sqlite3 b/chemprop/chemprop/web/chemprop.sqlite3 new file mode 100644 index 0000000000000000000000000000000000000000..a4adae685e0264b067ded01dd9ca6099c06a2c9b Binary files /dev/null and b/chemprop/chemprop/web/chemprop.sqlite3 differ diff --git a/chemprop/docs/Makefile b/chemprop/docs/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..26b942286fc384c8b4ca0af218fc3ffba0506984 --- /dev/null +++ b/chemprop/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/chemprop/docs/make.bat b/chemprop/docs/make.bat new file mode 100644 index 0000000000000000000000000000000000000000..747ffb7b3033659bdd2d1e6eae41ecb00358a45e --- /dev/null +++ b/chemprop/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/chemprop/docs/source/_static/images/logo/LICENSE.txt b/chemprop/docs/source/_static/images/logo/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..354f1e04f1247b3ddcbfc83b7519594d0f1ba261 --- /dev/null +++ b/chemprop/docs/source/_static/images/logo/LICENSE.txt @@ -0,0 +1,121 @@ +Creative Commons Legal Code + +CC0 1.0 Universal + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS + PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM + THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED + HEREUNDER. + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator +and subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for +the purpose of contributing to a commons of creative, cultural and +scientific works ("Commons") that the public can reliably and without fear +of later claims of infringement build upon, modify, incorporate in other +works, reuse and redistribute as freely as possible in any form whatsoever +and for any purposes, including without limitation commercial purposes. +These owners may contribute to the Commons to promote the ideal of a free +culture and the further production of creative, cultural and scientific +works, or to gain reputation or greater distribution for their Work in +part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any +expectation of additional consideration or compensation, the person +associating CC0 with a Work (the "Affirmer"), to the extent that he or she +is an owner of Copyright and Related Rights in the Work, voluntarily +elects to apply CC0 to the Work and publicly distribute the Work under its +terms, with knowledge of his or her Copyright and Related Rights in the +Work and the meaning and intended legal effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not +limited to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, + communicate, and translate a Work; + ii. moral rights retained by the original author(s) and/or performer(s); +iii. publicity and privacy rights pertaining to a person's image or + likeness depicted in a Work; + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + v. rights protecting the extraction, dissemination, use and reuse of data + in a Work; + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation + thereof, including any amended or successor version of such + directive); and +vii. other similar, equivalent or corresponding rights throughout the + world based on applicable law or treaty, and any national + implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention +of, applicable law, Affirmer hereby overtly, fully, permanently, +irrevocably and unconditionally waives, abandons, and surrenders all of +Affirmer's Copyright and Related Rights and associated claims and causes +of action, whether now known or unknown (including existing as well as +future claims and causes of action), in the Work (i) in all territories +worldwide, (ii) for the maximum duration provided by applicable law or +treaty (including future time extensions), (iii) in any current or future +medium and for any number of copies, and (iv) for any purpose whatsoever, +including without limitation commercial, advertising or promotional +purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each +member of the public at large and to the detriment of Affirmer's heirs and +successors, fully intending that such Waiver shall not be subject to +revocation, rescission, cancellation, termination, or any other legal or +equitable action to disrupt the quiet enjoyment of the Work by the public +as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason +be judged legally invalid or ineffective under applicable law, then the +Waiver shall be preserved to the maximum extent permitted taking into +account Affirmer's express Statement of Purpose. In addition, to the +extent the Waiver is so judged Affirmer hereby grants to each affected +person a royalty-free, non transferable, non sublicensable, non exclusive, +irrevocable and unconditional license to exercise Affirmer's Copyright and +Related Rights in the Work (i) in all territories worldwide, (ii) for the +maximum duration provided by applicable law or treaty (including future +time extensions), (iii) in any current or future medium and for any number +of copies, and (iv) for any purpose whatsoever, including without +limitation commercial, advertising or promotional purposes (the +"License"). The License shall be deemed effective as of the date CC0 was +applied by Affirmer to the Work. Should any part of the License for any +reason be judged legally invalid or ineffective under applicable law, such +partial invalidity or ineffectiveness shall not invalidate the remainder +of the License, and in such case Affirmer hereby affirms that he or she +will not (i) exercise any of his or her remaining Copyright and Related +Rights in the Work or (ii) assert any associated claims and causes of +action with respect to the Work, in either case contrary to Affirmer's +express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + b. Affirmer offers the Work as-is and makes no representations or + warranties of any kind concerning the Work, express, implied, + statutory or otherwise, including without limitation warranties of + title, merchantability, fitness for a particular purpose, non + infringement, or the absence of latent or other defects, accuracy, or + the present or absence of errors, whether or not discoverable, all to + the greatest extent permissible under applicable law. + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without + limitation any person's Copyright and Related Rights in the Work. + Further, Affirmer disclaims responsibility for obtaining any necessary + consents, permissions or other rights required for any use of the + Work. + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to + this CC0 or use of the Work. diff --git a/chemprop/docs/source/_static/images/logo/chemprop_logo.png b/chemprop/docs/source/_static/images/logo/chemprop_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..b6bfa1c0ab5c0b297d1919f8ac66f77b61c288de Binary files /dev/null and b/chemprop/docs/source/_static/images/logo/chemprop_logo.png differ diff --git a/chemprop/docs/source/_static/images/logo/chemprop_logo.svg b/chemprop/docs/source/_static/images/logo/chemprop_logo.svg new file mode 100644 index 0000000000000000000000000000000000000000..b121087a389530df4ec36857890010c691771851 --- /dev/null +++ b/chemprop/docs/source/_static/images/logo/chemprop_logo.svg @@ -0,0 +1,185 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/chemprop/docs/source/_static/images/message_passing.png b/chemprop/docs/source/_static/images/message_passing.png new file mode 100644 index 0000000000000000000000000000000000000000..679956675f864e40530c952d469d383d520b1a11 --- /dev/null +++ b/chemprop/docs/source/_static/images/message_passing.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:827148fbb3c94f9a4d905e23d48e97b8ad6cb7fbc237497caf7ac3a572068299 +size 168917 diff --git a/chemprop/docs/source/active_learning.nblink b/chemprop/docs/source/active_learning.nblink new file mode 100644 index 0000000000000000000000000000000000000000..25e1bc93592e2b3cd6ed31d4d11e830dd38a6db6 --- /dev/null +++ b/chemprop/docs/source/active_learning.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/active_learning.ipynb" +} \ No newline at end of file diff --git a/chemprop/docs/source/cmd.rst b/chemprop/docs/source/cmd.rst new file mode 100644 index 0000000000000000000000000000000000000000..7de9d992d3ead08133505a6dc489206cb711f80f --- /dev/null +++ b/chemprop/docs/source/cmd.rst @@ -0,0 +1,12 @@ +.. _cmd: + +CLI Reference +************* + +.. contents:: Table of Contents + :depth: 3 + :local: + +.. argparse:: + :ref: chemprop.cli.main.construct_parser + :prog: chemprop diff --git a/chemprop/docs/source/conf.py b/chemprop/docs/source/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..7141e55322fa7e6a69a3272ca3fb62619e6809ae --- /dev/null +++ b/chemprop/docs/source/conf.py @@ -0,0 +1,62 @@ +import os +import sys + +sys.path.insert(0, os.path.abspath("../..")) +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "Chemprop" +copyright = "2024, Chemprop developers" +author = "Chemprop developers" +release = "2.1.2" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + "nbsphinx", + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "autoapi.extension", + "sphinx.ext.mathjax", + "sphinx.ext.viewcode", + "sphinxcontrib.bibtex", + "sphinx.ext.doctest", + "sphinxarg.ext", + "nbsphinx_link", +] + +nbsphinx_execute = "never" +templates_path = ["_templates"] +exclude_patterns = [] +autodoc_typehints = "description" + +# -- AutoAPI configuration --------------------------------------------------- +nbsphinx_allow_errors = True +autoapi_dirs = ["../.."] +autoapi_ignore = ["*/tests/*", "*/cli/*"] +autoapi_file_patterns = ["*.py"] +autoapi_options = [ + "members", + "undoc-members", + "show-inheritance", + "show-module-summary", + "special-members", + "imported-members", +] +autoapi_keep_files = True + +# -- bibtex configuration --------------------------------------------------- + +bibtex_bibfiles = ["refs.bib"] + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "sphinx_book_theme" +html_static_path = ["_static"] diff --git a/chemprop/docs/source/convert_v1_to_v2.nblink b/chemprop/docs/source/convert_v1_to_v2.nblink new file mode 100644 index 0000000000000000000000000000000000000000..3c2d325cf7515c3f6a3bc512cce46bf170de2cd8 --- /dev/null +++ b/chemprop/docs/source/convert_v1_to_v2.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/convert_v1_to_v2.ipynb" +} diff --git a/chemprop/docs/source/extra_features_descriptors.nblink b/chemprop/docs/source/extra_features_descriptors.nblink new file mode 100644 index 0000000000000000000000000000000000000000..a49793a1dafe38e54e7c9f56e6375206bb83cd30 --- /dev/null +++ b/chemprop/docs/source/extra_features_descriptors.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/extra_features_descriptors.ipynb" +} diff --git a/chemprop/docs/source/hpopting.nblink b/chemprop/docs/source/hpopting.nblink new file mode 100644 index 0000000000000000000000000000000000000000..549c21dce2b725afcb34a772abe07f01c27c3847 --- /dev/null +++ b/chemprop/docs/source/hpopting.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/hpopting.ipynb" +} diff --git a/chemprop/docs/source/index.rst b/chemprop/docs/source/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..1b48749bc4768b811ef89fdf384e06b2a1aee1f7 --- /dev/null +++ b/chemprop/docs/source/index.rst @@ -0,0 +1,36 @@ +.. Chemprop documentation master file, created by + sphinx-quickstart on Wed Aug 23 22:52:52 2023. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to Chemprop's documentation! +==================================== + +This website contains documentation for Chemprop, a PyTorch-based framework for training and evaluating message-passing neural networks (MPNNs) for molecular property prediction. The package was originally developed for :footcite:t:`chemprop_theory` and further described in :footcite:t:`chemprop_software`. + +To get started with Chemprop, check out the :ref:`quickstart` page, and for more detailed information, see the :ref:`installation`, :ref:`tutorial`, and :ref:`notebooks` pages. + +.. note:: + Chemprop recently underwent a ground-up rewrite and new major release (v2.0.0). A helpful transition guide from Chemprop v1 to v2 can be found `here `_. This includes a side-by-side comparison of CLI argument options, a list of which arguments will be implemented in later versions of v2, and a list of changes to default hyperparameters. + +If you use Chemprop to train or develop a model in your own work, we would appreciate if you cite the following papers: + +.. footbibliography:: + +.. toctree:: + :maxdepth: 1 + :caption: Contents: + + quickstart + installation + tutorial/cli/index + tutorial/python/index + notebooks + cmd + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/chemprop/docs/source/installation.rst b/chemprop/docs/source/installation.rst new file mode 100644 index 0000000000000000000000000000000000000000..1ca041852a84c949aeef35e350a319e5b56cf3a0 --- /dev/null +++ b/chemprop/docs/source/installation.rst @@ -0,0 +1,103 @@ +.. _installation: + +Installation +============ + +Chemprop can either be installed from PyPI via pip_, from source (i.e., directly from the `git repo`_) using ``pip`` or the ``environment.yml`` file, or from `Docker`_. The PyPI version includes the vast majority of Chemprop functionality, but some functionality is only accessible when installed from source. We recommend installing ``chemprop`` in a virtual environment (e.g., conda_ or miniconda_). The following sections assume you are using ``conda`` or ``miniconda``, but you can use any virtual environment manager you like (e.g. ``mamba``). + +.. _pip: https://pypi.org/project/chemprop/ +.. _git repo: https://github.com/chemprop/chemprop.git +.. _`Docker`: https://www.docker.com/get-started/ +.. _conda: https://docs.conda.io/en/latest/conda.html +.. _miniconda: https://docs.conda.io/en/latest/miniconda.html + +.. note:: + *Python 3.11 vs. 3.12:* Options 1, 2, and 4 below explicitly specify ``python=3.11`` but you can choose to replace ``python=3.11`` with ``python=3.12`` in these commands. We test Chemprop on both versions in our CI. + +.. note:: + *CPU-only installation:* For the following options 1-3, if you do not have a GPU, you might need to manually install a CPU-only version of PyTorch. This should be handled automatically, but if you find that it is not, you should run the following command before installing Chemprop: + + .. code-block:: + + conda install pytorch cpuonly -c pytorch + +Option 1: Installing from PyPI +------------------------------ + +.. code-block:: + + conda create -n chemprop python=3.11 + conda activate chemprop + pip install chemprop + +Option 2: Installing from source using pip +------------------------------------------ + +.. code-block:: + + conda create -n chemprop python=3.11 + conda activate chemprop + git clone https://github.com/chemprop/chemprop.git + cd chemprop + pip install -e . + +.. note:: + You can also use this option to install additional optional dependencies by replacing ``pip install -e .`` with ``pip install -e ".[hpopt,dev,docs,test,notebooks]"``. + +Option 3: Installing from source using environment.yml +------------------------------------------------------- + +.. code-block:: + + git clone https://github.com/chemprop/chemprop.git + cd chemprop + conda env create -f environment.yml + conda activate chemprop + pip install -e . + +Option 4: Installing via Docker +------------------------------- + +Chemprop can also be installed with Docker, making it possible to isolate the Chemprop code and environment. +To install and run Chemprop in a Docker container, first `install Docker`_. +You may then either ``pull`` and use official Chemprop images or ``build`` the image yourself. + +.. _`install Docker`: https://docs.docker.com/get-docker/ + +.. note:: + The Chemprop Dockerfile runs only on CPU and does not support GPU acceleration. + Linux users with NVIDIA GPUs may install the `nvidia-container-toolkit`_ from NVIDIA and modify the installation instructions in the Dockerfile to install the version of `torch` which is compatible with your system's GPUs and drivers. + Adding the ``--gpus all`` argument to ``docker run`` will then allow Chemprop to run on GPU from within the container. You can see other options for exposing GPUs in the `Docker documentation`_. + Users on other systems should install Chemprop from PyPI or source. + +.. _`nvidia-container-toolkit`: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html +.. _`Docker documentation`: https://docs.docker.com/config/containers/resource_constraints/#expose-gpus-for-use + +Pull Official Images +++++++++++++++++++++ + +.. code-block:: + + docker pull chemprop/chemprop:X.Y.Z + docker run -it chemprop/chemprop:X.Y.Z + +Where ``X``, ``Y``, and ``Z`` should be replaced with the version of Chemprop you wish to ``pull``. +For example, to pull ``chemprop-2.0.0`` run + +.. code-block:: + + docker pull chemprop/chemprop:2.0.0 + +.. note:: + Not all versions of Chemprop are available as pre-built images. + Visit the `Docker Hub`_ page for a list of those that are available. + +.. note:: + Nightly builds of Chemprop are available under the ``latest`` tag on Dockerhub and are intended for developer use and as feature previews, not production deployment. + +.. _`Docker Hub`: https://hub.docker.com/repository/docker/chemprop/chemprop/general + +Build Image Locally ++++++++++++++++++++ + +See the build instructions in the top of the ``Dockerfile``. diff --git a/chemprop/docs/source/interpreting_monte_carlo_tree_search.nblink b/chemprop/docs/source/interpreting_monte_carlo_tree_search.nblink new file mode 100644 index 0000000000000000000000000000000000000000..6b3f3ab03c3895d4cd63b0371ac047a607171200 --- /dev/null +++ b/chemprop/docs/source/interpreting_monte_carlo_tree_search.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/interpreting_monte_carlo_tree_search.ipynb" +} diff --git a/chemprop/docs/source/mpnn_fingerprints.nblink b/chemprop/docs/source/mpnn_fingerprints.nblink new file mode 100644 index 0000000000000000000000000000000000000000..059bd126a21145eaae4cd28c268eeb57e852c665 --- /dev/null +++ b/chemprop/docs/source/mpnn_fingerprints.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/mpnn_fingerprints.ipynb" +} diff --git a/chemprop/docs/source/multi_task.nblink b/chemprop/docs/source/multi_task.nblink new file mode 100644 index 0000000000000000000000000000000000000000..88f8b0d252aa9f57d24be499badbce09b244010e --- /dev/null +++ b/chemprop/docs/source/multi_task.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/multi_task.ipynb" +} diff --git a/chemprop/docs/source/notebooks.rst b/chemprop/docs/source/notebooks.rst new file mode 100644 index 0000000000000000000000000000000000000000..dbe6ac0c606389bfb97ffe53f89f9a3be4d6a999 --- /dev/null +++ b/chemprop/docs/source/notebooks.rst @@ -0,0 +1,30 @@ +.. _notebooks: + +Jupyter Notebook Examples +========================= + +Chemprop's usage within Python scripts is also illustrated by the Jupyter notebooks on the following pages. + + +.. toctree:: + :maxdepth: 1 + :hidden: + + training + predicting + training_classification + training_regression_multicomponent + predicting_regression_multicomponent + training_regression_reaction + predicting_regression_reaction + multi_task + hpopting + mpnn_fingerprints + active_learning + transfer_learning + uncertainty + interpreting_monte_carlo_tree_search + shapley_value_with_customized_featurizers + extra_features_descriptors + use_featurizer_with_other_libraries + convert_v1_to_v2 \ No newline at end of file diff --git a/chemprop/docs/source/predicting.nblink b/chemprop/docs/source/predicting.nblink new file mode 100644 index 0000000000000000000000000000000000000000..ca5bfdadc82559f2e212b9b2a3fecf707903a0bf --- /dev/null +++ b/chemprop/docs/source/predicting.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/predicting.ipynb" +} diff --git a/chemprop/docs/source/predicting_regression_multicomponent.nblink b/chemprop/docs/source/predicting_regression_multicomponent.nblink new file mode 100644 index 0000000000000000000000000000000000000000..a406d1665795906be4ccb6d45249de18ff59a41d --- /dev/null +++ b/chemprop/docs/source/predicting_regression_multicomponent.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/predicting_regression_multicomponent.ipynb" +} diff --git a/chemprop/docs/source/predicting_regression_reaction.nblink b/chemprop/docs/source/predicting_regression_reaction.nblink new file mode 100644 index 0000000000000000000000000000000000000000..d5fac4357b87412f98dab5a22e28562f01777370 --- /dev/null +++ b/chemprop/docs/source/predicting_regression_reaction.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/predicting_regression_reaction.ipynb" +} diff --git a/chemprop/docs/source/quickstart.rst b/chemprop/docs/source/quickstart.rst new file mode 100644 index 0000000000000000000000000000000000000000..c307f052ccee740edf872985d0c4cb96b8aba0cd --- /dev/null +++ b/chemprop/docs/source/quickstart.rst @@ -0,0 +1,83 @@ +.. _quickstart: + +Quickstart +========== + +To get started with Chemprop, first install the package using the instructions in the :ref:`installation` section. Once you have Chemprop installed, you can train a model on your own data or use the pre-packaged solubility dataset to get a feel for how the package works. + +Let's use the solubility data that comes pre-packaged in the Chemprop directory: + +.. code-block:: text + + $ head tests/data/regression.csv + smiles,logSolubility + OCC3OC(OCC2OC(OC(C#N)c1ccccc1)C(O)C(O)C2O)C(O)C(O)C3O,-0.77 + Cc1occc1C(=O)Nc2ccccc2,-3.3 + CC(C)=CCCC(C)=CC(=O),-2.06 + c1ccc2c(c1)ccc3c2ccc4c5ccccc5ccc43,-7.87 + c1ccsc1,-1.33 + c2ccc1scnc1c2,-1.5 + Clc1cc(Cl)c(c(Cl)c1)c2c(Cl)cccc2Cl,-7.32 + CC12CCC3C(CCc4cc(O)ccc34)C2CCC1O,-5.03 + ClC4=C(Cl)C5(Cl)C3C1CC(C2OC12)C3C4(Cl)C5(Cl)Cl,-6.29 + ... + +Now we're ready to train a simple Chemprop model: + +.. code-block:: bash + + chemprop train --data-path tests/data/regression.csv \ + --task-type regression \ + --output-dir train_example + +This will train a model on the solubility dataset (``tests/data/regression.csv``) and save the model and training logs in the ``train_example`` directory. You should see some output printed to your terminal that shows the model architecture, number of parameters, and a progress bar for each epoch of training. At the end, you should see something like: + +.. code-block:: text + + ─────────────────────────────────────────────────────── + Test metric DataLoader 0 + ─────────────────────────────────────────────────────── + test/mse 0.7716904154601469 + ─────────────────────────────────────────────────────── + +With our trained model in hand, we can now use it to predict solubilities of new molecules. In the absence of additional data, for demonstration purposes, let's just test on the same molecules that we trained on: + +.. code-block:: bash + + chemprop predict --test-path tests/data/regression.csv \ + --model-path train_example/model_0/best.pt \ + --preds-path train_example/predictions.csv + +This should output a file ``train_example/predictions_0.csv`` containing the predicted log(solubility) values for the molecules contained in ``tests/data/regression.csv``. + +.. code-block:: text + + $ head train_example/predictions_0.csv + smiles,logSolubility,pred_0 + OCC3OC(OCC2OC(OC(C#N)c1ccccc1)C(O)C(O)C2O)C(O)C(O)C3O,-0.77,-1.0349703 + Cc1occc1C(=O)Nc2ccccc2,-3.3,-3.0304263 + CC(C)=CCCC(C)=CC(=O),-2.06,-2.0320206 + ... + +Given that our test data is identical to our training data, it makes sense that the predictions are similar to the ground truth values. + +In the rest of this documentation, we'll go into more detail about how to: + +* :ref:`Install Chemprop` +* :ref:`Customize model architecture and task type` +* :ref:`Specify training parameters: split type, learning rate, batch size, loss function, etc. ` +* :ref:`Use Chemprop as a Python package ` +* :ref:`Perform a hyperparameter optimization ` +* :ref:`Generate a molecular fingerprint ` +.. * :ref:`Quantify prediction uncertainty` + +Summary +------- + +* Install Chemprop using the instructions in the :ref:`installation` section +* Train a model with ``chemprop train --data-path --task-type --output-dir `` +* Use a saved model for prediction with ``chemprop predict --test-path --checkpoint-dir --preds-path `` + +.. _GitHub repository: https://github.com/chemprop/chemprop +.. + .. _FreeSolv dataset: https://pubmed.ncbi.nlm.nih.gov/24928188/ \ No newline at end of file diff --git a/chemprop/docs/source/refs.bib b/chemprop/docs/source/refs.bib new file mode 100644 index 0000000000000000000000000000000000000000..5c87772f0976442f01a0b142ad3ef9e0d41bb806 --- /dev/null +++ b/chemprop/docs/source/refs.bib @@ -0,0 +1,37 @@ +# this was downloaded from ACS: https://pubs.acs.org/doi/10.1021/acs.jcim.9b00237 +@article{chemprop_theory, + author = {Yang, Kevin and Swanson, Kyle and Jin, Wengong and Coley, Connor and Eiden, Philipp and Gao, Hua and Guzman-Perez, Angel and Hopper, Timothy and Kelley, Brian and Mathea, Miriam and Palmer, Andrew and Settels, Volker and Jaakkola, Tommi and Jensen, Klavs and Barzilay, Regina}, + title = {Analyzing Learned Molecular Representations for Property Prediction}, + journal = {Journal of Chemical Information and Modeling}, + volume = {59}, + number = {8}, + pages = {3370-3388}, + year = {2019}, + doi = {10.1021/acs.jcim.9b00237}, + note ={PMID: 31361484}, + URL = { + https://doi.org/10.1021/acs.jcim.9b00237 + }, + eprint = { + https://doi.org/10.1021/acs.jcim.9b00237 + } +} + +# this was downloaded from ACS: https://pubs.acs.org/doi/10.1021/acs.jcim.3c01250 +@article{chemprop_software, + author = {Heid, Esther and Greenman, Kevin P. and Chung, Yunsie and Li, Shih-Cheng and Graff, David E. and Vermeire, Florence H. and Wu, Haoyang and Green, William H. and McGill, Charles J.}, + title = {Chemprop: A Machine Learning Package for Chemical Property Prediction}, + journal = {Journal of Chemical Information and Modeling}, + volume = {64}, + number = {1}, + pages = {9-17}, + year = {2024}, + doi = {10.1021/acs.jcim.3c01250}, + note ={PMID: 38147829}, + URL = { + https://doi.org/10.1021/acs.jcim.3c01250 + }, + eprint = { + https://doi.org/10.1021/acs.jcim.3c01250 + } +} \ No newline at end of file diff --git a/chemprop/docs/source/shapley_value_with_customized_featurizers.nblink b/chemprop/docs/source/shapley_value_with_customized_featurizers.nblink new file mode 100644 index 0000000000000000000000000000000000000000..12a61d9c424e5588b43fea4a3ec6a8fc9520c254 --- /dev/null +++ b/chemprop/docs/source/shapley_value_with_customized_featurizers.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/shapley_value_with_customized_featurizers.ipynb" +} diff --git a/chemprop/docs/source/training.nblink b/chemprop/docs/source/training.nblink new file mode 100644 index 0000000000000000000000000000000000000000..ccfd59543f1892d55957ec7ef0b44111f6861dd3 --- /dev/null +++ b/chemprop/docs/source/training.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/training.ipynb" +} diff --git a/chemprop/docs/source/training_classification.nblink b/chemprop/docs/source/training_classification.nblink new file mode 100644 index 0000000000000000000000000000000000000000..48d0526d0bc7198225f3662b35e829ef51593ffa --- /dev/null +++ b/chemprop/docs/source/training_classification.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/training_classification.ipynb" +} diff --git a/chemprop/docs/source/training_regression_multicomponent.nblink b/chemprop/docs/source/training_regression_multicomponent.nblink new file mode 100644 index 0000000000000000000000000000000000000000..99c5bd4fe795054747f2502bcb1be8c2ed905782 --- /dev/null +++ b/chemprop/docs/source/training_regression_multicomponent.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/training_regression_multicomponent.ipynb" +} diff --git a/chemprop/docs/source/training_regression_reaction.nblink b/chemprop/docs/source/training_regression_reaction.nblink new file mode 100644 index 0000000000000000000000000000000000000000..fdd7511550caf6bf5214d3256055b7f19a4ad910 --- /dev/null +++ b/chemprop/docs/source/training_regression_reaction.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/training_regression_reaction.ipynb" +} diff --git a/chemprop/docs/source/transfer_learning.nblink b/chemprop/docs/source/transfer_learning.nblink new file mode 100644 index 0000000000000000000000000000000000000000..542ad040fbce9b0d9604867f9baa1c85323d249b --- /dev/null +++ b/chemprop/docs/source/transfer_learning.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/transfer_learning.ipynb" +} \ No newline at end of file diff --git a/chemprop/docs/source/tutorial/cli/convert.rst b/chemprop/docs/source/tutorial/cli/convert.rst new file mode 100644 index 0000000000000000000000000000000000000000..6eab765aa5dbc78941437df0e5c25e1a5d5c24c7 --- /dev/null +++ b/chemprop/docs/source/tutorial/cli/convert.rst @@ -0,0 +1,10 @@ +.. _convert: + +Conversion +---------- + +To convert a trained model from Chemprop v1 to v2, run ``chemprop convert`` and specify: + + * :code:`--input-path ` Path of the Chemprop v1 file to convert. + * :code:`--output-path ` Path where the converted Chemprop v2 will be saved. If unspecified, this will default to ``_v2.ckpt``. + diff --git a/chemprop/docs/source/tutorial/cli/fingerprint.rst b/chemprop/docs/source/tutorial/cli/fingerprint.rst new file mode 100644 index 0000000000000000000000000000000000000000..b2c0af8a2d03c859dc23eac795f234a83e63ae34 --- /dev/null +++ b/chemprop/docs/source/tutorial/cli/fingerprint.rst @@ -0,0 +1,34 @@ +.. _fingerprint: + +Fingerprint +============================ + +To calculate the learned representations (encodings) of model inputs from a pretrained model, run + +.. code-block:: + + chemprop fingerprint --test-path --model-path + +where :code:`` is the path to the CSV file containing SMILES strings, and :code:`` is the location of checkpoint(s) or model file(s) to use for prediction. It can be a path to either a single pretrained model checkpoint (.ckpt) or single pretrained model file (.pt), a directory that contains these files, or a list of path(s) and directory(s). If a directory, will recursively search and predict on all found (.pt) models. By default, predictions will be saved to the same directory as the test path. If desired, a different directory can be specified by using :code:`--output `. The output can end with either .csv or .npz, and the output will be saved to the corresponding file type. + +For example: + +.. code-block:: + + chemprop fingerprint --test-path tests/data/smis.csv \ + --model-path tests/data/example_model_v2_regression_mol.ckpt \ + --output fps.csv + + +Specifying FFN encoding layer +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +By default, the encodings are returned from the penultimate linear layer of the model's FFN. However, the exact layer to draw encodings from can be specified using :code:`--ffn-block-index `. + +An index of 0 will simply return the post-aggregation representation without passing through the FFN. Here, an index of 1 will return the output of the first linear layer of the FFN, an index of 2 the second layer, and so on. + + +Specifying Data to Parse +^^^^^^^^^^^^^^^^^^^^^^^^ + +:code:`fingerprint` shares the same arguments for specifying SMILES columns and reaction types as :code:`predict`. For more detail, see :ref:`predict`. \ No newline at end of file diff --git a/chemprop/docs/source/tutorial/cli/hpopt.rst b/chemprop/docs/source/tutorial/cli/hpopt.rst new file mode 100644 index 0000000000000000000000000000000000000000..53d1ce6d47043462b28bbe8f83091fd15b624f0c --- /dev/null +++ b/chemprop/docs/source/tutorial/cli/hpopt.rst @@ -0,0 +1,79 @@ +.. _hpopt: + +Hyperparameter Optimization +============================ + +.. note:: + Chemprop relies on `Ray Tune `_ for hyperparameter optimization which is an optional install. To install the required dependencies, run :code:`pip install -U ray[tune]` if installing with PyPI, or :code:`pip install -e .[hpopt]` if installing from source. + +Searching Hyperparameter Space +-------------------------------- + +We include an automated hyperparameter optimization procedure through the Ray Tune package. Hyperparameter optimization can be run as follows: + +.. code-block:: + + chemprop hpopt --data-path --task-type --search-parameter-keywords --hpopt-save-dir + +For example: + +.. code-block:: + + chemprop hpopt --data-path tests/data/regression.csv \ + --task-type regression \ + --search-parameter-keywords depth ffn_num_layers message_hidden_dim \ + --hpopt-save-dir results + +The search parameters can be any combination of hyperparameters or a predefined set. Options include :code:`basic` (default), which consists of: + + * :code:`depth` The number of message passing steps + * :code:`ffn_num_layers` The number of layers in the FFN model + * :code:`dropout` The probability (from 0.0 to 1.0) of dropout in the MPNN & FNN layers + * :code:`message_hidden_dim` The hidden dimension in the message passing step + * :code:`ffn_hidden_dim` The hidden dimension in the FFN model + +Another option is :code:`learning_rate` which includes: + + * :code:`max_lr` The maximum learning rate + * :code:`init_lr` The initial learning rate. It is searched as a ratio relative to the max learning rate + * :code:`final_lr` The initial learning rate. It is searched as a ratio relative to the max learning rate + * :code:`warmup_epochs` Number of warmup epochs, during which the learning rate linearly increases from the initial to the maximum learning rate + +Other individual search parameters include: + + * :code:`activation` The activation function used in the MPNN & FFN layers. Choices include ``relu``, ``leakyrelu``, ``prelu``, ``tanh``, ``selu``, and ``elu`` + * :code:`aggregation` Aggregation mode used during molecule-level predictor. Choices include ``mean``, ``sum``, ``norm`` + * :code:`aggregation_norm` For ``norm`` aggregation, the normalization factor by which atomic features are divided + * :code:`batch_size` Batch size for dataloader + +Specifying :code:`--search-parameter-keywords all` will search over all 13 of the above parameters. + +The following other common keywords may be used: + + * :code:`--raytune-num-samples ` The number of trials to perform + * :code:`--raytune-num-cpus ` The number of CPUs to use + * :code:`--raytune-num-gpus ` The number of GPUs to use + * :code:`--raytune-max-concurrent-trials ` The maximum number of concurrent trials + * :code:`--raytune-search-algorithm ` The choice of control search algorithm (either ``random``, ``hyperopt``, or ``optuna``). If ``hyperopt`` is specified, then the arguments ``--hyperopt-n-initial-points `` and ``--hyperopt-random-state-seed `` can be specified. + +Other keywords related to hyperparameter optimization are also available (see :ref:`cmd` for a full list). + +Splitting +---------- +By default, Chemprop will split the data into train / validation / test data splits. The splitting behavior can be modified using the same splitting arguments used in training, i.e., section :ref:`train_validation_test_splits`. + +.. note:: + This default splitting behavior is different from Chemprop v1, wherein the hyperparameter optimization was performed on the entirety of the data provided to it. + +If ``--num-replicates`` is greater than one, Chemprop will only use the first split to perform hyperparameter optimization. If you need to optimize hyperparameters separately for several different cross validation splits, you should e.g. set up a bash script to run :code:`chemprop hpopt` separately on each split. + + +Applying Optimal Hyperparameters +--------------------------------- + +Once hyperparameter optimization is complete, the optimal hyperparameters can be applied during training by specifying the config path. If an argument is both provided via the command line and the config file, the command line takes precedence. For example: + +.. code-block:: + + chemprop train --data-path tests/data/regression.csv \ + --config-path results/best_config.toml diff --git a/chemprop/docs/source/tutorial/cli/index.rst b/chemprop/docs/source/tutorial/cli/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..a5c54c8111e9857ef5388183f27634044f0a915d --- /dev/null +++ b/chemprop/docs/source/tutorial/cli/index.rst @@ -0,0 +1,62 @@ +.. _tutorial: + +Command Line Tutorials +====================== + +.. note:: + Chemprop recently underwent a ground-up rewrite and new major release (v2.0.0). A helpful transition guide from Chemprop v1 to v2 can be found `here `_. This includes a side-by-side comparison of CLI argument options, a list of which arguments will be implemented in later versions of v2, and a list of changes to default hyperparameters. + +Chemprop may be invoked from the command line using the following command: + +.. code-block:: + + $ chemprop COMMAND [ARGS] + +where ``COMMAND`` is one of the following: + +* ``train``: Train a model. +* ``predict``: Make predictions with a trained model. +* ``convert``: Convert a trained Chemprop model from v1 to v2. +* ``hpopt``: Perform hyperparameter optimization. +* ``fingerprint``: Use a trained model to compute a learned representation. + +and ``ARGS`` are command-specific arguments. To see the arguments for a specific command, run: + +.. code-block:: + + $ chemprop COMMAND --help + +For example, to see the arguments for the ``train`` command, run: + +.. code-block:: + + $ chemprop train --help + +To enable logging, specify ``--log `` or ``--logfile ``, where ```` is the desired path to which the logfile should be written; if unspecified, the log will be written to ``chemprop_logs``. +The default logging level is INFO. If more detailed debugging information is required, specify ``-v`` for DEBUG level. To decrease verbosity below the default INFO level, use ``-q`` for WARNING or ``-qq`` for ERROR. + +Chemprop is built on top of Lightning, which has support for training and predicting on GPUs. +Relevant CLI flags include `--accelerator` and `--devices`. +See the `Lightning documentation `_ and CLI reference for more details. + +For more details on each command, see the corresponding section below: + +* :ref:`train` +* :ref:`predict` +* :ref:`convert` +* :ref:`hpopt` +* :ref:`fingerprint` + +The following features are not yet implemented, but will be included in a future release: + +* ``interpret``: Interpret model predictions. + +.. toctree:: + :maxdepth: 1 + :hidden: + + train + predict + convert + hpopt + fingerprint \ No newline at end of file diff --git a/chemprop/docs/source/tutorial/cli/interpret.rst b/chemprop/docs/source/tutorial/cli/interpret.rst new file mode 100644 index 0000000000000000000000000000000000000000..bddaec99c9b2ab47cc991f68717891ea9cc4ff65 --- /dev/null +++ b/chemprop/docs/source/tutorial/cli/interpret.rst @@ -0,0 +1,37 @@ +.. _interpret: + +Interpreting +============ + +.. warning:: + This page is under construction. + +.. + It is often helpful to provide explanation of model prediction (i.e., this molecule is toxic because of this substructure). Given a trained model, you can interpret the model prediction using the following command: + + .. code-block:: + + chemprop interpret --data_path data/tox21.csv --checkpoint_dir tox21_checkpoints/fold_0/ --property_id 1 + + The output will be like the following: + + * The first column is a molecule and second column is its predicted property (in this case NR-AR toxicity). + * The third column is the smallest substructure that made this molecule classified as toxic (which we call rationale). + * The fourth column is the predicted toxicity of that substructure. + + As shown in the first row, when a molecule is predicted to be non-toxic, we will not provide any rationale for its prediction. + + .. csv-table:: + :header: "smiles", "NR-AR", "rationale", "rationale_score" + :widths: 20, 10, 20, 10 + + "O=[N+]([O-])c1cc(C(F)(F)F)cc([N+](=O)[O-])c1Cl", "0.014", "", "" + "CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)C=C[C@]5(C)[C@H]4[C@@H](O)C[C@]3(C)[C@]2(C(=O)CO)O1", "0.896", "C[C@]12C=CC(=O)C=C1[CH2:1]C[CH2:1][CH2:1]2", "0.769" + "C[C@]12CC[C@H]3[C@@H](CC[C@@]45O[C@@H]4C(O)=C(C#N)C[C@]35C)[C@@H]1CC[C@@H]2O", "0.941", "C[C@]12C[CH:1]=[CH:1][C@H]3O[C@]31CC[C@@H]1[C@@H]2CC[C:1][CH2:1]1", "0.808" + "C[C@]12C[C@H](O)[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@]2(O)C(=O)COP(=O)([O-])[O-]", "0.957", "C1C[CH2:1][C:1][C@@H]2[C@@H]1[C@@H]1CC[C:1][C:1]1C[CH2:1]2", "0.532" + + Chemprop's interpretation script explains model prediction one property at a time. :code:`--property_id 1` tells the script to provide explanation for the first property in the dataset (which is NR-AR). In a multi-task training setting, you will need to change :code:`--property_id` to provide explanation for each property in the dataset. + + For computational efficiency, we currently restricted the rationale to have maximum 20 atoms and minimum 8 atoms. You can adjust these constraints through :code:`--max_atoms` and :code:`--min_atoms` argument. + + Please note that the interpreting framework is currently only available for models trained on properties of single molecules, that is, multi-molecule models generated via the :code:`--number_of_molecules` command are not supported. diff --git a/chemprop/docs/source/tutorial/cli/predict.rst b/chemprop/docs/source/tutorial/cli/predict.rst new file mode 100644 index 0000000000000000000000000000000000000000..091a3f9f261f1edf23e6c77a7846b45b738fa61a --- /dev/null +++ b/chemprop/docs/source/tutorial/cli/predict.rst @@ -0,0 +1,109 @@ +.. _predict: + +Prediction +---------- + +To load a trained model and make predictions, run: + +.. code-block:: + + chemprop predict --test-path --model-paths <[model_paths]> + +where :code:`` is the path to the data to test on, and :code:`<[model_paths]>` is the location of checkpoint(s) or model file(s) to use for prediction. It can be a path to either a single pretrained model checkpoint (.ckpt) or single pretrained model file (.pt), a directory that contains these files, or a list of path(s) and directory(s). If a directory, will recursively search and predict on all found (.pt) models. By default, predictions will be saved to the same directory as the test path. If desired, a different directory can be specified by using :code:`--preds-path `. The predictions can end with either .csv or .pkl, and the output will be saved to the corresponding file type. + +For example: + +.. code-block:: + + chemprop predict --test-path tests/data/smis.csv \ + --model-path tests/data/example_model_v2_regression_mol.ckpt \ + --preds-path preds.csv + + +Specifying Data to Parse +^^^^^^^^^^^^^^^^^^^^^^^^ + +By default, Chemprop will assume that the the 0th column in the data .csv will have the data. To use a separate column, specify: + + * :code:`--smiles-columns` Text label of the column that includes the SMILES strings + +If atom-mapped reaction SMILES are used, specify: + + * :code:`--reaction-columns` Text labels of the columns that include the reaction SMILES + +If :code:`--reaction-mode` was specified during training, those same flags must be specified for the prediction step. + + +Uncertainty Quantification +-------------------------- + +To load a trained model and make uncertainty quantification, run: + +.. code-block:: + + chemprop predict --test-path \ + --cal-path \ + --model-paths <[model_paths]> \ + --uncertainty-method \ + --calibration-method \ + --evaluation-methods <[methods]> + +where :code:`` is the path to the data to test on, :code:`` is the calibration dataset used for uncertainty calibration if needed, and :code:`<[model_paths]>` is the location of checkpoint(s) or model file(s) to use for prediction. The uncertianty estimation, calibration, and evaluations methods are detailed below. + +Uncertainty Estimation +^^^^^^^^^^^^^^^^^^^^^^ + +The uncertainty of predictions made in Chemprop can be estimated by several different methods. Uncertainty estimation is carried out alongside model value prediction and reported in the predictions csv file when the argument :code:`--uncertainty-method ` is provided. If no uncertainty method is provided, then only the model value predictions will be carried out. The available methods are: + + * :code:`dropout` + * :code:`ensemble` + * :code:`quantile-regression` + * :code:`mve` + * :code:`evidential-total`, :code:`evidential-epistemic`, :code:`evidential-aleatoric` + * :code:`classification` + * :code:`classification-dirichlet` + * :code:`multiclass` + * :code:`multiclass-dirichlet` + +Uncertainty Calibration +^^^^^^^^^^^^^^^^^^^^^^^ + +Uncertainty predictions may be calibrated to improve their performance on new predictions. Calibration methods are selected using :code:`--calibration-method `, options provided below. An additional dataset to use in calibration is provided through :code:`--cal-path `, along with necessary features like :code:`--cal-descriptors-path `. As with the data used in training, calibration data for multitask models are allowed to have gaps and missing targets in the data. + +**Regression**: + + * :code:`zscaling` Assumes that errors are normally distributed according to the estimated variance for each prediction. Applies a constant multiple to all stdev or interval outputs in order to minimize the negative log likelihood for the normal distributions. (https://arxiv.org/abs/1905.11659) + * :code:`zelikman-interval` Assumes that the error distribution is the same for each prediction but scaled by the uncalibrated standard deviation for each. Multiplies the uncalibrated standard deviation by a factor necessary to cover the specified interval of the calibration set. Does not assume a Gaussian distribution using :code:`--calibration-interval-percentile ` which is default ot 95. (https://arxiv.org/abs/2005.12496) + * :code:`mve-weighting` For use with ensembles of models trained with mve or evidential loss function. Uses a weighted average of the predicted variances to achieve a minimum negative log likelihood of predictions. (https://doi.org/10.1186/s13321-021-00551-x) + * :code:`conformal-regression` Generates an interval of variable size for each prediction based on quantile predictions of the data such that the actual value has probability :math:`1 - \alpha` of falling in the interval. The desired error rate is controlled using the parameter :code:`--conformal-alpha ` which is set by default to 0.1. (https://arxiv.org/abs/2107.07511) + +**Classification**: + + * :code:`platt` Uses a linear scaling before the sigmoid function in prediction to minimize the negative log likelihood of the predictions. (https://arxiv.org/abs/1706.04599) + * :code:`isotonic` Fits an isotonic regression model to the predictions. Prediction outputs are transformed using a stepped histogram-style to match the empirical probability observed in the calibration data. Number and size of the histogram bins are procedurally decided. Histogram bins are wider in the regions of the model output that are less reliable in ordering confidence. (https://arxiv.org/abs/1706.04599) + * :code:`conformal-multilabel` Generates a pair of sets of labels :math:`C_{in} \subset C_{out}` such that the true set of labels :math:`S` satisfies the property :math:`C_{in} \subset S \subset C_{out}` with probability at least :math:`1-\alpha`. The desired error rate :math:`\alpha` can be controlled with the parameter :code:`--conformal-alpha ` which is set by default to 0.1. (https://arxiv.org/abs/2004.10181) + + +**Multiclass**: + + * :code:`conformal-multiclass` Generates a set of possible classes for each prediction such that the true class has probability :math:`1-\alpha` of falling in the set. The desired error rate :math:`\alpha` can be controlled with the parameter :code:`--conformal-alpha ` which is set by default to 0.1. Set generated using the basic conformal method. (https://arxiv.org/abs/2107.07511) + * :code:`conformal-adaptive` Similar to conformal-multiclass, this method generates a set of possible classes but uses an adaptive conformal method. The desired error rate :math:`\alpha` can be controlled with the parameter :code:`--conformal_alpha ` which is set by default to 0.1. (https://arxiv.org/abs/2107.07511) + * :code:`isotonic-multiclass` Calibrate multiclass classification datasets using isotonic regression. It uses a one-vs-all aggregation scheme to extend isotonic regression from binary to multiclass classifiers. (https://arxiv.org/abs/1706.04599) + +Uncertainty Evaluation Metrics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The performance of uncertainty predictions (calibrated or uncalibrated) as evaluated on the test set using different evaluation metrics as specified with :code:`--evaluation-methods <[methods]>`. +Evaluation scores will only appear in the output trace. Multiple evaluation methods can be provided and they will be calculated separately for each model task. Evaluation is only available when the target values are provided with the data in :code:`--test-path `. As with the data used in training, evaluation data for multitask models are allowed to have gaps and missing targets in the data. + + .. * Any valid classification or multiclass metric. Because classification and multiclass outputs are inherently probabilistic, any metric used to assess them during training is appropriate to evaluate the confidences produced after calibration. + + * :code:`nll-regression`, :code:`nll-classification`, :code:`nll-multiclass` Returns the average negative log likelihood of the real target as indicated by the uncertainty predictions. Enabled for regression, classification, and multiclass dataset types. + * :code:`spearman` A regression evaluation metric. Returns the Spearman rank correlation between the predicted uncertainty and the actual error in predictions. Only considers ordering, does not assume a particular probability distribution. + * :code:`ence` Expected normalized calibration error. A regression evaluation metric. Bins model prediction according to uncertainty prediction and compares the RMSE in each bin versus the expected error based on the predicted uncertainty variance then scaled by variance. (discussed in https://doi.org/10.1021/acs.jcim.9b00975) + * :code:`miscalibration_area` A regression evaluation metric. Calculates the model's performance of expected probability versus realized probability at different points along the probability distribution. Values range (0, 0.5) with perfect calibration at 0. (discussed in https://doi.org/10.1021/acs.jcim.9b00975) + * :code:`conformal-coverage-regression`, :code:`conformal-coverage-classification`, :code:`conformal-coverage-multiclass` Measures the empirical coverage of the conformal methods, that is the proportion of datapoints that fall within the output set or interval. Must be used with a conformal calibration method which outputs a set or interval. The metric can be used with multiclass, multilabel, or regression conformal methods. + +Different evaluation metrics consider different aspects of uncertainty. It is often appropriate to consider multiple metrics. For intance, miscalibration error is important for evaluating uncertainty magnitude but does not indicate that the uncertainty function discriminates well between different outputs. Similarly, spearman tests ordering but not prediction magnitude. + +Evaluations can be used to compare different uncertainty methods and different calibration methods for a given dataset. Using evaluations to compare between datasets may not be a fair comparison and should be done cautiously. \ No newline at end of file diff --git a/chemprop/docs/source/tutorial/cli/train.rst b/chemprop/docs/source/tutorial/cli/train.rst new file mode 100644 index 0000000000000000000000000000000000000000..83d63da0e7c10c130b0f08b956d6a1621e5c171f --- /dev/null +++ b/chemprop/docs/source/tutorial/cli/train.rst @@ -0,0 +1,335 @@ +.. _train: + +Training +========================= + +To train a model, run: + +.. code-block:: + + chemprop train --data-path --task-type --output-dir + +where ```` is the path to a CSV file containing a dataset, ```` is the type of modeling task, and ```` is the directory where model checkpoints will be saved. + +For example: + +.. code-block:: + + chemprop train --data-path tests/data/regression.csv \ + --task-type regression \ + --output-dir solubility_checkpoints + +The following modeling tasks are supported: + + * :code:`regression` + * :code:`regression-mve` + * :code:`regression-evidential` + * :code:`regression-quantile` + * :code:`classification` + * :code:`classification-dirichlet` + * :code:`multiclass` + * :code:`multiclass-dirichlet` + * :code:`spectral` + +A full list of available command-line arguments can be found in :ref:`cmd`. + + +Input Data +---------- + +In order to train a model, you must provide training data containing molecules (as SMILES strings) and known target values. Targets can either be real numbers, if performing regression, or binary (i.e. 0s and 1s), if performing classification. Target values which are unknown can be left as blanks. A model can be trained as either single- or multi-task. + +The data file must be be a **CSV file with a header row**. For example: + +.. code-block:: + + smiles,NR-AR,NR-AR-LBD,NR-AhR,NR-Aromatase,NR-ER,NR-ER-LBD,NR-PPAR-gamma,SR-ARE,SR-ATAD5,SR-HSE,SR-MMP,SR-p53 + CCOc1ccc2nc(S(N)(=O)=O)sc2c1,0,0,1,,,0,0,1,0,0,0,0 + CCN1C(=O)NC(c2ccccc2)C1=O,0,0,0,0,0,0,0,,0,,0,0 + ... + +By default, it is assumed that the SMILES are in the first column and the targets are in the remaining columns. However, the specific columns containing the SMILES and targets can be specified using the :code:`--smiles-columns ` and :code:`--target-columns ...` flags, respectively. To simultaneously train multiple molecules (such as a solute and a solvent), supply two column headers in :code:`--smiles-columns `. + +.. _train_validation_test_splits: + +Train/Validation/Test Splits +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Our code supports several methods of splitting data into train, validation, and test sets. + +* **Random:** By default, the data will be split randomly into train, validation, and test sets. + +* **Scaffold:** Alternatively, the data can be split by molecular scaffold so that the same scaffold never appears in more than one split. This can be specified by adding :code:`--split-type scaffold_balanced`. + +* **User Specified Splits** Custom splits can be specified in two ways, :code:`--splits-column` and :code:`--splits-file`, examples of which are shown below. + +.. code-block:: + + chemprop train --splits-column split -i data.csv -t regression + +.. list-table:: data.csv + :widths: 10 10 10 + :header-rows: 1 + + * - smiles + - property + - split + * - C + - 1.0 + - train + * - CC + - 2.0 + - train + * - CCC + - 3.0 + - test + * - CCCC + - 4.0 + - val + * - CCCCC + - 5.0 + - val + * - CCCCCC + - 6.0 + - test + +.. code-block:: + + chemprop train --splits-file splits.json -i data.csv -t regression + +.. code-block:: JSON + :caption: splits.json + + [ + {"train": [1, 2], "val": "3-5", "test": "6,7"}, + {"val": [1, 2], "test": "3-5", "train": "6,7"}, + ] + +.. note:: + By default, both random and scaffold split the data into 80% train, 10% validation, and 10% test. This can be changed with :code:`--split-sizes `. The default setting is :code:`--split-sizes 0.8 0.1 0.1`. Both splits also involve a random component that can be seeded with :code:`--data-seed `. The default setting is :code:`--data-seed 0`. + +Other supported splitting methods include :code:`random_with_repeated_smiles`, :code:`kennard_stone`, and :code:`kmeans`. + +Replicates +^^^^^^^^^^ + +Repeat random trials (i.e. replicates) run by specifying :code:`--num-replicates ` (default 1, i.e. no replicates). +This is analogous to the 'outer loop' of nested cross validation but at a lower cost, suitable for deep learning applications. + +Ensembling +^^^^^^^^^^ + +To train an ensemble, specify the number of models in the ensemble with :code:`--ensemble-size ` (default 1). + +Hyperparameters +--------------- + +Model performance is often highly dependent on the hyperparameters used. Below is a list of common hyperparameters (see :ref:`cmd` for a full list): + + * :code:`--batch-size` Batch size (default 64) + * :code:`--message-hidden-dim ` Hidden dimension of the messages in the MPNN (default 300) + * :code:`--depth ` Number of message-passing steps (default 3) + * :code:`--dropout ` Dropout probability in the MPNN & FFN layers (default 0) + * :code:`--activation ` The activation function used in the MPNN and FNN layers. Options include :code:`relu`, :code:`leakyrelu`, :code:`prelu`, :code:`tanh`, :code:`selu`, and :code:`elu`. (default :code:`relu`) + * :code:`--epochs ` How many epochs to train over (default 50) + * :code:`--warmup-epochs `: The number of epochs during which the learning rate is linearly incremented from :code:`init_lr` to :code:`max_lr` (default 2) + * :code:`--init-lr ` Initial learning rate (default 0.0001) + * :code:`--max-lr ` Maximum learning rate (default 0.001) + * :code:`--final-lr ` Final learning rate (default 0.0001) + + +Loss Functions +-------------- + +The loss function can be specified using the :code:`--loss-function ` keyword, where `` is one of the following: + +**Regression**: + + * :code:`mse` Mean squared error (default) + * :code:`bounded-mse` Bounded mean squared error + * :code:`mve` Mean-variance estimation + * :code:`evidential` Evidential; if used, :code:`--evidential-regularization` can be specified to modify the regularization, and :code:`--eps` to modify epsilon. + +**Classification**: + + * :code:`bce` Binary cross-entropy (default) + * :code:`binary-mcc` Binary Matthews correlation coefficient + * :code:`dirichlet` Dirichlet + + +**Multiclass**: + + * :code:`ce` Cross-entropy (default) + * :code:`multiclass-mcc` Multiclass Matthews correlation coefficient + * :code:`dirichlet` Dirichlet + +**Spectral**: + + * :code:`sid` Spectral information divergence (default) + * :code:`earthmovers` Earth mover's distance (or first-order Wasserstein distance) + * :code:`wasserstein` See above. + +Evaluation Metrics +------------------ + +The following evaluation metrics are supported during training: + +**Regression**: + + * :code:`rmse` Root mean squared error + * :code:`mae` Mean absolute error + * :code:`mse` Mean squared error (default) + * :code:`bounded-mae` Bounded mean absolute error + * :code:`bounded-mse` Bounded mean squared error + * :code:`bounded-rmse` Bounded root mean squared error + * :code:`r2` R squared metric + +**Classification**: + + * :code:`roc` Receiver operating characteristic (default) + * :code:`prc` Precision-recall curve + * :code:`accuracy` Accuracy + * :code:`f1` F1 score + * :code:`bce` Binary cross-entropy + * :code:`binary-mcc` Binary Matthews correlation coefficient + +**Multiclass**: + + * :code:`ce` Cross-entropy (default) + * :code:`multiclass-mcc` Multiclass Matthews correlation coefficient + +**Spectral**: + + * :code:`sid` Spectral information divergence (default) + * :code:`wasserstein` Earth mover's distance (or first-order Wasserstein distance) + + +Advanced Training Methods +------------------------- + +Pretraining and Transfer Learning +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +An existing model, for example from training on a larger, lower quality dataset, can be used for parameter-initialization of a new model by providing a checkpoint of the existing model using :code:`--checkpoint `. :code:``` is the location of checkpoint(s) or model file(s). It can be a path to either a single pretrained model checkpoint (.ckpt) or single pretrained model file (.pt), a directory that contains these files, or a list of path(s) and directory(s). + +When training the new model, its architecture **must** resemble that of the old model. Depending on the similarity of the tasks and datasets, as well as the quality of the old model, the new model might require fewer epochs to achieve optimal performance compared to training from scratch. + +It is also possible to freeze the weights of a loaded Chemprop model during training, such as for transfer learning applications. To do so, you first need to load a pre-trained model by specifying its checkpoint file using :code:`--checkpoint `. After loading the model, the MPNN weights can be frozen via :code:`--freeze-encoder`. You can control how the weights are frozen in the FFN layers by using :code:`--frzn-ffn-layers ` flag, where the :code:`n` is the first n layers are frozen in the FFN layers. By default, :code:`n` is set to 0, meaning all FFN layers are trainable unless specified otherwise. + +.. _train-on-reactions: + +Training on Reactions +^^^^^^^^^^^^^^^^^^^^^ + +Chemprop can also process atom-mapped reaction SMILES (see `Daylight manual `_ for details), which consist of three parts denoting reactants, agents, and products, each separated by ">". For example, an atom-mapped reaction SMILES denoting the reaction of methanol to formaldehyde without hydrogens: :code:`[CH3:1][OH:2]>>[CH2:1]=[O:2]` and with hydrogens: :code:`[C:1]([H:3])([H:4])([H:5])[O:2][H:6]>>[C:1]([H:3])([H:4])=[O:2].[H:5][H:6]`. The reactions do not need to be balanced and can thus contain unmapped parts, for example leaving groups, if necessary. + +Specify columns in the input file with reaction SMILES using the option :code:`--reaction-columns` to enable this, which transforms the reactants and products to the corresponding condensed graph of reaction, and changes the initial atom and bond features depending on the argument provided to :code:`--rxn-mode `: + + * :code:`reac_diff` Featurize with the reactant and the difference upon reaction (default) + * :code:`reac_prod` Featurize with both the reactant and product + * :code:`prod_diff` Featurize with the product and the difference upon reaction + +Each of these arguments can be modified to balance imbalanced reactions by appending :code:`_balance`, e.g. :code:`reac_diff_balance`. + +In reaction mode, Chemprop concatenates information to each atomic and bond feature vector. For example, using :code:`--reaction-mode reac_prod`, each atomic feature vector holds information on the state of the atom in the reactant (similar to default Chemprop), and concatenates information on the state of the atom in the product. Agents are featurized with but not connected to the reactants. Functions incompatible with a reaction as input (scaffold splitting and feature generation) are carried out on the reactants only. + +If the atom-mapped reaction SMILES contain mapped hydrogens, enable explicit hydrogens via :code:`--keep-h`. + +For further details and benchmarking, as well as a citable reference, please see `DOI 10.1021/acs.jcim.1c00975 `_. + + +Training Reactions with Molecules (e.g. Solvents, Reagents) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Both reaction and molecule SMILES can be associated with a target (e.g. a reaction rate in a solvent). To do so, use both :code:`--smiles-columns` and :code:`--reaction-columns`. + +.. Chemprop allows differently-sized MPNNs to be used for each reaction and solvent/molecule encoding. The following commands can be used to specify the solvent/molecule MPNN size if :code:`--reaction-solvent` is specified: + +.. * :code:`--bias-solvent` Whether to add bias to the linear layers of the solvent/molecule (default :code:`false`) +.. * :code:`--hidden-size-solvent ` The dimensionality of the hidden layers for the solvent/molecule (default 300) +.. * :code:`--depth-solvent ` The number of message passing steps for the solvent/molecule (default 3) + +The reaction and molecule SMILES columns can be ordered in any way. However, the same column ordering as used in the training must be used for the prediction. For more information on atom-mapped reaction SMILES, please refer to :ref:`train-on-reactions`. + + +Training on Spectra +^^^^^^^^^^^^^^^^^^^ + +Spectra training is different than other datatypes because it considers the predictions of all targets together. Targets for spectra should be provided as the values for the spectrum at a specific position in the spectrum. Spectra predictions are configured to return only positive values and normalize them to sum each spectrum to 1. Spectral prediction are still in beta and will be updated in the future. + +.. Activation to enforce positivity is an exponential function by default but can also be set as a Softplus function, according to the argument :code:`--spectral-activation `. Value positivity is enforced on input targets as well using a floor value that replaces negative or smaller target values with the floor value, customizable with the argument :code:`--spectra_target_floor ` (default 1e-8). + +.. In absorption spectra, sometimes the phase of collection will create regions in the spectrum where data collection or prediction would be unreliable. To exclude these regions, include paths to phase features for your data (:code:`--phase-features-path `) and a mask indicating the spectrum regions that are supported (:code:`--spectra-phase-mask-path `). The format for the mask file is a .csv file with columns for the spectrum positions and rows for the phases, with column and row labels in the same order as they appear in the targets and features files. + + +Additional Features +------------------- + +While the model works very well on its own, especially after hyperparameter optimization, additional features and descriptors may further improve performance on certain datasets. Features are used before message passing while descriptors are used after message passing. The additional features/descriptors can be added at the atom-, bond, or molecule-level. Molecule-level features can be either automatically generated by RDKit or custom features provided by the user and are concatenated to the learned descriptors generated by Chemprop during message passing (i.e. used as extra descriptors). + + +Atom-Level Features/Descriptors +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can provide additional atom features via :code:`--atom-features-path /path/to/atom/features.npz` as a numpy :code:`.npz` file. This command concatenates the features to each atomic feature vector before the D-MPNN, so that they are used during message-passing. This file can be saved using :code:`np.savez("atom_features.npz", *V_fs)`, where :code:`V_fs` is a list containing the atom features :code:`V_f` for each molecule, where :code:`V_f` is a 2D array with a shape of number of atoms by number of atom features in the exact same order as the SMILES strings in your data file. + +Similarly, you can provide additional atom descriptors via :code:`--atom-descriptors-path /path/to/atom/descriptors.npz` as a numpy :code:`.npz` file. This command concatenates the new features to the embedded atomic features after the D-MPNN with an additional linear layer. This file can be saved using :code:`np.savez("atom_descriptors.npz", *V_ds)`, where :code:`V_ds` has the same format as :code:`V_fs` above. + +The order of the atom features and atom descriptors for each atom per molecule must match the ordering of atoms in the RDKit molecule object. + +The atom-level features and descriptors are scaled by default. This can be disabled with the option :code:`--no-atom-feature-scaling` or :code:`--no-atom-descriptor-scaling`. + + +Bond-Level Features +^^^^^^^^^^^^^^^^^^^ + +Bond-level features can be provided using the option :code:`--bond-features-path /path/to/bond/features.npz`. as a numpy :code:`.npz` file. This command concatenates the features to each bond feature vector before the D-MPNN, so that they are used during message-passing. This file can be saved using :code:`np.savez("bond_features.npz", *E_fs)`, where :code:`E_fs` is a list containing the bond features :code:`E_f` for each molecule, where :code:`E_f` is a 2D array with a shape of number of bonds by number of bond features in the exact same order as the SMILES strings in your data file. + +The order of the bond features for each molecule must match the bond ordering in the RDKit molecule object. + +Note that bond descriptors are not currently supported because the post message passing readout function aggregates atom descriptors. + +The bond-level features are scaled by default. This can be disabled with the option :code:`--no-bond-features-scaling`. + + +Extra Datapoint Descriptors +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Additional datapoint descriptors can be concatenated to the learned representation after aggregation. These extra descriptors could be molecule-level features. If you install from source, you can modify the code to load custom descriptors as follows: + +1. **Generate features:** If you want to generate molecule features in code, you can write a custom features generator function using the default featurizers in :code:`chemprop/featurizers/`. This also works for custom atom and bond features. +2. **Load features:** Additional descriptors can be provided using :code:`--descriptors-path /path/to/descriptors.npz` where the descriptors are saved as a numpy :code:`.npz` file. This file can be saved using :code:`np.savez("/path/to/descriptors.npz", X_d)`, where :code:`X_d` is a 2D array with a shape of number of datapoints by number of additional descriptors. Note that the descriptors must be in the same order as the SMILES strings in your data file. The extra descriptors are scaled by default. This can be disabled with the option :code:`--no-descriptor-scaling`. + + +Molecule-Level 2D Features +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Chemprop provides several molecule featurizers that automatically calculate molecular features and uses them as extra datapoint descriptors. These are specified using :code:`--molecule-featurizers` followed by one or more of the following: + + * :code:`morgan_binary` binary Morgan fingerprints, radius 2 and 2048 bits + * :code:`morgan_count` count-based Morgan, radius 2 and 2048 bits + * :code:`rdkit_2d` RDKit 2D features + * :code:`v1_rdkit_2d` The RDKit 2D features used in Chemprop v1 + * :code:`v1_rdkit_2d_normalized` The normalized RDKit 2D features used in Chemprop v1 + +.. note:: + The Morgan fingerprints should not be scaled. Use :code:`--no-descriptor-scaling` to ensure this. + + The RDKit 2D features are not normalized. The :code:`StandardScaler` used in the CLI to normalize is non-optimal for some of the RDKit features. It is recommended to precompute and scale these features outside of the CLI using an appropriate scaler and then provide them using :code:`--descriptors-path` and :code:`--no-descriptor-scaling` as described above. + + In Chemprop v1, :code:`descriptastorus` was used to calculate RDKit 2D features. This package offers normalization of the features, with the normalizations fit to a set of molecules randomly selected from ChEMBL. Several descriptors have been added to :code:`rdkit` recently which are not included in :code:`descriptastorus` including 'AvgIpc', 'BCUT2D_CHGHI', 'BCUT2D_CHGLO', 'BCUT2D_LOGPHI', 'BCUT2D_LOGPLOW', 'BCUT2D_MRHI', 'BCUT2D_MRLOW', 'BCUT2D_MWHI', 'BCUT2D_MWLOW', and 'SPS'. + + +Missing Target Values +^^^^^^^^^^^^^^^^^^^^^ + +When training multitask models (models which predict more than one target simultaneously), sometimes not all target values are known for all molecules in the dataset. Chemprop automatically handles missing entries in the dataset by masking out the respective values in the loss function, so that partial data can be utilized. + +The loss function is rescaled according to all non-missing values, and missing values do not contribute to validation or test errors. Training on partial data is therefore possible and encouraged (versus taking out datapoints with missing target entries). No keyword is needed for this behavior, it is the default. + + +TensorBoard +^^^^^^^^^^^ + +During training, TensorBoard logs are automatically saved to the output directory under :code:`model_{i}/trainer_logs/version_0/`. +.. To view TensorBoard logs, run :code:`tensorboard --logdir=` where :code:`` is the path to the checkpoint directory. Then navigate to ``_. diff --git a/chemprop/docs/source/tutorial/python/activation.ipynb b/chemprop/docs/source/tutorial/python/activation.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..a23d30796be48232ad559aa7c42c818ceb8efb6b --- /dev/null +++ b/chemprop/docs/source/tutorial/python/activation.ipynb @@ -0,0 +1,143 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Activation" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.utils import Activation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Activation functions\n", + "\n", + "The following activation functions are available in Chemprop." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "relu\n", + "leakyrelu\n", + "prelu\n", + "tanh\n", + "selu\n", + "elu\n" + ] + } + ], + "source": [ + "for activation in Activation:\n", + " print(activation)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Custom activation functions require editing the source code in `chemprop.nn.utils.py`." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from enum import auto\n", + "from torch import nn\n", + "\n", + "from chemprop.utils.utils import EnumMapping\n", + "\n", + "\n", + "class Activation(EnumMapping):\n", + " RELU = auto()\n", + " LEAKYRELU = auto()\n", + " PRELU = auto()\n", + " TANH = auto()\n", + " SELU = auto()\n", + " ELU = auto()\n", + " GELU = auto() # example edited source code\n", + "\n", + "\n", + "def get_activation_function(activation: str | Activation) -> nn.Module:\n", + " \"\"\"Gets an activation function module given the name of the activation.\n", + "\n", + " See :class:`~chemprop.v2.models.utils.Activation` for available activations.\n", + "\n", + " Parameters\n", + " ----------\n", + " activation : str | Activation\n", + " The name of the activation function.\n", + "\n", + " Returns\n", + " -------\n", + " nn.Module\n", + " The activation function module.\n", + " \"\"\"\n", + " match Activation.get(activation):\n", + " case Activation.RELU:\n", + " return nn.ReLU()\n", + " case Activation.LEAKYRELU:\n", + " return nn.LeakyReLU(0.1)\n", + " case Activation.PRELU:\n", + " return nn.PReLU()\n", + " case Activation.TANH:\n", + " return nn.Tanh()\n", + " case Activation.SELU:\n", + " return nn.SELU()\n", + " case Activation.ELU:\n", + " return nn.ELU()\n", + " case Activation.GELU: # example edited source code\n", + " return nn.GELU() # example edited source code\n", + " case _:\n", + " raise RuntimeError(\"unreachable code reached!\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop/docs/source/tutorial/python/data/dataloaders.ipynb b/chemprop/docs/source/tutorial/python/data/dataloaders.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..d1c8bdbf44b2b76c68c75eee70c1898dcf657736 --- /dev/null +++ b/chemprop/docs/source/tutorial/python/data/dataloaders.ipynb @@ -0,0 +1,385 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dataloaders" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.data.dataloader import build_dataloader" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an example [dataset](./datasets.ipynb) to load." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from chemprop.data import MoleculeDatapoint, MoleculeDataset\n", + "\n", + "smis = [\"C\" * i for i in range(1, 4)]\n", + "ys = np.random.rand(len(smis), 1)\n", + "dataset = MoleculeDataset([MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Torch dataloaders" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chemprop uses native `torch.utils.data.Dataloader`s to batch data as input to a model. `build_dataloader` is a helper function to make the dataloader." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "dataloader = build_dataloader(dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`build_dataloader` changes the defaults of `Dataloader` to use a batch size of 64 and turn on shuffling. It also automatically uses the correct collating function for the dataset (single component vs multi-component)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from torch.utils.data import DataLoader\n", + "from chemprop.data.collate import collate_batch, collate_multicomponent\n", + "\n", + "dataloader = DataLoader(dataset=dataset, batch_size=64, shuffle=True, collate_fn=collate_batch)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Collate function" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The collate function takes an iterable of dataset outputs and batches them together. Iterating through batches is done automatically during training by the lightning `Trainer`." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "TrainingBatch(bmg=, V_d=None, X_d=None, Y=tensor([[0.0562],\n", + " [0.5048]]), w=tensor([[1.],\n", + " [1.]]), lt_mask=None, gt_mask=None)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "collate_batch([dataset[0], dataset[1]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Shuffling" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Shuffling the data helps improve model training, so `build_dataloader` has `shuffle=True` as the default. Shuffling should be turned off for validation and test dataloaders. Lightning gives a warning if a dataloader with shuffling is used during prediction." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = build_dataloader(dataset)\n", + "val_loader = build_dataloader(dataset, shuffle=False)\n", + "test_loader = build_dataloader(dataset, shuffle=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "from lightning import pytorch as pl\n", + "from chemprop import models, nn\n", + "\n", + "trainer = pl.Trainer(logger=False, enable_checkpointing=False, max_epochs=1)\n", + "chemprop_model = models.MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), nn.RegressionFFN())" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicting DataLoader 0: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 3.37it/s]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/brianli/Documents/chemprop/chemprop/nn/message_passing/base.py:263: UserWarning: The operator 'aten::scatter_reduce.two_out' is not currently supported on the MPS backend and will fall back to run on the CPU. This may have performance implications. (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/mps/MPSFallback.mm:13.)\n", + " M_all = torch.zeros(len(bmg.V), H.shape[1], dtype=H.dtype, device=H.device).scatter_reduce_(\n" + ] + } + ], + "source": [ + "preds = trainer.predict(chemprop_model, dataloader)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicting DataLoader 0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 164.67it/s]\n" + ] + } + ], + "source": [ + "preds = trainer.predict(chemprop_model, test_loader)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Parallel data loading" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As datapoints are sampled from the dataset, the `MolGraph` data structures are generated on-the-fly, which requires featurization of the molecular graphs. Giving the dataloader multiple workers can increase dataloading speed by preparing the datapoints in parallel. Note that this is not compatible with Windows (the process hangs) and some versions of Mac. \n", + "\n", + "[Caching](./dataloaders.ipynb) the the `MolGraphs` in the dataset before making the dataloader can also speed up sequential dataloading (`num_workers=0`)." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "build_dataloader(dataset, num_workers=8)\n", + "\n", + "dataset.cache = True\n", + "build_dataloader(dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Drop last batch" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`build_dataloader` drops the last batch if it is a single datapoint as batch normalization (the default) requires at least two data points. If you do not want to drop the last datapoint, you can adjust the batch size, or, if you aren't using batch normalization, build the dataloader manually." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Dropping last batch of size 1 to avoid issues with batch normalization (dataset size = 3, batch_size = 2)\n" + ] + } + ], + "source": [ + "dataloader = build_dataloader(dataset, batch_size=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "dataloader = build_dataloader(dataset, batch_size=3)\n", + "dataloader = DataLoader(dataset=dataset, batch_size=2, shuffle=True, collate_fn=collate_batch)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Samplers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The default sampler for a `torch.utils.data.Dataloader` is a `torch.utils.data.sampler.SequentialSampler` for `shuffle=False`, or a `torch.utils.data.sampler.RandomSampler` if `shuffle=True`. \n", + "\n", + "`build_dataloader` can be given a seed to make a `chemprop.data.samplers.SeededSampler` for reproducibility. Chemprop also offers `chemprop.data.samplers.ClassSampler` to equally sample positive and negative classes for binary classification tasks. " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "build_dataloader(dataset, seed=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([[1.],\n", + " [0.],\n", + " [1.],\n", + " [0.],\n", + " [1.],\n", + " [0.],\n", + " [1.],\n", + " [0.]])\n" + ] + } + ], + "source": [ + "smis = [\"C\" * i for i in range(1, 11)]\n", + "ys = np.random.randint(low=0, high=2, size=(len(smis), 1))\n", + "dataset = MoleculeDataset([MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)])\n", + "\n", + "dataloader = build_dataloader(dataset, class_balance=True)\n", + "\n", + "_, _, _, Y, *_ = next(iter(dataloader))\n", + "print(Y)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop/docs/source/tutorial/python/data/datapoints.ipynb b/chemprop/docs/source/tutorial/python/data/datapoints.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..a75d5162208a379c324fe518d350695318763b84 --- /dev/null +++ b/chemprop/docs/source/tutorial/python/data/datapoints.ipynb @@ -0,0 +1,419 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Datapoints" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from rdkit import Chem\n", + "from chemprop.data.datapoints import MoleculeDatapoint, ReactionDatapoint" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Molecule Datapoints" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`MoleculeDatapoint`s are made from target value(s) and either a `rdkit.Chem.Mol` object or a SMILES." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "mol = Chem.MolFromInchi(\"InChI=1S/C2H6/c1-2/h1-2H3\")\n", + "smi = \"CC\"\n", + "n_targets = 1\n", + "y = np.random.rand(n_targets)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MoleculeDatapoint(mol=, y=array([0.30484272]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name=None, V_f=None, E_f=None, V_d=None)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MoleculeDatapoint(mol, y)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MoleculeDatapoint(mol=, y=array([0.30484272]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='CC', V_f=None, E_f=None, V_d=None)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MoleculeDatapoint.from_smi(smi, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Hydrogens in the graph" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Explicit hydrogens in the graph created by `from_smi` can be controlled using `keep_h` and `add_h`." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAPWUlEQVR4nO3dW0yT5x8H8LcIVU5ylCGKHGQcRVDAEzAmsoCTLUsWdrGsy5Il4C7WsQxWLly6C5dg2aHLki2wOFezK7bdFB1uBcEjKKAMx0EUEUEROcjGsVTa/8Xjv2NyKrxtn/fw/VxpAu030H7pe3h+j8RkMjEAALBaDrQDAADwG2oUAIAV1CgAACuoUQAAVlCjAACsoEYBAFhBjQIAsIIaBQBgBTUKAMAKahQAgBXUKAAAK6hRAABWUKMAAKw40g4A8K+jR4/evXs3Li7u/fffp50FwFISDMoDLvjjjz/eeeed/v5+8t+AgIDy8vLk5GS6qQAsgYN6oKy3tzcxMTEzM7O/v18ikWzevJlhmAcPHqSkpKSlpQ0NDdEOCLAM1ChQ8+TJk7y8vODg4KamJoZhQkNDW1paent7GxsbQ0NDGYY5f/68v79/Xl6e0WikHRZgUahRoOOHH37w8vIqKyszGo1ubm7Hjx/v6uratm0bwzAJCQldXV0//viju7v77OxsWVnZ+vXrv//+e9qRARaGc6Ngb3fu3ElPT+/p6WEYZs2aNYcPH/7mm28kEsn8r5ydnc3Lyztx4gT5NPrmm29+8cUX/v7+9k4MsCTUKNjP5OSkSqVSqVRTU1MMw6Smpv76668bNmxY+rsePnz4+uuvNzQ0GAwGV1fXgoKCoqKidevW2SUygAVMALZnNBrLy8uDgoLIq+7AgQN1dXUreoTbt2/n5OSQbw8MDNRoNDaKCrBSqFGwucbGxpSUFNKAO3fuvHDhwqofqrq6evv27eSh9u/f/+eff1oxJ8DqoEbBhh48eJCbm7tmzRqGYTZu3FhaWvrkyROWj2kwGEpLS8mpAAcHB5lM9ujRI6ukBVgd1CjYxMzMjFqtXr9+PcMwTk5Ocrn877//tuLjj4yMKBQKqVTKMIyXl1dxcbFer7fi4wNYDjUK1qfVardu3UoOvbOzs2/fvm2jJ+ro6Hj55ZfJE0VERJw+fdpGTwSwBNQoWFN7e/vBgwdJr0VGRv722292eFKdThcVFUWeNCMjo7W11Q5PCmCGGgXrGBkZkcvljo6O5ChbrVYbDAa7Pfv8cwijo6N2e3YQOdQosDX3mo+jo2Nubi6taz6Dg4NyuZxc0fLx8VGr1eyvaAEsCzUKrFRXV8fGxpID6vT09JaWFtqJTE1NTampqeb7q86fP087EQgcahRW6datW+b74cPCwsrLy2kn+g+tVhscHGy+zNXd3U07EQgWahRWbHx8XKlUrl27lmEYV1dXpVI5PT1NO9QCJiYmiouL3dzcGIZxdnZWKBRjY2O0Q4EAoUZhBYxGo0ajIcNBJBKJTCbr7++nHWoZfX19MpmMjD7ZtGmTRqMxGo20Q4GgoEbBUleuXNmzZw85TN61a9fly5dpJ1qBZ8KvdEU/wBJQo7A8YXygm52d1Wg0zz33HI8+SgMvoEZhKZOTkwI7vciXE7vAI6hRWJSAL3Z3dnZy+TYD4BfUKCxg7q2XO3bsEOqtl1VVVWTbEjIClQs3vQIfoUbhP4aGhkS1EIgswfL19aW+BAv4CzUKT5Fl6R4eHiJclj48PGweCODt7W3ngQDAd6hRMJlMJp1OFx0dLfIhSe3t7VlZWebxVJWVlbQTAT+gRsUOIzufodVqQ0ND7TAsFQQDNSpeGCC/GL1eb9PR/SAwqFExIjei+/n5mbczGhgYoB2Kc8hGUg4ODuaNpGZnZ2mHAi5CjYrO2bNnsbmm5RobG5OTk8mPKyEhgc22piBUqFERuXfvnkwmw1bvK2U0GsvLy4OCgsgq0pycnLt379IOBRwiMZlMDAjdxMRESUnJsWPHpqenXV1dCwoKioqK1q1bRzsXn0xOTqpUKpVKNTU15eLiUlhYqFAonJ2daecCDqDd42Bb5JNUYGAg8/9PUj09PbRD8Vhvb695SsvmzZt5OqUFrAs1KmRXr17dt28f+XuZlJR06dIl2okEora2Nj4+nvxg09LSrl+/TjsR0IQaFab79++brzIHBATgKrPVzR27R+52ePjwIe1QQAdqVGjIaDt3d3eGYaRSqVwu/+eff2iHEqzHjx8rFAoyds/T07O4uBhj90QINSooWq02JCSEHGxmZ2ffuXOHdiJRuHnzZnZ2NvmxP//88xi7JzaoUYG4fv36Cy+8QN7J8fHxtbW1tBOJjk6ni4mJIb+CjIyMv/76i3YisBPUKO/NHW1HphMJe7Qdl5EpWZ6engzDODk55ebmDg4O0g4FNoca5bH5o+0eP35MOxQ8HbuHP2zigRrlKxxCclxbW1tmZib5BUVFRZ05c4Z2IrAV1Cj/zL2gER4eXlFRQTsRLOqZi35dXV20E4H1oUb5BLfX8BEZu4db0AQMNcoP80fb4WZvfsGCCAFDjfJATU1NXFyceelhc3Mz7USwSnOX5yYmJmJ5rjCgRjmNjLbDIAwhIcNitmzZgmExgoFBeRxFxrKR0XZkLBtG2wkJRhcKCu0eh2fN/7SCIcFChUHawoAa5ZaGhoa5W1ZcvHiRdiKwOWzrwneoUa7ABmpihk0GeQ01St/c7XzJfYXYzlec5m55Te4LxpbXvIAapUyr1YaGhpIDOqxyAZPJ1NHRcejQIfKSCA8PP3XqFO1EsAzUKDVtbW1ZWVnk3RIZGVlZWUk7EXCITqeLjo4mL4+MjIzW1lbaiWBRqFEK5k8AMhgMtEMB58yf4DU6Oko7FCwANWpXBoOhtLTU19eXYRhHR0fMo4RlzZ0n6+Pjg7F7HIQatZ+qqqpt27aRw7QDBw7cuHGDdiLgjWvXrqWmppIXz44dO86dO0c7EfwLNWoPnZ2dOTk55D2AvXpg1bRabXBwsPmCJPba4gjUqG2NjY0plUoy2s7NzU2pVGK0HbBBdn51c3NjGMbZ2VmhUGDsHnWoUVvBPuZgO319feaZNZs2bcLMGrpQozZRX1+/e/ducvC1a9euuro62olAgK5cubJ3717yMktKSrp8+TLtRCKFGrWy3t5ejLYDuzEajRqNxt/fnwyykclk/f39tEOJDmrUaiYmJswnrVxcXBQKxdjYGO1QIArj4+NKpZLM2XN1dVUqlVNTU7RDiQhq1Dq0Wm1QUJD5Emp3dzftRCA6t27dMt8QEhYWhhtC7AY1ylZTU1NKSgp57e7cufPChQu0E4GoVVdXx8bGkhdkenp6S0sL7UTChxpdPTLajiwv8fX1xfIS4AiyWG7Dhg3mxXKPHj2iHUrIUKOrQRY7k9F2ZLEzRtsB14yMjMjlckdHR4ZhvLy8MLrBdlCjK6bVardu3WoevdPW1kY7EcCi2tvbDx48SF6uERERp0+fpp1IgFCjK4BXJPAU/vbbFGrUIjg+Ar7DmSjbQY0uA2frQUhwXdQWUKNLeebeEWzZCMKAu/SsCzW6MNzJDIKHNSPWghp9FtbVgXhgBbNVoEb/hSkPIE6Yp8MSavSpq1evYuYYiNnc6Y67d+/GdEfLoUYxARfgKcwaXx1R1yj2YwCYDzvfrJR4axS7gwEsAfswWk6MNYq9agEshF3BLSGuGh0aGpLL5WQJh4+PD5ZwACyLLOTz9fU1L+QbHBykHYpbxFKjZEGxh4eHeUHx6Ogo7VAAvDE8PGz+COLt7Y2xEnOJokZ1Ol10dLR5vE1rayvtRAC81NbWlpmZSd5KkZGRlZWVtBNxgsBrtKOj49ChQ+S3Hh4efurUKdqJAHhPq9WGhoaaL892dXXRTkSZYGt0ZGREoVBIpVKGYTw9PYuLi/V6Pe1QAAKh1+vNY/ekUqnIx+4JsEbJLcR+fn7mW4gHBgZohwIQoPv37+fm5jo4ODAMs3HjxtLS0tnZWdqhKBBajZ49e3b79u3kcGP//v3Nzc20EwEIXENDQ3JyMnnTJSQkXLx4kXYiexNOjd67d08mk5HfZWBgoEajoZ0IQCyMRmN5efmWLVvIWJ+cnJyenh7aoexHYjKZGJ6bmJgoKSk5duzY9PS0q6trQUFBUVERmXQHAHYzOTmpUqnIO9HFxaWwsFAs70TaPc6KyP8GAnDQ3ONCkYzd43GNNjQ07Nu3j/y2EhMTL126RDsRADxVU1MTFxdH3p4vvviisK9S8LJG514fDAgIEO31QQAuE889Mzw7NzozM/Pdd9998sknY2NjUqn08OHDR48edXd3p50LABY2OjpaXFz81VdfzczMeHp6FhUV5efnkyl8C6qtrT1+/Dj591tvvWVeNLW08fHx9957j/w7IiLiyJEj7JOvAO0eXwGtVhsSEkJiY+0EAI/cvHkzOzubvHnDw8MrKioW+8rS0lJzO3355ZcWPv7g4KD5u1JSUqyU2lIO9qzsVWtvb8/Kynr11Ve7u7ujoqLOnDlTUVFhXo4GABxHqlOn08XExHR2dr7yyisvvfRSa2sr7VzWwfUaHRkZ+eCDD2JjY3///XcyV+bGjRsWfs4HAE7JyMi4du3a559/7uHhUVVVlZ6ertfraYeyAkfaARZlMBi+/fbbTz/9dHR01MnJ6d133/3ss8/I0EMA4CmpVPrRRx/JZLIjR47ExsYucZKURzhao1VVVfn5+eQzf0ZGhlqtjomJoR0KAKzDz8+vrKyMdgqr4dxB/dzzJmQHGHI+hXYuAICFcejTKLkxQq1W6/V6S26MAADgAk7UqNFo/Omnnz7++OOBgQFym25JSQnZLBsAgOPo1+i5c+fy8/Obm5sZhklLS1Or1fHx8bRDAQBYiua50b6+vrfffptMBSUjDGpqatChAMAvdD6NkoFaKpVqamqKDNRSKBTOzs5UwgAAsGHvGjWZTL/88kthYWFPTw8ZbVdSUhIUFGTnGADAfT///HNHR4clXzk9PW3rMEuwa40ODw9nZ2fX19czDJOUlPT111/v3bvXngEAgEfq6urq6upop1ieXc+Nent7Ozk5ka2v6uvr0aEAIAB2/TQqkUhOnjzp6+vr5uZmz+cFAD7as2dPbGysJV+p1+tPnjxp6zyLsfe50eDgYDs/IwDw1BtvvPHhhx9a8pVDQ0MUa5Rzi0EBAPgFNQoAwApqFACAFdQoAAArqFEAAFZQowAArKBGAQBYQY0CALCCGgUAYAU1CgDACmoUAIAV+puIAACYhYSEvPbaa+TfYWFhFn7X2rVrzd8VHR1tk2SLk5hMJjs/JQCAkOCgHgCAFdQoAAArqFEAAFZQowAArKBGAQBYQY0CALCCGgUAYAU1CgDACmoUAIAV1CgAACuoUQAAVlCjAACsoEYBAFhBjQIAsPI/mfYDNf0DrLIAAABlelRYdHJka2l0UEtMIHJka2l0IDIwMjMuMDkuNQAAeJx7v2/tPQYg4GdAAGYgZgLiBkZGBQ0gzcjIxpAAEmOC0IzM3AyMDIxMDCIg1eJ6ICG43oduy+yBWvchmWcPIoDi+2HiYgCzdgzCMqvRuwAAAKh6VFh0TU9MIHJka2l0IDIwMjMuMDkuNQAAeJyNUEEKwzAMu+cV+kCD61BYjk1S1jKawJbtD7vv/8yhZE0PG7N9kIVkhBVKXcPl+cKnOCgF0I+x1uJhiEitKABuOi8RPo+uMj7dY77BgMUhfVSOOa2V6TGj12wtmRM60jzIYXFoog1UIcM3rB7oi86Irvvr4hTDIcoWzqUY9nCleY8gC0zrb9Vlr08QrN4jl0NZa+vfuwAAAEF6VFh0U01JTEVTIHJka2l0IDIwMjMuMDkuNQAAeJyL9oh1dlao0TDUM7K0NDDR0TXQMzLVsTbQMdADUrqowpo1APtNChCjpyj6AAAAAElFTkSuQmCC", + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MoleculeDatapoint.from_smi(\"[H]CC\", y, keep_h=True).mol" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAGb0lEQVR4nO3cP0iV/x7A8a+npFq0SLRFHRQ1gkAHq6WlRce2iqLWIhCC1oTW/i0/bBUq22pMgtDTUjgkRKFJQihGSlG4WITn3EHu797LOZn66fcc9b5e4/N8h494ePscz0erisViAmCjcpUeAGBrk1GAEBkFCJFRgBAZBQiRUYCQnZUegK1ncXHx9u3bKaXLly/X1dWVHnj//v39+/dzudy1a9cyn66869evFwqFs2fPtra2lt79/PnzX3/9lVK6cuVKTU1N5tOxtVXZG2W9Zmdnm5qaUkoTExMdHR2lB4aHh3t7e3O53PLycubTlbdjx45CofDkyZOenp7Su5OTkwcPHkwpzczMNDY2Zj4dW5s39QAhMgoQIqMAITIKECKjACEWnti4R48eHThwoPT6mzdvsh9mLZ4+ffrx48fS658+fcp+GLYNC0+s298LT6vbhAtPvz1m4YkN8DTKxl28eHH//v2l16enpx8+fJj9PL91+vTplpaW0utfvny5e/du9vOwPXgaZd2s38N/8xETQIiMAoTIKECIjAKEyChAiIwChNgbZd3q6+tHR0dTSs3NzWUPHDlyZHR0tKqqKtOxVjUyMlIsFg8fPlz2bnNz88pXVF9fn+lYbAv2RgFCvKnnD7h161ZXV9fQ0FClB1mroaGhrq6uW7duVXoQtgMZ5Q+YnZ0dHx9fWFio9CBrtbCwMD4+Pjs7W+lB2A5kFCBERgFCZBQgREYBQmQUIERGAUJkFCBERgFCZBQgREYBQmQUIERGAUJkFCBERgFCZBQgREYBQmQUIERGAUJkFCBERgFCZBQgREYBQmQUIERGAUJkFCBERgFCZBQgREYBQmQUIERGAUJkFCBERgFCZBQgREYBQmQUIERGAUJkFCBERgFCdlZ6ANL8/Pzw8HBK6dy5c7lcmR9sU1NTL168qKmpOXnyZObTURmPHz9eXFw8duxYW1tb6d1CoXDv3r2UUk9PT0NDQ+bT8T9ktPLevXt34cKFlNKpU6d27dpVeuDZs2eXLl1qaWmR0f8fV69enZ6eHhgYKJvRnz9/rrxm8vm8jFacN/UAITIKECKjACEyChAiowAhPqnfRPL5fHV1den1qamp7IdhM5iamhoZGSm9/vPnz+yH4ZeKVFo+n1/Ld6qlpaVYLI6Pj//TLwkqa3x8vFgstrS0rOVwPp+v9OuXoqfRTeTMmTO/Wr8fGxvLfh4qrru7+1fr90NDQ9nPQ3mV7jj/eRr9/v172QMDAwPp30+jm1NfX19K6c6dO5UeZK3u3LmTUurr66v0IL+08jQ6MDBQ9u73799XXjOeRjcDHzEBhMgoQIiMAoTIKECIjAKEyChAiL3Rymtvbx8cHEwplf0TppTSiRMnBgcHa2pqMh2Lirpx48bKv20ue7e6unrlNdPe3p7pWJQjo5XX0NBw/vz5VQ60tbWV3cFmG1v9X3TncrnVXzNkyZt6gBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUf6AxsbGzs7O+vr6Sg+yVvX19Z2dnY2NjZUehO2gqlgsVnoGgC1sZ6UHYOv58ePHy5cvU0rd3d179uwpPfD169fXr19XVVUdP3488+nKe/78ebFYPHz48L59+0rvLi0tjY2NpZSOHj26a9euzKdja/M0yrrNzs42NTWllCYmJjo6OkoPDA8P9/b25nK55eXlzKcrb8eOHYVC4cmTJz09PaV3JycnDx48mFKamZnxTp/18rtRgBAZBQiRUYAQGQUIkVGAEAtPbNzc3Nzu3btLr8/Pz2c/zFrMz89/+PCh9Prc3Fzms7B9WHhi3f5eeFrdJlx4+u0xC09sgKdRNq6rq6vs+v23b9/evn2b/Ty/dejQob1795ZeX1paevXqVfbzsD3IKBv34MGDVdbvs5/nt27evLn6+j1sgI+YAEJkFCBERgFCZBQgREYBQmQUIMTCE+tWW1vb39+fUqqrqyt7oLW1tb+/P5fbRD+k+/v7C4VCa2tr2bt1dXUrX1FtbW22c7Ed+CsmgJBN9LwAsBXJKECIjAKEyChAiIwChMgoQMi/AJsvobhUBxZMAAAAhnpUWHRyZGtpdFBLTCByZGtpdCAyMDIzLjA5LjUAAHice79v7T0GIOBnQAAOIGYH4gZGNgUFIM0CpRgZNEDSjMTS3AyMDAxMDAzMQL0MjKwMjGwMjOwMIiA58SyQAiQLHfYD6SUQroM9gi1w4NRJY1WoOFDNAXsk9n6oGgdUvTBxMBusXgwAriwUsztESVUAAADZelRYdE1PTCByZGtpdCAyMDIzLjA5LjUAAHicjZJBDoMgEEX3nOJfQIMooksV0zaNmLS2d+i+909n2uBomxoHFvPhzQA/KHBc/PnxxBzGKwXojVnXNe651loN4ARtfzgFdFPTxpVuvIXpigqOKmisyWYah7iSoUOiU2epn+ZMvwOSRNIQuQvMcURiUmO/9n/AgsG5ZZbaf6BdgckGWRI5H711SUfgro7VEtzg+uBXxn6sbsfgxWoeRvxkmYtrLAvxhqUVB6gSpTyTpZPHsKyWV1kezDr+DsrVC3NxdbukCdmpAAAAeHpUWHRTTUlMRVMgcmRraXQgMjAyMy4wOS41AAB4nIv2iHXWiPaI1QQTSEwgVqjR0DXSMzLVMdCx1jXQM0diGOqZwpi6YDZMGqYeXQrE0tRJLMnPDSjKL7Ay0Mss9swtyMlMzizRM7QyQuUao3JNUblmqFxzFG4NAG7AOeL/jG0zAAAAAElFTkSuQmCC", + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MoleculeDatapoint.from_smi(smi, y, add_h=True).mol" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Other datapoint properties" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Datapoints can be individually weighted in the loss function." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MoleculeDatapoint(mol=, y=array([0.30484272]), weight=0.5, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='CC', V_f=None, E_f=None, V_d=None)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MoleculeDatapoint.from_smi(smi, y, weight=0.5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A string identifier (e.g. a name) can be assigned to a datapoint. If a SMILES is used to make the datapoint, the name defaults to the SMILES, but this can be overwritten." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MoleculeDatapoint(mol=, y=array([0.30484272]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='Ethane', V_f=None, E_f=None, V_d=None)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MoleculeDatapoint(mol, y, name=\"Ethane\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MoleculeDatapoint(mol=, y=array([0.30484272]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='Ethane', V_f=None, E_f=None, V_d=None)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MoleculeDatapoint.from_smi(smi, y, name=\"Ethane\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Extra features and descriptors" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Extra datapoint descriptors (like [molecule features](../featurizers/molecule_featurizers.ipynb)) will be concatenated to the learned descriptors from message passing and used in the FFN. They are called `x_d`. " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MoleculeDatapoint(mol=, y=array([0.30484272]), weight=1.0, gt_mask=None, lt_mask=None, x_d=array([0.79952846, 0.57058144, 0.61951421]), x_phase=None, name='CC', V_f=None, E_f=None, V_d=None)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "n_extra_descriptors = 3\n", + "MoleculeDatapoint.from_smi(smi, y, x_d=np.random.rand(n_extra_descriptors))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Extra atom features, bond features, and atom descriptors are called `V_f`, `E_f`, `V_d`. In this context, features are used before the message passing operations, while descriptors are used after. Extra bond descriptors aren't currently supported as aggregation ignores the final bond (edge) representations. " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MoleculeDatapoint(mol=, y=array([0.30484272]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='CC', V_f=array([[0.3860953 , 0.64302719, 0.05571153],\n", + " [0.06926393, 0.90740897, 0.95685501]]), E_f=array([[0.55393371, 0.29979474, 0.07807503, 0.73485953]]), V_d=array([[0.10712249, 0.33913704, 0.37935725, 0.74724361, 0.49632224],\n", + " [0.8496356 , 0.31315312, 0.14000781, 0.58916825, 0.16698837]]))" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "n_atoms = 2\n", + "n_bonds = 1\n", + "n_extra_atom_features = 3\n", + "n_extra_bond_features = 4\n", + "n_extra_atom_descriptors = 5\n", + "extra_atom_features = np.random.rand(n_atoms, n_extra_atom_features)\n", + "extra_bond_features = np.random.rand(n_bonds, n_extra_bond_features)\n", + "extra_atom_descriptors = np.random.rand(n_atoms, n_extra_atom_descriptors)\n", + "MoleculeDatapoint.from_smi(\n", + " smi, y, V_f=extra_atom_features, E_f=extra_bond_features, V_d=extra_atom_descriptors\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Reaction Datapoints" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`ReactionDatapoint`s are the same as for molecules expect for:\n", + "1. extra atom features, bond features, and atom descriptors are not supported\n", + "2. both reactant and product `rdkit.Chem.Mol` objects or SMILES are required" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ReactionDatapoint(rct=, pdt=, y=array([0.30484272]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name=None)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Keep the atom mapping for hydrogens\n", + "rct = Chem.MolFromSmiles(\"[H:1][C:4]([H:2])([H:3])[F:5]\", sanitize=False)\n", + "pdt = Chem.MolFromSmiles(\"[H:1][C:4]([H:2])([H:3]).[F:5]\", sanitize=False)\n", + "Chem.SanitizeMol(\n", + " rct, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_ADJUSTHS\n", + ")\n", + "Chem.SanitizeMol(\n", + " pdt, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_ADJUSTHS\n", + ")\n", + "ReactionDatapoint(rct, pdt, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The SMILES can either be a single reaction SMILES 'Reactant>Agent>Product', or a tuple of reactant and product SMILES. Note that if an Agent is provided, its graph is concatenated to the reactant graph with no edges connecting them." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAbvElEQVR4nO3daUAUV7oG4LebpgEB2SEBATUMyhoX3GIQFRMVN1wgk2hcogYSrqJoEAlqDDruDksSjNuoKI6oMUa8LjioMSIiQb24sgVZBBQaRGiQbrruj4oMYdeiabv5nl/WqdPF15i8nqo6dYrHMAwIIYS8Lr6iCyCEEOVGMUoIIZxQjBJCCCcUo4QQwgnFKCGEcEIxSgghnAgUXQDh5Ny5c2KxePDgwRYWFk33FhcXJyYmCoXCCRMmtHIQiUSSkJCQmZlZWVlpZmY2atQoa2truZVMiKrh0bxRpWZpaZmfn3/kyBFvb++me8+cOePh4aGnp1deXt7SEfbv379y5crCwsL6FjU1tc8///yf//ynhoaGXIomRLXQaLRL2759+7Jly3g8nqen56hRo4RC4cWLF2NjY6OiohiGiYqKUnSBhCgBitGuq6ysbM2aNXw+//Dhw/WDWV9fXzs7u7Vr1+7atSsoKIjO7glpE91i6roMDAxu3ry5b9++RhcEAgMD1dTU6urqLl++rKjaCFEiNBrt0mxsbGxsbBo1duvWzdzcPC8vr6SkRCFVEaJcaDTahfzxxx8+Pj4+Pj4VFRWtdGMYprS0FICRkVFnlUaIEqPRqCq4evVqXV1d0/bbt2833CwuLt65cyeAtWvXdu/evaWjJScni8ViAO+9915HV0qICqIYVQURERERERFtdtPX1x87diyA1mcybd68GYCbm9vf/va3jqqQEBVG80aVGztvdPr06fb29k33ZmVlxcTEtD5vtJHo6OjZs2cLBIJr1665uLh0aLGEqCYajaoCb2/vlqbfx8TEtP84ly9f9vX1BbB161bKUELaiW4xkT9dvHhxwoQJYrE4NDTU399f0eUQojQoRgkA7N27d9y4cVVVVatWrQoJCVF0OYQoEzqp7+pqa2sDAwPDw8O1tLSio6NnzZql6IoIUTIUo11IVlbW2rVrAURGRurp6QHIzc39+OOPExMTe/To8dNPPw0aNEjRNRKifChGu5CnT59GR0cD2Lx5s56e3p07d1xdXdmb+EKh8IsvvmjU38XFZceOHQoolBClQjHahRgaGnp6egLQ1NQEkJOTUz8RKjs7u2n/VqboE0Lq0bxR5VZWVlZXV9e9e3ehUNh0r0QiefbsGZ/PNzQ0bHZvZWVlKwcXCAS6urodVishKopiVKUwDCOTyfh8Po/HU3QthHQVNOFJpWzcuFEgEAQHByu6EEK6EIpRQgjhhGKUEEI4oRglhBBOKEYJIYQTilFCCOGEYpQQQjihGCWEEE4oRgkhhBOKUUII4YRilBBCOKEYJYQQTihGCSGEE4pRQgjhhGKUEEI4oRglhBBOKEYJIYQTilFCCOGEYpQQQjihGCWEEE4oRgkhhBOKUUII4YRilBBCOKEYJYQQTihGCSGEE4pRQgjhhGKUEEI4oRglhBBOKEYJIYQTgaILIKQNNTU1z58/V1dX19fXb7ZDWVmZVCrV1dXV1NRs/VCPHj3Ky8sDYG1tbWlp2fG1KrO6ujqRSATA2NiYx+M17VBVVSUWizU1NXV1dds8WnV1dVZWlkgkMjMzs7a2bvOvRqnRaJS86Xbv3m1qauru7t5SBzc3N1NT0/3797dykGPHjtnZ2fXs2dPV1dXV1dXKysrBweHUqVNyqFdZPXjwwNTU1NTU9Pnz5812CA4ONjU1XbhwYevHSU9P9/b2NjY2dnJycnNz69u3r4mJia+v74sXL+RQ9RuBRqNE9e3du3f+/PkAhg4d6ubmxjBMQkJCSkrKlClTDh48+Mknnyi6QNVx/vx5T0/P6upqHR0dDw8PCwuLJ0+eJCYmFhUVaWhoKLo6eaEYJSpOKpWuWLECgI+Pz44dO9hGhmHmzZu3f//+5cuXf/TRR2pqagqtUUXk5eXNmDGjurp66tSpu3fvNjQ0ZNtra2vFYrFia5MrOqknKi4rK6ukpATA559/Xt/I4/F8fX0BFBYW5uTkKKg0VbNq1arnz587OTkdOXKkPkMBCIXClq5rqwaKUaLi6s8lq6urG7bX30XR1tbu7JpUUVVV1bFjxwCsWLFCXV1d0eV0KopRomouXLgQHR1948YNdrNHjx7m5uYAVqxYUV5eXt/tyJEjAIYNG/bWW28ppE5ll5aWFh0dHRcXx26mpKRUVVXxeDwPDw/FFtb56NooUQ4SiYSdq9TsroabGzZsSEhI8PPzGzRoEACBQLBp06bZs2dfvXq1T58+CxcunDNnTlJSUnh4uLm5+b59+zqheOVSUFDw7Nmzpu2VlZUNN0+cOLFmzRpHR8eJEycCSEtLA2BhYaGpqfn9998fO3YsIyNDR0enf//+ixcvHjZsWOcUrxAUo0Q5pKWlWVlZtaenlpaWtrZ2w/vCs2bNqq2tnT9//pMnT9avX/+Pf/wDgKWlZVJS0ttvvy2vipWWvb19e7oJhUJtbe1u3bqxm+ycU4Zh+vfv//DhQwDdunUrKCh4+PDh0aNHw8PD/fz85FezYlGMEuWgq6vLji6bSk5ObjhQqj/NrJeYmLh69Wo+n//FF1+UlJT88ssv1dXVubm5o0ePjo2NdXJykmPdSsjNza3ZqQsZGRkNTwiCgoKCgoLqN9m/goKCAjs7u0OHDk2ePFlHRyc3N3f58uVHjx5dsmSJq6urs7NzJ9Tf+VQ2Rvfs2VNUVDR+/PgBAwY03VtYWLh3714AwcHBzT6wUU8kErHnfdOnT7e2tpZPse1VXV198eLF69evl5SUCAQCS0tLDw+Pdo4dWBKJJCIiQiqVAujXr9/YsWPlVmzzUp8/T6yoaGnvZGNjqyazCxkAgH7Pnp4HDjTa5aav76yt7ezszJ5RNuvRo0cTJkwoLy/fu3fvvHnzAFRUVMTGxq5bt+7Bgweurq537tzp0aPH634hFfTLL7907969abu/v39ERERLn2KH/z179kxJSakfolpZWR08ePDWrVsZGRm7d+9u5eNKTWVjNDIy8vbt24aGhs3GaH5+fkhICIAVK1YIBM3/ErKzs8PDw/fs2VNVVQXAyMhozpw5cq25dbt27QoJCXny5EnDxq+++mrGjBlRUVHGxsYApk+f3rdv3z59+rR0kM2bN7NfHICvr2/nx+j/VVXtKypqae8AXd2mMVojkwEolUj2N/mgQ7duaOs+e1hYWHl5+bBhw9gMBdC9e/cFCxZ4enr269evoKBg+/bt27dvf+VvQv7KwMAAQHV1dX2GsoRCobu7e0ZGRiv/1Ck7lY1RLgoLC/38/E6ePCmTyTQ0NDQ1NWtqahRb0sqVKzdu3AjAwcHh008/7du3r0QiSUlJ2bVr17Fjx/7444+kpCSBQGBra2tra9vSQR4+fLhu3TqhUPj+++8nJCR0YvmN2WhpzWvu/ritllbTxqq6OgBqPN7G3r0b7erfjoe7b926BWDIkCGN2o2NjSdPnhwVFXXnzh225fbt2++++247yifNsLOzA/DkyROxWNwoSdlNmUymmMrkj2K0Gfr6+jdu3PDw8PDy8poyZcro0aNTU1MVWM/Zs2c3bdoE4MsvvwwPD68fPs+YMWPp0qVeXl5BQUEtjanryWSyhQsX1tTUrFq1SiQSKTZGDdXVxzaYnt2653V1ANR5vDEGBq/xs9jfTFFzQ+CnT5/i5bzRo0ePent7T5w4MTIysmfPnq/xg5SRWCyOjIxMTEw8efIkx0MNHTpUKBTW1tZeuHBh8uTJDXdduXIFQCsnScqO5o02Q0tLKy8v79SpU7Nnz9bT01N0OVi3bh3DMIMHD46MjGwUl2ZmZr/++mt7Zur9+OOPV65csbGxCQ4OllulcsGORgWtXsJu6PTp0zt37kxMTGQ3R4wYASAuLu7+/fsNu6Wnp585cwYAu+hJUVGRlpZWXFycg4NDaGhoo7n6qodhmJiYmD59+gQFBZ06dap+mm37paam7ty5k51yD8DAwGDKlCkAQkJCGi5usmPHDvbgs2bN6qDa3zgUo2+6vLy8q1evAli2bBmf/5p/X48fP2bTMyoqSumWLKt8xRjdvn27j49PTEwMu7lo0SJra+vKysrhw4d/++23586dS0hI2LRp0/Dhw6uqqpydnT/77DO2W3p6+qefflpdXb169WpbW9sDBw4wDCOnL6VYqampI0aMmDlzZn5+/oABAy5fvtzSLIhWxMXF+fj4rF27tr5lw4YNBgYGaWlpzs7OISEhERER06ZN+/LLLwHMmzeP/fdMJVGMAkBWVha7flphYaGia2ksOTmZ/cOYMWNa7ymRSMaPHz9+/PiUlJRGu/z8/MrLy2fNmtXmQRTujEj0yf37/pmZ9S31o9GCFy9+LS//39LS358/l7YccPr6+sbGxvVrYurr61++fNnd3b2srGzNmjXjxo1zd3cPCgoqKSmZOnXq+fPn6y/k9ejR48CBA9euXRsyZEh+fv6cOXOGDh2alJQkz6/b2QoLC318fAYPHvzbb78ZGxuHhYUlJye7urq+xqG0tbWNjY0bPjv/zjvvxMfH29nZ5eTkrF+/3t/f/8SJE0KhMDAw8Mcff+y4L/HGUfFro6dOnXr8+HHT9kZxWVlZ+dtvv6HJY9etuHfv3t27d7lX2AobG5v+/fvn5+cDMDExMWzrYqJMJjt79iyARYsWNWyPjY39+eefDQ0Nt23b1tJn79y50+ict02G7u5Mu0eIAMw1NOxfBlamWByYldWow0Rj4xF6emUSSbpY/FworG83GzHinW3bdPT0pry8FwTAUCD4nx49JhsZAYiIiHj27Fm/fv3YXcePH290ZGtr6wsXLty/f//KlSsFBQU8Hs/c3HzMmDG9m9yzAjBkyJDExMSDBw8GBgYmJycPHz585syZW7ZsMTMza/+XfQNJJJIffvhh9erVFRUV6urqfn5+oaGhDSc2WVlZ/fzzz3h5R6iphQsXjh49un5y2LJly5YtW9aoz8CBA9PS0hITE2/duvXixQtzc/MPP/yQnUaiwlQ8Rs+cOcNe/2qdiYnJ0qVLAbR/HZrY2NiGpzPy4Ofn991337GP5bVnvXE1NTV2EaOGT/uIRKLFixcD2LRpk6mpaUufPXToEDsToP2G//57zauc804yMlrz8taNSCpNaPB4O+tdHR3o6Tnq6Cx4++3uDaZ/f2Bv/9jIyK5bNydtbX2B4IlEEi8SZVRXf5uT80Im8zIxGTlyZHsKsLOzY+8mt4nP58+ePXvatGlbt27duHFjdHT0Tz/9tHz58pUrVyrpopmnTp1aunRpVlYWgDFjxkRERDT9Vejq6rIXN1vi6Ojo6OjY5s9SU1Njz+24FKxcVDxG58+f/8EHHzRtz87Obninxdzc/FVnDjo4OMyYMYNrfa1iZ7yy/9/W1ta22V8gEDQ9dVq2bFlxcfH777/PrlvcEicnp1f9OrZ6etJXGY3aN5jgaa+tvbzJOzzMhUIAztrazn+dCjrdxGS6iUnDlrlvvbU1Nzf26dPI/HwPQ0Nt+awWqqOj880333zyyScBAQGnT59eu3bt4cOH94WHDxs3Th4/Tk4ePHgQEBDADib69Omzffv2Lrh0iNwxKoqdAPjDDz80u7f+gqNEImnzUGyc7du3r6NrbJedO3cC0NTUbE+pjSQkJPB4PIFAcOvWrYbt7NPNvr6+HVdme/2rsHBgSsoX6ekcj1MhlbqkpAxMSfmPSNQhhbXuwoULjo6OAoHgjo0N4+7OpKV1wg/lSiRiFi8+PWoUAAMDg7CwsNf4T4i0h4qPRlUA+xhyTU3N3bt3X2lyuFQqXbBgAcMwRkZG69evb7jr5s2bAOLj4729vT08PObOnduhJXcGXTU1E6HwSW1t0V+Xd5ITd3f31NTUxP37Hb76CpmZGDAAX36JNWvwWlNZ5U4qRVQUvvkGItF4ff1VAQFLvv66zWvr5LVRjL7p+vXrZ2BgUFZWdvjw4VeKUbFYnJ2dDaC4uPjo0aNNO2RlZWVlZVlYWHRYrZ1IxjDPpFIA+m09d9BR1NXV3RYswLRpWLsW33+P8HBER2P1avj5obNqaJeEBCxZAvbJy9GjeWFh39LaK3JGE57edBoaGuzD4BEREffu3Wv/B7W1tVNa4OXlBWD69OkpKSkBAQHyKr2D3Hj+nE3MhpIqKl7IZHzAsZPXrjc0RHg40tIwdixEIixZAicnnD3bqTW0JDMT3t5wd0daGmxsEBuL//wHlKHyRzEKABkZGS4uLi4uLgUFBYqupRmrVq2ytLSsrq4ePXr0iRMnGj6bnJWVtXLlSvYkXSKRjBw5cuTIkdevXwegpqY2sAXsLXsTE5OBAwe+aa9rjystnX737hfp6exmckWFf0bG7AcPLpSV1TIMABlwvqxsdU4OgA8NDZsuZdIZ7Oxw9ix++QW9e+PBA4wfj0mTkJ2tgEpYVVX45hs4OuLoUWhrY80apKXBy0th9XQxb9LJiOKIxeLff/8dQP2rtA8cOLBkyRL2zxUVFQD8/PzYSVGfffbZ1q1bO7M8fX39c+fOjR8//tGjR9OmTTMxMbG3txcIBJmZmbm5uQzDVFRUfP/99zKZ7PLlywBKS0s7s7yOVSGVPqqpqX35T0VvLS0bLa17YnFQdraQzzdTVy+RSKplMgADdXW/VuzShZMmYexYREVh1SrExeH8efj6IjQUza0yJy8Mg+horFiBoiLwePj0U2zeDHotSudS2Rg1MjIyNTVtaSKxuro6OyJjFxs1MzP7+uuv8XKxr0aaXXuxk9nZ2aWlpYWFhf373/++d+8eG5d8Pt/W1nbSpEnsfHuBQODv7w+gV69erR9NS0vLwMCgpV+OXHVXU7PQ0DBp+ZVn/XR0/CwsdF/OYTJWV9/Xt+8ZkeiMSJRWVZX34oUmnz9AV3eioeFEY2PFn0wJhfD3x4wZCArCoUOIiMDx49i0CTNndsZPT07GkiW4dg0ABg1CeDhU+l0dbyweo6JPDauwysrKkpIShmHeeustreZWllNhEoZRf5XJqp0qJQX+/khMxOefo3OefRwyBMnJsLTEpk34+9/xxv5mVB3FKCEdhz3FHjcOLT8w1pGuXcPZs1ixAoo4sSD1KEYJkb+6Oty9i8ePIZXC2BjOzm0HX3U1Wl8oR1cXf324iyiKyl4bJeSNUFqK9euxfz9Eov82amjAwwOhoXBwaPGD169j1KjWjjxnDujt0G8GilGVkpSUdOnSpWHDhrm5uSm6FgI8fIhx45CTAzU1jBgBZ2cIBPjjD8TH48QJnDmDQ4cwbVrznzUwQEurGqamQiTq1PkApHUKfBCVdDj2DexBQUGKLoQwTGUlY2vLAIydXeNn8IuLmUmTGIDR0GBu3361w4pEjI4OAzA3b3ZgsYQLxc8YIUQ1hYUhPR36+oiPR6P15UxNcfw4Bg3Cixd41afIduxAZSU++AAv11clCkcxSoh87NwJAIsXo9lVC9TVwa4Xk5CAjIz2HlMiwQ8/AECT9ZKJAlGMEiIH2dnIzQUAT88W+7i7Q18fDINffwWAx4/x3Xf47ju8fJSuGYcOIT8fjo748MOOrpi8PopRQuSAXUSGx2vtXjyf/+fJPts5IwOLFmHRIlRVtfiRsDAACAigmfZvFLpTT4gcsNObdHTQ4KVSzTAy+m9nDQ2YmwNAS+9/PX8et2/D1BQff9yRpRLOKEYJkQN2tNjOMSPbbehQtL7AGPtGwsWLoWyvyFZ5dFJPiByw70asrETri/Oz49D2vEjxzh3Ex6NbN/j4dER9pCNRjBIiB337AoBMhlZeW80wYN/R3Z73lW7dCobB3LlQ9ZcVKyOKUULkwMbmz0U/T59usU9i4p+j0TbfRfz4MQ4fBo+HRYs6rkTSYShGCZEDHg+ffQYAkZF49qz5PqGhAPD++38OXVsRGYnaWkye3HZPoggUo4TIR0AAzM1RWAhPTzR6H4FEgiVLcO4c1NSwefOfjfn52LIFW7agpuYvncVi7NoF0JT7NxfdqSdEPoyMEBuLiRNx6RJsbfHRR3B2hqYm0tNx7BgyMsDnIzLyv+vVZ2UhMBAA5s//y7343btRWgoXl7bP/YmCUIwSIjfDhyMpCcuW4fRpREX9ZZeTE7Ztwwcf/LelWzfY2ADAyxeoAEBdHSIiAGD58k6ol7weilFC5KlPH8TFoaAAly6huBi1tTA1xeDBjRcrATBoUDMP19fUYMcOABg5shOKJa+HYpQQ+bOweM2X3Glrt7jqKHlj0C0mQgjhhGKUEEI4oRglhBBOKEYJIYQTilFCCOGEYpQQQjihGCWEEE4oRgkhhBOKUUII4YRilBBCOKEYJYQQTihGCSGEE4pRQgjhhGKUEEI4oRglhBBOKEYJIYQTilFCCOGEYpQQQjihGFUpw4YNCwwMdHNzU3QhhHQhPIZhFF0DIYQoMXqlnXILDQ0tKyubM2fOu+++23TvgwcPdu7cqaWltX79+lYOkpmZuW/fvszMzGfPnpmbm7u5uXl5eWlpacmtakJUCo1GlZulpWV+fv6RI0e8vb2b7j1z5oyHh4eenl55eXmzH5fJZEFBQdu2bZPJZA3be/fuHRcXZ2dnJ5eiCVEtdG20S/vqq6+2bNliZWW1d+/etLS0u3fv/utf/7KwsMjOzvb09JRIJIoukBAlQCf1XdeLFy+uXLny9ttvJyUlmZmZsY329vb29vZDhgxJT09PTEyku1WEtIlitOvS0NC4fv16Xl5efYayXFxcBAKBVCotKipSVG2EKBE6qe/SeDyelZVVo8b09HSpVArAxsZGEUURomQoRruQ27dvs+fsJSUlLfUpLy/38/MDMGrUqIEDB3ZidYQoKzqpVwV79uy5dOlS0/bc3NyGm2Kx+P79+wDYwWZDMTExqampubm58fHx5eXlkyZNOnDggNzqJUSlUIyqgvPnz7enm5WV1bp16wDo6uo22nXy5MnY2Fj2z6amppMnT27ahxDSLJo3qtzYeaMhISHN3lK/ceNGcHBwK/NG6xUXF5eVlRUWFl6/fj0sLKy4uHjq1KnHjh3j8+myDyFtoNGoKnBychozZkzT9vZP/DQzMzMzM+vbt++oUaNmzpxpb29/4sSJ48ePe3l5dWilhKggGmuQxiwtLYcOHQogPj5e0bUQogQoRkkzTE1NATx9+lTRhRCiBChGu7SEhISTJ082aqytrU1KSgLwzjvvKKIoQpQMxWgXcvPmzV69evXq1YudN1pcXDxlyhQvL68NGzZUVVWxfUpLS+fNm5ednc3n8+fOnavIcglREhSjXUhNTU1OTk5OTg47b9TMzCwiIoLP5wcHB5uYmDg4ONjb25ubm8fExPD5/IiICEdHR0WXTIgSoDv1ys3W1lZXV1dPT6/ZvTo6OnZ2dt27d2c3e/bsuW3bNgD1LfPmzXvvvfe2bNly9uzZe/fuATA0NBwzZkxAQMCQIUM65RsQovRo3ij5k0QikUqltFozIa+KYpQQQjiha6OEEMIJxSghhHBCMUoIIZxQjBJCCCcUo4QQwgnFKCGEcEIxSgghnFCMEkIIJ/8P74ZWgJKpAVMAAACwelRYdHJka2l0UEtMIHJka2l0IDIwMjMuMDkuNQAAeJx7v2/tPQYg4GdAAA4gZgPiBkZGDQ0gzcgowcimoQBksUiwwIWY4CxmTrAkowQrXIiNAyzEJMEOF+LgZmBkYGRiYGRmYGRhYGVjYGNnEAFZJ54FUoBk+YH906cpqII4Z8/4qADxEhBbQkIWKHbAHsTeZ9u6FKQOouWAPUxNofJiB6C77SDsYiCbAapGAS4uBgAnfB2tRmsD5QAAAPF6VFh0TU9MIHJka2l0IDIwMjMuMDkuNQAAeJx9kkGOwjAMRfc5xb8AkZM0TbKkLcOMEK0EhTuwn/trHJDHtB2N04XtvnzrWzGocRlOj2/8hh+MAeifr5SCeyAic0ZN0B2OXyP6ed9Jp59u43xFRss3+CzJ/TydpePwiZ2zkWqALK0SJ6BHr93dFmwEDKyo1zfKXrimTlbFLRmEjPhQoT9GRwFbloyWSqbExq2PS64VLmFCsKkUCrkKricnATMLev29FszCHcZhsdXXnrtpHHTP9XjdJhfqMDzLRldTy6j+HVtL6tKxg8Xs90m1lrfAufkBkHpy18/7PwAAAACwelRYdFNNSUxFUyByZGtpdCAyMDIzLjA5LjUAAHicZcq7DoMwDAXQX+kIUrCcFwlmqipVLH3skQcqdQOBULvx8U3abl7s6+ObBtKcTuS4SgMZrsuyXKczeYacW043CpxT5MNeNRq8QtXjb5Qrr6/2/5cH7CJGhWAyWghdh041WCqmVIrXanwt831bVkKYl+mYr8u4Xt/z47mBJi3RkZFoyEq05CR68hJbaiUGChLj/gHLeln8f0JKEgAAAABJRU5ErkJggg==", + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rxn_smi = \"[H:1][C:4]([H:2])([H:3])[F:5]>[H:6][O:7][H:8]>[H:1][C:4]([H:2])([H:3]).[F:5]\"\n", + "from_rxn_smi = ReactionDatapoint.from_smi(rxn_smi, y, keep_h=True)\n", + "from_rxn_smi.rct" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAP+ElEQVR4nO3de1BTd5/H8W9iIAECCBhEQXkYUASt4spqq6BsRwVWrBcKbrHSOiJycaaX6VQ79QK2TqWDZbTj2GJFeaxaa8Wnio7VUhURi+I6tihIYUUCChIkoJCQ6/5xVpcSrv6AXPi8/kp+OTnzZcy85+ScA/L0ej0BAMDL4ht7AAAA84aMAgAwQUYBAJggowAATJBRAAAmyCgAABOBsQcA6KywsLC+vt7Pz2/SpEmGryoUinPnzhFReHi4SCTqbid6vb6goKCsrOzJkyeOjo7BwcGTJ08exKFhONMDmJjQ0FAi2rJlS5evVldXcx9dqVTa3R7y8vJ8fX07fdQXL14sk8kGbWoYvnA0CpYmNzd3+fLlarU6ODh4yZIlDg4Ot27d2rdv3+nTp5cvX37p0iUej2fsGcGiIKNgUXQ63UcffaRWq1NSUrZu3fpiPSQkZMWKFfn5+efPn+eOdgEGCi4xgUXh8/mXL1/es2fPli1bOq5HR0f7+PgQ0W+//Wak0cBiIaNgaUaPHp2UlGT4zZ3LaENDgzGGAkuGjIJ5a29vX7du3bp168rKynreUiaTEZGLi8uQzAXDCM6NgokqKSk5evSo4fqTJ086PlWpVJmZmUQUFRXV5Q1SHJlMVlJSQkRz5swZ6ElhuENGwUTl5OTk5OT0uplAIOAuGY0aNaqHzb7++mulUunu7h4eHj5gIwIQETIKJmvu3Lnz5s0zXG9padm1a9eLpzY2Ntzd+D0oKir64osviCgtLU0oFA7snADIKJiokJCQ1NRUw3WpVNoxo72qqKiIjIxUq9Xx8fErV64cuAEB/g8uMYElKy8vDwkJqa2tjYmJ2bt3r7HHAcuEjILFysvLe+2112pra6OiorKzs/l8fNphUOCDBZZp165dYWFhTU1NKSkpx44dEwhw/goGCz5bYN6USmV8fDwRbdiwgfsbTi0tLfHx8ceOHbO3t8/Ozl62bJmxZwQLh4yCeVOr1YcOHSKi2NjYyZMny+Xy6dOnV1VVEZFYLN6+ffv27ds7bu/k5HThwgWjjAqWChkF82ZlZbV06VIicnV1JSK5XM41lIgePXr06NGjTttLJJKhHRAsH0+P/6ceTExLS4tKpbK1tbW1tTV8VafTcb/I5OzsbHjVSKfTNTc397BzHo83cuTIAZwWABkFM6DVank8Hi61g2nC5xJM3alTpwQCAffNHcAEIaMAAEyQUQAAJsgoAAATZBQAgAkyCgDABBkFAGCCjAIAMEFGAQCYIKMAAEyQUQAAJsgoAAATZBQAgAkyCgDABBkFAGCCjAIAMEFGAQCYIKMAAEyQUQAAJsgoAAATZBQAgAkyCgDABBkFAGCCjAIAMEFGAQCYIKMAAEyQUQAAJsgoAAATZBQAgAkyCgDABBkFAGCCjAIAMEFGAQCYIKMAAEyQUQAAJsgoAAATZBQAgAkyCgDABBkFAGAiMPYA0A8///xzSUlJYGBgaGio4asajSYtLY2I1q5d6+rq2sN+FArF4cOHZTJZWFhYQEDAYI3bNxqNprCw8MqVK/X19Tqdzs3Nbf78+bNmzeLxeH3fyf79+2UyGRF5eHisXLly0IbtWpVSmdvY2N2rQY6OAWKx4fqR+vpGjcZwfYqd3X+MHDmQ88EgQ0bNyY8//njkyJHk5OQuM6pSqTZt2kREERER3WW0oaEhKytr9+7dDx8+JKKmpibjZvTs2bPvv//+X3/91XFx8+bNs2fPPnDgwMSJE4lo5syZOTk5Y8aM6W4nZ86ciYuL4x4HBQUNfUal7e0H6+q6e3WkQNBlRg/W1z9Rqw3X48eORUbNCzI6XKhUquTk5O+//16pVPL5fLFY/OzZM+OOlJ2dvWbNGq1W6+7uvnr16oCAAIFA8Oeff2ZlZRUWFoaEhNy+fVsikbi5uS1btqy7nTQ3N69bt46IwsLCzp07N4Tjd2Y/YsTG8eMN1yfZ2houavV6uVpNRCn/+IeIz+91ezBlyOhwYW1tfefOnaCgoIiIiKioqI0bNx46dMiI85SXlycmJmq12oULF544cUL8/HhtyZIlH3744cqVKxcsWCCRSHrdz4YNG2pra5ctW7ZgwQLjZtSazw91du7jxo1qtY7Imsdb5OLSj5MXYJKQ0WGksLDQ2CP8v/T0dIVC4ebmduzYMfHfv/Pa2tqePHmyLzvJz8/PzMy0t7ffvXv36dOnB2fSQSFTq4lolJUVGmoBcKUejECn0x0/fpyI1q5dO/JlzwO2t7cnJCTo9frPPvvMw8NjQAccdA1cRq2tjT0IDABk1GJptdrg4ODg4GCTOgjllJeXy+VyIlq4cGGvG8fFxYWHh584caLTempqamlp6bRp05KTkwdlyoFT0toaU1oaU1qq0um4lUYuowLBE42moLn5bGPjtZaWtuevgnnBl3rzU1RUtHnzZsN1zd/vntHr9QUFBUTU2P29OJ3U1NRcu3aNfcIeuLq6zps3r6amhnvKXYvv2eXLlysqKl5//fWOi3/88Ud6ejqfz//2228Fgq4/xtXV1UVFRf0az2XOHJ2NTd+3dxAIZtrbc4+farUfV1Z22uDfHRyiJJJWrba8rY2ItM/XuS/1v7e0hN2+/aKdtnz+225ua9zcRvTnZi8wOmTU/BQXFxcXF/e6GZ/P/+CDD4jI29u7j3u+du1adHQ003C9CQkJuXjxYnNzM/fU/nmDevDWW2/V19d3vDFLq9WuWbNGrVa/9957s2bN6u6N+fn5q1at6td4kTduVPUnYf62tv/08+Meq3S63+TyThuIBQIichcK48aMISKr5zv3t7PzEoncrK3/zd5eYmXVpNEUNjffePo08+HDJrV6Q1dX/MFkIaPmJzQ0dPXq1YbrKpUqNjb2xVM+n//VV1/1a8/jxo178803Wefr0ZQpU4hIKBRyT1UqlU1vR3/btm3rtJKRkVFcXDxmzJjU1NQe3ujp6dnfH2eaUOhtZ9f37cc9/0GIyFEgyPDx6bSBi0BARB5CYcLYsR3XgxwdgxwdO66sGj36x8ePv5RKjzc0LJdIJvTnoBiMCxk1Pz4+PitWrDBcb2tr65jRl/Dqq69yV34Gm/PzG4Pq6uoc/16TXlVVVaWkpBDRnj17en4vd2r4ZWfsNwGPN7U/CTYU5eqaXV9fr1JdlMuRUTOCjIIRTJkyhc/n63S6mzdv+vr69uu9iYmJra2tIpHo6NGjR48efbFeWVlJRGVlZdHR0VOnTuV+ocu88Ii8RKJ6lapepTL2LNAPyCgYgYODw4wZM27cuHHkyJGYmJh+vffOnTtEpFQquzxwlslkx48flxucozQXTzQaInLq5qIZmCb8a4FxJCQk3Lhx48yZM7m5uREREX1/45kzZ1RdHaz99NNPO3bsCAgI+O677xwcHAZu0kFRrlCI+PzxHU6tElFNe/v/KBRExHhyAIYYMmqxtFotdxU7IyNjKE8R9tE777yTlZV19erV6Ojo9PT0uLg46+f3oj9+/PjgwYMTJ05cunQpEb399ts1NTWJiYncGeFXXnmlyx1ev36diMRi8YwZM4bqh+irP549S33wgIiO+PkJ+fxqpTK5vFxLlDh27H86O9uNGEFE//306ecPHmj0el9b2yD8aRKzgoxaLL1ef/PmTSJ68Q33+vXrYWFh3OPW1lYi2r179759+4ho9uzZubm5QzneiBEjTp48GRERcf369eTk5I0bNwYEBNjY2Ny/f//+/fsajWbu3LlcRouKiioqKhYtWjSU4w0shU73QKkkIu4WUVdr62li8SW5PK26eqdUOtra+qlG06LVEpGnSJTh7Y3fijEvyKg5cXR0dHV17e4bK4/H4/4+npWVFRHx+fxPP/2UurnF3c7Ozs7Y3xwlEklBQcH+/fsPHDhw8+bNK1eucOvjxo0LDw9PSkrinr777rsNDQ2BgYE9700oFDo5OfXlRtQBZ8PnuwuFzt2f0BwnEiW7u9Pz+0ZFfH66t3dhc/O/GhtvPX1a295uzeNNsrVd6Oz8XxKJNR8VNTM8vV5v7BkASKFQyGQytVo9atQo0z+zObDUer0Vfm3JnCGjAABM8PUBAIAJMgoAwAQZBVNXUVGxY8cOwz+UB2AikFEwdXfv3v3kk0+ys7ONPQhA15BRAAAmyCgAABNkFACACTIKAMAEGQUAYIKMAgAwQUYBAJggowAATJBRAAAmyCgAABNkFACACTIKAMAEGQUAYIKMAgAwQUYBAJggowAATJBRAAAmyCgAABNkFACACTIKAMAEGQUAYIKMAgAwQUYBAJggowAATJBRAAAmyCgAABNkFACACTIKAMAEGQUAYIKMAgAwQUYBAJggowAATJBRAAAmAmMPANCLCRMmfPzxx/7+/sYeBKBrPL1eb+wZAADMGI5GweRkZmaWlZWFhoaGhoYaviqXy7dt20ZEW7dudXR07G4njx8/zsrKunv3bl1dnZub28yZM2NiYpydnQdxbhi29AAmhqvnli1buny1urqa++hKpdLu9vDNN9/Y2Nh0+qi7uLhcuHBh0KaG4QuXmMDSZGVlJSQkCIXCnTt33rp1q6ys7OTJk1OnTm1sbIyMjKyrqzP2gGBp8KUeLE1eXp5QKLx48WJAQAC34uvrO2fOHC8vr5aWlpycnKSkJONOCBYGR6NgaQ4fPnzv3r0XDeVIJBJvb28iwtEoDDhkFCyQp6dnp5X29vbKykoi8vHxMcZEYMmQUTBvbW1t/v7+/v7+v//+e3fbtLe3r1+/vrW1dfz48VFRUUM5HgwHODcKJurs2bMNDQ2G68+ePev4VKvVlpaWElFra2unLfPy8s6fP19bW3vp0qXa2trAwMAffvjB8Ao+ACNkFExUcXFxcXFxr5sJhcLPP/+ciLhTnx1dvXr1yy+/5B6LRKLIyMjRo0cP+JwA+C0mMDlhYWG//PLLqlWrYmNjDV9taGiIiYkhIqlU6uHh0cN+mpqaZDJZXV1dSUnJrl277t27FxgY+Ouvv/Zw0z7AS8DRKJgoLy+v+fPnG65LpdI+7sHJycnJyWnChAnBwcGxsbHTp08vLi7OyMhISUkZyEFh2MMlJhgW7OzsFi1aREQXLlww9ixgaZBRGC5cXV2JqMvLVgAskFGwNPfu3duzZ4/hen5+PnV1JQqAETIK5q21tdXLy8vLy6uwsJCINBrN4sWL169fn5SUJJPJuG2USuWmTZvOnTtHRKtXrzbmuGCJcIkJzJtOp6uqqiIihUJBRAKBIDMzMzIycu/evZmZmT4+PtbW1pWVlW1tbUSUnJwcHR1t3IHB8iCjYHI8PT39/Py4U5mGrKys/Pz8uAdEJBKJdu7cSUQTJ07kNggJCSktLU1LSzt16lRFRYVWqxWLxWFhYYmJiW+88cZQ/RAwjOC+UbBkOp2ura1NLBYbexCwZMgoAAATXGICAGCCjAIAMEFGAQCYIKMAAEyQUQAAJsgoAAATZBQAgAkyCgDA5H8BhAgcb+74aH4AAACLelRYdHJka2l0UEtMIHJka2l0IDIwMjMuMDkuNQAAeJx7v2/tPQYg4GdAAFYgZgHiBkZGDQ0gzcgowcimoQASlWCBCzHBWcycYElGCVZuBkYGRiYGRmYGRhYGEZBZ4m4gGSSTD+yfPk1BFcQ5e8ZHBYiXgNgSErJAsQP2IPY+29alIHUQLQfsYWrEALyjFrvi+CrHAAAAvnpUWHRNT0wgcmRraXQgMjAyMy4wOS41AAB4nH2QSw7CMAxE9znFXKCVm8+iy7YpH6GmEhTuwJ77qzHImFCEk4VtPY8zMeA4x9P9gXfYaAxAf27btrg5IjITOEE/7o8Jw9L10hnma1ouCPB5Ip+S7JZ5kk6DA6qmDsQBqukraQS0GLRbbUEvoMuKOr5RtsJ53qyKW9IJGbBToR+rg4BjioW5l91+TlHt8rFqKhe6yD1Lry/kslD/1OJaPj3nZgXXjFcxHOdHzwAAAIN6VFh0U01JTEVTIHJka2l0IDIwMjMuMDkuNQAAeJyL9rAyjI12tjKJ1Yj2sDKK1QRRxrGa0W5WprEKNRq6hnqmOgY61gYQAsQDUmBRa4iUpk5iSX5uQFF+gZWBXm5+jiOQ55tY4Feam5RapGdoZYgpaGJlhCloZGWMKWhsZYIpaFoDADOaN9kqtHGTAAAAAElFTkSuQmCC", + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rct_smi = \"[H:1][C:4]([H:2])([H:3])[F:5]\"\n", + "pdt_smi = \"[H:1][C:4]([H:2])([H:3]).[F:5]\"\n", + "from_tuple = ReactionDatapoint.from_smi((rct_smi, pdt_smi), y, keep_h=True)\n", + "from_tuple.rct" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop/docs/source/tutorial/python/data/datasets.ipynb b/chemprop/docs/source/tutorial/python/data/datasets.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..078c3d94283a827ca21d92e3d390982e8c90b7e0 --- /dev/null +++ b/chemprop/docs/source/tutorial/python/data/datasets.ipynb @@ -0,0 +1,367 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.data.datasets import MoleculeDataset, ReactionDataset, MulticomponentDataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To make a dataset you first need a list of [datapoints](./datapoints.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from chemprop.data import MoleculeDatapoint, ReactionDatapoint\n", + "\n", + "ys = np.random.rand(2, 1)\n", + "\n", + "smis = [\"C\", \"CC\"]\n", + "mol_datapoints = [MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]\n", + "\n", + "rxn_smis = [\"[H:2][O:1][H:3]>>[H:2][O:1].[H:3]\", \"[H:2][S:1][H:3]>>[H:2][S:1].[H:3]\"]\n", + "rxn_datapoints = [\n", + " ReactionDatapoint.from_smi(rxn_smi, y, keep_h=True) for rxn_smi, y in zip(rxn_smis, ys)\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Molecule Datasets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`MoleculeDataset`s are made from a list of `MoleculeDatapoint`s." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MoleculeDataset(data=[MoleculeDatapoint(mol=, y=array([0.23384385]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='C', V_f=None, E_f=None, V_d=None), MoleculeDatapoint(mol=, y=array([0.74433064]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='CC', V_f=None, E_f=None, V_d=None)], featurizer=SimpleMoleculeMolGraphFeaturizer(atom_featurizer=, bond_featurizer=))" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MoleculeDataset(mol_datapoints)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dataset properties" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The properties of datapoints are collated in a dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0.23384385]\n", + " [0.74433064]]\n", + "['C', 'CC']\n" + ] + } + ], + "source": [ + "dataset = MoleculeDataset(mol_datapoints)\n", + "print(dataset.Y)\n", + "print(dataset.names)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Datasets return a `Datum` when indexed. A `Datum` contains a `MolGraph` (see the [molgraph featurizer notebook](../featurizers/molgraph_molecule_featurizer.ipynb)), the extra atom and datapoint level descriptors, the target(s), the weights, and masks for bounded loss functions." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Datum(mg=MolGraph(V=array([[0. , 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 1. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0.12011]], dtype=float32), E=array([], shape=(0, 14), dtype=float64), edge_index=array([], shape=(2, 0), dtype=int64), rev_edge_index=array([], dtype=int64)), V_d=None, x_d=None, y=array([0.23384385]), weight=1.0, lt_mask=None, gt_mask=None)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Caching" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `MolGraph`s are generated as needed by default. For small to medium dataset (exact sizes not yet benchmarked), it is more efficient to generate and cache the molgraphs when the dataset is created. \n", + "\n", + "If the cache needs to be recreated, set the cache to True again. To clear the cache, set it to False. \n", + "\n", + "Note we recommend [scaling](../scaling.ipynb) additional atom and bond features before setting the cache, as scaling them after caching will require the cache to be recreated, which is done automatically." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "dataset.cache = True # Generate the molgraphs and cache them\n", + "dataset.cache = True # Recreate the cache\n", + "dataset.cache = False # Clear the cache\n", + "\n", + "dataset.cache = True # Cache created with unscaled extra bond features\n", + "dataset.normalize_inputs(key=\"E_f\") # Cache recreated automatically with scaled extra bond features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Datasets with custom featurizers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Datasets use a molgraph featurizer to create the `MolGraphs`s from the `rdkit.Chem.Mol` objects in datapoints. A basic `SimpleMoleculeMolGraphFeaturizer` is the default featurizer for `MoleculeDataset`s. If you are using a [custom molgraph featurizer](../featurizers/molgraph_molecule_featurizer.ipynb), pass it as an argument when creating the dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MoleculeDataset(data=[MoleculeDatapoint(mol=, y=array([0.23384385]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='C', V_f=None, E_f=None, V_d=None), MoleculeDatapoint(mol=, y=array([0.74433064]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='CC', V_f=None, E_f=None, V_d=None)], featurizer=SimpleMoleculeMolGraphFeaturizer(atom_featurizer=, bond_featurizer=))" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from chemprop.featurizers import SimpleMoleculeMolGraphFeaturizer, MultiHotAtomFeaturizer\n", + "\n", + "mol_featurizer = SimpleMoleculeMolGraphFeaturizer(atom_featurizer=MultiHotAtomFeaturizer.v1())\n", + "MoleculeDataset(mol_datapoints, featurizer=mol_featurizer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Reaction Datasets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Reaction datasets are the same as molecule datasets, except they are made from a list of `ReactionDatapoint`s and `CondensedGraphOfReactionFeaturizer` is the default featurizer. [CGRs](../featurizers/molgraph_reaction_featurizer.ipynb) are also `MolGraph`s." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "CondensedGraphOfReactionFeaturizer(atom_featurizer=, bond_featurizer=)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ReactionDataset(rxn_datapoints).featurizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Multicomponent datasets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`MulticomponentDataset` is for datasets whose target values depend on multiple components. It is composed of parallel `MoleculeDataset`s and `ReactionDataset`s." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mol_dataset = MoleculeDataset(mol_datapoints)\n", + "rxn_dataset = ReactionDataset(rxn_datapoints)\n", + "\n", + "# e.g. reaction in solvent\n", + "multi_dataset = MulticomponentDataset(datasets=[mol_dataset, rxn_dataset])\n", + "\n", + "# e.g. solubility\n", + "MulticomponentDataset(datasets=[mol_dataset, mol_dataset])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A `MulticomponentDataset` collates dataset properties (e.g. SMILES) of each dataset. It does not collate datapoint level properties like target values and extra datapoint descriptors. Chemprop models automatically take those from **the first dataset** in datasets." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('C', ('[O:1]([H:2])[H:3]', '[H:3].[O:1][H:2]')),\n", + " ('CC', ('[S:1]([H:2])[H:3]', '[H:3].[S:1][H:2]'))]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "multi_dataset.smiles" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0.23384385],\n", + " [0.74433064]])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "multi_dataset.datasets[0].Y" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop/docs/source/tutorial/python/data/splitting.ipynb b/chemprop/docs/source/tutorial/python/data/splitting.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..b3c1b5d652ffdd7edd0643ec86a0e7ccc4878060 --- /dev/null +++ b/chemprop/docs/source/tutorial/python/data/splitting.ipynb @@ -0,0 +1,487 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data splitting" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.data import SplitType, make_split_indices, split_data_by_indices" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These are example [datapoints](./datapoints.ipynb) to split." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "from chemprop.data import MoleculeDatapoint\n", + "\n", + "smis = [\"C\" * i for i in range(1, 11)]\n", + "ys = np.random.rand(len(smis), 1)\n", + "datapoints = [MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data splits" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A typical Chemprop workflow uses three sets of data. The first is used to train the model. The second is used as validation for early stopping and hyperparameter optimization. The third is used to test the final model's performance as an estimate for how it will perform on future data. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chemprop provides helper functions to split data into these training, validation, and test sets. Available splitting schemes are listed in `SplitType`.\n", + "All of these rely on [`astartes`](https://github.com/JacksonBurns/astartes) in the backend." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "scaffold_balanced\n", + "random_with_repeated_smiles\n", + "random\n", + "kennard_stone\n", + "kmeans\n" + ] + } + ], + "source": [ + "for splittype in SplitType:\n", + " print(splittype)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Splitting steps" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. Collect the `rdkit.Chem.mol` objects for each datapoint. These are required for structure based splits.\n", + "2. Generate the splitting indices.\n", + "3. Split the data using those indices.\n", + "\n", + "The `make_split_indices` function includes a `num_replicates` argument to perform repeated splits (each with a different random seed) with your sampler of choice.\n", + "Any sampler can be used for replicates, though deterministic samplers (i.e. Kennard-Stone) will not change on replicates.\n", + "Splits are returned as a 2- or 3-member tuple containing `num_replicates`-length lists of training, validation, and testing indexes." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "mols = [d.mol for d in datapoints]\n", + "\n", + "train_indices, val_indices, test_indices = make_split_indices(mols)\n", + "\n", + "train_data, val_data, test_data = split_data_by_indices(\n", + " datapoints, train_indices, val_indices, test_indices\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The default splitting scheme is a random split with 80% of the data used to train, 10% to validate and 10% to split." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1, 1, 1)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(train_data), len(val_data), len(test_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Each of these is length 1 because we only requested 1 replicate (the default).\n", + "The inner lists for each of these sets contain the actual indices for training." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(8, 1, 1)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(train_data[0]), len(val_data[0]), len(test_data[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Split randomness" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "All split randomness uses a default seed of 0 and `numpy.random`. The seed can be changed to get different splits." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "([[8, 4, 9, 1, 6, 7, 3, 0]], [[5]], [[2]])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "make_split_indices(datapoints)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "([[8, 7, 0, 4, 9, 3, 2, 1]], [[6]], [[5]])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "make_split_indices(datapoints, seed=12)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Split fractions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The split sizes can also be changed. Set the middle value to 0 for a two way split. If the data can not be split to exactly the specified proportions, you will get a warning from `astartes` with the actual sizes used. And if the specified sizes don't sum to 1, the sizes will first be rescaled to sum to 1. " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "([[8, 4, 9, 1]], [[6, 7, 3]], [[0, 5, 2]])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "make_split_indices(datapoints, sizes=(0.4, 0.3, 0.3))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "([[8, 4, 9, 1, 6, 7]], [[]], [[3, 0, 5, 2]])" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "make_split_indices(datapoints, sizes=(0.6, 0.0, 0.4))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/astartes/main.py:325: ImperfectSplittingWarning: Actual train/test split differs from requested size. Requested validation size of 0.25, got 0.30. Requested test size of 0.25, got 0.30. \n", + " warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "([[8, 4, 9, 1, 6]], [[7, 3]], [[0, 5, 2]])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "make_split_indices(datapoints, sizes=(0.5, 0.25, 0.25))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/astartes/main.py:381: NormalizationWarning: Requested train/val/test split (0.50, 0.50, 0.50) do not sum to 1.0, normalizing to train=0.33, val=0.33, test=0.33.\n", + " warn(\n", + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/astartes/main.py:325: ImperfectSplittingWarning: Actual train/test split differs from requested size. Requested train size of 0.33, got 0.30. Requested test size of 0.33, got 0.20. \n", + " warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "([[8, 4, 9]], [[1, 6, 7, 3, 0]], [[5, 2]])" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "make_split_indices(datapoints, sizes=(0.5, 0.5, 0.5))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Random with repeated molecules" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your dataset has repeated molecules, all duplicate molecules should go in the same split. This split type requires the `rdkit.Chem.mol` objects of the datapoints. It first removes duplicates before using `astartes` to make the random splits and then adds back in the duplicate datapoints." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "smis = [\"O\", \"O\"] + [\"C\" * i for i in range(1, 10)]\n", + "ys = np.random.rand(len(smis), 1)\n", + "repeat_datapoints = [MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]\n", + "mols = [d.mol for d in repeat_datapoints]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "([[10, 6, 0, 1, 3, 8, 9, 5, 2]], [[7]], [[4]])" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "make_split_indices(mols, split=\"random_with_repeated_smiles\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Structure based splits" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Including all similar molecules in only one of the datasets can give a more realistic estimate of how a model will perform on unseen chemistry. This uses the `rdkit.Chem.mol` representation of the molecules. See the `astartes` [documentation](https://jacksonburns.github.io/astartes/) for details about Kennard Stone, k-means, and scaffold balanced splitting schemes." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "smis = [\n", + " \"Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14\",\n", + " \"COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)CCc3ccccc23\",\n", + " \"COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl\",\n", + " \"OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(Cl)sc4[nH]3\",\n", + " \"Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)NCC#N)c1\",\n", + " \"OC1(CN2CCC1CC2)C#Cc3ccc(cc3)c4ccccc4\",\n", + " \"COc1cc(OC)c(cc1NC(=O)CCC(=O)O)S(=O)(=O)NCc2ccccc2N3CCCCC3\",\n", + " \"CNc1cccc(CCOc2ccc(C[C@H](NC(=O)c3c(Cl)cccc3Cl)C(=O)O)cc2C)n1\",\n", + " \"COc1ccc(cc1)C2=COc3cc(OC)cc(OC)c3C2=O\",\n", + " \"Oc1ncnc2scc(c3ccsc3)c12\",\n", + " \"CS(=O)(=O)c1ccc(Oc2ccc(cc2)C#C[C@]3(O)CN4CCC3CC4)cc1\",\n", + " \"C[C@H](Nc1nc(Nc2cc(C)[nH]n2)c(C)nc1C#N)c3ccc(F)cn3\",\n", + " \"O=C1CCCCCN1\",\n", + " \"CCCSc1ncccc1C(=O)N2CCCC2c3ccncc3\",\n", + " \"CC1CCCCC1NC(=O)c2cnn(c2NS(=O)(=O)c3ccc(C)cc3)c4ccccc4\",\n", + " \"Nc1ccc(cc1)c2nc3ccc(O)cc3s2\",\n", + " \"COc1ccc(cc1)N2CCN(CC2)C(=O)[C@@H]3CCCC[C@H]3C(=O)NCC#N\",\n", + " \"CCC(COC(=O)c1cc(OC)c(OC)c(OC)c1)(N(C)C)c2ccccc2\",\n", + " \"COc1cc(ccc1N2CC[C@@H](O)C2)N3N=Nc4cc(sc4C3=O)c5ccc(Cl)cc5\",\n", + " \"CO[C@H]1CN(CCN2C(=O)C=Cc3ccc(cc23)C#N)CC[C@H]1NCc4ccc5OCC(=O)Nc5n4\",\n", + "]\n", + "\n", + "ys = np.random.rand(len(smis), 1)\n", + "datapoints = [MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]\n", + "mols = [d.mol for d in datapoints]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/astartes/main.py:325: ImperfectSplittingWarning: Actual train/test split differs from requested size. Requested train size of 0.80, got 0.85. Requested test size of 0.10, got 0.05. \n", + " warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "([[0, 1, 2, 3, 4, 6, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19]],\n", + " [[5, 10]],\n", + " [[7]])" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "make_split_indices(mols, split=\"kmeans\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop/docs/source/tutorial/python/ensembling.ipynb b/chemprop/docs/source/tutorial/python/ensembling.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..dda8effd173e150d897bf95e9a8c4816dd4915b0 --- /dev/null +++ b/chemprop/docs/source/tutorial/python/ensembling.ipynb @@ -0,0 +1,355 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Ensembling" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from lightning import pytorch as pl\n", + "import numpy as np\n", + "import torch\n", + "from chemprop import data, models, nn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an example [dataloader](./data/dataloaders.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "smis = [\"C\" * i for i in range(1, 4)]\n", + "ys = np.random.rand(len(smis), 1)\n", + "dset = data.MoleculeDataset([data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)])\n", + "dataloader = data.build_dataloader(dset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Model ensembling" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A single model will sometimes give erroneous predictions for some molecules. These erroneous predictions can be mitigated by averaging the predictions of several models trained on the same data. " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "ensemble = []\n", + "n_models = 3\n", + "for _ in range(n_models):\n", + " ensemble.append(models.MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), nn.RegressionFFN()))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | RegressionFFN | 90.6 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.273 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 14.38it/s, train_loss_step=0.234, train_loss_epoch=0.234]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=1` reached.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 13.86it/s, train_loss_step=0.234, train_loss_epoch=0.234]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | RegressionFFN | 90.6 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.273 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 46.40it/s, train_loss_step=0.215, train_loss_epoch=0.215]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=1` reached.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.79it/s, train_loss_step=0.215, train_loss_epoch=0.215]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | RegressionFFN | 90.6 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.273 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 42.51it/s, train_loss_step=0.239, train_loss_epoch=0.239]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=1` reached.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 36.88it/s, train_loss_step=0.239, train_loss_epoch=0.239]\n" + ] + } + ], + "source": [ + "for model in ensemble:\n", + " trainer = pl.Trainer(logger=False, enable_checkpointing=False, max_epochs=1)\n", + " trainer.fit(model, dataloader)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 83.86it/s] \n", + "Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 82.63it/s]\n", + "Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 68.94it/s] \n" + ] + } + ], + "source": [ + "prediction_dataloader = data.build_dataloader(dset, shuffle=False)\n", + "predictions = []\n", + "for model in ensemble:\n", + " predictions.append(torch.concat(trainer.predict(model, prediction_dataloader)))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[tensor([[0.0096],\n", + " [0.0008],\n", + " [0.0082]]),\n", + " tensor([[0.0318],\n", + " [0.0260],\n", + " [0.0254]]),\n", + " tensor([[-0.0054],\n", + " [ 0.0032],\n", + " [-0.0035]])]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predictions" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[0.0120],\n", + " [0.0100],\n", + " [0.0100]])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "torch.concat(predictions, axis=1).mean(axis=1, keepdim=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop/docs/source/tutorial/python/featurizers/atom_featurizers.ipynb b/chemprop/docs/source/tutorial/python/featurizers/atom_featurizers.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..7d02bcbf62df15efe46d1ce1e0b8913e06f38883 --- /dev/null +++ b/chemprop/docs/source/tutorial/python/featurizers/atom_featurizers.ipynb @@ -0,0 +1,373 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Atom featurizers" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.featurizers.atom import MultiHotAtomFeaturizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an example atom to featurize." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from rdkit import Chem\n", + "\n", + "atom_to_featurize = Chem.MolFromSmiles(\"CC\").GetAtoms()[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Atom features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following atom features are generated by `rdkit` and cast to one-hot vectors (except for mass which is divided by 100). These feature vectors are joined together to a single multi-hot feature vector (with a final float32 bit for mass). All of these features (except aromaticity and mass) are padded with an extra bit for all unknown values.\n", + "\n", + " - atomic number\n", + " - degree\n", + " - formal charge\n", + " - chiral tag\n", + " - number of hydrogens\n", + " - hybridization\n", + " - aromaticity\n", + " - mass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### v2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The v2 atom featurizer is the default. It provides bits in the feature vector for:\n", + "\n", + " - atomic number\n", + " - first four rows of the period table plus iodine\n", + " - degree\n", + " - 0 bonds - 5 bonds\n", + " - formal charge\n", + " - -2, -1, 0, 1, 2\n", + " - chiral tag\n", + " - 0, 1, 2, 3 - See `rdkit.Chem.rdchem.ChiralType` for more details\n", + " - number of hydrogens\n", + " - 0 - 4\n", + " - hybridization\n", + " - S, SP, SP2, SP2D, SP3, SP3D, SP3D2" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0. , 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0.12011])" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "featurizer = MultiHotAtomFeaturizer.v2()\n", + "featurizer(atom_to_featurize)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### v1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The v1 atom featurizer is the same as was used in Chemprop v1. It is the same as the v2 atom featurizer except for:\n", + "\n", + " - atomic number\n", + " - first 100 elements (customizable)\n", + " - hybridization\n", + " - SP, SP2, SP3, SP3D, SP3D2" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0. , 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. , 0.12011])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "featurizer = MultiHotAtomFeaturizer.v1()\n", + "featurizer(atom_to_featurize)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0. , 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 1. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0.12011])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "featurizer = MultiHotAtomFeaturizer.v1(max_atomic_num=53)\n", + "featurizer(atom_to_featurize)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### organic" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The organic atom featurizer is optimized to reduce feature vector size for organic molecule. It is the same as the v2 atom featurizer except for:\n", + "\n", + " - atomic number\n", + " - H, B, C, N, O, F, Si, P, S, Cl, Br, and I atoms\n", + " - hybridization\n", + " - S, SP, SP2, SP3" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0. , 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 1. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 1. ,\n", + " 0. , 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0.12011])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "featurizer = MultiHotAtomFeaturizer.organic()\n", + "featurizer(atom_to_featurize)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Custom atom featurizers can also be created by specifying the choices. Custom choices for atomic number, degree, formal charge, chiral tag, # of hydrogens, and hybridization can be specified to create a custom atom featurizer. Aromaticity featurization is always True/False. " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0. , 1. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0. , 1. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 1. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0.12011])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from rdkit.Chem.rdchem import HybridizationType\n", + "\n", + "atomic_nums = [1, 6, 7, 8]\n", + "degrees = [0, 1, 2, 3, 4]\n", + "formal_charges = [-2, -1, 0, 1, 2]\n", + "chiral_tags = [0, 1, 2, 3]\n", + "num_Hs = [0, 1, 2, 3, 4]\n", + "hybridizations = [HybridizationType.SP, HybridizationType.SP2, HybridizationType.SP3]\n", + "featurizer = MultiHotAtomFeaturizer(\n", + " atomic_nums, degrees, formal_charges, chiral_tags, num_Hs, hybridizations\n", + ")\n", + "featurizer(atom_to_featurize)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Generic" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Any class that has a length and returns a numpy array when given an `rdkit.Chem.rdchem.Atom` can be used as an atom featurizer. " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([6.])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from rdkit.Chem.rdchem import Atom\n", + "import numpy as np\n", + "\n", + "\n", + "class MyAtomFeaturizer:\n", + " def __len__(self):\n", + " return 1\n", + "\n", + " def __call__(self, a: Atom):\n", + " return np.array([a.GetAtomicNum()], dtype=float)\n", + "\n", + "\n", + "featurizer = MyAtomFeaturizer()\n", + "featurizer(atom_to_featurize)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop/docs/source/tutorial/python/featurizers/bond_featurizers.ipynb b/chemprop/docs/source/tutorial/python/featurizers/bond_featurizers.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..183d730af509e2a2024761d0a29e2b56edb8704d --- /dev/null +++ b/chemprop/docs/source/tutorial/python/featurizers/bond_featurizers.ipynb @@ -0,0 +1,189 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Bond featurizers" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.featurizers.bond import MultiHotBondFeaturizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an example bond to featurize." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from rdkit import Chem\n", + "\n", + "bond_to_featurize = Chem.MolFromSmiles(\"CC\").GetBondBetweenAtoms(0, 1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Bond features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following bond features are generated by `rdkit` and cast to one-hot vectors (except for the initial null bit which is True/False depending on if the bond is `None`). These feature vectors are joined together to a single multi-hot feature vector. Only the stereochemistry vector is padded for unknown values.\n", + "\n", + " - null?\n", + " - bond type\n", + " - conjugated?\n", + " - in ring?\n", + " - stereochemistry" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0])" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "featurizer = MultiHotBondFeaturizer()\n", + "featurizer(bond_to_featurize)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The bond types and stereochemistry can be customized. The defaults are:\n", + "\n", + " - bond_type\n", + " - Single, Double, Triple, Aromatic\n", + " - stereos\n", + " - 0, 1, 2, 3, 4, 5 - See `rdkit.Chem.rdchem.BondStereo` for more details" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 1, 0, 0, 1, 0, 0, 0])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from rdkit.Chem.rdchem import BondType\n", + "\n", + "featurizer = MultiHotBondFeaturizer(bond_types=[BondType.SINGLE], stereos=[0, 1, 2])\n", + "featurizer(bond_to_featurize)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Generic" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Any class that has a length and returns a numpy array when given an `rdkit.Chem.rdchem.Bond` can be used as a bond featurizer. " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from rdkit.Chem.rdchem import Bond\n", + "import numpy as np\n", + "\n", + "\n", + "class MyBondFeaturizer:\n", + " def __len__(self):\n", + " return 1\n", + "\n", + " def __call__(self, a: Bond):\n", + " return np.array([a.GetIsConjugated()], dtype=float)\n", + "\n", + "\n", + "featurizer = MyBondFeaturizer()\n", + "featurizer(bond_to_featurize)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop/docs/source/tutorial/python/featurizers/molecule_featurizers.ipynb b/chemprop/docs/source/tutorial/python/featurizers/molecule_featurizers.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..963e932831b6d4d56ab72432cfce0e9d25d84852 --- /dev/null +++ b/chemprop/docs/source/tutorial/python/featurizers/molecule_featurizers.ipynb @@ -0,0 +1,423 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Molecule featurizers" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.featurizers.molecule import (\n", + " MorganBinaryFeaturizer,\n", + " MorganCountFeaturizer,\n", + " RDKit2DFeaturizer,\n", + " V1RDKit2DFeaturizer,\n", + " V1RDKit2DNormalizedFeaturizer,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These are example molecules to featurize." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.utils import make_mol\n", + "\n", + "smis = [\"C\" * i for i in range(1, 11)]\n", + "mols = [make_mol(smi, keep_h=False, add_h=False) for smi in smis]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Molecule vs molgraph featurizers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Both molecule and [molgraph](./molgraph_molecule_featurizer.ipynb) featurizers take `rdkit.Chem.Mol` objects as input. Molgraph featurizers produce a `MolGraph` which is used in message passing. Molecule featurizers produce a 1D numpy array of features that can be used as [extra datapoint descriptors](../data/datapoints.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, ..., 0, 0, 0], dtype=uint8)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from chemprop.data import MoleculeDatapoint\n", + "\n", + "molecule_featurizer = MorganBinaryFeaturizer()\n", + "\n", + "datapoints = [MoleculeDatapoint(mol, x_d=molecule_featurizer(mol)) for mol in mols]\n", + "\n", + "molecule_featurizer(mols[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Morgan fingerprint featurizers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Morgan fingerprint can either use a binary or count representation of molecular structures. The radius of structures, length of the fingerprint, and whether to include chirality can all be customized. The default radius is 2, the default length is 2048, and chirality is included by default." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((1024,), array([0, 0, 0, ..., 0, 0, 0], dtype=int32))" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mf = MorganCountFeaturizer(radius=3, length=1024, include_chirality=False)\n", + "morgan_fp = mf(mols[0])\n", + "morgan_fp.shape, morgan_fp" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### RDKit molecule featurizers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chemprop gives a warning that the RDKit molecule featurers are not well scaled by a `StandardScaler`. Consult the literature for more appropriate scaling methods." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "The RDKit 2D features can deviate signifcantly from a normal distribution. Consider manually scaling them using an appropriate scaler before creating datapoints, rather than using the scikit-learn `StandardScaler` (the default in Chemprop).\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "data": { + "text/plain": [ + "array([ 0. , 0. , 0. , 0. , 0.35978494,\n", + " 0. , 16.043 , 12.011 , 16.03130013, 8. ,\n", + " 0. , -0.07755789, -0.07755789, 0.07755789, 0.07755789,\n", + " 1. , 1. , 1. , 12.011 , 12.011 ,\n", + " -0.07755789, -0.07755789, 0.1441 , 0.1441 , 2.503 ,\n", + " 2.503 , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 8.73925103, 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 7.42665278, 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 7.42665278, 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 7.42665278, 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 7.42665278, 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0.6361 , 6.731 ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "molecule_featurizer = RDKit2DFeaturizer()\n", + "extra_datapoint_descriptors = [molecule_featurizer(mol) for mol in mols]\n", + "extra_datapoint_descriptors[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The rdkit featurizers from v1 are also available. They rely on the `descriptastorus` package which can be found at [https://github.com/bp-kelley/descriptastorus](https://github.com/bp-kelley/descriptastorus). This package doesn't include the following rdkit descriptors: `['AvgIpc', 'BCUT2D_CHGHI', 'BCUT2D_CHGLO', 'BCUT2D_LOGPHI', 'BCUT2D_LOGPLOW', 'BCUT2D_MRHI', 'BCUT2D_MRLOW', 'BCUT2D_MWHI', 'BCUT2D_MWLOW', 'SPS']`. Scaled versions of these descriptors are available, though it is unknown which molecules were used to fit the scaling, so this may be a dataleak depending on the test set used to evaluate model performace. See this [issue](https://github.com/bp-kelley/descriptastorus/issues/31) for more details about the scaling. " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n", + "[16:42:01] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "data": { + "text/plain": [ + "array([1.96075662e-05, 5.77173432e-04, 3.87525506e-15, 2.72296612e-11,\n", + " 1.02515408e-07, 4.10254814e-13, 1.63521389e-11, 1.93930344e-05,\n", + " 1.22824218e-06, 2.20907757e-07, 6.35349909e-07, 3.08677419e-06,\n", + " 1.70338959e-05, 1.34072882e-05, 4.07488775e-10, 2.17523456e-08,\n", + " 6.89356874e-07, 2.63048207e-01, 1.96742684e-02, 2.50993926e-11,\n", + " 9.25841695e-11, 5.85610910e-17, 1.08871430e-06, 2.39145041e-11,\n", + " 7.52245592e-13, 1.23345732e-08, 2.94906350e-01, 9.59992784e-03,\n", + " 2.31947354e-03, 9.99390325e-01, 9.88006922e-01, 1.59186446e-08,\n", + " 4.42180049e-09, 1.00000000e+00, 7.85198619e-13, 4.14332758e-13,\n", + " 6.49617582e-11, 4.45588945e-06, 7.89307465e-03, 2.39990382e-02,\n", + " 7.89307465e-03, 4.59284380e-03, 3.24286613e-10, 1.83192891e-02,\n", + " 7.38491174e-01, 9.73505944e-01, 6.05575320e-02, 3.42737552e-07,\n", + " 1.23284669e-08, 6.13163344e-02, 3.33304127e-02, 9.93858689e-22,\n", + " 1.42492255e-01, 6.29631332e-02, 3.47228888e-02, 4.82992991e-15,\n", + " 1.11775996e-02, 1.89758400e-02, 5.52866693e-02, 5.22997303e-05,\n", + " 5.69516350e-08, 2.15229839e-03, 0.00000000e+00, 1.14242658e-21,\n", + " 2.40245513e-23, 1.31105703e-02, 8.72153349e-03, 5.76142917e-21,\n", + " 3.60875252e-15, 1.45980119e-01, 1.73556718e-22, 1.18093757e-10,\n", + " 5.99833786e-02, 9.05498589e-08, 4.60978367e-10, 1.57072376e-01,\n", + " 1.66847964e-01, 2.37240682e-02, 8.07601514e-02, 2.75008841e-02,\n", + " 4.92845505e-03, 1.24459630e-01, 7.31816496e-02, 1.67096874e-01,\n", + " 7.55810089e-02, 8.78622233e-24, 1.33643046e-01, 3.04494668e-02,\n", + " 2.58369311e-02, 5.30138094e-05, 1.42657565e-16, 3.73160396e-02,\n", + " 6.95272017e-13, 0.00000000e+00, 9.79690873e-13, 2.64281353e-04,\n", + " 1.20493060e-11, 2.86305006e-09, 1.04578852e-01, 3.09944928e-02,\n", + " 2.99487758e-06, 2.77639012e-01, 5.30138094e-05, 6.17138309e-03,\n", + " 5.30138094e-05, 5.00000000e-01, 3.84710451e-01, 5.30138094e-05,\n", + " 5.30138094e-05, 1.64664515e-01, 5.30138094e-05, 9.98653446e-01,\n", + " 3.99820633e-01, 2.02868342e-02, 5.70867846e-19, 3.32362804e-10,\n", + " 9.64197643e-10, 7.10542736e-15, 5.83707586e-13, 1.19880642e-20,\n", + " 1.65079548e-01, 1.67040631e-01, 1.66498334e-01, 1.66486816e-01,\n", + " 2.02864661e-01, 6.93658809e-02, 7.10542736e-15, 1.68346480e-01,\n", + " 1.67982932e-01, 6.87189958e-10, 1.18157291e-03, 1.64332634e-01,\n", + " 8.37776917e-04, 1.66325734e-01, 1.63034142e-01, 1.65079548e-01,\n", + " 9.56970492e-08, 3.49708922e-08, 1.68206175e-01, 1.65806858e-01,\n", + " 1.67346595e-01, 7.13964619e-07, 2.64115098e-12, 9.99127911e-02,\n", + " 2.86809243e-10, 3.77737848e-01, 4.50616778e-03, 1.33250251e-01,\n", + " 3.47299284e-02, 1.61482916e-09, 1.87517315e-18, 2.09410539e-07,\n", + " 7.10542736e-15, 4.99264281e-01, 1.64929402e-01, 1.31744508e-17,\n", + " 2.11164355e-16, 1.16815875e-09, 3.25923600e-22, 6.24601420e-10,\n", + " 1.68149182e-01, 1.65450729e-01, 1.17110262e-13, 0.00000000e+00,\n", + " 1.64668868e-01, 1.66924728e-01, 0.00000000e+00, 5.10071327e-08,\n", + " 7.10542736e-15, 1.54654108e-01, 2.79420938e-22, 0.00000000e+00,\n", + " 1.67639733e-01, 6.31499266e-25, 1.68186130e-01, 9.08850267e-03,\n", + " 1.68363202e-01, 8.26542313e-11, 1.56346354e-01, 0.00000000e+00,\n", + " 0.00000000e+00, 2.11354236e-02, 2.11354236e-02, 2.38815575e-20,\n", + " 0.00000000e+00, 8.33672450e-25, 5.30138094e-05, 1.56951066e-01,\n", + " 4.03434503e-08, 1.55259196e-23, 1.59306117e-17, 5.76610077e-14,\n", + " 2.95798941e-11, 1.68378369e-01, 1.67380186e-01, 1.48151465e-18,\n", + " 2.32414994e-16, 4.70359809e-08, 1.66633397e-01, 1.87492844e-01])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "molecule_featurizer = V1RDKit2DFeaturizer()\n", + "molecule_featurizer = V1RDKit2DNormalizedFeaturizer()\n", + "molecule_featurizer(mols[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Any class that has a length and returns a 1D numpy array when given an `rdkit.Chem.Mol` can be used as a molecule featurizer. " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from rdkit import Chem\n", + "\n", + "class MyMoleculeFeaturizer:\n", + " def __len__(self) -> int:\n", + " return 1\n", + "\n", + " def __call__(self, mol: Chem.Mol) -> np.ndarray:\n", + " total_atoms = mol.GetNumAtoms()\n", + " return np.array([total_atoms])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mf = MyMoleculeFeaturizer()\n", + "mf(mols[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using molecule features as extra datapoint descriptors" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you only have molecule features for one molecule per datapoint, those features can be used directly as extra datapoint descriptors. If you have multiple molecules with extra features, or other extra datapoint descriptors, they first need to be concatenated into a single numpy array." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "mol1_features = np.random.randn(len(mols), 1)\n", + "mol2_features = np.random.randn(len(mols), 2)\n", + "other_datapoint_descriptors = np.random.randn(len(mols), 3)\n", + "\n", + "extra_datapoint_descriptors = np.hstack([mol1_features, mol2_features, other_datapoint_descriptors])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop/docs/source/tutorial/python/featurizers/molgraph_molecule_featurizer.ipynb b/chemprop/docs/source/tutorial/python/featurizers/molgraph_molecule_featurizer.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..cd6b866581491832fabfb2f671620533de89fd2b --- /dev/null +++ b/chemprop/docs/source/tutorial/python/featurizers/molgraph_molecule_featurizer.ipynb @@ -0,0 +1,207 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Molecule MolGraph featurizers" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.featurizers.molgraph.molecule import SimpleMoleculeMolGraphFeaturizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an example molecule to featurize." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from rdkit import Chem\n", + "\n", + "mol_to_featurize = Chem.MolFromSmiles(\"CC\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Simple molgraph featurizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A `MolGraph` represents the graph featurization of a molecule. It is made of atom features (`V`), bond features (`E`), and a mapping between atoms and bonds (`edge_index` and `rev_edge_index`). It is created by `SimpleMoleculeMolGraphFeaturizer`. " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MolGraph(V=array([[0. , 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0.12011],\n", + " [0. , 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0.12011]], dtype=float32), E=array([[0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]]), edge_index=array([[0, 1],\n", + " [1, 0]]), rev_edge_index=array([1, 0]))" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "featurizer = SimpleMoleculeMolGraphFeaturizer()\n", + "featurizer(mol_to_featurize)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The [atom](./atom_featurizers.ipynb) and [bond](./bond_featurizers.ipynb) featurizers used by the molgraph featurizer are customizable." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MolGraph(V=array([[0. , 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 1. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 1. ,\n", + " 0. , 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0.12011],\n", + " [0. , 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 1. , 0. , 1. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 1. ,\n", + " 0. , 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0.12011]], dtype=float32), E=array([[0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]]), edge_index=array([[0, 1],\n", + " [1, 0]]), rev_edge_index=array([1, 0]))" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from chemprop.featurizers import MultiHotAtomFeaturizer, MultiHotBondFeaturizer\n", + "\n", + "atom_featurizer = MultiHotAtomFeaturizer.organic()\n", + "bond_featurizer = MultiHotBondFeaturizer(stereos=[0, 1, 2, 3, 4])\n", + "featurizer = SimpleMoleculeMolGraphFeaturizer(\n", + " atom_featurizer=atom_featurizer, bond_featurizer=bond_featurizer\n", + ")\n", + "featurizer(mol_to_featurize)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Extra atom and bond features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your [datapoints](../data/datapoints.ipynb) have extra atom or bond features, the molgraph featurizer needs to know the length of the extra features when it is created so that molecules without heavy atoms (molecular hydrogen) are featurized correctly and so that the bond feature array is the correct shape." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "n_extra_atom_features = 3\n", + "n_extra_bond_features = 4\n", + "featurizer = SimpleMoleculeMolGraphFeaturizer(\n", + " extra_atom_fdim=n_extra_atom_features, extra_bond_fdim=n_extra_bond_features\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The [dataset](../data/datasets.ipynb) is given this custom featurizer and automatically handles the featurization including passing extra atom and bond features for each datapoint. " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop/docs/source/tutorial/python/featurizers/molgraph_reaction_featurizer.ipynb b/chemprop/docs/source/tutorial/python/featurizers/molgraph_reaction_featurizer.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..7f51a10ef224166ce93bcf29b871c704726ac3e3 --- /dev/null +++ b/chemprop/docs/source/tutorial/python/featurizers/molgraph_reaction_featurizer.ipynb @@ -0,0 +1,433 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Reaction MolGraph featurizers" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.featurizers.molgraph.reaction import CondensedGraphOfReactionFeaturizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an example reaction to featurize. The sanitizing code is to preserve atom mapped hydrogens in the graph." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from rdkit import Chem\n", + "\n", + "rct = Chem.MolFromSmiles(\"[H:1][C:4]([H:2])([H:3])[F:5]\", sanitize=False)\n", + "pdt = Chem.MolFromSmiles(\"[H:1][C:4]([H:2])([H:3]).[F:5]\", sanitize=False)\n", + "Chem.SanitizeMol(\n", + " rct, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_ADJUSTHS\n", + ")\n", + "Chem.SanitizeMol(\n", + " pdt, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_ADJUSTHS\n", + ")\n", + "\n", + "rxn = (rct, pdt)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Condensed Graph of Reaction featurizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Like a [molecule](./molgraph_molecule_featurizer.ipynb) MolGraph featurizer, reaction MolGraph featurizers produce a `MolGraph`. The difference between the molecule and reaction versions is that a reaction takes two `rdkit.Chem.Mol` objects and need to know what \"mode\" of featurization to use. Available modes are found in `RxnMode`." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "reac_prod\n", + "reac_prod_balance\n", + "reac_diff\n", + "reac_diff_balance\n", + "prod_diff\n", + "prod_diff_balance\n" + ] + } + ], + "source": [ + "from chemprop.featurizers import RxnMode\n", + "\n", + "for mode in RxnMode:\n", + " print(mode)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Briefly, \"reac\" stands for reactant features, \"prod\" stands for product features, and \"diff\" stands for the difference between reactant and product features. The two sets of features are concatenated together. \"balance\" refers to balancing imablanced reactions. See the 2022 [paper](https://doi.org/10.1021/acs.jcim.1c00975) by Heid and Green for more details. \"reac_diff\" is the default." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "reac_diff = CondensedGraphOfReactionFeaturizer()\n", + "reac_prod = CondensedGraphOfReactionFeaturizer(mode_=\"reac_prod\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, -1,\n", + " 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, -1,\n", + " 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0]])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reac_diff(rxn).E" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 0, 0, 0],\n", + " [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 0, 0, 0],\n", + " [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 0, 0, 0],\n", + " [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 0, 0, 0],\n", + " [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 0, 0, 0],\n", + " [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 0, 0, 0],\n", + " [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0],\n", + " [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0]])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reac_prod(rxn).E" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Like molecule MolGraph featurizers, reaction featurizers can use custom atom and bond featurizers." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MolGraph(V=array([[ 1. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0.01008, 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. ],\n", + " [ 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 1. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0.12011, 0. , 0. , 0. , 1. ,\n", + " -1. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. ],\n", + " [ 1. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0.01008, 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. ],\n", + " [ 1. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0.01008, 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. ],\n", + " [ 0. , 0. , 0. , 0. , 0. , 1. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0.18998, 1. , -1. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. ]]), E=array([[ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, -1,\n", + " 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, -1,\n", + " 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0]]), edge_index=array([[0, 1, 1, 2, 1, 3, 1, 4],\n", + " [1, 0, 2, 1, 3, 1, 4, 1]]), rev_edge_index=array([1, 0, 3, 2, 5, 4, 7, 6]))" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from chemprop.featurizers import MultiHotAtomFeaturizer\n", + "\n", + "atom_featurizer = MultiHotAtomFeaturizer.organic()\n", + "rxn_featurizer = CondensedGraphOfReactionFeaturizer(atom_featurizer=atom_featurizer)\n", + "rxn_featurizer(rxn)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Extra atom and bond features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Extra atom and bond features are not yet supported for reactions." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "'atom_features_extra' is currently unsupported for reactions\n" + ] + }, + { + "data": { + "text/plain": [ + "MolGraph(V=array([[ 1. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0.01008, 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. ],\n", + " [ 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 1. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0.12011, 0. , 0. , 0. , 1. ,\n", + " -1. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. ],\n", + " [ 1. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0.01008, 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. ],\n", + " [ 1. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0.01008, 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. ],\n", + " [ 0. , 0. , 0. , 0. , 0. , 1. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 1. , 0. ,\n", + " 0. , 0.18998, 1. , -1. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. ]]), E=array([[ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, -1,\n", + " 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0],\n", + " [ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, -1,\n", + " 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0]]), edge_index=array([[0, 1, 1, 2, 1, 3, 1, 4],\n", + " [1, 0, 2, 1, 3, 1, 4, 1]]), rev_edge_index=array([1, 0, 3, 2, 5, 4, 7, 6]))" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rxn_featurizer(rxn, atom_features_extra=[1.0])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop/docs/source/tutorial/python/index.rst b/chemprop/docs/source/tutorial/python/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..e9dade8a737be7fed6fe8857ee81f4f2e096644a --- /dev/null +++ b/chemprop/docs/source/tutorial/python/index.rst @@ -0,0 +1,63 @@ +.. _python usage: + +Python Module Tutorials +======================= + +Chemprop may be used in python scripts, allowing for greater flexibility and control than the CLI. We recommend first looking through some of the worked examples to get an overview of the workflow. Then further details about the creation, customization, and use of Chemprop modules can be found in the following module tutorials: + +Data Modules: + +* :doc:`data/datapoints` +* :doc:`data/datasets` +* :doc:`data/dataloaders` +* :doc:`data/splitting` + +Featurization Modules: + +* :doc:`featurizers/atom_featurizers` +* :doc:`featurizers/bond_featurizers` +* :doc:`featurizers/molgraph_molecule_featurizer` +* :doc:`featurizers/molgraph_reaction_featurizer` +* :doc:`featurizers/molecule_featurizers` + +Model Modules: + +* :doc:`models/basic_mpnn_model` +* :doc:`models/message_passing` +* :doc:`models/aggregation` +* :doc:`models/predictor` +* :doc:`models/multicomponent_mpnn_model` + +Other module and workflow tutorials: + +* :doc:`activation` +* :doc:`loss_functions` +* :doc:`metrics` +* :doc:`saving_and_loading` +* :doc:`ensembling` +* :doc:`scaling` + +.. toctree:: + :maxdepth: 1 + :hidden: + + data/datapoints + data/datasets + data/dataloaders + data/splitting + featurizers/atom_featurizers + featurizers/bond_featurizers + featurizers/molgraph_molecule_featurizer + featurizers/molgraph_reaction_featurizer + featurizers/molecule_featurizers + models/basic_mpnn_model + models/message_passing + models/aggregation + models/predictor + models/multicomponent_mpnn_model + activation + loss_functions + metrics + saving_and_loading + ensembling + scaling \ No newline at end of file diff --git a/chemprop/docs/source/tutorial/python/loss_functions.ipynb b/chemprop/docs/source/tutorial/python/loss_functions.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..8c83372ce3475f2a342ce8040ce5cf9e8b50af5c --- /dev/null +++ b/chemprop/docs/source/tutorial/python/loss_functions.ipynb @@ -0,0 +1,641 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loss functions" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "\n", + "from lightning import pytorch as pl\n", + "import numpy as np\n", + "from numpy.typing import ArrayLike\n", + "import pandas as pd\n", + "from pathlib import Path\n", + "import torch\n", + "from torch import Tensor\n", + "import torchmetrics\n", + "\n", + "from chemprop import data, models, nn\n", + "from chemprop.nn.metrics import ChempropMetric, LossFunctionRegistry" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Available functions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chemprop provides several loss functions. The derivatives of these differentiable functions are used to update the model weights. Users only need to select the loss function to use. The rest of the details are handled by Chemprop and the lightning trainer, which reports the training and validation loss during model fitting.\n", + "\n", + "See also [metrics](./metrics.ipynb) which are the same as loss functions, but potentially non-differentiable and used to measure the performance of a model. " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "mse\n", + "mae\n", + "rmse\n", + "bounded-mse\n", + "bounded-mae\n", + "bounded-rmse\n", + "mve\n", + "evidential\n", + "bce\n", + "ce\n", + "binary-mcc\n", + "multiclass-mcc\n", + "dirichlet\n", + "sid\n", + "earthmovers\n", + "wasserstein\n", + "quantile\n", + "pinball\n" + ] + } + ], + "source": [ + "for lossfunction in LossFunctionRegistry:\n", + " print(lossfunction)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task weights" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A model can make predictions of multiple targets/tasks at the same time. For example, a model may predict both solubility and melting point. Task weights can be specified when some of the tasks are more important to get accurate than others. The weight for each task defaults to 1." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MSE(task_weights=[[0.10000000149011612, 0.5, 1.0]])" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from chemprop.nn.metrics import MSE\n", + "\n", + "predictor = nn.RegressionFFN(criterion=MSE(task_weights=[0.1, 0.5, 1.0]))\n", + "model = models.MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), predictor)\n", + "predictor.criterion" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Mean squared error and bounded mean square error" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`MSE` is the default loss function for regression tasks." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MSE(task_weights=[[1.0]])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predictor = nn.RegressionFFN()\n", + "model = models.MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), predictor)\n", + "predictor.criterion" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`BoundedMSE` is useful when the target values have \\\"less than\\\" or \\\"greater than\\\" behavior, e.g. the prediction is correct as long as it is below/above a target value. Datapoints have a less than/greater than property that keeps track of bounded targets. Note that, like target values, the less than and greater than masks used to make datapoints are 1-D numpy arrays of bools instead of a single bool. This is because a single datapoint can have multiple target values and the less than/greater than masks are defined for each target value separately." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ True],\n", + " [False],\n", + " [False],\n", + " [False],\n", + " [ True]])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from chemprop.nn.metrics import BoundedMSE\n", + "\n", + "smis = [\"C\" * i for i in range(1, 6)]\n", + "ys = np.random.rand(len(smis), 1)\n", + "lt_mask = np.array([[True], [False], [False], [False], [True]])\n", + "gt_mask = np.array([[False], [True], [False], [True], [False]])\n", + "datapoints = [\n", + " data.MoleculeDatapoint.from_smi(smi, y, lt_mask=lt, gt_mask=gt)\n", + " for smi, y, lt, gt in zip(smis, ys, lt_mask, gt_mask)\n", + "]\n", + "bounded_dataset = data.MoleculeDataset(datapoints)\n", + "bounded_dataset.lt_mask" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "predictor = nn.RegressionFFN(criterion=BoundedMSE())\n", + "model = models.MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), predictor)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Binary cross entropy and cross entropy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`BCELoss` is the default loss function for binary classification and `CrossEntropyLoss` is the default for multiclass classification." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BCELoss(task_weights=[[1.0]])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predictor = nn.BinaryClassificationFFN()\n", + "predictor.criterion" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "CrossEntropyLoss(task_weights=[[1.0]])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predictor = nn.MulticlassClassificationFFN(n_classes=3)\n", + "predictor.criterion" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Matthews correlation coefficient" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "MCC loss is useful for imbalanced classification data. An optimal MCC is 1, so the loss function version of MCC returns 1 - MCC." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.metrics import BinaryMCCLoss, MulticlassMCCLoss" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Uncertainty" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Various methods for estimating uncertainty in predictions are available. These methods often use specific loss functions." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.metrics import MVELoss, EvidentialLoss, DirichletLoss, QuantileLoss" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Spectral loss functions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Spectral information divergence and wasserstein (earthmover's distance) are often used for spectral predictions." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.metrics import SID, Wasserstein" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom loss functions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chemprop loss functions are instances of `chemprop.nn.metrics.ChempropMetric`, which inherits from `torchmetrics.Metric`. Custom loss functions need to follow the interface of both `ChempropMetric` and `Metric`. Start with a `Metric` either by importing an existing one from `torchmetrics` or by creating your own by following the instructions on the `torchmetrics` website. Then make the following changes:\n", + "\n", + "1. Allow for task weights to be passed to the `__init__` method.\n", + "2. Allow for the `update` method to be given `preds, targets, mask, weights, lt_mask, gt_mask` in that order.\n", + "\n", + "* `preds`: A `Tensor` of the model's predictions with dimension 0 being the batch dimension and dimension 1 being the task dimension. Dimension 2 exists for uncertainty estimation or multiclass predictions and is either used for uncertainty parameters or multiclass logits.\n", + "* `targets`: A `Tensor` of the target values with dimension 0 being the batch dimension and dimension 1 being the task dimension.\n", + "* `mask`: A `Tensor` of the same shape as `targets` with `True`s where the target value is present and finite and `False` where it is not.\n", + "* `weights`: A `Tensor` of the weights for each data point in the loss function. This is useful when some data points are more important than others.\n", + "* `lt_mask`: A `Tensor` of the same shape as `targets` with `True`s where the target value is a \"less than\" target value and `False` where it is not.\n", + "* `gt_mask`: A `Tensor` of the same shape as `targets` with `True`s where the target value is a \"greater than\" target value and `False` where it is not." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "class ChempropMulticlassHingeLoss(torchmetrics.classification.MulticlassHingeLoss):\n", + " def __init__(self, task_weights: ArrayLike = 1.0, **kwargs):\n", + " super().__init__(**kwargs)\n", + " self.task_weights = torch.as_tensor(task_weights, dtype=torch.float).view(1, -1)\n", + " if (self.task_weights != 1.0).any():\n", + " warnings.warn(\"task_weights were provided but are ignored by metric \"\n", + " f\"{self.__class__.__name__}. Got {task_weights}\")\n", + "\n", + " def update(self, preds: Tensor, targets: Tensor, mask: Tensor | None = None, *args, **kwargs):\n", + " if mask is None:\n", + " mask = torch.ones_like(targets, dtype=torch.bool)\n", + "\n", + " super().update(preds[mask], targets[mask].long())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, if your loss function can return a value for every task for every data point (i.e. not reduced in the task or batch dimension), you can inherit from `chemprop.nn.metrics.ChempropMetric` and just override the `_calc_unreduced_loss` method (and if needed the `__init__` method)." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "class BoundedNormalizedMSEPlus1(ChempropMetric):\n", + " def __init__(self, task_weights = None, norm: float = 1.0):\n", + " super().__init__(task_weights)\n", + " norm = torch.as_tensor(norm)\n", + " self.register_buffer(\"norm\", norm)\n", + "\n", + " def _calc_unreduced_loss(self, preds, targets, mask, weights, lt_mask, gt_mask) -> Tensor:\n", + " preds = torch.where((preds < targets) & lt_mask, targets, preds)\n", + " preds = torch.where((preds > targets) & gt_mask, targets, preds)\n", + "\n", + " return torch.sum((preds - targets) ** 2) / self.norm + 1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set up the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parents[3]\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"classification\" / \"mol_multiclass.csv\"\n", + "df_input = pd.read_csv(input_path)\n", + "smis = df_input.loc[:, \"smiles\"].values\n", + "ys = df_input.loc[:, [\"activity\"]].values\n", + "all_data = [data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]\n", + "train_indices, val_indices, test_indices = data.make_split_indices(all_data, \"random\", (0.8, 0.1, 0.1))\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " all_data, train_indices, val_indices, test_indices\n", + ")\n", + "train_dset = data.MoleculeDataset(train_data[0])\n", + "val_dset = data.MoleculeDataset(val_data[0])\n", + "test_dset = data.MoleculeDataset(test_data[0])\n", + "train_loader = data.build_dataloader(train_dset)\n", + "val_loader = data.build_dataloader(val_dset, shuffle=False)\n", + "test_loader = data.build_dataloader(test_dset, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a model with a custom loss function" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "n_classes = max(ys).item() + 1\n", + "\n", + "loss_function = ChempropMulticlassHingeLoss(num_classes = n_classes)\n", + "ffn = nn.MulticlassClassificationFFN(n_classes = n_classes, criterion = loss_function)\n", + "\n", + "model = models.MPNN(nn.BondMessagePassing(), nn.NormAggregation(), ffn)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run training" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (7) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "\n", + " | Name | Type | Params | Mode \n", + "------------------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | NormAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MulticlassClassificationFFN | 91.2 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "------------------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.276 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sanity Checking DataLoader 0: 0%| | 0/1 [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test/multiclass-mcc 0.0 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/multiclass-mcc \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.0 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "[{'test/multiclass-mcc': 0.0}]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trainer = pl.Trainer(max_epochs=2)\n", + "trainer.fit(model, train_loader, val_loader)\n", + "trainer.test(model, test_loader)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop/docs/source/tutorial/python/metrics.ipynb b/chemprop/docs/source/tutorial/python/metrics.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..82b6ff16310bcd070d2fd7f0faad81f941a7cd5f --- /dev/null +++ b/chemprop/docs/source/tutorial/python/metrics.ipynb @@ -0,0 +1,650 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from lightning import pytorch as pl\n", + "import numpy as np\n", + "from numpy.typing import ArrayLike\n", + "import pandas as pd\n", + "from pathlib import Path\n", + "import torch\n", + "from torch import Tensor\n", + "import torchmetrics\n", + "import logging\n", + "\n", + "from chemprop import data, models, nn\n", + "from chemprop.nn.metrics import ChempropMetric, MetricRegistry\n", + "\n", + "logger = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Available metric functions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chemprop provides several metrics. The functions calculate a single value that serves as a measure of model performance. Users only need to select the metric(s) to use. The rest of the details are handled by Chemprop and the lightning trainer, which logs all metric values to the trainer logger (defaults to TensorBoard) for the validation and test sets. Note that the validation metrics are in the scaled space while the test metrics are in the original target space.\n", + "\n", + "See also [loss functions](./loss_functions.ipynb) which are the same as metrics, except used to optimize the model and therefore required to be differentiable." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "mse\n", + "mae\n", + "rmse\n", + "bounded-mse\n", + "bounded-mae\n", + "bounded-rmse\n", + "r2\n", + "binary-mcc\n", + "multiclass-mcc\n", + "roc\n", + "prc\n", + "accuracy\n", + "f1\n" + ] + } + ], + "source": [ + "for metric in MetricRegistry:\n", + " print(metric)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Specifying metrics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Each FFN predictor has a default metric. If you want different metrics reported, you can give a list of metrics to the model at creation. Note that the list of metrics is used in place of the default metric and not in addition to the default metric." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.metrics import MSE, MAE, RMSE\n", + "\n", + "metrics = [MSE(), MAE(), RMSE()]\n", + "model = models.MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), nn.RegressionFFN(), metrics=metrics)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Accumulating metrics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chemprop metrics are based on `Metric` from `torchmetrics` which stores the information from each batch that is needed to calculate the metric over the whole validation or test set." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing DataLoader 0: 0%| | 0/5 [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test/mae 0.4941912293434143 │\n", + "│ test/mse 0.3071698546409607 │\n", + "│ test/rmse 0.5542290806770325 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mae \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.4941912293434143 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.3071698546409607 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test/rmse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.5542290806770325 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicting DataLoader 0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 147.05it/s]\n" + ] + } + ], + "source": [ + "smis = [\"C\" * i for i in range(1, 11)]\n", + "ys = np.random.rand(len(smis), 1)\n", + "dset = data.MoleculeDataset([data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)])\n", + "dataloader = data.build_dataloader(dset, shuffle=False, batch_size=2)\n", + "\n", + "trainer = pl.Trainer(logger=False, enable_checkpointing=False, max_epochs=1)\n", + "result_when_batched = trainer.test(model, dataloader)\n", + "preds = trainer.predict(model, dataloader)\n", + "preds = torch.concat(preds)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Batch / Not Batched\n", + "0.5542 / 0.5542\n" + ] + } + ], + "source": [ + "result_when_not_batched = RMSE()(preds, torch.from_numpy(dset.Y), None, None, None, None)\n", + "print(\"Batch / Not Batched\")\n", + "print(f\"{result_when_batched[0]['test/rmse']:.4f} / {result_when_not_batched.item():.4f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Batch normalization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It is worth noting that if your model has a batch normalization layer, the computed metric will be different depending on if the model is in training or evaluation mode. When a batch normalization layer is training, it uses a biased estimator to calculate the standard deviation, but the value stored and used during evaluation is calculated with the unbiased estimator. Lightning takes care of this if the `Trainer()` is used. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Regression" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are several metric options for regression. `MSE` is the default. There are also bounded versions (except for r2), similar to the bounded versions of the [loss functions](./loss_functions.ipynb). " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.metrics import MSE, MAE, RMSE, R2Score" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.metrics import BoundedMAE, BoundedMSE, BoundedRMSE" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Classification" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are metrics for both binary and multiclass classification." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.metrics import (\n", + " BinaryAUROC,\n", + " BinaryAUPRC,\n", + " BinaryAccuracy,\n", + " BinaryF1Score,\n", + " BinaryMCCMetric,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.metrics import MulticlassMCCMetric" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Spectra" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Spectral information divergence and wasserstein (earthmovers distance) are often used for spectral predictions" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.metrics import SID, Wasserstein" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom metrics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Chemprop metrics are instances of `chemprop.nn.metrics.ChempropMetric`, which inherits from `torchmetrics.Metric`. Custom loss functions need to follow the interface of both `ChempropMetric` and `Metric`. Start with a `Metric` either by importing an existing one from `torchmetrics` or by creating your own by following the instructions on the `torchmetrics` website. Then make the following changes:\n", + "\n", + "1. Allow for task weights to be passed to the `__init__` method.\n", + "2. Allow for the `update` method to be given `preds, targets, mask, weights, lt_mask, gt_mask` in that order.\n", + "3. Provide an alias property, which is used to identify the metric value in the logs.\n", + "\n", + "* `preds`: A `Tensor` of the model's predictions with dimension 0 being the batch dimension and dimension 1 being the task dimension.\n", + "* `targets`: A `Tensor` of the target values with dimension 0 being the batch dimension and dimension 1 being the task dimension.\n", + "* `mask`: A `Tensor` of the same shape as `targets` with `True`s where the target value is present and finite and `False` where it is not.\n", + "* `weights`: Usually ignored in metrics.\n", + "* `lt_mask`: A `Tensor` of the same shape as `targets` with `True`s where the target value is a \"less than\" target value and `False` where it is not.\n", + "* `gt_mask`: A `Tensor` of the same shape as `targets` with `True`s where the target value is a \"greater than\" target value and `False` where it is not." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "class ChempropMulticlassAUROC(torchmetrics.classification.MulticlassAUROC):\n", + " def __init__(self, task_weights: ArrayLike = 1.0, **kwargs):\n", + " super().__init__(**kwargs)\n", + " self.task_weights = torch.as_tensor(task_weights, dtype=torch.float).view(1, -1)\n", + " if (self.task_weights != 1.0).any():\n", + " logger.warn(\"task_weights were provided but are ignored by metric \"\n", + " f\"{self.__class__.__name__}. Got {task_weights}\")\n", + "\n", + " def update(self, preds: Tensor, targets: Tensor, mask: Tensor | None = None, *args, **kwargs):\n", + " if mask is None:\n", + " mask = torch.ones_like(targets, dtype=torch.bool)\n", + "\n", + " super().update(preds[mask], targets[mask].long())\n", + "\n", + " @property\n", + " def alias(self) -> str:\n", + " return \"multiclass_auroc\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, if your metric can return a value for every task for every data point (i.e. not reduced in the task or batch dimension), you can inherit from `chemprop.nn.metrics.ChempropMetric` and just override the `_calc_unreduced_loss` method (and if needed the `__init__` method)." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "class BoundedNormalizedMSEPlus1(ChempropMetric):\n", + " def __init__(self, task_weights = None, norm: float = 1.0):\n", + " super().__init__(task_weights)\n", + " norm = torch.as_tensor(norm)\n", + " self.register_buffer(\"norm\", norm)\n", + "\n", + " def _calc_unreduced_loss(self, preds, targets, mask, weights, lt_mask, gt_mask) -> Tensor:\n", + " preds = torch.where((preds < targets) & lt_mask, targets, preds)\n", + " preds = torch.where((preds > targets) & gt_mask, targets, preds)\n", + "\n", + " return torch.sum((preds - targets) ** 2) / self.norm + 1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set up the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parents[3]\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"classification\" / \"mol_multiclass.csv\"\n", + "df_input = pd.read_csv(input_path)\n", + "smis = df_input.loc[:, \"smiles\"].values\n", + "ys = df_input.loc[:, [\"activity\"]].values\n", + "all_data = [data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]\n", + "train_indices, val_indices, test_indices = data.make_split_indices(all_data, \"random\", (0.8, 0.1, 0.1))\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " all_data, train_indices, val_indices, test_indices\n", + ")\n", + "train_dset = data.MoleculeDataset(train_data[0])\n", + "val_dset = data.MoleculeDataset(val_data[0])\n", + "test_dset = data.MoleculeDataset(test_data[0])\n", + "train_loader = data.build_dataloader(train_dset)\n", + "val_loader = data.build_dataloader(val_dset, shuffle=False)\n", + "test_loader = data.build_dataloader(test_dset, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a model with a custom loss function" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "n_classes = max(ys).item() + 1\n", + "\n", + "metrics = [ChempropMulticlassAUROC(num_classes = n_classes)]\n", + "\n", + "model = models.MPNN(\n", + " nn.BondMessagePassing(), \n", + " nn.NormAggregation(), \n", + " nn.MulticlassClassificationFFN(n_classes = n_classes), \n", + " metrics = metrics\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run training" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (7) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + "\n", + " | Name | Type | Params | Mode \n", + "------------------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | NormAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MulticlassClassificationFFN | 91.2 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "------------------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.276 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/core/saving.py:363: Skipping 'metrics' parameter because it is not possible to safely dump to YAML.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sanity Checking DataLoader 0: 0%| | 0/1 [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test/multiclass_auroc 0.6266666650772095 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/multiclass_auroc \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.6266666650772095 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "[{'test/multiclass_auroc': 0.6266666650772095}]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trainer = pl.Trainer(max_epochs=2)\n", + "trainer.fit(model, train_loader, val_loader)\n", + "trainer.test(model, test_loader)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop/docs/source/tutorial/python/models/aggregation.ipynb b/chemprop/docs/source/tutorial/python/models/aggregation.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..f99275ec1ed3db62386473ecf7aa0936730dc772 --- /dev/null +++ b/chemprop/docs/source/tutorial/python/models/aggregation.ipynb @@ -0,0 +1,256 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Aggregation" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from chemprop.nn.agg import MeanAggregation, SumAggregation, NormAggregation, AttentiveAggregation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is example output from [message passing](./message_passing.ipynb) for input to aggregation." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "n_atoms_in_batch = 7\n", + "hidden_dim = 3\n", + "example_message_passing_output = torch.randn(n_atoms_in_batch, hidden_dim)\n", + "which_atoms_in_which_molecule = torch.tensor([0, 0, 1, 1, 1, 1, 2]).long()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Combine nodes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The aggregation layer combines the node level represenations into a graph level representaiton (usually atoms -> molecule)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Mean and sum aggregation " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Mean aggregation is recommended when the property to predict does not depend on the number of atoms in the molecules (intensive). Sum aggregation is recommended when the property is extensive, though usually norm aggregation is better." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "mean_agg = MeanAggregation()\n", + "sum_agg = SumAggregation()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[-0.4593, -0.1808, -0.3459],\n", + " [ 0.9343, -0.1746, 0.7430],\n", + " [-0.4747, -0.9394, -0.3877]])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mean_agg(H=example_message_passing_output, batch=which_atoms_in_which_molecule)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[-0.9187, -0.3616, -0.6917],\n", + " [ 3.7373, -0.6986, 2.9720],\n", + " [-0.4747, -0.9394, -0.3877]])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum_agg(H=example_message_passing_output, batch=which_atoms_in_which_molecule)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Norm aggregation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Norm aggregation can be better than sum aggregation when the molecules are large as it is best to keep the hidden representation values on the order of 1 (though this is less important when batch normalization is used). The normalization constant can be customized (defaults to 100.0)." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "norm_agg = NormAggregation()\n", + "big_norm = NormAggregation(norm=1000.0)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[-0.0092, -0.0036, -0.0069],\n", + " [ 0.0374, -0.0070, 0.0297],\n", + " [-0.0047, -0.0094, -0.0039]])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "norm_agg(H=example_message_passing_output, batch=which_atoms_in_which_molecule)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[-0.0009, -0.0004, -0.0007],\n", + " [ 0.0037, -0.0007, 0.0030],\n", + " [-0.0005, -0.0009, -0.0004]])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "big_norm(H=example_message_passing_output, batch=which_atoms_in_which_molecule)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Attentive aggregation " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This uses a learned weighted average to combine atom representations within a molecule graph. It needs to be told the size of the hidden dimension as it uses the hidden representation of each atom to calculate the weight of that atom. " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "att_agg = AttentiveAggregation(output_size=hidden_dim)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[-0.4551, -0.1791, -0.3438],\n", + " [ 0.9370, 0.1375, 0.3714],\n", + " [-0.4747, -0.9394, -0.3877]], grad_fn=)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "att_agg(H=example_message_passing_output, batch=which_atoms_in_which_molecule)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop/docs/source/tutorial/python/models/basic_mpnn_model.ipynb b/chemprop/docs/source/tutorial/python/models/basic_mpnn_model.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..4a52195f73b7fb145dc9f9888bb1cd6f2d4c6606 --- /dev/null +++ b/chemprop/docs/source/tutorial/python/models/basic_mpnn_model.ipynb @@ -0,0 +1,351 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Chemprop MPNN models" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.models.model import MPNN" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Composition" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A Chemprop `MPNN` model is made up of several submodules including a [message passing](./message_passing.ipynb) layer, an [aggregation](./aggregation.ipynb) layer, an optional batch normalization layer, and a [predictor](./predictor.ipynb) feed forward network layer. `MPNN` defines the training and predicting logic used by `lightning` when using a Chemprop model in their framework. " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): Identity()\n", + " )\n", + " (agg): NormAggregation()\n", + " (bn): Identity()\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): Identity()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0-1): 2 x MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from chemprop.nn import BondMessagePassing, NormAggregation, RegressionFFN\n", + "\n", + "mp = BondMessagePassing()\n", + "agg = NormAggregation()\n", + "ffn = RegressionFFN()\n", + "\n", + "basic_model = MPNN(mp, agg, ffn)\n", + "basic_model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Batch normalization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Batch normalization can improve training by keeping the inputs to the FFN small and centered around zero. It is off by default, but can be turned on." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): Identity()\n", + " )\n", + " (agg): NormAggregation()\n", + " (bn): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): Identity()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0-1): 2 x MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MPNN(mp, agg, ffn, batch_norm=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Optimizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`MPNN` also configures the optimizer used by lightning during training. The `torch.optim.Adam` optimizer is used with a Noam learning rate scheduler (defined in `chemprop.scheduler.NoamLR`). The following parameters are customizable:\n", + "\n", + " - number of warmup epochs, defaults to 2\n", + " - the initial learning rate, defaults to $10^{-4}$\n", + " - the max learning rate, defaults to $10^{-3}$\n", + " - the final learning rate, defaults to $10^{-4}$" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "model = MPNN(mp, agg, ffn, warmup_epochs=5, init_lr=1e-3, max_lr=1e-2, final_lr=1e-5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Metrics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "During the validation and testing loops, lightning will use the metrics stored in `MPNN` to evaluate the current model's performance. The `MPNN` has a default metric defined by the type of predictor used. Other [metrics](../metrics.ipynb) can be given to `MPNN` to use instead." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn import metrics\n", + "\n", + "metrics_list = [metrics.RMSE(), metrics.MAE()]\n", + "model = MPNN(mp, agg, ffn, metrics=metrics_list)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Fingerprinting and encoding" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`MPNN` has two helper functions to get the hidden representations at different parts of the model. The fingerprint is the learned representation of the message passing layer after aggregation and batch normalization. The encoding is the hidden representation after a number of layers of the predictor. See the predictor notebook for more details. Note that the 0th encoding is equivalent to the fingerprint." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Example batch for the model. See the [data notebooks](../data/dataloaders.ipynb) for more details." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from chemprop.data import MoleculeDatapoint, MoleculeDataset\n", + "from chemprop.data import build_dataloader\n", + "\n", + "smis = [\"C\" * i for i in range(1, 4)]\n", + "ys = np.random.rand(len(smis), 1)\n", + "dataset = MoleculeDataset([MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)])\n", + "dataloader = build_dataloader(dataset)\n", + "batch = next(iter(dataloader))\n", + "bmg, V_d, X_d, *_ = batch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[0.0333],\n", + " [0.0331],\n", + " [0.0332]], grad_fn=)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "basic_model(bmg, V_d, X_d)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([3, 300])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "basic_model.fingerprint(bmg, V_d, X_d).shape" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([3, 300])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "basic_model.encoding(bmg, V_d, X_d, i=1).shape" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor(True)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(basic_model.fingerprint(bmg, V_d, X_d) == basic_model.encoding(bmg, V_d, X_d, i=0)).all()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop/docs/source/tutorial/python/models/message_passing.ipynb b/chemprop/docs/source/tutorial/python/models/message_passing.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..d834341e24577056aa5030495ca4f61da80b2d95 --- /dev/null +++ b/chemprop/docs/source/tutorial/python/models/message_passing.ipynb @@ -0,0 +1,232 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Message passing" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.message_passing.base import BondMessagePassing, AtomMessagePassing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an example [dataloader](../data/dataloaders.ipynb) to make inputs for the message passing layer." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from chemprop.data import MoleculeDatapoint, MoleculeDataset, build_dataloader\n", + "\n", + "smis = [\"C\" * i for i in range(1, 4)]\n", + "ys = np.random.rand(len(smis), 1)\n", + "dataset = MoleculeDataset([MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)])\n", + "dataloader = build_dataloader(dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Message passing schemes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are two message passing schemes. Chemprop prefers a D-MPNN scheme (`BondMessagePassing`) where messages are passed between directed edges (bonds) rather than between nodes (atoms) as would be done in a traditional MPNN (`AtomMessagePassing`)." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "mp = AtomMessagePassing()\n", + "mp = BondMessagePassing()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Input dimensions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By default, the bond message passing layer's input dimension is the sum of atom and bond features from the default [atom](../featurizers/atom_featurizers.ipynb) and [bond](../featurizers/bond_featurizers.ipynb) featurizers. If you use a custom featurizer, the message passing layer needs to be told when it is created.\n", + "\n", + "Also note that an atom message passing's default input dimension is the length of the atom features from the default atom featurizer." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from chemprop.featurizers import SimpleMoleculeMolGraphFeaturizer\n", + "\n", + "n_atom_features, n_bond_features = SimpleMoleculeMolGraphFeaturizer().shape\n", + "(n_atom_features + n_bond_features) == mp.W_i.in_features" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.featurizers import MultiHotAtomFeaturizer\n", + "\n", + "n_extra_bond_features = 12\n", + "featurizer = SimpleMoleculeMolGraphFeaturizer(\n", + " atom_featurizer=MultiHotAtomFeaturizer.organic(), extra_bond_fdim=n_extra_bond_features\n", + ")\n", + "\n", + "mp = BondMessagePassing(d_v=featurizer.atom_fdim, d_e=featurizer.bond_fdim)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If extra atom descriptors are used, the message passing layer also needs to be told. A separate weight matrix is created and applied to the concatenated hidden representation and extra descriptors after message passing is complete. The output dimension of the message passing layer is the sum of the hidden size and number of extra atom descriptors." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "328" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "n_extra_atom_descriptors = 28\n", + "mp = BondMessagePassing(d_vd=n_extra_atom_descriptors)\n", + "mp.output_dim" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Customization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following hyperparameters of the message passing layer are customizable:\n", + "\n", + " - the hidden dimension during message passing, default: 300\n", + " - whether a bias term used, default: False\n", + " - the number of message passing iterations, default: 3\n", + " - whether to pass messages on undirected edges, default: False\n", + " - the dropout probability, default: 0.0 (i.e. no dropout)\n", + " - which activation function, default: ReLU" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "mp = BondMessagePassing(\n", + " d_h=600, bias=True, depth=5, undirected=True, dropout=0.5, activation=\"tanh\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The output of message passing is a torch tensor of shape # of atoms in batch x length of hidden representation." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([6, 600])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "batch_molgraph, extra_atom_descriptors, *_ = next(iter(dataloader))\n", + "hidden_atom_representations = mp(batch_molgraph, extra_atom_descriptors)\n", + "hidden_atom_representations.shape" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop/docs/source/tutorial/python/models/multicomponent_mpnn_model.ipynb b/chemprop/docs/source/tutorial/python/models/multicomponent_mpnn_model.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..2f142a975b184951bece7db7939ed45f41513fd4 --- /dev/null +++ b/chemprop/docs/source/tutorial/python/models/multicomponent_mpnn_model.ipynb @@ -0,0 +1,208 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Multicomponent models" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.message_passing import MulticomponentMessagePassing\n", + "from chemprop.models import MulticomponentMPNN" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Overview" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The basic Chemprop model is designed for a single molecule or reaction as input. A multicomponent Chemprop model organizes these basic building blocks to take multiple molecules/reactions as input. This is useful for properties that depend on multiple components like properties in solvents." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Message passing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`MulticomponentMessagePassing` organizes the single component [message passing](./message_passing.ipynb) modules for each component in the multicomponent dataset. The individual message passing modules can be unique for each component, shared between some components, or shared between all components. If all components share the same message passing module, the shared flag can be set to True. Note that it doesn't make sense for components that use different featurizers (e.g. molecules and reactions) to use the same message passing module." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn import BondMessagePassing\n", + "\n", + "mp1 = BondMessagePassing(d_h=100)\n", + "mp2 = BondMessagePassing(d_h=600)\n", + "blocks = [mp1, mp2]\n", + "mcmp = MulticomponentMessagePassing(blocks=blocks, n_components=len(blocks))\n", + "\n", + "mp = BondMessagePassing()\n", + "mcmp = MulticomponentMessagePassing(blocks=[mp], n_components=2, shared=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "During the forward pass of the model, the output of each message passing block is concatentated after aggregation as input to the predictor." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Aggregation\n", + "\n", + "A single [aggregation](./aggregation.ipynb) module is used on all message passing outputs." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn import MeanAggregation\n", + "\n", + "agg = MeanAggregation()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Predictor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The [predictor](./predictor.ipynb) needs to be told the output dimension of the message passing layer." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn import RegressionFFN\n", + "\n", + "ffn = RegressionFFN(input_dim=mcmp.output_dim)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Multicomponent MPNN" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The submodules are composed together in a `MulticomponentMPNN` model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MulticomponentMPNN(\n", + " (message_passing): MulticomponentMessagePassing(\n", + " (blocks): ModuleList(\n", + " (0-1): 2 x BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): Identity()\n", + " )\n", + " )\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): Identity()\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=600, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): Identity()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0-1): 2 x MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mc_model = MulticomponentMPNN(mcmp, agg, ffn)\n", + "mc_model" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop/docs/source/tutorial/python/models/predictor.ipynb b/chemprop/docs/source/tutorial/python/models/predictor.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..0f09019bd64271a59b5c6a0cb900cff3b6aef756 --- /dev/null +++ b/chemprop/docs/source/tutorial/python/models/predictor.ipynb @@ -0,0 +1,444 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Predictors" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from chemprop.nn.predictors import (\n", + " RegressionFFN,\n", + " BinaryClassificationFFN,\n", + " MulticlassClassificationFFN,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is example output of [aggregation](./aggregation.ipynb) for input to the predictor." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "n_datapoints_in_batch = 2\n", + "hidden_dim = 300\n", + "example_aggregation_output = torch.randn(n_datapoints_in_batch, hidden_dim)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Feed forward network" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The learned representation from message passing and aggregation is a vector like that of fixed representations. While other predictors like random forest could be used to make final predictions from this representation, Chemprop prefers and implements using a feed forward network as that allows for end-to-end training. Three basic Chemprop FFNs differ in the prediction task they are used for. Note that multiclass classification needs to know the number of classes." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "regression_ffn = RegressionFFN()\n", + "binary_class_ffn = BinaryClassificationFFN()\n", + "multi_class_ffn = MulticlassClassificationFFN(n_classes=3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Input dimension" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The default input dimension of the predictor is the same as the default dimension of the message passing hidden representation. If your message passing hidden dimension is different, or if you have addition atom or datapoint descriptors, you need to change the predictor's input dimension." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[0.2080],\n", + " [0.2787]], grad_fn=)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ffn = RegressionFFN()\n", + "ffn(example_aggregation_output)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[-0.0877],\n", + " [-0.2629]], grad_fn=)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mp_hidden_dim = 2\n", + "n_atom_descriptors = 1\n", + "mp_output = torch.randn(n_datapoints_in_batch, mp_hidden_dim + n_atom_descriptors)\n", + "example_datapoint_descriptors = torch.randn(n_datapoints_in_batch, 12)\n", + "\n", + "input_dim = mp_output.shape[1] + example_datapoint_descriptors.shape[1]\n", + "\n", + "ffn = RegressionFFN(input_dim=input_dim)\n", + "ffn(torch.cat([mp_output, example_datapoint_descriptors], dim=1))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Output dimension" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The number of tasks defaults to 1 but can be adjusted. Predictors that need to predict multiple values per task, like multiclass classification, will automatically adjust the output dimension." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([2, 4])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ffn = RegressionFFN(n_tasks=4)\n", + "ffn(example_aggregation_output).shape" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([2, 4, 3])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ffn = MulticlassClassificationFFN(n_tasks=4, n_classes=3)\n", + "ffn(example_aggregation_output).shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Customization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following hyperparameters of the predictor are customizable:\n", + "\n", + " - the hidden dimension between layer, default: 300\n", + " - the number of layer, default 1\n", + " - the dropout probability, default: 0.0 (i.e. no dropout)\n", + " - which activation function, default: ReLU" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[ 0.0121],\n", + " [-0.0760]], grad_fn=)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "custom_ffn = RegressionFFN(hidden_dim=600, n_layers=3, dropout=0.1, activation=\"tanh\")\n", + "custom_ffn(example_aggregation_output)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Intermediate hidden representations can also be extracted. Note that each predictor layer consists of an activation layer, followed by dropout, followed by a linear layer. The first predictor layer only has the linear layer." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([2, 600])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "layer = 2\n", + "custom_ffn.encode(example_aggregation_output, i=layer).shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=600, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): Tanh()\n", + " (1): Dropout(p=0.1, inplace=False)\n", + " (2): Linear(in_features=600, out_features=600, bias=True)\n", + " )\n", + " (2): Sequential(\n", + " (0): Tanh()\n", + " (1): Dropout(p=0.1, inplace=False)\n", + " (2): Linear(in_features=600, out_features=600, bias=True)\n", + " )\n", + " (3): Sequential(\n", + " (0): Tanh()\n", + " (1): Dropout(p=0.1, inplace=False)\n", + " (2): Linear(in_features=600, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): Identity()\n", + ")" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "custom_ffn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Criterion" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Each predictor has a criterion that is used as the [loss function](../loss_functions.ipynb) during training. The default criterion for a predictor is defined in the predictor class." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "print(RegressionFFN._T_default_criterion)\n", + "print(BinaryClassificationFFN._T_default_criterion)\n", + "print(MulticlassClassificationFFN._T_default_criterion)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A custom criterion can be given to the predictor." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn import MSE\n", + "\n", + "criterion = MSE(task_weights=torch.tensor([0.5, 1.0]))\n", + "ffn = RegressionFFN(n_tasks=2, criterion=criterion)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Regression vs. classification" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In addition to using different loss functions, regression and classification predictors also differ in their tranforms of the model outputs during inference. \n", + "\n", + "Regression should use a [scaler transform](../scaling.ipynb) if target normalization is used during training.\n", + "\n", + "Classification uses a sigmoid (for binary classification) or a softmax (for multiclass) transform to keep class probability predictions between 0 and 1. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor(True)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "probs = binary_class_ffn(example_aggregation_output)\n", + "(0 < probs).all() and (probs < 1).all()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Other predictors coming soon" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Beta versions of predictors for uncertainty and spectral tasks will be finalized in v2.1." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.nn.predictors import (\n", + " MveFFN,\n", + " EvidentialFFN,\n", + " BinaryDirichletFFN,\n", + " MulticlassDirichletFFN,\n", + " SpectralFFN,\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop/docs/source/tutorial/python/saving_and_loading.ipynb b/chemprop/docs/source/tutorial/python/saving_and_loading.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..9d1d8aa518bc5616a3964db75a737930542e00ff --- /dev/null +++ b/chemprop/docs/source/tutorial/python/saving_and_loading.ipynb @@ -0,0 +1,161 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Saving and loading models" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from chemprop.models.utils import save_model, load_model\n", + "from chemprop.models.model import MPNN\n", + "from chemprop.models.multi import MulticomponentMPNN\n", + "from chemprop import nn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an example buffer to save to and load from, to avoid creating new files when running this notebook. A real use case would probably save to and read from a file like `model.pt`." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import io\n", + "\n", + "saved_model = io.BytesIO()\n", + "\n", + "# from pathlib import Path\n", + "# saved_model = Path(\"model.pt\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Saving models" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A valid model save file is a dictionary containing the hyper parameters and state dict of the model. `torch` is used to pickle the dictionary." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "model = MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), nn.RegressionFFN())\n", + "\n", + "save_model(saved_model, model)\n", + "\n", + "# model_dict = {\"hyper_parameters\": model.hparams, \"state_dict\": model.state_dict()}\n", + "# torch.save(model_dict, saved_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`lightning` will also automatically create checkpoint files during training. These `.ckpt` files are like `.pt` model files, but also contain information about training and can be used to restart training. See the `lightning` documentation for more details." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "from lightning.pytorch.callbacks import ModelCheckpoint\n", + "from lightning.pytorch import Trainer\n", + "\n", + "checkpointing = ModelCheckpoint(\n", + " dirpath=\"mycheckpoints\",\n", + " filename=\"best-{epoch}-{val_loss:.2f}\",\n", + " monitor=\"val_loss\",\n", + " mode=\"min\",\n", + " save_last=True,\n", + ")\n", + "trainer = Trainer(callbacks=[checkpointing])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Loading models" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`MPNN` and `MulticomponentMPNN` each have a class method to load a model from either a model file `.pt` or a checkpoint file `.ckpt`. The method to load from a file works for either model files or checkpoint files, but won't load the saved training information from a checkpoint file." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Need to set the buffer stream position to the beginning, not necessary if using a file\n", + "saved_model.seek(0)\n", + "\n", + "model = MPNN.load_from_file(saved_model)\n", + "\n", + "# Other options\n", + "# model = MPNN.load_from_checkpoint(saved_model)\n", + "# model = MulticomponentMPNN.load_from_file(saved_model)\n", + "# model = MulticomponentMPNN.load_from_checkpoint(saved_model)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop/docs/source/tutorial/python/scaling.ipynb b/chemprop/docs/source/tutorial/python/scaling.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..2ad64091c1602e5b4eb7a5dc589fead17a4175d5 --- /dev/null +++ b/chemprop/docs/source/tutorial/python/scaling.ipynb @@ -0,0 +1,687 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Scaling inputs and outputs" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from chemprop.models import MPNN\n", + "from chemprop.nn import BondMessagePassing, NormAggregation, RegressionFFN\n", + "from chemprop.nn.transforms import ScaleTransform, UnscaleTransform, GraphTransform" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an example [dataset](./data/datasets.ipynb) with extra atom and bond features, extra atom descriptors, and extra [datapoint](./data/datapoints.ipynb) descriptors." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from chemprop.data import MoleculeDatapoint, MoleculeDataset\n", + "\n", + "smis = [\"CC\", \"CN\", \"CO\", \"CF\", \"CP\", \"CS\", \"CI\"]\n", + "ys = np.random.rand(len(smis), 1) * 100\n", + "\n", + "n_datapoints = len(smis)\n", + "n_atoms = 2\n", + "n_bonds = 1\n", + "n_extra_atom_features = 3\n", + "n_extra_bond_features = 4\n", + "n_extra_atom_descriptors = 5\n", + "n_extra_datapoint_descriptors = 6\n", + "\n", + "extra_atom_features = np.random.rand(n_datapoints, n_atoms, n_extra_atom_features)\n", + "extra_bond_features = np.random.rand(n_datapoints, n_bonds, n_extra_bond_features)\n", + "extra_atom_descriptors = np.random.rand(n_datapoints, n_atoms, n_extra_atom_descriptors)\n", + "extra_datapoint_descriptors = np.random.rand(n_datapoints, n_extra_datapoint_descriptors)\n", + "\n", + "datapoints = [\n", + " MoleculeDatapoint.from_smi(smi, y, x_d=x_d, V_f=V_f, E_f=E_f, V_d=V_d)\n", + " for smi, y, x_d, V_f, E_f, V_d in zip(\n", + " smis,\n", + " ys,\n", + " extra_datapoint_descriptors,\n", + " extra_atom_features,\n", + " extra_bond_features,\n", + " extra_atom_descriptors,\n", + " )\n", + "]\n", + "train_dset = MoleculeDataset(datapoints[:3])\n", + "val_dset = MoleculeDataset(datapoints[3:5])\n", + "test_dset = MoleculeDataset(datapoints[5:])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Scaling targets - FFN" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Scaling the target values before training can improve model performance and make training faster. The scaler for the targets should be fit to the training dataset and then applied to the validation dataset. This scaler is *not* applied to the test dataset. Instead the scaler is used to make an `UnscaleTransform` which is given to the predictor (FFN) layer and used automatically during inference. \n", + "\n", + "Note that currently the output_transform is saved both in the model's state_dict and and in the model's hyperparameters. This may be changed in the future to align with `lightning`'s recommendations. You can ignore any messages about this." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "output_scaler = train_dset.normalize_targets()\n", + "val_dset.normalize_targets(output_scaler)\n", + "# test_dset targets not scaled\n", + "\n", + "output_transform = UnscaleTransform.from_standard_scaler(output_scaler)\n", + "\n", + "ffn = RegressionFFN(output_transform=output_transform)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Scaling extra atom and bond features - Message Passing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The atom and bond features generated by Chemprop [featurizers](./featurizers/molgraph_molecule_featurizer.ipynb) are either multi-hot or on the order of 1. We recommend scaling extra atom and bond features to also be on the order of 1. Like the target scaler, these scalers are fit to the training data, applied to the validation data, and then saved to the model (in this case the message passing layer) so that they are applied automatically to the test dataset during inference." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
StandardScaler()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "StandardScaler()" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "V_f_scaler = train_dset.normalize_inputs(\"V_f\")\n", + "E_f_scaler = train_dset.normalize_inputs(\"E_f\")\n", + "\n", + "val_dset.normalize_inputs(\"V_f\", V_f_scaler)\n", + "val_dset.normalize_inputs(\"E_f\", E_f_scaler)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The scalers are used to make `ScaleTransform`s. These are combined into a `GraphTransform` which is given to the message passing module. Note that `ScaleTransform` acts on the whole feature vector, not just the extra features. The `ScaleTransform`'s mean and scale arrays are padded with enough zeros and ones so that only the extra features are actually scaled. The amount of padding required is the length of the default features of the featurizer." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from chemprop.featurizers import SimpleMoleculeMolGraphFeaturizer\n", + "\n", + "featurizer = SimpleMoleculeMolGraphFeaturizer(\n", + " extra_atom_fdim=n_extra_atom_features, extra_bond_fdim=n_extra_bond_features\n", + ")\n", + "n_V_features = featurizer.atom_fdim - featurizer.extra_atom_fdim\n", + "n_E_features = featurizer.bond_fdim - featurizer.extra_bond_fdim\n", + "\n", + "V_f_transform = ScaleTransform.from_standard_scaler(V_f_scaler, pad=n_V_features)\n", + "E_f_transform = ScaleTransform.from_standard_scaler(E_f_scaler, pad=n_E_features)\n", + "\n", + "graph_transform = GraphTransform(V_f_transform, E_f_transform)\n", + "\n", + "mp = BondMessagePassing(graph_transform=graph_transform)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you only have one of extra atom features or extra bond features, you can set the transform for the unused option to `torch.nn.Identity`." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "graph_transform = GraphTransform(V_transform=torch.nn.Identity(), E_transform=E_f_transform)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Scaling extra atom descriptors - Message Passing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The atom descriptors from message passing (before aggregation) are also likely to be on the order of 1 so extra atom descriptors should also be scaled. No padding is needed (unlike above) as this scaling is only applied to the extra atom descriptors. The `ScaleTransform` is given to the message passing module for use during inference." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "V_d_scaler = train_dset.normalize_inputs(\"V_d\")\n", + "val_dset.normalize_inputs(\"V_d\", V_d_scaler)\n", + "\n", + "V_d_transform = ScaleTransform.from_standard_scaler(V_d_scaler)\n", + "\n", + "mp = BondMessagePassing(V_d_transform=V_d_transform)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A `GraphTransform` and `ScaleTransform` can both be given to the message passing." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "mp = BondMessagePassing(graph_transform=graph_transform, V_d_transform=V_d_transform)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Scaling extra datapoint descriptors - MPNN" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The molecule/reaction descriptors from message passing (after aggregation) are batch normalized by default to be on the order of 1 (can be turned off, see the [model notebook](./models/basic_mpnn_model.ipynb)). Therefore we also recommended scaling the extra datapoint level descriptors. The `ScaleTransform` for this is given to the `MPNN` or `MulticomponentMPNN` module." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "X_d_scaler = train_dset.normalize_inputs(\"X_d\")\n", + "val_dset.normalize_inputs(\"X_d\", X_d_scaler)\n", + "\n", + "X_d_transform = ScaleTransform.from_standard_scaler(X_d_scaler)\n", + "\n", + "chemprop_model = MPNN(\n", + " BondMessagePassing(), NormAggregation(), RegressionFFN(), X_d_transform=X_d_transform\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop/docs/source/uncertainty.nblink b/chemprop/docs/source/uncertainty.nblink new file mode 100644 index 0000000000000000000000000000000000000000..dc48565d43c209b26725082a41097cb8cfd075e6 --- /dev/null +++ b/chemprop/docs/source/uncertainty.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/uncertainty.ipynb" +} diff --git a/chemprop/docs/source/use_featurizer_with_other_libraries.nblink b/chemprop/docs/source/use_featurizer_with_other_libraries.nblink new file mode 100644 index 0000000000000000000000000000000000000000..5112dd211f8a2693b821a4df548d2e8d3d750165 --- /dev/null +++ b/chemprop/docs/source/use_featurizer_with_other_libraries.nblink @@ -0,0 +1,3 @@ +{ +"path": "../../examples/use_featurizer_with_other_libraries.ipynb" +} diff --git a/chemprop/environment.yml b/chemprop/environment.yml new file mode 100644 index 0000000000000000000000000000000000000000..fd1fb6ca6355730c2c7ec83889ade7f67e5956d4 --- /dev/null +++ b/chemprop/environment.yml @@ -0,0 +1,17 @@ +name: chemprop +channels: + - conda-forge +dependencies: + - python>=3.11 + - pytorch>=2.1 + - astartes + - aimsim + - configargparse + - lightning>=2.0 + - numpy + - pandas + - rdkit + - scikit-learn + - scipy + - rich + - descriptastorus diff --git a/chemprop/examples/active_learning.ipynb b/chemprop/examples/active_learning.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..4200f0fa0fa1fca7cc85bfc575af2a6d864a0d6d --- /dev/null +++ b/chemprop/examples/active_learning.ipynb @@ -0,0 +1,843 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Active Learning\n", + "Active learning is an iterative process where a model actively selects the most informative data points to be labeled by an oracle (e.g. a human expert), optimizing the model's performance with fewer labeled samples. Active learning can be implemented with Chemprop through Python as demonstrated by this notebook." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/active_learning.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import random\n", + "from typing import Tuple\n", + "\n", + "from lightning import pytorch as pl\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import torch\n", + "from torch.utils.data import DataLoader\n", + "\n", + "from chemprop import data, featurizers, models, nn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load some data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = (\n", + " chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\"\n", + ") # path to your data .csv file\n", + "df_input = pd.read_csv(input_path)\n", + "smis = df_input.loc[:, \"smiles\"].values\n", + "ys = df_input.loc[:, [\"lipo\"]].values\n", + "all_data = [data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this notebook we use three sets of data: An starting set of training data, a set of data to select additional training data from, and a set of data to test the model on. The set of data to select additional training data from could be unlabeled, but for this example all the data already has labels." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "mols = [d.mol for d in all_data] # RDkit Mol objects are use for structure based splits\n", + "splitting_indices = data.make_split_indices(mols, \"random\", (0.1, 0.8, 0.1))\n", + "starting_data, additional_data, test_data = data.split_data_by_indices(all_data, *splitting_indices)\n", + "starting_data, additional_data, test_data = starting_data[0], additional_data[0], test_data[0]\n", + "test_loader = data.build_dataloader(data.MoleculeDataset(test_data), shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "During each iteration of active learning, the training data will be split into training and validation sets and packaged into data loaders, so we make a helper function to do this." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def get_dataloaders(trainval_data) -> Tuple[DataLoader]:\n", + " trainval_mols = [d.mol for d in trainval_data]\n", + " train_indices, _, val_indices = data.make_split_indices(\n", + " trainval_mols, \"random\", (0.9, 0.0, 0.1)\n", + " )\n", + " train_data, val_data, _ = data.split_data_by_indices(\n", + " trainval_data, train_indices, val_indices, None\n", + " )\n", + "\n", + " train_dset = data.MoleculeDataset(train_data[0])\n", + " scaler = train_dset.normalize_targets()\n", + "\n", + " val_dset = data.MoleculeDataset(val_data[0])\n", + " val_dset.normalize_targets(scaler)\n", + "\n", + " train_loader = data.build_dataloader(train_dset)\n", + " val_loader = data.build_dataloader(val_dset, shuffle=False)\n", + " return train_loader, val_loader, scaler" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We also define a helper function to construct a chemprop model. Because this is a regression task, the targets of the training data are normalized and the model needs the scaler that was used to unnormalize the predictions." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def get_mpnn(scaler):\n", + " output_transform = nn.UnscaleTransform.from_standard_scaler(scaler)\n", + " ffn = nn.MveFFN(output_transform=output_transform)\n", + " mpnn = models.MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), ffn, batch_norm=False)\n", + " return mpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We also need a lightning trainer to run the model." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(\n", + " logger=False, enable_progress_bar=False, accelerator=\"cpu\", devices=1, max_epochs=20\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change active learning parameters here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A priority function (or acquistition function) guides the active learning process by selecting the most informative data points to label next. A good choice for such a function is the uncertainty of a model's output on each data point." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# rank datapoints based on priority, priority determined by variance\n", + "def priority_function(mpnn, datapoint):\n", + " dataset = data.MoleculeDataset([datapoint])\n", + " loader = data.build_dataloader(dataset, batch_size=1)\n", + " output = trainer.predict(mpnn, loader)\n", + " output = torch.concat(output, dim=0)\n", + " return output[..., 1]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If our additional data was unlabeled, we would need a way to get the labels for the selected data points. " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# the oracle takes a list of potentially unlabeled datapoints to be labeled for the next active learning iteration.\n", + "def request_labels(new_data):\n", + " # adding new data labels:\n", + " # for datapoint in new_data:\n", + " # datapoint.y = {label}\n", + " return" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Lastly, we also need to decide how many data points to add to our training set in each iteration." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# number of new datapoints added to trainval pool each iteration.\n", + "query_size = len(additional_data) // 8" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start training" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We start by training a model on the initial training data." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Dropping last batch of size 1 to avoid issues with batch normalization (dataset size = 1, batch_size = 64)\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MveFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.274 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/utilities/data.py:105: Total length of `DataLoader` across ranks is zero. Please make sure this was your intention.\n", + "`Trainer.fit` stopped: `max_epochs=20` reached.\n" + ] + } + ], + "source": [ + "train_loader, val_loader, scaler = get_dataloaders(starting_data)\n", + "mpnn = get_mpnn(scaler)\n", + "trainer.fit(mpnn, train_loader, val_loader)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can start the active learning loop. In each iteration, we train a model on the current training data, use the model to select the most informative data points (the ones where the model is least certain), add them to the training data, and repeat." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MveFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.274 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "`Trainer.fit` stopped: `max_epochs=20` reached.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│         test/mse              1.2045652866363525     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.2045652866363525 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MveFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.274 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "`Trainer.fit` stopped: `max_epochs=20` reached.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│         test/mse              0.9172996282577515     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9172996282577515 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MveFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.274 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "`Trainer.fit` stopped: `max_epochs=20` reached.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│         test/mse              1.0593369007110596     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.0593369007110596 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MveFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.274 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "`Trainer.fit` stopped: `max_epochs=20` reached.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│         test/mse               1.151768445968628     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.151768445968628 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MveFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.274 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "`Trainer.fit` stopped: `max_epochs=20` reached.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│         test/mse              1.2037131786346436     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.2037131786346436 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MveFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.274 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "`Trainer.fit` stopped: `max_epochs=20` reached.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│         test/mse              1.1304174661636353     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.1304174661636353 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MveFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.274 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "`Trainer.fit` stopped: `max_epochs=20` reached.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│         test/mse              1.0078696012496948     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.0078696012496948 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MveFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.274 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "`Trainer.fit` stopped: `max_epochs=20` reached.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│         test/mse              0.9942679405212402     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9942679405212402 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "trainval_data = starting_data\n", + "results = []\n", + "\n", + "for _ in range(len(additional_data) // query_size):\n", + " # sort new datapoints by priority using priority function\n", + " priority_remaining_data = [\n", + " (priority_function(mpnn, datapoint), datapoint) for datapoint in additional_data\n", + " ]\n", + " sorted_remaining_data = [\n", + " datapoint\n", + " for unc, datapoint in sorted(priority_remaining_data, key=lambda d: d[0], reverse=True)\n", + " ]\n", + "\n", + " new_data = sorted_remaining_data[:query_size]\n", + " additional_data = additional_data[query_size:]\n", + "\n", + " request_labels(new_data)\n", + " trainval_data.extend(new_data)\n", + "\n", + " train_loader, val_loader, scaler = get_dataloaders(trainval_data)\n", + "\n", + " mpnn = get_mpnn(scaler)\n", + " trainer.fit(mpnn, train_loader, val_loader)\n", + "\n", + " result = trainer.test(mpnn, test_loader)\n", + " results.append((len(trainval_data), result[0][\"test/mse\"]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally we can view the results. The model's performance will hopefully improve with each iteration of active learning. Though this notebook is just an example. We didn't train the model for many epochs, and we used a very small dataset, so we don't expect to see the model improve. " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(20, 1.2045652866363525),\n", + " (30, 0.9172996282577515),\n", + " (40, 1.0593369007110596),\n", + " (50, 1.151768445968628),\n", + " (60, 1.2037131786346436),\n", + " (70, 1.1304174661636353),\n", + " (80, 1.0078696012496948),\n", + " (90, 0.9942679405212402)]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAArwAAAK7CAYAAAAQv1z7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACTKklEQVR4nOzdd3iV9f3/8dd9TvYke0BIwt6IbBAFB8qqfq3VasGB1lrbb7W0vyraVm1t1bZWa5114SqOr1tAQGUpiDLC3oSZhJC957l/f5ycA5FhAknuM56P68p1lXPuk/POXUxefPL+vD+GaZqmAAAAAB9ls7oAAAAAoD0ReAEAAODTCLwAAADwaQReAAAA+DQCLwAAAHwagRcAAAA+jcALAAAAn0bgBQAAgE8j8AIAAMCnEXgBeIQnnnhChmFowIABZ/w5cnJydP/99ysrK+uE5+6//34ZhnEWFZ6ZOXPmyDAM7du3r8Pfu7UMw9D9999vdRke4bv3YuvWrbr//vu94v9HACci8ALwCC+99JIkacuWLVq9evUZfY6cnBw98MADJw28t9xyi1atWnU2Jfq8VatW6ZZbbrG6DI+0detWPfDAAwRewEsReAFYbs2aNdqwYYOmTJkiSXrxxRfb/D26dOmiUaNGtfnn9VT19fVqaGho1WtGjRqlLl26tFNFrdPY2Kja2lqrywDgIwi8ACznCrgPP/ywxowZozfffFNVVVUnXHf48GHdeuutSktLU1BQkFJTU3XVVVfpyJEjWrp0qYYPHy5Juummm2QYRrNfS3+3peGKK65Qenq6HA7HCe8zcuRInXvuue4/m6app59+Wuecc45CQ0MVExOjq666Snv37j3jr/mzzz7TRRddpKioKIWFhWns2LH6/PPPm12ze/du3XTTTerZs6fCwsLUuXNnTZs2TZs2bWp23dKlS2UYhl577TX95je/UefOnRUcHKzdu3frxhtvVEREhHbv3q3JkycrIiJCaWlp+s1vfnNCoPzur/Fd7RhLlizRz3/+c8XHxysuLk5XXnmlcnJymr22trZWv/nNb5ScnKywsDCdf/75Wrt2rTIyMnTjjTee9l7s27dPhmHob3/7mx588EFlZmYqODhYS5YskeT8B9EPfvADxcbGKiQkREOGDNHbb7/d7HNUVVXpt7/9rTIzMxUSEqLY2FgNGzZMc+fOdV8zfvx4jR8//oT3v/HGG5WRkXHK+ubMmaMf/ehHkqQJEya4/27NmTNHkrR+/XpNnTpViYmJCg4OVmpqqqZMmaJDhw6d9usG0HEIvAAsVV1drblz52r48OEaMGCAZs6cqfLycr3zzjvNrjt8+LCGDx+u999/X7NmzdKCBQv0+OOPKzo6WsXFxTr33HP18ssvS5J+//vfa9WqVaf9Ff3MmTN14MABffHFF80e3759u7755hvddNNN7sd+9rOf6c4779TFF1+sDz74QE8//bS2bNmiMWPG6MiRI63+ml9//XVNnDhRUVFReuWVV/T2228rNjZWl156abPQm5OTo7i4OD388MP69NNP9dRTTykgIEAjR47Ujh07Tvi8s2fP1oEDB/Tss8/q448/VmJioiTnau8PfvADXXTRRfrwww81c+ZMPfbYY3rkkUdaVO8tt9yiwMBA/fe//9Xf/vY3LV26VNOnT292zU033aTHH39cN910kz788EP98Ic/1P/8z/+opKSkxffliSee0BdffKF//OMfWrBggfr06aMlS5Zo7NixKikp0bPPPqsPP/xQ55xzjq655hp34JSkWbNm6ZlnntGvfvUrffrpp3rttdf0ox/9SIWFhS1+/1OZMmWK/vrXv0qSnnrqKfffrSlTpqiyslKXXHKJjhw5oqeeekqLFy/W448/rq5du6q8vPys3xtAGzEBwEKvvvqqKcl89tlnTdM0zfLycjMiIsIcN25cs+tmzpxpBgYGmlu3bj3l5/r2229NSebLL798wnP33Xefefy3vPr6ejMpKcm87rrrml33u9/9zgwKCjILCgpM0zTNVatWmZLMRx99tNl1Bw8eNENDQ83f/e53p/36Xn75ZVOSmZ2dbZqmaVZWVpqxsbHmtGnTml3X2NhoDh482BwxYsQpP1dDQ4NZV1dn9uzZ0/z1r3/tfnzJkiWmJPP8888/4TU33HCDKcl8++23mz0+efJks3fv3s0ek2Ted999J9R+++23N7vub3/7mynJzM3NNU3TNLds2WJKMu+6665m182dO9eUZN5www2n/JpM0zSzs7NNSWb37t3Nurq6Zs/16dPHHDJkiFlfX9/s8alTp5opKSlmY2OjaZqmOWDAAPOKK6447ftccMEF5gUXXHDC4zfccIOZnp7e7LHv3ot33nnHlGQuWbKk2XVr1qwxJZkffPDBad8bgLVY4QVgqRdffFGhoaH68Y9/LEmKiIjQj370I61YsUK7du1yX7dgwQJNmDBBffv2bZP3DQgI0PTp0/Xee++ptLRUkrNv9LXXXtPll1+uuLg4SdInn3wiwzA0ffp0NTQ0uD+Sk5M1ePBgLV26tFXvu3LlShUVFemGG25o9vkcDocuu+wyffvtt6qsrJQkNTQ06K9//av69eunoKAgBQQEKCgoSLt27dK2bdtO+Nw//OEPT/qehmFo2rRpzR4bNGiQ9u/f36Kaf/CDH5zwWknu1y9btkySdPXVVze77qqrrlJAQECL3sP1PoGBge4/7969W9u3b9dPfvITSWp2vyZPnqzc3Fz3SveIESO0YMEC3X333Vq6dKmqq6tb/L5no0ePHoqJidFdd92lZ599Vlu3bu2Q9wXQOgReAJbZvXu3li9frilTpsg0TZWUlKikpERXXXWVpGOTGyTp6NGjbb6haubMmaqpqdGbb74pSVq4cKFyc3ObtTMcOXJEpmkqKSlJgYGBzT6+/vprFRQUtOo9XS0QV1111Qmf75FHHpFpmioqKpLk/DX9H/7wB11xxRX6+OOPtXr1an377bcaPHjwSQNdSkrKSd8zLCxMISEhzR4LDg5WTU1Ni2p2hf/jXyvJXYOrbSApKanZdQEBASe89nS+W7/rXv32t7894V7dfvvtkuS+/0888YTuuusuffDBB5owYYJiY2N1xRVXNPtHU3uIjo7WsmXLdM455+iee+5R//79lZqaqvvuu0/19fXt+t4AWq7l//QGgDb20ksvyTRN/d///Z/+7//+74TnX3nlFT344IOy2+1KSEho801A/fr104gRI/Tyyy/rZz/7mV5++WWlpqZq4sSJ7mvi4+NlGIZWrFjhDnrHO9ljpxMfHy9J+ve//33KqRGu4Pj666/r+uuvd/ePuhQUFKhTp04nvM6KOcPSsUB85MgRde7c2f14Q0NDq3pov1u/617Nnj1bV1555Ulf07t3b0lSeHi4HnjgAT3wwAM6cuSIe7V32rRp2r59uyQpJCTEvZp/vNb+o+W7Bg4cqDfffFOmaWrjxo2aM2eO/vSnPyk0NFR33333WX1uAG2DwAvAEo2NjXrllVfUvXt3vfDCCyc8/8knn+jRRx/VggULNHXqVE2aNEmvvfaaduzY4Q453/XdlceWuOmmm/Tzn/9cX375pT7++GPNmjVLdrvd/fzUqVP18MMP6/Dhwyf8yv5MjB07Vp06ddLWrVv1y1/+8rTXGoZxQqCeN2+eDh8+rB49epx1LW3l/PPPlyS99dZbzaZb/N///V+rR6Mdr3fv3urZs6c2bNhwQug/naSkJN14443asGGDHn/8cVVVVSksLEwZGRl65513VFtb676vhYWFWrlypaKiok77OVvyd8swDA0ePFiPPfaY5syZo3Xr1rW4ZgDti8ALwBILFixQTk6OHnnkkZOOihowYICefPJJvfjii5o6dar+9Kc/acGCBTr//PN1zz33aODAgSopKdGnn36qWbNmqU+fPurevbtCQ0P1xhtvqG/fvoqIiFBqaqpSU1NPWce1116rWbNm6dprr1Vtbe0JI7TGjh2rW2+9VTfddJPWrFmj888/X+Hh4crNzdWXX36pgQMH6uc//3mLv+6IiAj9+9//1g033KCioiJdddVVSkxM1NGjR7VhwwYdPXpUzzzzjCRn2J4zZ4769OmjQYMGae3atfr73//uMbNyXfr3769rr71Wjz76qOx2uy688EJt2bJFjz76qKKjo2WznXn33HPPPadJkybp0ksv1Y033qjOnTurqKhI27Zt07p169zTPEaOHKmpU6dq0KBBiomJ0bZt2/Taa69p9OjRCgsLkyTNmDFDzz33nKZPn66f/vSnKiws1N/+9rfvDbuS3CcA/uc//1FkZKRCQkKUmZmpVatW6emnn9YVV1yhbt26yTRNvffeeyopKdEll1xyxl83gDZm5Y45AP7riiuuMIOCgsz8/PxTXvPjH//YDAgIMPPy8kzTdE5GmDlzppmcnGwGBgaaqamp5tVXX20eOXLE/Zq5c+eaffr0MQMDA5vttP/ulIbjXXfddaYkc+zYsaes5aWXXjJHjhxphoeHm6GhoWb37t3N66+/3lyzZs1pv87vTmlwWbZsmTllyhQzNjbWDAwMNDt37mxOmTLFfOedd9zXFBcXmzfffLOZmJhohoWFmeedd565YsWKE6YNuKY0HP9alxtuuMEMDw8/4fGT3Q+dYkrDt99+2+w61/sdP7GgpqbGnDVrlpmYmGiGhISYo0aNMletWmVGR0c3myhxMq4pDX//+99P+vyGDRvMq6++2kxMTDQDAwPN5ORk88ILL3RP9jBN07z77rvNYcOGmTExMWZwcLDZrVs389e//rV72obLK6+8Yvbt29cMCQkx+/XrZ7711lstmtJgmqb5+OOPm5mZmabdbndPA9m+fbt57bXXmt27dzdDQ0PN6Ohoc8SIEeacOXNO+zUD6FiGaZqmNVEbAODLVq5cqbFjx+qNN97QddddZ3U5APwYgRcAcNYWL16sVatWaejQoQoNDdWGDRv08MMPKzo6Whs3bjxhSgQAdCR6eAEAZy0qKkqLFi3S448/rvLycsXHx2vSpEl66KGHCLsALMcKLwAAAHwaB08AAADApxF4AQAA4NMIvAAAAPBpbFo7CYfDoZycHEVGRlp2VCcAAABOzTRNlZeXKzU19XsPuCHwnkROTo7S0tKsLgMAAADf4+DBg997AiWB9yQiIyMlOW9gS46cBAAAQMcqKytTWlqaO7edDoH3JFxtDFFRUQReAAAAD9aS9lM2rQEAAMCnEXgBAADg0wi8AAAA8GkEXgAAAPg0Ai8AAAB8GoEXAAAAPo3ACwAAAJ9G4AUAAIBPI/ACAADApxF4AQAA4NMIvAAAAPBpBF4AAAD4NAIvAAAAfBqBFwAAAD6NwAsAAACfRuAFAACATyPwAgAAwKcReAEAAODTCLwAAADwaQReAAAA+DQCLwAAAHwagRcAAAA+jcALAAAAn0bg9QB7jlbo4w052nWk3OpSAAAAfA6B1wM8tWS3/nfuei3ckmd1KQAAAD6HwOsBMuPCJUnZBVUWVwIA1jhYVKU/fLBZe45WWF0KAB8UYHUBkDLinYF3X2GlxZUAQMcrr6nXjS9/oz1HK7XhUIk+uH2sbDbD6rIA+BBWeD1ApivwFhB4AfgXh8PUb97eoD1Hnd//Nh4q1QdZhy2uCoCvIfB6gPS4MElSYWWdymrqLa4GADrO00t3a9HWIwqy23TFOamSpL99ukPVdY0WVwbAlxB4PUBkSKDiI4IkSfvp4wXgJ5bsyNeji3dKkv50eX89/MNB6twpVHllNXp+xV6LqwPgSywNvMuXL9e0adOUmpoqwzD0wQcfnPb69957T5dccokSEhIUFRWl0aNHa+HChSdc9+6776pfv34KDg5Wv3799P7777fTV9B2Mlwb1+jjBeAH9hVU6o6562Wa0nUju+rHI7oqJNCuuyf1kSQ9s3SPjpTVWFwlAF9haeCtrKzU4MGD9eSTT7bo+uXLl+uSSy7R/PnztXbtWk2YMEHTpk3T+vXr3desWrVK11xzjWbMmKENGzZoxowZuvrqq7V69er2+jLaRAZ9vAD8RGVtg3722lqV1TRoSNdOum9aP/dzUwel6NyunVRd36h/LNxhYZUAfIlhmqZpdRGSZBiG3n//fV1xxRWtel3//v11zTXX6I9//KMk6ZprrlFZWZkWLFjgvuayyy5TTEyM5s6d26LPWVZWpujoaJWWlioqKqpV9Zypp5bs1t8X7tCVQzrrn9ec0yHvCQAdzTRN/XLues3bmKuEyGB98r/nKSkqpNk16w4U68qnV8owpI9/eZ4GdI62qFoAnqw1ec2re3gdDofKy8sVGxvrfmzVqlWaOHFis+suvfRSrVy58pSfp7a2VmVlZc0+OpqrpYHRZAB82X+W79W8jbkKsBl65ifnnhB2JencrjH6weBUmab0l3nb5CHrMgC8mFcH3kcffVSVlZW6+uqr3Y/l5eUpKSmp2XVJSUnKyzv1KWYPPfSQoqOj3R9paWntVvOpuCY17Ctk0xoA3/TlrgI98ul2SdJ90/ppWEbsKa/93WW9FRRg06q9hfpsW35HlQjAR3lt4J07d67uv/9+vfXWW0pMTGz2nGE0H1humuYJjx1v9uzZKi0tdX8cPHiwXWo+HVcPb1FlnUqrGU0GwLccLKrSL+euk8OUfjS0i6aPSj/t9V1iwnTLeZmSpL/O36a6BkdHlAnAR3ll4H3rrbd088036+2339bFF1/c7Lnk5OQTVnPz8/NPWPU9XnBwsKKiopp9dLSI4AAlRAZLYuMaAN9SXdeon722ViVV9RrUJVp/vmLAaRchXG6f0EPxEUHKLqjU61/v74BKAfgqrwu8c+fO1Y033qj//ve/mjJlygnPjx49WosXL2722KJFizRmzJiOKvGMZdLHC8DHmKape97fpK25ZYoLD9Kz04cqJNDeotdGBAfoNxN7S5L+9fkulVTVtWepAHyYpYG3oqJCWVlZysrKkiRlZ2crKytLBw4ckORsNbj++uvd18+dO1fXX3+9Hn30UY0aNUp5eXnKy8tTaWmp+5o77rhDixYt0iOPPKLt27frkUce0WeffaY777yzI7+0M5IR39THy+ETAHzEnJX79P76w7LbDD153blK7RTaqtdfPSxNfZIjVVpdr399vqudqgTg6ywNvGvWrNGQIUM0ZMgQSdKsWbM0ZMgQ94ix3Nxcd/iVpOeee04NDQ36xS9+oZSUFPfHHXfc4b5mzJgxevPNN/Xyyy9r0KBBmjNnjt566y2NHDmyY7+4M+CexcsKLwAf8PXeQj04b5sk6Z7JfTW6e1yrP4fdZujeKX0lSa+t2q+9RyvatEYA/sFj5vB6Eivm8ErS/E25uv2NdTonrZM++MXYDntfAGhrOSXVmvbvL1VYWacrzknVY9ec06K+3VOZOedbfbE9X5f0S9Lz1w9rw0oBeCu/mcPra5jFC8AX1NQ36uevr1VhZZ36pUTpoSsHnVXYlaR7JveR3WZo8dYjWrmnoI0qBeAvCLwexNXDW1JVz+YMAF7JNE398cPN2nCoVJ3CAvXcjKEKDWrZJrXT6ZEYqZ+M7CpJevCTbWp08MtJAC1H4PUgYUEBSopqGk3GARQAvNAbqw/o7TWHZDOkf187RGmxYW32ue+8uJciQwK0NbdM76471GafF4DvI/B6GHdbA7N4AXiZtfuL9MDHWyRJv7usj8b1TGjTzx8bHqRfXdhTkvSPhTtUWdvQpp8fgO8i8HoYV+DNJvAC8CJHymp02+vrVN9oasrAFP3s/G7t8j7Xj0lX19gw5ZfX6rnle9vlPQD4HgKvh2E0GQBvU9fg0O1vrNPR8lr1SorQ3646+01qpxIcYNfsSX0kSf9Zvke5pdXt8j4AfAuB18Nkug+fIPAC8A5/+mSL1u4vVmRIgP4zY5jCgwPa9f0uG5CsERmxqql36O+f7mjX9wLgGwi8Hsa1wptdUClGJAPwdG9/e1Cvf31AhiH968fnuL+HtSfDMPT7qc7DKN5bf1gbD5W0+3sC8G4EXg+THuv8YVFW06CSqnqLqwGAU8s6WKLff7BZkjTr4l66sE9Sh733oC6ddOWQzpKcY8pYIABwOgReDxMaZFdyVIgkKZs+XgAe6mh5rW57ba3qGh26pF+SfjGhR4fX8P8u662QQJu+2VekTzfndfj7A/AeBF4PlEEfLwAPVt/o0C/+u055ZTXqlhCuf149WDZb+2xSO52U6FDden53SdJDC7artqGxw2sA4B0IvB4oM55ZvAA811/nb9M32UWKCHZuUosMCbSslp+d302JkcE6UFSlV1fut6wOAJ6NwOuB3LN4OW0NgId5f/0hvfzVPknSo1cPVo/ECEvrCQ8O0G8v7S1JeuKLXSqsqLW0HgCeicDrgVy7nPfTwwvAg2w+XKq7390kSfrfC3vo0v7JFlfk9MNzu6hfSpTKaxr0r893WV0OAA9E4PVAx5+2xs5jAJ6gqLJOP3ttrWobHJrQO0F3XtzL6pLc7LZjY8reWH1Au/PLLa4IgKch8Hqg9DjnprXymgYVVdZZXA0Af9fQ6ND/zl2nwyXVyogL0+M/HiK7BZvUTmdM93hd0i9JjQ5Tf52/3epyAHgYAq8HCgm0KzXaOZqMI4YBWO3vC3foq92FCguy67kZwxQdat0mtdOZPamPAmyGvtierxW7jlpdDgAPQuD1UMdOXGPjGgDrfLwhR88t3ytJ+vtVg9U7OdLiik6tW0KEZoxOl+Q8jKLRQUsYACcCr4di4xoAq23PK9Pv/m+jJOlnF3TTlEEpFlf0/e64qKeiQwO140i53l5z0OpyAHgIAq+Hyjxu4xoAdLTSqnrd+upaVdc3alzPeP3u0j5Wl9QincKCdMdFPSVJjy7aoYraBosrAuAJCLweyrVxjR5eAB2t0WHqjrfW60BRlbrEhOoJD9ykdjrTR6UrMz5cBRV1enrJbqvLAeABCLwe6thpa1WMJgPQoR5bvFNLdxxVSKBNz80YqpjwIKtLapWgAJvumewcU/bCl9k6VMxeCMDfEXg9VFpsmAxDqqhtUEEFo8kAdIxPN+fpyaZV0YevHKT+qdEWV3RmLu6bqNHd4lTX4NDfPt1hdTkALEbg9VDO0WShkti4BqBj7M4v12/ezpIkzRybqSuGdLa2oLNgGIbundJXhiF9tCFH6w4UW10SAAsReD1YZjwb1wB0jLKaet362lpV1jVqVLdYzZ7sHZvUTmdA52hddW4XSdKDn2ylPQzwYwReD8bGNQAdweEwNeutDdp7tFIp0SF68rpzFWj3jR8Pv720t8KC7Fp3oESfbMy1uhwAFvGN72g+6viNawDQXv79xW59tu2IggJsenb6UMVHBFtdUptJigrRbRd0lyQ9vGC7auobLa4IgBUIvB4sg1m8ANrZ59uO6PHPd0qSHrxigAandbK2oHbw03HdlBwVosMl1Xr5q31WlwPAAgReD+Y6bW1fYSW9ZwDaXHZBpe58K0umKc0Yla6rh6VZXVK7CA2y63eX9ZYkPbVkt46W11pcEYCORuD1YGmxobIZUlVdo45W8A0aQNupqG3Qra+uUXlNg4alx+gPU/tZXVK7uuKczhrUJVoVtQ167LOdVpcDoIMReD1YcIBdqZ2co8no4wXQVkzT1O/+b4N25VcoMTJYT//kXAUF+PaPA5vN0O+nOEP9m98c0I68cosrAtCRfPs7nA84tnGNPl4AbePZZXs1f1OeAu2Gnpk+VIlRIVaX1CFGZMZq0oBkOUzpwXmMKQP8CYHXw7k3rjGaDEAbWLbzqP62cLsk6f4f9NfQ9BiLK+pYd0/qoyC7TSt2FWjpzqNWlwOggxB4PVwGK7wA2siBwir9au56mab04+Fpum5EV6tL6nDpceG6cWyGJOkv87apodFhbUEAOgSB18NlxrsOn6CHF8CZq6pr0K2vrVFpdb0Gp3XSA5f3l2EYVpdliV9M6KGYsEDtzq/Q3G8PWl0OgA5A4PVw6U0tDfsZTQbgDJmmqbvf3aTteeWKjwjSs9PPVXCA3eqyLBMdGqhfX9JLkvTY4p0qra63uCIA7Y3A6+HSYsLco8nymR0J4Ay8+GW2PtqQowCboaeuO1cp0aFWl2S5a0d0VfeEcBVV1unpJbutLgdAOyPwerigAJu6xDjbGjhxDUBrrdxToIcWODep/X5KX43sFmdxRZ4h0G5zjyl7+at9OkDbGODTCLxegI1rAM7E4ZJq/fK/69XoMHXlkM66YUyG1SV5lPG9EzSuZ7zqGh165NPtVpcDoB0ReL1AZhwb1wC0Tk19o257ba2KKuvUPzVKf71yoN9uUjsVwzB075S+shnSvE25+nZfkdUlAWgnBF4vwAovgNYwTVP3vr9Zmw6XKiYsUM/NGKqQQP/dpHY6fZKjdM1w53i2Bz/ZKoeDzcGALyLwegHX4RP7OHwCQAu89vV+vbvukGyG9NR157r3AeDkZl3SS+FBdm04VKqPNuRYXQ6AdkDg9QLuFd7CSlYfAJzWN9lF+tPHWyVJsyf11Zge8RZX5PkSIoN1+4QekqRHPt2u6rpGiysC0NYIvF6gS0yo7DZDNfUOHSmvsbocAB4qr7RGt7+xTg0OU9MGp+qWcZlWl+Q1bj4vU507hSq3tEYvrNhrdTkA2hiB1wsE2m1Ki3HOzdxXwMY1ACeqbWjUba+vVUFFrfokR+qRH7JJrTVCAu26a1IfSdIzy/Yov4zFBcCXEHi9xPFtDQDwXfd/tFVZB0sUFRKg52YMVVhQgNUleZ1pg1I0pGsnVdU16tFFO60uB0AbIvB6CffGNSY1APiOud8c0NxvDsgwpCeuHeI+khytYxiG+zCKt9ce1JacUosrAtBWCLxeIiOO09YAnGjdgWLd9+EWSdJvJ/bW+N6JFlfk3Yamx2jqoBSZpvSXedtkmmwUBnwBgddL0NIA4Lvyy2v089fXqq7Rocv6J+v28d2tLskn3HVZHwUF2LRyT6E+35ZvdTkA2gCB10tkNgXe/YVVjCYDoLoGh37xxjodKatVj8QI/ePqwWxSayNpsWG6+TznhIu/zt+m+kaHxRUBOFsEXi/RuVOoAmyGahscymP3MOD3/jJvq77dV6zI4AD9Z8ZQRQSzSa0t3T6+u+LCg7S3oFKvf73f6nIAnCUCr5cIsNuUFuvs42XjGuDf/m/tIb2yyhnCHrvmHHVLiLC4It8TGRKoWRN7SZIe/2yXSqrqLK4IwNkg8HoR98Y1+ngBv7XpUKnueX+TJOmOi3rq4n5JFlfku64ZlqbeSZEqra7Xv7/YbXU5AM4CgdeLuDeuscIL+KXCilr97LU1qmtw6KI+ibrjop5Wl+TTAuw23TulryTp1VX7mJIDeDECrxdxbVzL5rQ1wO80NDr0y/+uV05pjbrFh+uxH58jm41Nau3t/F4JGt87QfWNph6av83qcgCcIQKvF3EdPrGflgbA7zy8YLtW7S1UeJBdz80YqqiQQKtL8hv3Tu4ru83Qoq1HtGpPodXlADgDBF4v4g68RYwmA/zJh1mH9cKX2ZKkR68erJ5JkRZX5F96JkXquhFdJUkPztvK91/ACxF4vUhqpxAF2g3VNTiUU1ptdTkAOsDWnDLd9e5GSc5RWZcNSLG4Iv9058U9FRkcoC05ZXpv/WGrywHQSgReL9J8NBl9vICvK6mq089eX6OaeofO75Wg30zsbXVJfisuIli/vLCHJOnvC7erqq7B4ooAtAaB18tkNrU1MJoM8G2NDlP/O3e9DhZVq2tsmJ748Tmys0nNUjeOzVBabKiOlNXquWV7rS4HQCsQeL2MazTZfsbjAD7tH4t2aMWuAoUGOjepdQoLsrokvxccYNfsSc4xZc8t36O8Uk69BLwFgdfLuA6f2McKL+Cz5m/K1TNL90iSHrlqkPqmRFlcEVwmDUjWsPQY1dQ79LeF260uB0ALEXi9TIZ7Fi+BF/BFO4+U67fvbJAk/XRcpn4wONXiinA8wzD0h6n9JEnvrTusjYdKrC0IQIsQeL2MazTZwaJqNTIaB/AppdX1+tlra1VV16gx3eN012V9rC4JJzE4rZP+Z0hnSdKD87bJNPleDHg6Aq+XSe0UqiC7TXWNDuWUMJoM8BUOh6lfv5Wl7IJKde4UqievO1cBdr5Fe6r/d2lvBQfY9E12kRZuOWJ1OQC+B99NvYzdZqgrfbyAz3n88136Ynu+ggNsem7GUMWGs0nNk6V2CtWt53eTJD20YJtqGxotrgjA6RB4vZCrrWEffbyAT1i89Yie+HyXJOmv/zNQAzpHW1wRWuK2C7orITJY+wur9Nqq/VaXA+A0CLxeyDWpIZvDJwCvt+dohX79VpYk6cYxGfrh0C7WFoQWCw8O0P9rOgzkX5/vUlFlncUVATgVAq8Xck1qoKUB8G7lNfW69dU1qqht0IiMWN07pa/VJaGVfji0i/qmRKm8psG9Sg/A8xB4vVBmPC0NgLdzOEz99p0N2nO0UslRIXrqJ+cqkE1qXsduM/T7pn+ovPb1fu3Or7C4IgAnw3dXL+Ra4T1YXKWGRofF1QA4E88s26OFW44oyG7TM9PPVUJksNUl4QyN7RGvi/smqtFh6qH526wuB8BJEHi9UEpUiIICbKpvNJVTwtGWgLdZsiNf/1i0Q5L0p8v7a0jXGIsrwtmaPbmvAmyGPt+ery93FVhdDoDvIPB6IZvNUHps08Y1+ngBr7K/sFJ3zF0v05SuG9lVPx7R1eqS0Aa6J0Ro+qh0SdKD87ZyMBDgYQi8XiqDPl7A61TWNujWV9eqrKZBQ7p20n3T+lldEtrQHRf1VHRooLbnleudNQetLgfAcQi8Xsq1cS2bwAt4BdM09bt3N2rHkXIlRAbr2elDFRxgt7ostKGY8CD96qKekqR/LNqpitoGiysC4ELg9VKuwyf209IAeIXnV+zVvI25CrAZevon5yopKsTqktAOZoxKV0ZcmAoqavXs0j1WlwOgCYHXS2XEu44X5vAJwNN9uatADy/YLkm6b1o/Dc+ItbgitJegAJtmT3aOKXt+xV4dLqm2uCIAEoHXa7lWeA8WMZoM8GQHi6r0v3PXyWFKVw3t4t7YBN81sV+SRmbGqrbBob99ut3qcgCIwOu1kqNCFBxgU4PD1KFiVhAAT1RT36jbXl+r4qp6DeoSrQevGCDDMKwuC+3MMAz9YWo/GYb0YVaO1h8otrokwO8ReL2UzWa4V3kZTQZ4HtM0Nfu9TdqSU6a48CA9O32oQgLZpOYvBnSO1g/P7SJJenDeNpkmY8oAKxF4vZi7j5dJDYDHmbNyn95ff1h2m6EnrztXqZ1CrS4JHey3E3srNNCutfuLNW9TrtXlAH6NwOvFXLN497NxDfAoX+8t1IPznEfM3jO5r0Z3j7O4IlghOTpEP7ugmyTp4QXbVVPfaHFFgP8i8Hoxd0sDK7yAx8gpqdYv3linRoepy89J1cyxGVaXBAvden43JUeF6FBxteas3Gd1OYDfIvB6MVfg3UcPL+ARauob9fPX16qwsk59U6L08JWD2KTm58KCAvT/Lu0tSXrqi90qqKi1uCLAPxF4vZjrtLVDxdWqZzQZYCnTNPXHDzdrw6FSdQoL1H9mDFVoEJvUIP3PkM4a0DlK5bUNemzxTqvLAfwSgdeLJUUFKzTQrkaHqYNF9PECVnpj9QG9veaQbIb072uHKC02zOqS4CFsNkN/mNJPkjT3mwPaeaTc4ooA/2Np4F2+fLmmTZum1NRUGYahDz744LTX5+bm6rrrrlPv3r1ls9l05513nnDNnDlzZBjGCR81NTXt80VYyDAMpcc5f6iycQ2wTn55jf70yVZJ0u8u66NxPRMsrgieZmS3OF3WP1kOU/pL04ZGAB3H0sBbWVmpwYMH68knn2zR9bW1tUpISNC9996rwYMHn/K6qKgo5ebmNvsICfHNc+tdbQ1sXAOs8/qq/aprcOictE762fndrC4HHuruSX0UaDe0bOdRLd2Rb3U5gF8JsPLNJ02apEmTJrX4+oyMDP3rX/+SJL300kunvM4wDCUnJ591fd4gnY1rgKWq6xr12tf7JTl35LNJDaeSER+uG0Zn6IUvs/WXedt0Xo94BdjpLAQ6gk/+l1ZRUaH09HR16dJFU6dO1fr16097fW1trcrKypp9eIvMpsMnWOEFrPHe+kMqrqpXl5hQTeyXZHU58HD/e2FPxYQFald+hd789qDV5QB+w+cCb58+fTRnzhx99NFHmjt3rkJCQjR27Fjt2rXrlK956KGHFB0d7f5IS0vrwIrPDqPJAOs4HKZe/DJbkjRzbCardfhe0WGBuvPiXpKkxxbvVFlNvcUVAf7B5747jxo1StOnT9fgwYM1btw4vf322+rVq5f+/e9/n/I1s2fPVmlpqfvj4EHv+Ve3q4f3cHG16hoYTQZ0pCU78rX3aKUiQwJ09XDv+YcyrHXdyK7qlhCuwso6Pb1kj9XlAH7B5wLvd9lsNg0fPvy0K7zBwcGKiopq9uEtEiKDFRZkl8OUDhYzqQHoSC+scK7uXjeiqyKCLd0SAS8SaLfp3sl9JUkvfZnNWEmgA/h84DVNU1lZWUpJSbG6lHbhHE3W1NZAHy/QYTYfLtWqvYWy2wzdMCbD6nLgZS7sk6jzesSrrtGhhz/dbnU5gM+zNPBWVFQoKytLWVlZkqTs7GxlZWXpwIEDkpytBtdff32z17iur6io0NGjR5WVlaWtW7e6n3/ggQe0cOFC7d27V1lZWbr55puVlZWl2267rcO+ro7GxjWg47l6d6cMTFFqp1CLq4G3MQxD907pK8OQ5m3M1dr9RVaXBPg0S38Ht2bNGk2YMMH951mzZkmSbrjhBs2ZM0e5ubnu8OsyZMgQ9/9eu3at/vvf/yo9PV379u2TJJWUlOjWW29VXl6eoqOjNWTIEC1fvlwjRoxo/y/IImxcAzpWXmmNPt6QI0m6ZVymxdXAW/VNidI1w9L05rcH9adPtun9n4+RzcZYO6A9WBp4x48fL9M0T/n8nDlzTnjsdNdL0mOPPabHHnvsbEvzKhlNG9c4bQ3oGHNW7lODw9SIzFgN6tLJ6nLgxWZN7KWPN+Row8ESfbwxR5ef09nqkgCf5PM9vP6A09aAjlNZ26D/rnYeNPHTcZyqhrOTGBmi2yf0kCQ9smC7auobLa4I8E0EXh+QHufs4c0pqVZtA98sgfb0zpqDKqtpUEZcmC7qk2h1OfABN5+XqdToEOWU1rh7wwG0LQKvD0iICFa4azQZ422AdtPoMPXSV/skOUMK/ZZoCyGBdt01qY8k6eklu5VfXmNxRYDvIfD6AMMw3H282QUEXqC9LN56RAeKqtQpLFA/HNrF6nLgQ6YNStXgtE6qrGvUPxfttLocwOcQeH3EsY1r9PEC7eWFFXslST8Z2VVhQRw0gbZjsxn641TnYRRvrTmorTllFlcE+BYCr4/IjGPjGtCe1h8o1pr9xQq0G7phdIbV5cAHDU2P1ZRBKTJN6S/zt37vVCIALUfg9RGujWvM4gXaxwtNm4l+MLizEqNCLK4Gvuruy/ooyG7TV7sL9cX2fKvLAXwGgddHuEaT7aOHF2hzB4uqtGBTriTnZjWgvaTFhumm8zIkSX+Zv031jQ5rCwJ8BIHXR7h6eHNKq5njCLSxV1buk8OUzusRr36pUVaXAx/3iwk9FBcepL1HK/Xf1Qe+/wUAvheB10fEhQcpMjhApikdYDQZ0GbKaur15rcHJUk3c4wwOkBUSKB+fUkvSdLjn+1UaVW9xRUB3o/A6yOOH022j41rQJt5+9uDqqhtUI/ECF3QM8HqcuAnfjw8TT0TI1RcVa9/f7HL6nIAr0fg9SHuwMvGNaBNNDQ69HLTQRO3cNAEOlCA3aZ7pzjHlL2yah8LGcBZIvD6kIymSQ0cPgG0jQWb83S4pFpx4UG6Ykhnq8uBnxnfO1EX9EpQfaOphxdst7ocwKsReH1IRhwtDUBbMU3TfdDEjNHpCgm0W1wR/NG9U/rKZkifbsnT6r2FVpcDeC0Crw+hpQFoO2v2F2vDoVIFBdg0fVS61eXAT/VKitS1I7pKkh6ct00OB4dRAGeCwOtDXLN4c0trGE0GnCXX6u6VQzorPiLY4mrgz359SS9FBgdo0+FSvb/+sNXlAF6JwOtDYsICFRUSIEnaX0gfL3Cm9hVUatHWI5I4aALWi48I1i8u7CFJ+vvCHaqqa7C4IsD7EHh9yPGjybLp4wXO2MtfZcs0pfG9E9QzKdLqcgDdOCZDXWJClVdWo/8s32t1OYDXIfD6GPfGNfp4gTNSWlWvt9cckiT9dFw3i6sBnEIC7bp7Uh9J0nPL9iqvtMbiigDvQuD1MRw+AZydN77Zr+r6RvVJjtSY7nFWlwO4TRmYoqHpMaqub9Q/Fu2wuhzAqxB4fUxmvHMWLyu8QOvVNTj0ysp9kqRbxnWTYXDQBDyHYRj6fdNhFO+uO6TNh0strgjwHgReH3NsFi+b1oDW+mRjjo6U1SoxMlg/GJxqdTnACYZ0jdHl56TKNKU/f7JVpsmYMqAlCLw+xhV488pqVF3HaDKgpZwHTWRLkm4Yk6GgAL49wjP97rI+Cg6waXV2kXuaCIDT4zu6j4kJD1J0aKAk2hqA1li1t1Bbc8sUGmjXT0Z2tboc4JQ6dwp1b6h8aP421TU4LK4I8HwEXh/ExjWg9Vyru1cN7aJOYUEWVwOc3m3juys+Ilj7Cqv02tf7rS4H8HgEXh+UGefauEYfL9ASu/Mr9MX2fBmGNJODJuAFIoID9NuJvSRJ//psp4or6yyuCPBsBF4fxAov0Dovfulc3b24b5L7iG7A0/1oWJr6JEeqrKZB//p8l9XlAB6NwOuDXBvXsunhBb5XYUWt3lvnPGjiFlZ34UXsNkO/n9JPkvT61/u152iFxRUBnovA64NY4QVa7o3VB1Tb4NCgLtEakRlrdTlAq5zXM14X9UlUg8PUQ/O3W10O4LEIvD4os2mFN7+8VpW1DRZXA3iumvpGvbpqnyTp5vMyOWgCXmn25L6y2wx9tu2IVu4usLocwCMReH1QdFigYsIYTQZ8n4+yclRQUaeU6BBNHphidTnAGemRGKHpTaP0/jxvmxwODqMAvovA66NcbQ37mdQAnJRpmnrhy72SpJvGZijQzrdDeK87L+6liOAAbcst09oDxVaXA3gcvsP7KFdbQzZ9vMBJLd9VoJ1HKhQeZNc1wzloAt4tJjxIE/snSZLmbcy1uBrA8xB4fVR6HBvXgNN5YYVzdfea4V3dpxMC3mzqIGdbzvxNuWqkrQFohsDrozLiXYdPEHiB79qeV6YVuwpkM5ztDIAvOK9HgqJCApRfXqs1+4qsLgfwKAReH+Uanp9dQA8v8F0vNh0jfNmAZKXFhllcDdA2ggJsurR/siTpE9oagGYIvD7KtWmtoKJWFYwmA9zyy2v0YVaOJOmWcd0srgZoW1Oa2hoWbKatATgegddHRYUEKi48SBJ9vMDxXlu1X3WNDp3btZPO7RpjdTlAmxrbI16dwgJVUFGn1dmFVpcDeAwCrw9Lj6OPFzhedV2jXv96vyTpp6zuwgcF2m26jLYG4AQEXh/GEcNAc++uO6TiqnqlxYZqYlMoAHyNq63h0815amh0WFwN4BkIvD7s2CxeNq4BDoepl750bla7aUym7DaOEYZvGt0tTrHhQSqqrNPXe5nWAEgEXp927LQ1VniBL7bna29BpSJDAnT18DSrywHaTYDdpssGOH+DMW9TjsXVAJ6BwOvDXKPJ6OEF5D5G+LoRXRURHGBxNUD7mjrQNa0hT/W0NQAEXl/m2rRWUFGn8pp6i6sBrLP5cKm+3lukAJuhGzloAn5gRGas4iOCVFJVr5V7mNYAEHh9WGRIoOIjXKPJ6OOF/3IdIzxlUIpSokMtrgZofwF2myYNcK7yzttIWwNA4PVxGa6Na7Q1wE/llla7xzPdch6jyOA/jp/WUNdAWwP8G4HXx7k3rjGaDH5qzsp9anCYGpkZq4Fdoq0uB+gwwzNilRAZrLKaBn21u8DqcgBLEXh9nGvjGiu88EeVtQ367+oDkjhGGP7HbjM0eQCHUAASgdfnuU9bY4UXfuidNQdVXtOgzPhwXdQn0epygA43dXCqJGnR1jzVNjRaXA1gHQKvj3P18O4rZNMa/Eujw9RLX+2TJM08L1M2DpqAHxraNUZJUcEqr2nQip20NcB/EXh9nKuHt6iyTqXVjCaD/1i8NU8HiqrUKSxQV53bxepyAEvYbIYmN83knbeJtgb4LwKvj4sIDlBCZLAkTlyDf3l+hfMY4ekj0xUaZLe4GsA6Uwc52xoWbz2imnraGuCfCLx+INM1mow+XviJdQeKtXZ/sQLthq4fnW51OYClhqR1Ump0iCpqG7Rs51GrywEsQeD1A8c2rtHHC//w4pfO1d0fDO6sxKgQi6sBrNWsrYFpDfBTBF4/4Orj3UdLA/zAwaIqLWjqVbxlXKbF1QCewXUIxWfbaGuAfyLw+gH3LF5aGuAH5qzcJ4cpndcjXn1ToqwuB/AI56R1UudOoaqqa9TSHflWlwN0OAKvHzg2mozAC99WVlOvt749KInVXeB4hmFoatMq78e0NcAPEXj9QEa8s4e3pKpeJVV1FlcDtJ+3vjmoitoG9UyM0AW9EqwuB/AorraGL7blq6quweJqgI5F4PUDYUEBSopyjibjAAr4qoZGh17+yrlZ7ebzMmUYHDQBHG9g52h1jQ1TdX2jlmxnWgP8C4HXT6S72hro44WPmr85TzmlNYoLD9IVQzpbXQ7gcQzDcK/yfrIxx+JqgI5F4PUTzOKFLzNNUy+s2CtJmjE6XSGBHDQBnMyUpvFkX2zPV2UtbQ3wHwReP8FoMviyNfuLtfFQqYICbJoxioMmgFPpnxqljLgw1TY49Pl2pjXAfxB4/URm08Y1enjhi55f7lzd/eG5nRUXEWxxNYDnck5rcB41PI+2BvgRAq+fcK/w0tIAH7OvoFKLtx2R5NysBuD0XH28S3YcVXlNvcXVAB2DwOsn0mOdgbe0ul7FlYwmg+946atsmaY0oXeCeiRGWl0O4PH6JEeqW0K46hoc+nwbbQ3wDwRePxEaZFdyVIgkKZs+XviIkqo6vbPmkCTplnHdLK4G8A6GYWjqQNe0Bg6hgH8g8PoR1wEUtDXAV/z3mwOqrm9U35QojekeZ3U5gNeYOtjZx7t851GVVtPWAN9H4PUjme5JDWxcg/era3DolZX7JEm3cNAE0Cq9kiLVMzFCdY0Ofbb1iNXlAO2OwOtHMjh8Aj7kk405OlJWq8TIYE1rWq0C0HKuzWvzNtHWAN9H4PUj7tPW6OGFlzNNU8+vcB4jfMOYDAUF8K0MaK2pTYF3xa6jKq2irQG+jZ8SfsTV0pBdUCnTNC2uBjhzq/YUaltumUID7frJyK5WlwN4pR6JkeqTHKn6RlMLt+ZZXQ7Qrgi8fiQ9zrlprbymQUWMJoMXe+FL5+ruj4Z1UaewIIurAbyX66jheUxrgI8j8PqRkEC7UqOdo8nYuAZvtTu/XF9sz5dhSDeN5aAJ4Gy4+ni/2l3AjHb4NAKvn+HENXi7F7/cJ0m6uG+Su00HwJnplhChfilRanCYWriFtgb4LgKvn2HjGrxZYUWt3lvnPGjipxw0AbQJpjXAHxB4/Uxm0+ET2azwwgu9/vUB1TY4NKhLtIZnxFhdDuATXNMaVu4pVGFFrcXVAO2DwOtnMljhhZeqqW/Ua1/vk+Q8RpiDJoC2kR4XroGdo9XoMLVwC4dQwDcReP2Mq+dxf0EVo8ngVT7MOqyCijqlRodo0oBkq8sBfIqrreGTjTkWVwK0DwKvn0mLDZNhSOW1DSpkRy68hGmaeqHpoIkbx2Yo0M63LqAtucaTfb23UEfLaWuA7+Gnhp9xjiYLlcSkBniPZTuPald+hcKD7PrxCA6aANpaWmyYBneJlsOUPmVaA3wQgdcPZbBxDV7mxaaDJq4Z3lVRIYEWVwP4pqmDUiVJn2ygrQG+h8Drh9i4Bm+yPa9MK3YVyGZIN43NsLocwGdNGujsjf9mX5Hyy2osrgZoWwReP5TpPnyC09bg+Vy9u5MGpCgtNsziagDf1SUmTEO6dpJpSgs209YA30Lg9UOs8MJb5JfV6MOsw5Kkm8dxjDDQ3lxtDfM2cggFfAuB1w8df7wwo8ngyV5dtV/1jaaGpsfo3K4cNAG0t8lNbQ3f7i9SXiltDfAdBF4/lBYbKpshVdY16iin6sBDVdc16vXV+yVJt5zH6i7QEVKiQzUsPUamKc3nqGH4EAKvHwoOsCu1k2s0GX288Ezvrjukkqp6pcWGamJ/DpoAOorrEIp5BF74EEsD7/LlyzVt2jSlpqbKMAx98MEHp70+NzdX1113nXr37i2bzaY777zzpNe9++676tevn4KDg9WvXz+9//77bV+8l8s8rq0B8DQOh6mXmkaRzRybKbuNY4SBjjJ5YIoMQ1q7v1g5JdVWlwO0CUsDb2VlpQYPHqwnn3yyRdfX1tYqISFB9957rwYPHnzSa1atWqVrrrlGM2bM0IYNGzRjxgxdffXVWr16dVuW7vXYuAZP9sX2fO0tqFRkSIB+NCzN6nIAv5IUFaLhGbGSaGuA77A08E6aNEkPPvigrrzyyhZdn5GRoX/961+6/vrrFR0dfdJrHn/8cV1yySWaPXu2+vTpo9mzZ+uiiy7S448/3oaVez/3xjUCLzzQ8yv2SpKuG9lVEcEBFlcD+J+pTW0NnzCtAT7C53p4V61apYkTJzZ77NJLL9XKlStP+Zra2lqVlZU1+/B1GXGu09bo4YVn2XSoVKuzixRgM3TjmAyrywH80mUDkmUzpKyDJTpYxM8JeD+fC7x5eXlKSkpq9lhSUpLy8k49RPuhhx5SdHS0+yMtzfd/hepa4d1fyGgyeJYXv3Su7k4ZlKKU6FCLqwH8U2JkiEZmxkmirQG+wecCryQZRvMNLqZpnvDY8WbPnq3S0lL3x8GDB9u7RMulxYTJZkhVdY3KL2c0GTxDbmm1+1eot5zXzeJqAP/GtAb4Ep8LvMnJySes5ubn55+w6nu84OBgRUVFNfvwdUEBNnWJcbY1MKkBnmLOyn1qcJgamRmrgV1O3qcPoGO42ho2HirVgULaGuDdfC7wjh49WosXL2722KJFizRmzBiLKvJcbFyDJ6mobdB/Vx+QJP10HKu7gNXiI4I1pnu8JOmTTTkWVwOcHUsDb0VFhbKyspSVlSVJys7OVlZWlg4ccP7Qmz17tq6//vpmr3FdX1FRoaNHjyorK0tbt251P3/HHXdo0aJFeuSRR7R9+3Y98sgj+uyzz045s9efsXENnuSdNQdVXtOgzPhwXdgn0epyAOi4tgamNcDLWRp416xZoyFDhmjIkCGSpFmzZmnIkCH64x//KMl50IQr/Lq4rl+7dq3++9//asiQIZo8ebL7+TFjxujNN9/Uyy+/rEGDBmnOnDl66623NHLkyI77wryEexYvLQ2wWKPD1EtfNR00cV6mbBw0AXiES/sny24ztCWnTNn8rIAXs3TA5fjx4087IWDOnDknPNaSiQJXXXWVrrrqqrMpzS9k0tIAD7FoS54OFlWrU1igrjq3i9XlAGgSGx6ksT3itXznUc3flKtfTOhhdUnAGfG5Hl603PE9vIwmg5VeaDpGePrIdIUG2S2uBsDxpg50tjV8vIE+XngvAq8f6xITKrvNUE29Q0fKGE0Ga6w7UKy1+4sVZLfp+jHpVpcD4Dsm9k9SgM3Q9rxy7c6vsLoc4IwQeP1YoN2mtBjnYH96s2CVF1c4V3d/cE6qEiNDLK4GwHd1CgvSeT2d0xo4hALeisDr59Lj6OOFdQ4WVWnBZucP0JvPy7S4GgCnMnVQqiTpk420NcA7EXj9nHvjGiu8sMDLX+2Tw5TG9YxX3xTfP/AF8FaX9EtSoN3QziMV2nmk3OpygFYj8Pq5Y7N4CbzoWGU19XrrW+fYQVZ3Ac8WHRqo83smSGImL7wTgdfPuSY17OfYSHSwN785oMq6RvVMjNAFvRKsLgfA95g6uOkQik25TPaB1yHw+rnjZ/E6HHwDQ8eob3Rozlf7JEm3jMuUYXDQBODpLu6bpKAAm3bnV2gHbQ3wMgReP9e5U6gCbIZqGxzKK6uxuhz4iQWb85RTWqP4iCBdfk5nq8sB0AKRIYHu38bQ1gBvQ+D1cwF2m9JinX28bFxDRzBNUy+s2CtJmjEqQyGBHDQBeIupg5raGjbS1gDvQuDFsY1rjCZDB/h2X7E2HipVcIBN00d1tbocAK1wUd8kBQfYtLegUltzy6wuB2gxAi/YuIYO9XzT6u6V53ZRXESwxdUAaI2I4ABN6J0oibYGeBcCL9wb1xhNhvaWXVCpz7YdkSTdfF6GtcUAOCNTBjGtAd6HwItjp60ReNHOXv4qW6YpTeidoB6JkVaXA+AMXNQ3USGBNu0vrNKWHNoa4B0IvFBmU+DdX1TFaDK0m5KqOr2z5pAk6afjullcDYAzFRYUoIv6JEmSPuaoYXgJAi+U2ilEgXZDdQ0O5ZRWW10OfNQbqw+our5RfVOiNLp7nNXlADgLU5jWAC9D4EWz0WRsXEN7qGtw6JWV+yRJP+WgCcDrTeidqLAguw4VV2vjoVKrywG+F4EXko61NbBxDe3h4w05yi+vVWJksKYOSrW6HABnKTTIrov6OtsaPqGtAV6AwAtJbFxD+zFNUy98mS1JumFMhoIC+LYD+IIpA2lrgPfgJw8kSZnxTaetcfgE2tjKPYXallum0EC7fjKSgyYAXzG+d4LCg+zKKa3R+oMlVpcDnBaBF5KOHT5BSwPamusY4R8N66JOYUEWVwOgrYQE2nVJP2dbA4dQwNMReCFJymhqaThYVK1GRpOhjezOL9eSHUdlGNLMsZlWlwOgjU1p6smftzGXsZbwaAReSJJSO4UqyG5TXaNDOSWMJkPbeLGpd/eSvknu3yIA8B3jesYrMjhAeWU1Wneg2OpygFMi8EKSZLcZ6hpHHy/aTmFFrd5dd1iSdAsHTQA+6fi2hk9oa4AHI/DCLcMVeOnjRRt47ev9qmtwaHCXaA3PiLG6HADtZOpg57SG+ZtyaYmDxyLwwi3DPYuXwydwdmrqG/Xaqv2SpJvHdeOgCcCHndcjQZEhAcovr9WafUVWlwOcFIEXbq4ey/20NOAsfbD+sAor65QaHaJJA5KtLgdAOwoKsOnS/s7/zudtoq0BnonAC7dM12gyAi/OwvEHTdw0NlOBdr7NAL5u6iBXW0MebQ3wSPwkgptrhfdgUZUaGh0WVwNvtWznUe3Or1BEcICuGZFmdTkAOsDYHvGKDg1UQUWtVmcXWl0OcAICL9xSokIUFGBTfaOpnJIaq8uBl3phhXN195rhaYoKCbS4GgAdIdBu02WutgamNcADEXjhZrMZSo91TmqgrQFnYltumb7cXSCbId04JsPqcgB0oClNbQ2fbs7jt4TwOAReNONqa2A0Gc6Ea3V30oAUpTX94wmAfxjTPU4xYYEqrKzT13uZ1gDPQuBFM66Naxw+gdbKL6vRRxtcB01wjDDgbwLsNl02wLnKO29TjsXVAM0ReNGMaxYvK7xorVdX7Vd9o6mh6TEa0pWDJgB/NPW4toZ62hrgQQi8aMZ92lohh0+g5arqGvT6audBEz9ldRfwWyMzYxUfEaTiqnqt2sO0BngOAi+aYTQZzsS76w6rpKpeabGhuqQfB00A/srZ1uD8HvDJRtoa4DkIvGgmOSpEwQE2NThMHSqutroceAGHw9RLTQdNzBybKbuNY4QBfzZlYKokaeGWI6prYOEEnoHAi2ZsNuNYHy8b19ACn2/PV3ZBpSJDAnT1MA6aAPzdiMxYJUQGq7S6Xl/tKbC6HEASgRcnkRHf1MfLxjW0wAsr9kqSrhvZVeHBARZXA8Bqdpuhya62hg0cQgHPQODFCY6t8LJxDae36VCpVmcXKcBmcNAEALcpg5xtDYu25qm2odHiagACL07CtXEtmxVefI8XvnSu7k4dlKKU6FCLqwHgKYalxygpKljlNQ36chdtDbAegRcnoIcXLZFTUq15G52/rrxlXDeLqwHgSWw2Q5MHOmfyfrKRtgZYj8CLE7hOWztUXM3gcJzSKyv3qcFhalS3WA3oHG11OQA8jOsQisVbj6imnrYGWIvAixMkRQUrJNCmRkaT4RQqahv0328OSJJuOY/VXQAnGpIWo5ToEFXUNmj5zqNWlwM/R+DFCQzD4IhhnNbb3x5UeU2DusWH68I+iVaXA8ADHd/WMG8TbQ2wFoEXJ+UKvGxcw3c1Oky99FXTQRPnZcrGQRMATsHV1vAZbQ2wGIEXJ+Wa1MDGNXzXoi15OlRcrZiwQP3w3C5WlwPAg52T1kmdO4Wqsq5RS3fkW10O/BiBFyeV6Tp8glm8+I7nmw6amD4qXaFBdourAeDJDMPQlEFMa4D1CLw4KXp4cTJr9xdr3YESBdltmjE63epyAHiBKU19vJ9vy1dVXYPF1cBfEXhxUsdGk1WproHRZHB6semgicvPSVViZIjF1QDwBoO6RCstNlTV9Y1asp1pDbAGgRcnlRAZrLAguxymdLCYtgZIB4uq9OnmPEnSzeMyLa4GgLcwDENTBjqPGp63KcfiauCvCLw4KcMwlE5bA47z8lf75DClcT3j1Sc5yupyAHgR17SGL7bnq7KWtgZ0PAIvTomNa3Apra7XW982HTTBMcIAWql/apQy4sJUU+/Q59uZ1oCOR+DFKbFxDS5vfXtAlXWN6pUUofN7xltdDgAvc/y0hnkbaWtAxyPw4pSYxQtJqm906OWv9klyHiNsGBw0AaD1XH28S3YcVQVtDehgBF6cEqetQZLmb8pVbmmN4iOC9INzUq0uB4CX6psSqW7x4aprcOizrUesLgd+hsCLU8po6uHNKalWbQNHQvoj0zT14pfOY4RnjMpQSCAHTQA4M4ZhuDevcQgFOhqBF6eUEBGscNdosiI2rvmjb7KLtPFQqYIDbJo+qqvV5QDwclMGOX9LtHznUZXV1FtcDfxJqwLv3/72N1VXV7v/vHz5ctXW1rr/XF5erttvv73tqoOlDMM41sdbQOD1Ry80re5eeW4XxUUEW1wNAG/XKylCPRIjVNdIWwM6VqsC7+zZs1VeXu7+89SpU3X48GH3n6uqqvTcc8+1XXWwHBvX/Fd2QaU+2+b8gXTzeRw0AeDs0dYAq7Qq8Jqmedo/w/dkxDn7eNm45n9e+jJbpild2CdRPRIjrC4HgI+YMtAZeFfsOqrSKtoa0DHo4cVpuWfxssLrV0qq6vTO2oOSpFtY3QXQhnomRap3UqTqG00t2ppndTnwEwRenFYmPbx+6Y3VB1RT71C/lCiN7h5ndTkAfMwU2hrQwQJa+4IXXnhBERHOX282NDRozpw5io93nrx0fH8vfIOrhzentFo19Y2MpfIDtQ2NmrNynyTplnGZHDQBoM1NGZSify7eqa92F6i4sk4x4UFWlwQf16rA27VrVz3//PPuPycnJ+u111474Rr4jrjwIEUGB6i8tkEHi6rUMynS6pLQzj7ekKuj5bVKigrW1EEcNAGg7XVPiFDflChtyy3Toq15umY42QHtq1WBd9++fe1UBjyVYRhKjw/T5sNlyi6oJPD6ONM09cKKvZKkG8ZkKCiAricA7WPqoBRtyy3TJxtzCbxod/w0w/di45r/WLmnUNvzyhUaaNd1I/gBBKD9uKY1rNxTqMKK2u+5Gjg7rQq8q1ev1oIFC5o99uqrryozM1OJiYm69dZbmx1EAd/g2riWzcY1n/d80+ru1cO6qFMYPXUA2k9GfLgGdI5So8PUwi0cQoH21arAe//992vjxo3uP2/atEk333yzLr74Yt199936+OOP9dBDD7V5kbCWa4V3Pyu8Pm3XkXIt3XFUhiHdNJZRZADa35SBzn0C8zblWFwJfF2rAm9WVpYuuugi95/ffPNNjRw5Us8//7xmzZqlJ554Qm+//XabFwlrHTtemMDry15sOkb4kr5J7v/PAaA9udoaVu0p1NFyfkOM9tOqwFtcXKykpCT3n5ctW6bLLrvM/efhw4fr4MGDbVcdPEKmezRZjWrqGy2uBu2hoKJW7613HhP+0/O7WVwNAH/RNS5Mg7tEy2FKn27hEAq0n1YF3qSkJGVnO1eB6urqtG7dOo0ePdr9fHl5uQIDA9u2QlguJixQkSHOgR77C+nj9UVvfXtQdQ0ODe4SrWHpMVaXA8CPuA6hmLeRtga0n1YF3ssuu0x33323VqxYodmzZyssLEzjxo1zP79x40Z17969zYuEtQzDOG7jGm0NvmjxVueGkR+P6MpBEwA61OSmtobV2UXKL6+xuBr4qlYF3gcffFB2u10XXHCBnn/+ef3nP/9RUNCxndwvvfSSJk6c2OZFwnpsXPNdhRW12nCoRJJ0YZ9Ea4sB4He6xIRpSNdOMk1pwSbaGtA+WnXwREJCglasWKHS0lJFRETIbm9+zOw777yjyEgOJvBF7o1rBF6fs2znUZmm1C8lSklRIVaXA8APTRmYovUHSjRvY65uGJNhdTnwQa0KvDNnzmzRdS+99NIZFQPPlRkfJomWBl+0ZMdRSazuArDO5IEpenDeNn27v0h5pTVKjuYf32hbrQq8c+bMUXp6uoYMGSLTNNurJnigdNdpaxw+4VMaGh1atiNfkjShT4LF1QDwV6mdQjU0PUZr9xdr/qZczTyPWeBoW60KvLfddpvefPNN7d27VzNnztT06dMVGxvbXrXBg2Q2Bd68shpV1zUqNMj+Pa+AN1h/sERlNQ3qFBaoc9KYzgDAOlMHpWjt/mLNI/CiHbRq09rTTz+t3Nxc3XXXXfr444+Vlpamq6++WgsXLmTF18fFhAcpOtQ5co4+Xt/xxXbn6u4FvRJktzGdAYB1Jg1IkWFIa/cXK6ek2upy4GNaFXglKTg4WNdee60WL16srVu3qn///rr99tuVnp6uioqK9qgRHsK1cY1JDb5jSVPgndCb/l0A1kqODtHwdOdvjedvyrW4GviaVgfe4xmGIcMwZJqmHA5HW9UED5UZ59q4Rh+vL8gpqdb2vHIZhnOFFwCsNnWwcybvJxsJvGhbrQ68tbW1mjt3ri655BL17t1bmzZt0pNPPqkDBw4oIiKiPWqEhzi2cY0VXl+wtGk6w5C0TooJD/qeqwGg/V02IFmGIWUdLNHBIhZX0HZaFXhvv/12paSk6JFHHtHUqVN16NAhvfPOO5o8ebJstrNaLIYXcJ+2RkuDT1jSNJ2BcWQAPEViZIhGZjrbGhZsZpUXbadVUxqeffZZde3aVZmZmVq2bJmWLVt20uvee++9NikOnsV9+AQrvF6vtqFRX+0ukCSNp38XgAeZMihVX+8t0icbc3Xr+d2tLgc+olWB9/rrr5dhsJPbX7lGk+WX16qqrkFhQa366wMP8k12karqGpUYGaz+qVFWlwMAbpMGJOu+Dzdr46FSHSisUtem/SPA2Wj1wRPwX9FhgYoJC1RxVb32FVSpH0HJa31x3HQG/hELwJPERwRrdPc4fbW7UPM25ern41nlxdmztPF2+fLlmjZtmlJTU2UYhj744IPvfc2yZcs0dOhQhYSEqFu3bnr22WebPT9nzhz39IjjP2pqatrpq/Av7o1r9PF6NdeGNU5XA+CJpgxMlSTN25RjcSXwFZYG3srKSg0ePFhPPvlki67Pzs7W5MmTNW7cOK1fv1733HOPfvWrX+ndd99tdl1UVJRyc3ObfYSEcC53W3BvXKOP12tlF1Qqu6BSgXZDY3vEW10OAJzgsgHJstsMbT5cxr4RtAlLmzAnTZqkSZMmtfh616a5xx9/XJLUt29frVmzRv/4xz/0wx/+0H2dYRhKTk5u63IhKYPRZF7PddjE8IxYRYYEWlwNAJwoNjxIY7rHacWuAs3blKtfTOhhdUnwcl41S2zVqlWaOHFis8cuvfRSrVmzRvX19e7HKioqlJ6eri5dumjq1Klav379aT9vbW2tysrKmn3g5DLinZsH9hcyH9FbMY4MgDeYOohDKNB2vCrw5uXlKSkpqdljSUlJamhoUEGBc8RSnz59NGfOHH300UeaO3euQkJCNHbsWO3ateuUn/ehhx5SdHS0+yMtLa1dvw5vxixe71ZZ26DVe4skMY4MgGe7tH+yAmyGtuWWac/RCqvLgZfzqsAr6YQd5aZpNnt81KhRmj59ugYPHqxx48bp7bffVq9evfTvf//7lJ9z9uzZKi0tdX8cPHiw/b4AL+fatHa0vFYVtQ0WV4PWWrmnUHWNDnWNDVP3hHCrywGAU+oUFqTzejr3GcxjlRdnyasCb3JysvLy8po9lp+fr4CAAMXFxZ30NTabTcOHDz/tCm9wcLCioqKafeDkokMDFdt0DC19vN7n2DiyBMaRAfB4UwY62xoIvDhbXhV4R48ercWLFzd7bNGiRRo2bJgCA0+++cY0TWVlZSklJaUjSvQLGU1DwBlN5l1M09TSpv7dCfTvAvACE/slK9BuaMeRcu06Um51OfBilgbeiooKZWVlKSsrS5Jz7FhWVpYOHDggydlqcP3117uvv+2227R//37NmjVL27Zt00svvaQXX3xRv/3tb93XPPDAA1q4cKH27t2rrKws3XzzzcrKytJtt93WoV+bL3MdMczGNe+yPa9cuaU1Cgm0aVS3k/9GBAA8SXRYoM7v6ZwXzuY1nA1LA++aNWs0ZMgQDRkyRJI0a9YsDRkyRH/84x8lSbm5ue7wK0mZmZmaP3++li5dqnPOOUd//vOf9cQTTzQbSVZSUqJbb71Vffv21cSJE3X48GEtX75cI0aM6Ngvzoe5jhhmFq93cU1nGNM9XiGBdourAYCWmdI0rWHeplz3vh2gtSydwzt+/PjT/uU92VHGF1xwgdatW3fK1zz22GN67LHH2qI8nIJrhZceXu/imr9LOwMAb3JxvyQF2W3anV+hnUcq1Ds50uqS4IW8qocXniGD44W9TmlVvdbuL5bk3LAGAN4iKiRQ5/dytTVw1DDODIEXreY6fKKgok7lNfXfczU8wbJdR+UwpV5JEeoSE2Z1OQDQKtMGH5vWQFsDzgSBF60WGRKo+AjnaDI2rnmHpe5xZLQzAPA+F/VNUlCATXsLKrUtl2kNaD0CL85IBhvXvIbDYWrpzqOS6N8F4J0iggPc7VjzNtHWgNYj8OKMsHHNe2w4VKKiyjpFhgRoaHqM1eUAwBmZOihVknM8GW0NaC0CL86I6/CJbDauebwlO5yru+f3TFCgnf/kAXinC/skKiTQpv2FVdqSU2Z1OfAy/PTDGWGF13u4xpGNZzoDAC8WHhygC5vasjiEAq1F4MUZOTaajE1rniy/vEabDpdKksazYQ2Al5sy0NXWkENbA1qFwIsz4lrhLaqsU2k1o8k81dKmdoZBXaKVEBlscTUAcHYu7JOo0EC7DhVXa+OhUqvLgRch8OKMRAQHuAPUfvp4PdbSHa52BlZ3AXi/0CC7Lurr/H42bxNtDWg5Ai/OmHvjGn28Hqm+0aEVOwskyd33BgDebuogDqFA6xF4ccbcfbwF9PF6ojX7ilVe26C48CAN6hxtdTkA0CbG905UeJBdh0uqtf5gidXlwEsQeHHG3JMaaGnwSEua2hku6J0gm82wuBoAaBshgXZd3C9JknOVF2gJAi/OWCaB16Mt4ThhAD5qykBnW8P8TblyOGhrwPcj8OKMHWtpIPB6moNFVdqVXyG7zdD5PZm/C8C3nN8rQZHBAcotrdG6A8VWlwMvQODFGcuId25aK66qV2kVo8k8iWs6w9CuMYoOC7S4GgBoWyGBdl3S1NbAIRRoCQIvzlhYUIASm0aTccSwZ3EdJzyB6QwAfNSUQbQ1oOUIvDgrHDHseWrqG7Vyj3Mc2YQ+tDMA8E3n9YxXZEiA8strtWY/bQ04PQIvzkpmHBvXPM2qvYWqqXcoJTpEvZMirS4HANpFcIBdl/ZPluQ8ahg4HQIvzgorvJ7HPZ2hT6IMg3FkAHzXsbaGPDXS1oDTIPDirGQ2bVzLLuTwCU9gmqa+YBwZAD8xtnu8okMDVVBRq2+yi6wuBx6MwIuzks5oMo+y52iFDhVXK8hu09gecVaXAwDtKijApkv7u6Y10NaAUyPw4qy4ZvGWVteruLLO4mqwZLtzOsPIbrEKCwqwuBoAaH9TB6VKkj7dnKeGRofF1cBTEXhxVkKD7EqOCpHExjVP4DpO+ELGkQHwE6O7xykmLFCFlXVaTVsDToHAi7PmOoCCwGut8pp6dw8b/bsA/EWg3abLBrimNXAIBU6OwIuzltk0qSG7gI1rVvpqd4EaHKa6xYe7p2cAgD841taQq3raGnASBF6cNTaueQbXdIbxrO4C8DMjM2MVFx6k4qp6rdpTaHU58EAEXpy1DA6fsJxpmscdJ8zpagD8S8BxbQ3zaGvASRB4cdaOtTRUyjQZ/G2FLTllOlpeq7Agu0ZkxlpdDgB0ONchFJ9uyVNdA20NaI7Ai7OWHufctFZe06DiqnqLq/FPrtPVxvaIV3CA3eJqAKDjjcyMU3xEsEqr6/XVngKry4GHIfDirIUE2pUa7RxNlk0fryW+YBwZAD9ntxmaPJC2BpwcgRdtgo1r1imqrFPWwRJJ0vje9O8C8F9TBjrbGhbS1oDvIPCiTbjGYLFxreMt33lUpin1TYlSSnSo1eUAgGWGZcQqMTJY5TUNWrHrqNXlwIMQeNEmMpsOn6CloeO5xpFNYHUXgJ9ztjU4V3lpa8DxCLxoE67RZPsLOXyiIzU6TC3b6VzFoH8XAKSpTdMaFm89opr6Rourgacg8KJNuEaT7WM0WYdaf6BYpdX1ig4N1DlpnawuBwAsd27XGCVHhai8tkHLd9LWACcCL9pEWmyYDEMqr21QYWWd1eX4jSVN0xnO75WgADv/OQOAzWa4Z/LO20RbA5z4CYk24RxN5twwxaSGjvPFdlc7A/27AODiCryf0daAJgRetJkMNq51qLzSGm3LLZNhSOf3JPACgMuQtE7q3ClUlXWNWrqDtgYQeNGG2LjWsVztDOekdVJcRLDF1QCA5zCMY4dQfLIxx+Jq4AkIvGgzro1r2czi7RBL3OPImM4AAN81dVCqJOnzbfmqrqOtwd8ReNFmMjhtrcPUNjTqq93Os+IZRwYAJxrUJVpdYkJVXd/o/o0Y/BeBF23G1cPLaLL29212sSrrGpUQGax+KVFWlwMAHscwjk1roK0BBF60mbTYMNkMqbKuUUcraq0ux6e5Vism9E6QzWZYXA0AeKZpTW0NX2zPV2Vtg8XVwEoEXrSZ4AC7Ujs5R5Oxca190b8LAN+vf2qU0uPCVFPvcB/DDv9E4EWbcm9co4+33ewrqNTegkoF2AyN7RlvdTkA4LEMw9CUgU2HUGzkEAp/RuBFm2LjWvtztTMMz4hVVEigxdUAgGdz9fEu2ZGvCtoa/BaBF20qPa5p4xqjydrNkqYh6hM4XQ0Avle/lCh1iw9XbYNDn287YnU5sAiBF23qWEsDPbztoaquQV/vLZTEODIAaInm0xpoa/BXBF60qYx412lrjCZrDyt3F6quwaEuMaHqnhBhdTkA4BVcgXfZjqMqq6m3uBpYgcCLNpUW4xxNVlXXqKPljCZra67+3Qv7JMowGEcGAC3ROylSPRIjVNfo0GdbaWvwRwRetKmgAJu6xDj7eJnU0LZM02QcGQCcAaY1gMCLNsfGtfax80iFckprFBxg0+jucVaXAwBexdXWsHzXUZVW09bgbwi8aHNsXGsfrqHpY7rHKSTQbnE1AOBdeiVFqldShOobTS3akmd1OehgBF60OWbxtg/3ccJMZwCAMzK16ajheZtoa/A3BF60OdcKLy0Nbae0ql5r9xdLon8XAM7U5KY+3i93Faikqs7iatCRCLxoc8dGk1UxmqyNrNh9VI0OUz0SI5QWG2Z1OQDglXokRqhPcqQaHKYW0tbgVwi8aHNdYkJltxmqrm/UkTJGk7UFV/8uh00AwNmZNtjZ1sAhFP6FwIs2F2i3qUtMqCRGk7UFh8PUsqbjhMf35jhhADgbrraGlXsKVVRJW4O/IPCiXbg3rtHHe9Y2HS5VYWWdIoIDNDwj1upyAMCrZcaHq39qlBppa/ArBF60CzautR1XO8O4nvEKtPOfLACcLddM3k825lhcCToKPz3RLjJch0/Q0nDWljKODADa1NSBzj7eVXsKVVDBXhN/QOBFu3BNatjH4RNn5Wh5rTYcKpUkje9F/y4AtIWucWEa1CVaDlP6dDNtDf6AwIt2cXwPr8PBaLIztWync7PagM5RSowKsbgaAPAdUwbS1uBPCLxoF11iQhVgM1Tb4FBeWY3V5XitJa5xZBw2AQBtytXHuzq7SPnl/JzydQRetIsAu819QAIb185MfaNDy3c1jSOjfxcA2lSXmDCdk9ZJJm0NfoHAi3ZzbOMafbxnYu3+YpXXNCg2PEiDu3SyuhwA8DlTXdMaNnAIha8j8KLdZDCa7KwsaZrOcEGvBNlthsXVAIDvcR1C8e3+IuWV0tbgywi8aDeujWuctnZmXP27jCMDgPaR2ilUQ9NjZJrSgs2s8voyAi/azbHRZATe1jpUXKWdRypkM6Tze8ZbXQ4A+CzXtIZ5Gwm8vozAi3aT2bTCu7+oitFkrbR0h3Oz2tD0GHUKC7K4GgDwXZMHpsgwpDX7i5VTUm11OWgnBF60m9ROIQq0G6prcCiX0WSt4mpnGM84MgBoV8nRIRqeHitJmr+JVV5fReBFu2k2moy2hharqW/UV3sKJEkTCLwA0O5cM3nnEXh9FoEX7YqNa6339d5C1dQ7lBwVor4pkVaXAwA+b9KAZBmGtP5AiQ4VM0rTFxF40a7cRwwTeFvM1b87oU+CDINxZADQ3hKjQjQiw9nWMPu9TVq89Yhq6hstrgptKcDqAuDbMuNdp63xL+aWME1TX7jGkdHOAAAd5vrRGVqdXaQVuwq0YleBIoIDdFHfRE0akKLxvRMUEmi3ukScBQIv2hWHT7TO3oJKHSiqUpDdprE9GEcGAB1lyqAUpXQao4835GjBpjzlldXow6wcfZiVo7Aguy7sk6jJA1M0oXeiQoMIv96GwIt25WppOFBYpUaHyYlh38M1nWFkt1iFB/OfJwB0pHO7xujcrjH6w5R+Wn+wRAs25WrB5jwdLqnWJxtz9cnGXIUG2jWhT4ImDUjRhX0S+V7tJfh/Ce0qtVOoguw21TU6lFNS7Z7agJNzHSfMODIAsI7NZmhoeoyGpsfo3il9teFQqRZsytW8Tbk6VFyt+ZvyNH9TnoIDbBrfO0GTBzrDb2RIoNWl4xQIvGhXdpuhtNhQ7TlaqX2FlQTe06iobdA32UWSpAs5ThgAPIJhGDonrZPOSeukuyf10ebDZZq/OVfzN+Vqf2GVFm45ooVbjigowKbzeyZo8sBkXdQ3SdGhhF9PQuBFu8uMD3cG3oJKjeuZYHU5HuvLXQWqbzSVERemzKbeZwCA5zAMQwO7RGtgl2j97tLe2ppbpgWb8jR/U672FlTqs21H9Nm2Iwq0GxrXM0GTBiTrkn5JnJjpAQi8aHfu0WRMajitpbQzAIDXMAxD/VOj1T81Wr+Z2Es7j1Ro3qZcLdiUq135Ffpie76+2J6vAJuhMT3iNWVgsi7pl6zYcMKvFQi8aHfuSQ3M4j0l0zTd/bu0MwCAdzEMQ72TI9U7OVKzLumlXUfKNX9TnhZsztX2vHIt33lUy3ce1T3vb9aY7nGaNCBFE/snKT4i2OrS/QaBF+3O9ev5bEaTndLW3DIdKatVaKBdIzJjrS4HAHAWeiZF6o6kSN1xcU/tOVqhBZtyNX9Tnrbmlrnn/P7+g00a1S1Okwam6NL+SUqMDLG6bJ9G4EW7S49zblQ7WFSlhkaHAuwc8PddrnFkY3vEM9wcAHxI94QI/fLCnvrlhT21r6BS8zfnasGmPG06XKqVewq1ck+h/vjhZg3PiNWUgSm6bECykqIIv22NwIt2lxodqqAAm+oaHMopqVHXOCY1fNeS444TBgD4poz4cN0+voduH99DB4uqNH9TruZvztOGgyX6JrtI32QX6f6Pt2ho1xhNbgq/qZ1CrS7bJxB40e5sNkPpsWHalV+hfYWVBN7vKK6s0/oDxZI4ThgA/EVabJh+dkF3/eyC7jpUXKVPNzunPaw7UKI1+4u1Zn+x/vTJVg3p2sm98tslhp+fZ8rS3y0vX75c06ZNU2pqqgzD0AcffPC9r1m2bJmGDh2qkJAQdevWTc8+++wJ17z77rvq16+fgoOD1a9fP73//vvtUD1agyOGT235rqNymFKf5Ej+JQ8AfqhLTJhuGddN790+VqtmX6j7pvXT8IwYGYa0/kCJHpy3Tec9skSXP/mlnl22RweYetRqlgbeyspKDR48WE8++WSLrs/OztbkyZM1btw4rV+/Xvfcc49+9atf6d1333Vfs2rVKl1zzTWaMWOGNmzYoBkzZujqq6/W6tWr2+vLQAu4N64xqeEErv7dCUxnAAC/lxIdqpvGZuqd28bo69kX6U+X99fIzFgZhrThUKkeXrBd5/99iab+e4WeWrKbCUgtZJimaVpdhOQc6fH+++/riiuuOOU1d911lz766CNt27bN/dhtt92mDRs2aNWqVZKka665RmVlZVqwYIH7mssuu0wxMTGaO3dui2opKytTdHS0SktLFRUVdWZfEJp5Y/V+3fv+Zk3onaCXbxphdTkeo9FhatiDi1VcVa+3fzaaCQ0AgJPKL6/Roi1HNH9Trr7eWyjHcemtb0qUJg9I1uRBKeqeEGFdkR2sNXnNq3p4V61apYkTJzZ77NJLL9WLL76o+vp6BQYGatWqVfr1r399wjWPP/74KT9vbW2tamtr3X8uKytr07ohZXL4xEllHSxRcVW9okICdG7XTlaXAwDwUImRIZo+Kl3TR6WrsKJWi7Y6w+/KPYXallumbbllenTxTvVOitSkgcmaPDBFvZIirS7bY3hV4M3Ly1NSUlKzx5KSktTQ0KCCggKlpKSc8pq8vLxTft6HHnpIDzzwQLvUDCdXDy+jyZpztTOc3yuBewIAaJG4iGBdO6Krrh3RVcWVdVq89YjmbcrVV7sLtONIuXYcKdfjn+1Sj8QI98pv76RIGYZhdemW8arAK+mE/7NcHRnHP36ya073f/Ls2bM1a9Ys95/LysqUlpbWFuWiSXJUiIIDbKptcOhwSbXSm1Z8/Z3rdDWmMwAAzkRMeJCuHp6mq4enqbSqXou3OVd+V+w6qt35FXrii9164ovd6hYfrkkDkzVpQIr6p0b5Xfj1qsCbnJx8wkptfn6+AgICFBcXd9prvrvqe7zg4GAFB3O8X3uy2QxlxIVrx5FyZRdUEnglHSmr0ZacMhmGdEFv5u8CAM5OdFigrhraRVcN7aKymnp9vu2I5m/K07KdR7W3oFJPLdmjp5bsUXpcmCYNSNHkgcka2DnaL8KvVwXe0aNH6+OPP2722KJFizRs2DAFBga6r1m8eHGzPt5FixZpzJgxHVorTpQeF6YdR8qdO0p7W12N9ZY2re4O6tKJ89QBAG0qKiRQ/zOki/5nSBeV19Tri+35WrApT0t25Gt/YZWeXbZHzy7boy4xoZo8MEWTBiTrnLROPht+LQ28FRUV2r17t/vP2dnZysrKUmxsrLp27arZs2fr8OHDevXVVyU5JzI8+eSTmjVrln76059q1apVevHFF5tNX7jjjjt0/vnn65FHHtHll1+uDz/8UJ999pm+/PLLDv/60FxmPBvXjvdFU//uhbQzAADaUWRIoC4/p7MuP6ezKmsbtGSHM/x+sT1fh4qr9Z/le/Wf5XuVGh2iSQOdK79D0mJks/lO+LU08K5Zs0YTJkxw/9nVR3vDDTdozpw5ys3N1YEDB9zPZ2Zmav78+fr1r3+tp556SqmpqXriiSf0wx/+0H3NmDFj9Oabb+r3v/+9/vCHP6h79+566623NHLkyI77wnBSHD5xTF2DQ1/uKpDEccIAgI4THhygqYNSNXVQqqrqGrRsx1HN35ynz7cdUU5pjV78Mlsvfpmt5KgQXTbAOe1haHqM7F4efj1mDq8nYQ5v+1i1p1DXPv+1MuLCtPT/Tfj+F/iwlbsLdN0LqxUfEaxv7rnIp/4VDQDwPjX1jVq286gWbMrVZ9vyVVHb4H4uITJYkwY4N7yNyIz1mPDrs3N44d1cLQ0Hi6tV3+hQoB+P4XK1M4zvnUDYBQBYLiTQrkv7J+vS/smqqW/Ul7sKNH9zrhZvPaKj5bV6ddV+vbpqv+IjgnRpf+fK78jMWK8ZqUngRYdJjAxWSKBNNfUOHSqudgdgf8Q4MgCApwoJtOvifkm6uF+S6hoc+mp3geZvytWirUdUUFGnN1Yf0BurDygmLNAdfkd3j/PohSwCLzqMazTZ9jznpAZ/DbwHCqu052il7DZD43rFW10OAACnFBRg04Q+iZrQJ1F/bXRo5Z5CLdiUq4Vb8lRcVa83vz2oN789qOjQQE3sl6TJg1I0tnu8ggI8K/x6VjXweRlxbFxzre4OS49RVEigxdUAANAygXabLuiVoId/OEjf3nuxXr95pK4b2VVx4UEqra7XO2sP6aaXv9Xa/cVWl3oCVnjRodyTGgr8N/C6x5H1oZ0BAOCdAuw2ndczXuf1jNefLx+gb7KLNH9TrtbsL9aIzFiryzsBgRcdKjM+TJKU7aezeKvrGrVqb6EkaQKBFwDgA+w2Q6O7x2l09zirSzklWhrQodwtDX66wrtyT4HqGhzq3ClUPRMjrC4HAAC/QOBFh3K1NBwqrlJdg8PiajqeezpDnwSfPb4RAABPQ+BFh0qMDFZYkF0OUzpY7F9tDaZpasn2o5Lo3wUAoCMReNGhDMNQelNbw34/m9SwK79Ch0uqFRRg0+hujCMDAKCjEHjR4dwb1wr8a4V3SdN0htHd4hQaZLe4GgAA/AeBFx3OXzeuMY4MAABrEHjR4fzx8ImymnqtaRrEzXHCAAB0LAIvOpxrUkO2H63wrthZoEaHqe4J4eoaF2Z1OQAA+BUCLzpcRlMPb05JtWobGi2upmO4x5GxugsAQIcj8KLDJUQEK9w1mqyo2upy2p3DYWrpDvp3AQCwCoEXHc4wDHdbgz9sXNucU6qCijpFBAdoWIbnnS8OAICvI/DCEv60cc01neG8HvEKCuA/OQAAOho/fWGJDPcsXt8PvEt2OE9Xm9AnweJKAADwTwReWCLDfdqabx8+UVBRq42HSiRJ49mwBgCAJQi8sESmn4wmW7bjqExT6p8apaSoEKvLAQDALxF4YQnXprWc0mrV1PvuaDLGkQEAYD0CLywRFx6kiOAAmaZ0sMg32xoaGh1avtPVv0vgBQDAKgReWMI5msy3N66tO1CispoGxYQF6py0TlaXAwCA3yLwwjK+vnHNNY7sgl4JstsMi6sBAMB/EXhhGffGNR+dxes6XY12BgAArEXghWXch0/4YEvD4ZJqbc8rl82Qzu/J/F0AAKxE4IVlfPl4Ydfq7pCuMYoJD7K4GgAA/BuBF5bJiHNuWssprfG50WRLtjunM1xIOwMAAJYj8MIyseFBigwJkORbG9dq6hv11e4CSdL43rQzAABgNQIvLGMYhnvj2j4f2rj2TXaRqusblRQVrH4pUVaXAwCA3yPwwlK+uHHNNY5sQu9EGQbjyAAAsBqBF5bK8MEVXteGtfEcJwwAgEcg8MJSro1rvnLa2t6jFdpXWKVAu6HzesZbXQ4AABCBFxY7NprMNzatLdnhnM4wIjNWEcEBFlcDAAAkAi8sltnUw5tXVqPqOu8fTbbkuP5dAADgGQi8sFRMeJCiQwMlSfuLvLutobK2QauzCyVxnDAAAJ6EwAvL+cqJa1/tLlB9o6n0uDB1a/qaAACA9Qi8sNyxjWve3ce7ZAfjyAAA8EQEXljOF2bxmqbpPk6Y09UAAPAsBF5YzhdOW9uWW668shqFBNo0qluc1eUAAIDjEHhhOV84fMLVzjC2e7xCAu0WVwMAAI5H4IXlXKPJjpTVqqquweJqzox7HBnTGQAA8DgEXlguOixQncKco8m88QCKkqo6rTtQLInACwCAJyLwwiO4N655YVvDsp1H5TCl3kmR6twp1OpyAADAdxB44RG8eePa0qbjhMf3YToDAACeiMALj+Cto8kaHaaW7XQG3gs5ThgAAI9E4IVHyIh3Hj7hbT28Gw6VqKiyTpEhATo3PcbqcgAAwEkQeOERXCu82V7W0rC0aTrD+T0TFGjnPycAADwRP6HhEVyzeI+W16qi1ntGk32xg3FkAAB4OgIvPEJ0aKBiw4MkSfu9ZJU3v6xGmw+XSZIu6MWGNQAAPBWBFx4jI867+nhd0xkGd4lWQmSwxdUAAIBTIfDCY3jbEcOu44THM50BAACPRuCFx3AdMZztBaPJ6hocWrGrQJJ0If27AAB4NAIvPEZ6vPfM4l2zv0gVtQ2KjwjSwM7RVpcDAABOg8ALj5HpRccLu/p3L+iVKJvNsLgaAABwOgReeAzX4RMFFXUqr6m3uJrT+2K7axwZ0xkAAPB0BF54jMiQQMVHuEaTee6khoNFVdqdXyG7zdC4ngReAAA8HYEXHiXDCzauuaYzDE2PUXRooMXVAACA70PghUdJj/P8jWtLXO0MjCMDAMArEHjhUTKb+nizPXTjWnVdo1buKZTEODIAALwFgRcexXX4hKf28H69t1C1DQ6lRoeoV1KE1eUAAIAWIPDCo2R4eEvDsekMiTIMxpEBAOANCLzwKK4V3sLKOpV52Ggy0zTdG9bo3wUAwHsQeOFRIoIDFB8RLMnzVnl351foUHG1ggJsGtMjzupyAABACxF44XHcG9c8LPC6VndHdYtTWFCAxdUAAICWIvDC47j6eD1t49qS7c7jhCf05rAJAAC8CYEXHsfVx+tJLQ1lNfX6dl+RJPp3AQDwNgReeJzMpsDrSbN4v9pVoAaHqW7x4e5ADgAAvAOBFx4nPc7Zw+tJK7zHjyMDAADehcALj+Pq4S2uqldplfWjyRwOU0t3uvp3CbwAAHgbAi88TnhwgBIjm0aTeUBbw5acMh0tr1V4kF3DM2OsLgcAALQSgRceyb1xzQMCr2sc2dge8QoOsFtcDQAAaC0CLzxSZlNbgyfM4nX1715I/y4AAF6JwAuP5CmjyQorarXhUIkkaTz9uwAAeCUCLzxSRtOkhmyLD59YvuuoTFPqmxKl5OgQS2sBAABnhsALj+Ra4d1vcQ/vF02nq13Yh9PVAADwVgReeCTXaLKSqnqVVNVZUkNDo0PLGUcGAIDXI/DCI4UG2ZUc5WwhsGrj2vqDJSqtrlensEAN6co4MgAAvBWBFx4rI77pxDWL2hqWNE1nOL9nguw2w5IaAADA2SPwwmNluEeTWbNxjXFkAAD4BgIvPJaVo8lyS6u1Pa9chiGd34sNawAAeDMCLzyWa4XXikkNS5qmMwxJ66TY8KAOf38AANB2CLzwWJnxx05bM02zQ9/bdZww0xkAAPB+BF54rPSmwyfKahpUXFXfYe9b29Cor3YXSJIm0L8LAIDXI/DCY4UE2pUS3fGjyb7JLlJVXaMSI4PVPzWqw94XAAC0D8sD79NPP63MzEyFhIRo6NChWrFixWmvf+qpp9S3b1+Fhoaqd+/eevXVV5s9P2fOHBmGccJHTU1Ne34ZaCeuPt6O3Ljm6t8d3ztBhsE4MgAAvF2AlW/+1ltv6c4779TTTz+tsWPH6rnnntOkSZO0detWde3a9YTrn3nmGc2ePVvPP/+8hg8frm+++UY//elPFRMTo2nTprmvi4qK0o4dO5q9NiQkpN2/HrS9jPhwrdpb2KEb11z9u4wjAwDAN1gaeP/5z3/q5ptv1i233CJJevzxx7Vw4UI988wzeuihh064/rXXXtPPfvYzXXPNNZKkbt266euvv9YjjzzSLPAahqHk5OSO+SLQrjKbDp/ILuyYWbzZBZXKLqhUoN3Q2B7xHfKeAACgfVnW0lBXV6e1a9dq4sSJzR6fOHGiVq5cedLX1NbWnrBSGxoaqm+++Ub19cc2NVVUVCg9PV1dunTR1KlTtX79+tPWUltbq7KysmYf8Awd3dLgOl1teEasIkMCO+Q9AQBA+7Is8BYUFKixsVFJSUnNHk9KSlJeXt5JX3PppZfqhRde0Nq1a2WaptasWaOXXnpJ9fX1Kihw7qrv06eP5syZo48++khz585VSEiIxo4dq127dp2yloceekjR0dHuj7S0tLb7QnFWjj98oiNGkzGODAAA32P5prXvbgoyTfOUG4X+8Ic/aNKkSRo1apQCAwN1+eWX68Ybb5Qk2e12SdKoUaM0ffp0DR48WOPGjdPbb7+tXr166d///vcpa5g9e7ZKS0vdHwcPHmybLw5nrWtsmAxDKq9tUGFlXbu+V2Vtg1bvLZLEODIAAHyJZYE3Pj5edrv9hNXc/Pz8E1Z9XUJDQ/XSSy+pqqpK+/bt04EDB5SRkaHIyEjFx5+839Jms2n48OGnXeENDg5WVFRUsw94hpBAu1KjQyW1/4lrK/cUqq7RobTYUHVPCG/X9wIAAB3HssAbFBSkoUOHavHixc0eX7x4scaMGXPa1wYGBqpLly6y2+168803NXXqVNlsJ/9STNNUVlaWUlJS2qx2dKwM18a1gvbduHZ8OwPjyAAA8B2WTmmYNWuWZsyYoWHDhmn06NH6z3/+owMHDui2226T5Gw1OHz4sHvW7s6dO/XNN99o5MiRKi4u1j//+U9t3rxZr7zyivtzPvDAAxo1apR69uypsrIyPfHEE8rKytJTTz1lydeIs5cRF66vdhe268Y10zTdG9ZoZwAAwLdYGnivueYaFRYW6k9/+pNyc3M1YMAAzZ8/X+np6ZKk3NxcHThwwH19Y2OjHn30Ue3YsUOBgYGaMGGCVq5cqYyMDPc1JSUluvXWW5WXl6fo6GgNGTJEy5cv14gRIzr6y0MbyWzauJbdji0NO46UK7e0RiGBNo3uFtdu7wMAADqeYXbE1ncvU1ZWpujoaJWWltLP6wEWbz2in766Rv1TozTvV+Pa5T2eXrpbf/t0hy7sk6iXbhzeLu8BAADaTmvymuVTGoDv4zp8Yn9hVbuNJlvadJzwhN4J7fL5AQCAdQi88HhpsWGyGVJFbYMKKtp+NFlpVb3WHiiWJI1n/i4AAD6HwAuPFxxgV2on52iyfe3Qx7t811E1Okz1TIxQWmxYm39+AABgLQIvvIJ741o7TGpwTWe4kOkMAAD4JAIvvEJ6nHPlta1HkzkcppbudPbv0s4AAIBvIvDCK2TEOVd427qlYePhUhVV1ikyOEDDMmLa9HMDAADPQOCFV3C1NOxr49PWvmhqZxjXK16Bdv5zAADAF/ETHl4hI/7YCm9bjiZb2nScMO0MAAD4LgIvvEJajHM0WVVdo46W17bJ58wvr9HGQ6WSpPHM3wUAwGcReOEVggJs6hzjHE3WVpMalu1wblYb2DlaiZEhbfI5AQCA5yHwwmu09ca1JU3tDBMYRwYAgE8j8MJruDeuFZ79xrX6RodW7CyQxHHCAAD4OgIvvIZ7hbcNWhrW7CtWeW2D4sKDNLhLp7P+fAAAwHMReOE12vK0Ndd0hgt6JchmM8768wEAAM9F4IXXcJ22tr+w6qxHk7nm746nfxcAAJ9H4IXXSIsNk91mqLq+UUfKznw02cGiKu3Kr5DNkC7oSf8uAAC+jsALrxFot6lL02iys5nUsHSncxzZ0PQYRYcFtkltAADAcxF44VXaYuPaku2MIwMAwJ8QeOFV3BvXznCFt6a+USv3uMaREXgBAPAHBF54lYymjWtnusK7am+hauodSokOUZ/kyLYsDQAAeCgCL7xKuuvwiYIzO3xiqWs6Q+9EGQbjyAAA8AcEXniVzKYe3v1FlXI4WjeazDRNfdE0f/dC+ncBAPAbBF54lS4xoQqwGaqpd+hIeU2rXrvnaKUOFlUryG7TmO5x7VQhAADwNAReeJUAu01psc4+3taeuOaazjCyW6zCgwPavDYAAOCZCLzwOsc2rrWuj3dJUzsD0xkAAPAvBF54nXTXLN5WjCYrr6nXt/uKJDF/FwAAf0PghdfJjG/94RNf7S5QfaOpzPhw9+sBAIB/IPDC62TEt36Fd8l253HC43sntEtNAADAcxF44XXco8kKq1o0msw0TXf/LuPIAADwPwReeJ3UTiEKtBuqbXAot+z7R5NtySlTfnmtwoLsGpEZ2wEVAgAAT0LghdcJsNuUFtPyI4Zd48jG9ohXcIC9XWsDAACeh8ALr+Tq423JLF7GkQEA4N8IvPBKGe4+3tMH3qLKOq0/WCKJDWsAAPgrAi+8Uma867S10x8+sXznUZmm1Cc5UqmdQjuiNAAA4GEIvPBKLR1N5m5nYDoDAAB+i8ALr+RqaThQWKXGU4wma3SYWrbTOX+XcWQAAPgvAi+8UmqnUAXZbaprdCinpPqk12QdLFZJVb2iQwM1JK1TxxYIAAA8BoEXXsluM5QW6+zJ3V948j7eL5rGkZ3fK0EBdv6qAwDgr0gB8FqZrtFkp+jjdR0nPIHpDAAA+DUCL7yWq4/3ZIdP5JXWaGtumQxDuqAXgRcAAH9G4IXXSo8/deBd2jSdYXCXToqLCO7QugAAgGch8MJrZcaduqXB1b/LdAYAAEDghdfKaDp84mBR89FktQ2N+mp3gSSOEwYAAAReeLHU6FAFBdhU32g2G022Zl+xKusaFR8RrP6pURZWCAAAPAGBF17LZjOUHus6YvhYW4OrnWFC7wTZbIYltQEAAM9B4IVXO9kRwxwnDAAAjkfghVfLiGu+wru/sFJ7j1YqwGbovJ7xVpYGAAA8BIEXXs21wus6bW1JUzvDsIwYRYUEWlYXAADwHAReeLXM7xw+8cUO5+lqjCMDAAAuBF54NdcK74GiKpXX1OvrvYWSGEcGAACOIfDCqyVHhSg4wKYGh6l31hxSXYNDXWJC1SMxwurSAACAhyDwwqvZbIbSmzauzVm5T5JzddcwGEcGAACcCLzwehlxx9oaJGlCnwQrywEAAB6GwAuvl9nUxytJwQE2je7GODIAAHAMgRdeL+O4wDu6e5xCg+wWVgMAADwNgRdez9XSIDGODAAAnIjAC693fEsD48gAAMB3BVhdAHC2kqND9NuJvWS32ZQWG2Z1OQAAwMMQeOETfnlhT6tLAAAAHoqWBgAAAPg0Ai8AAAB8GoEXAAAAPo3ACwAAAJ9G4AUAAIBPI/ACAADApxF4AQAA4NMIvAAAAPBpBF4AAAD4NAIvAAAAfBqBFwAAAD6NwAsAAACfRuAFAACATyPwAgAAwKcReAEAAODTCLwAAADwaQReAAAA+DQCLwAAAHwagRcAAAA+jcALAAAAn0bgBQAAgE8j8AIAAMCnEXgBAADg0wi8AAAA8GkEXgAAAPi0AKsL8ESmaUqSysrKLK4EAAAAJ+PKaa7cdjoE3pMoLy+XJKWlpVlcCQAAAE6nvLxc0dHRp73GMFsSi/2Mw+FQTk6OIiMjZRhGu79fWVmZ0tLSdPDgQUVFRbX7+/ki7uHZ4f6dPe7h2eMenh3u39njHp6djr5/pmmqvLxcqampstlO36XLCu9J2Gw2denSpcPfNyoqiv/AzhL38Oxw/84e9/DscQ/PDvfv7HEPz05H3r/vW9l1YdMaAAAAfBqBFwAAAD6NwOsBgoODdd999yk4ONjqUrwW9/DscP/OHvfw7HEPzw737+xxD8+OJ98/Nq0BAADAp7HCCwAAAJ9G4AUAAIBPI/ACAADApxF4AQAA4NMIvB3ooYce0vDhwxUZGanExERdccUV2rFjR7NrTNPU/fffr9TUVIWGhmr8+PHasmWLRRV7lmeeeUaDBg1yD7QePXq0FixY4H6ee9c6Dz30kAzD0J133ul+jHt4evfff78Mw2j2kZyc7H6e+9cyhw8f1vTp0xUXF6ewsDCdc845Wrt2rft57uOpZWRknPB30DAM/eIXv5DEvWuJhoYG/f73v1dmZqZCQ0PVrVs3/elPf5LD4XBfw308vfLyct15551KT09XaGioxowZo2+//db9vEfePxMd5tJLLzVffvllc/PmzWZWVpY5ZcoUs2vXrmZFRYX7mocfftiMjIw03333XXPTpk3mNddcY6akpJhlZWUWVu4ZPvroI3PevHnmjh07zB07dpj33HOPGRgYaG7evNk0Te5da3zzzTdmRkaGOWjQIPOOO+5wP849PL377rvP7N+/v5mbm+v+yM/Pdz/P/ft+RUVFZnp6unnjjTeaq1evNrOzs83PPvvM3L17t/sa7uOp5efnN/v7t3jxYlOSuWTJEtM0uXct8eCDD5pxcXHmJ598YmZnZ5vvvPOOGRERYT7++OPua7iPp3f11Veb/fr1M5ctW2bu2rXLvO+++8yoqCjz0KFDpml65v0j8FooPz/flGQuW7bMNE3TdDgcZnJysvnwww+7r6mpqTGjo6PNZ5991qoyPVpMTIz5wgsvcO9aoby83OzZs6e5ePFi84ILLnAHXu7h97vvvvvMwYMHn/Q57l/L3HXXXeZ55513yue5j61zxx13mN27dzcdDgf3roWmTJlizpw5s9ljV155pTl9+nTTNPk7+H2qqqpMu91ufvLJJ80eHzx4sHnvvfd67P2jpcFCpaWlkqTY2FhJUnZ2tvLy8jRx4kT3NcHBwbrgggu0cuVKS2r0VI2NjXrzzTdVWVmp0aNHc+9a4Re/+IWmTJmiiy++uNnj3MOW2bVrl1JTU5WZmakf//jH2rt3ryTuX0t99NFHGjZsmH70ox8pMTFRQ4YM0fPPP+9+nvvYcnV1dXr99dc1c+ZMGYbBvWuh8847T59//rl27twpSdqwYYO+/PJLTZ48WRJ/B79PQ0ODGhsbFRIS0uzx0NBQffnllx57/wi8FjFNU7NmzdJ5552nAQMGSJLy8vIkSUlJSc2uTUpKcj/n7zZt2qSIiAgFBwfrtttu0/vvv69+/fpx71rozTff1Lp16/TQQw+d8Bz38PuNHDlSr776qhYuXKjnn39eeXl5GjNmjAoLC7l/LbR3714988wz6tmzpxYuXKjbbrtNv/rVr/Tqq69K4u9ha3zwwQcqKSnRjTfeKIl711J33XWXrr32WvXp00eBgYEaMmSI7rzzTl177bWSuI/fJzIyUqNHj9af//xn5eTkqLGxUa+//rpWr16t3Nxcj71/AZa9s5/75S9/qY0bN+rLL7884TnDMJr92TTNEx7zV71791ZWVpZKSkr07rvv6oYbbtCyZcvcz3PvTu3gwYO64447tGjRohP+ZX487uGpTZo0yf2/Bw4cqNGjR6t79+565ZVXNGrUKEncv+/jcDg0bNgw/fWvf5UkDRkyRFu2bNEzzzyj66+/3n0d9/H7vfjii5o0aZJSU1ObPc69O7233npLr7/+uv773/+qf//+ysrK0p133qnU1FTdcMMN7uu4j6f22muvaebMmercubPsdrvOPfdcXXfddVq3bp37Gk+7f6zwWuB///d/9dFHH2nJkiXq0qWL+3HXbu/v/gsoPz//hH8p+augoCD16NFDw4YN00MPPaTBgwfrX//6F/euBdauXav8/HwNHTpUAQEBCggI0LJly/TEE08oICDAfZ+4hy0XHh6ugQMHateuXfwdbKGUlBT169ev2WN9+/bVgQMHJPF9sKX279+vzz77TLfccov7Me5dy/y///f/dPfdd+vHP/6xBg4cqBkzZujXv/61+zdf3Mfv1717dy1btkwVFRU6ePCgvvnmG9XX1yszM9Nj7x+BtwOZpqlf/vKXeu+99/TFF18oMzOz2fOuvyiLFy92P1ZXV6dly5ZpzJgxHV2uVzBNU7W1tdy7Frjooou0adMmZWVluT+GDRumn/zkJ8rKylK3bt24h61UW1urbdu2KSUlhb+DLTR27NgTxjHu3LlT6enpkvg+2FIvv/yyEhMTNWXKFPdj3LuWqaqqks3WPP7Y7Xb3WDLuY8uFh4crJSVFxcXFWrhwoS6//HLPvX8WbZbzSz//+c/N6Ohoc+nSpc3GylRVVbmvefjhh83o6GjzvffeMzdt2mRee+21lo/y8BSzZ882ly9fbmZnZ5sbN24077nnHtNms5mLFi0yTZN7dyaOn9JgmtzD7/Ob3/zGXLp0qbl3717z66+/NqdOnWpGRkaa+/btM02T+9cS33zzjRkQEGD+5S9/MXft2mW+8cYbZlhYmPn666+7r+E+nl5jY6PZtWtX86677jrhOe7d97vhhhvMzp07u8eSvffee2Z8fLz5u9/9zn0N9/H0Pv30U3PBggXm3r17zUWLFpmDBw82R4wYYdbV1Zmm6Zn3j8DbgSSd9OPll192X+NwOMz77rvPTE5ONoODg83zzz/f3LRpk3VFe5CZM2ea6enpZlBQkJmQkGBedNFF7rBrmty7M/HdwMs9PD3XLMnAwEAzNTXVvPLKK80tW7a4n+f+tczHH39sDhgwwAwODjb79Olj/uc//2n2PPfx9BYuXGhKMnfs2HHCc9y771dWVmbecccdZteuXc2QkBCzW7du5r333mvW1ta6r+E+nt5bb71lduvWzQwKCjKTk5PNX/ziF2ZJSYn7eU+8f4ZpmqZ168sAAABA+6KHFwAAAD6NwAsAAACfRuAFAACATyPwAgAAwKcReAEAAODTCLwAAADwaQReAAAA+DQCLwAAAHwagReA3xg/frzuvPNOq8vQ/fffr3POOcf95xtvvFFXXHHFaV/TVrV7yj1oC3PmzFGnTp3O+PWGYeiDDz5os3oAeC4CLwCPYxjGaT9uvPHGM/q87733nv785z+3+nXV1dUKCwvTX/7yF0VHR6uqquqEa2pqatSpUyf985//bPXn/9e//qU5c+a0+nWns3TpUhmGoZKSkmaPn+k98EW5ubmaNGmS1WUA6AAEXgAeJzc31/3x+OOPKyoqqtlj//rXv5pdX19f36LPGxsbq8jIyFbXs3jxYqWlpenWW29VdXW13n333ROueffdd1VVVaUZM2a0+vNHR0ef1Upla5zpPfBFycnJCg4OtroMAB2AwAvA4yQnJ7s/oqOjZRiG+8+uldS3335b48ePV0hIiF5//XUVFhbq2muvVZcuXRQWFqaBAwdq7ty5zT7vd3+dn5GRob/+9a+aOXOmIiMj1bVrV/3nP/85oZ4PP/xQP/jBD5SQkKBp06bppZdeOuGal156yX3NXXfdpV69eiksLEzdunXTH/7wh9OG8u+2NFRWVur6669XRESEUlJS9Oijj57wmtdff13Dhg1TZGSkkpOTdd111yk/P1+StG/fPk2Y8P/bu/+YqOs/gONPRPnVBx1iwo0pmAZBTRwHcygLdCrKHOfMgTUK6marRjommBtBBGuSopQ/52qD1D8QdSSBW6xEKg5NHTQNFHLQ+Yf9gbIlmmNw7+8f7D7rowdcal/z9npst/H+ce/36/2GsRfve3O3BICgoCDDqfj9ezAwMMAbb7xBUFAQAQEBrFq1ip6eHr3deW3g22+/JTo6Gk3TWLlyJTdu3BhzPc7T5cbGRmJjY/Hz82PhwoVcunTJ0O/EiRO8+OKL+Pr6EhER8cA6J4ptIkNDQ+Tm5mIymfDz8yMiIoJt27bp7X+/0lBSUuLy1QTnybtSiu3bt/Pcc8/h7+9PbGwsx48fdzsWIcSTJQmvEOKp9MEHH7Bx40a6urpITU3l3r17mM1mGhoauHz5Mm+//Tavv/46586dG3ecnTt3Eh8fT3t7O++99x7vvvsuV65c0dsdDgcNDQ1YLBYArFYrLS0t9Pb26n36+vpobm7GarUCEBgYSHV1NZ2dnXz++ed88cUXVFZWur22goICmpubqauro6mpiTNnznDx4kVDn6GhIcrKyvjll1/4+uuv6e3t1ZPaWbNm6afQV69edXkq7pSTk8OFCxeor6+nra0NpRRpaWmGBP3u3btUVFRw+PBhfvjhB+x2O/n5+W6to6KigvPnzzNz5kzS09P1cS9evEhGRgbr16/n0qVLlJSUUFRUZLja4U5s49m9ezf19fXU1tZy9epVjhw5QkREhMu++fn5hlcRKioqCAgIID4+HoAPP/yQqqoqDhw4wK+//kpeXh5ZWVm0tLS4FYsQ4glTQgjxH1ZVVaWmTZuml3t7exWgPvvsswmfm5aWpjZv3qyXk5OT1aZNm/RyeHi4ysrK0ssOh0PNnDlTHThwQK9rbW1VM2bMUCMjI0oppYaHh1VYWJgqLi7W+xQXF6uwsDA1PDzsMo7t27crs9mslz/66CMVGxurl7Ozs5XFYlFKKXX79m3l4+Ojampq9PabN28qf39/Q+z3+/nnnxWgbt++rZRSqrm5WQFqYGDA0O/ve9Dd3a0A1draqrf39/crf39/VVtbq5Qa3X9A/fbbb3qfffv2qZCQkDFjcc7tag1Hjx5VSin12muvqeXLlxueV1BQoGJiYv5RbH//2bjf+++/r5YuXaocDofLdkDV1dU9UN/W1qb8/Pz0WAcHB5Wfn5+y2WyGflarVb366qtjzi+E+O+QE14hxFPJefLmNDIywieffML8+fMJDg5G0zSampqw2+3jjjN//nz9a+fVCefVABi9zrB69WomTRr9dent7U12djbV1dU4HA6UUnz11Vfk5OTg7e0NwPHjx0lKSiI0NBRN0ygqKpowDqdr164xNDREYmKiXjd9+nSioqIM/drb27FYLISHhxMYGEhKSgqA2/MAdHV1MXnyZBYuXKjXBQcHExUVRVdXl14XEBDA3Llz9bLJZDLs0VhcrcE5bldXF4sXLzb0X7x4MT09PYyMjLgd23hycnLo6OggKiqKjRs30tTUNOFz7HY7a9asIT8/n4yMDAA6Ozu5d+8ey5cvR9M0/XHo0CGuXbvmVixCiCdLEl4hxFPpmWeeMZR37txJZWUlW7Zs4fTp03R0dJCamsrQ0NC440yZMsVQ9vLywuFw6OX6+nr9OoPTW2+9xfXr1zl9+jTff/89drudN998E4CzZ8+yfv16Vq1aRUNDA+3t7RQWFk4Yh5NSasI+d+7cYcWKFWiaxpEjRzh//jx1dXUAbs8z3lxKKby8vPSyqz1yJ05XnOPeP8f98bgb23ji4uLo7e2lrKyMv/76i4yMDNatWzdm/zt37pCenk5iYiKlpaV6vfPnobGxkY6ODv3R2dkp93iFeEpMftIBCCHE4/Djjz9isVjIysoCRpOUnp4eoqOjH3rMnp4e+vr6WLFihaF+7ty5JCcnU1VVhVKKlJQU/QS0tbWV8PBwCgsL9f6///6723POmzePKVOmcPbsWWbPng2M/vNWd3c3ycnJAFy5coX+/n7Ky8uZNWsWABcuXDCM4+PjA4yefI8lJiaG4eFhzp07x6JFiwC4efMm3d3dj7RvTq7W8MILL+hz//TTT4b+NpuNyMhIvL29H1tsU6dOJTMzk8zMTNatW8fKlSu5desW06dPN/RTSpGVlYXD4eDw4cOGpDomJgZfX1/sdrv+PRBCPF0k4RVCeIR58+Zx4sQJbDYbQUFB7Nq1iz/++OOREreTJ0+ybNkyAgICHmizWq1s2LABgC+//NIQh91up6amhoSEBBobG/XTV3domobVaqWgoIDg4GBCQkIoLCzUr1QAzJ49Gx8fH/bs2cM777zD5cuXH3hv3fDwcLy8vGhoaCAtLQ1/f380TTP0ef7557FYLGzYsIGDBw8SGBjI1q1bCQsLe+BU+2GUlpYa1jBjxgz93Sg2b95MQkICZWVlZGZm0tbWxt69e9m/f/9ji62yshKTycSCBQuYNGkSx44dIzQ01OVbwJWUlPDdd9/R1NTE4OAgg4ODwOhbxgUGBpKfn09eXh4Oh4OkpCT+/PNPbDYbmqaRnZ39yHslhPh3yZUGIYRHKCoqIi4ujtTUVFJSUggNDZ3w08smcvLkyTGTq1deeQVfX198fX1Zu3atXm+xWMjLyyM3N5cFCxZgs9koKir6R/Pu2LGDl19+mfT0dJYtW0ZSUhJms1lvf/bZZ6murubYsWPExMRQXl5ORUWFYYywsDA+/vhjtm7dSkhICLm5uS7nqqqqwmw2s3r1ahITE1FKcerUqQeuMTyM8vJyNm3ahNls5saNG9TX1+snz3FxcdTW1lJTU8NLL71EcXExpaWlhg8VedTYNE3j008/JT4+noSEBPr6+jh16pThjwenlpYWBgcHWbRoESaTSX8cPXoUgLKyMoqLi9m2bRvR0dGkpqbyzTffMGfOnEfeJyHEv89LPexFLCGE8GD9/f2YTCauX79OaGjokw7nqXLmzBmWLFnCwMDA/+0DNYQQYjxywiuEEC7cunWLXbt2SbIrhBAeQO7wCiGEC5GRkURGRj7pMIQQQjwGcqVBCCGEEEJ4NLnSIIQQQgghPJokvEIIIYQQwqNJwiuEEEIIITyaJLxCCCGEEMKjScIrhBBCCCE8miS8QgghhBDCo0nCK4QQQgghPJokvEIIIYQQwqP9D3apBfFvB4ZUAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = plt.figure(figsize=(8, 8))\n", + "ax = fig.add_subplot(1, 1, 1)\n", + "ax.set_title(\"Active learning results\")\n", + "ax.set_xlabel(\"Train/Validation pool size\")\n", + "ax.set_ylabel(\"MSE\")\n", + "\n", + "ax.plot([a[0] for a in results], [a[1] for a in results])\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop/examples/convert_v1_to_v2.ipynb b/chemprop/examples/convert_v1_to_v2.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..227cc49efa93b4e530321013cea7121ba830f2e0 --- /dev/null +++ b/chemprop/examples/convert_v1_to_v2.ipynb @@ -0,0 +1,495 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Convert v1 to v2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/convert_v1_to_v2.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from pprint import pprint\n", + "from pathlib import Path\n", + "\n", + "from chemprop.utils.v1_to_v2 import convert_model_dict_v1_to_v2\n", + "from chemprop.models.model import MPNN\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change model paths here" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "model_v1_input_path = chemprop_dir / \"tests/data/example_model_v1_regression_mol.pt\" # path to v1 model .pt file\n", + "model_v2_output_path = Path.cwd() / \"converted_model.ckpt\" # path to save the converted model .ckpt file" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load v1 model .pt file" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "model_v1_dict = torch.load(model_v1_input_path, weights_only=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['args',\n", + " 'state_dict',\n", + " 'data_scaler',\n", + " 'features_scaler',\n", + " 'atom_descriptor_scaler',\n", + " 'bond_descriptor_scaler',\n", + " 'atom_bond_scaler']\n" + ] + } + ], + "source": [ + "# Here are all the keys that is stored in v1 model\n", + "pprint(list(model_v1_dict.keys()))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'activation': 'ReLU',\n", + " 'adding_bond_types': True,\n", + " 'adding_h': False,\n", + " 'aggregation': 'mean',\n", + " 'aggregation_norm': 100,\n", + " 'atom_constraints': [],\n", + " 'atom_descriptor_scaling': True,\n", + " 'atom_descriptors': None,\n", + " 'atom_descriptors_path': None,\n", + " 'atom_descriptors_size': 0,\n", + " 'atom_features_size': 0,\n", + " 'atom_messages': False,\n", + " 'atom_targets': [],\n", + " 'batch_size': 50,\n", + " 'bias': False,\n", + " 'bias_solvent': False,\n", + " 'bond_constraints': [],\n", + " 'bond_descriptor_scaling': True,\n", + " 'bond_descriptors': None,\n", + " 'bond_descriptors_path': None,\n", + " 'bond_descriptors_size': 0,\n", + " 'bond_features_size': 0,\n", + " 'bond_targets': [],\n", + " 'cache_cutoff': 10000,\n", + " 'checkpoint_dir': None,\n", + " 'checkpoint_frzn': None,\n", + " 'checkpoint_path': None,\n", + " 'checkpoint_paths': None,\n", + " 'class_balance': False,\n", + " 'config_path': None,\n", + " 'constraints_path': None,\n", + " 'crossval_index_dir': None,\n", + " 'crossval_index_file': None,\n", + " 'crossval_index_sets': None,\n", + " 'cuda': False,\n", + " 'data_path': '/Users/hwpang/Software/chemprop/tests/data/regression.csv',\n", + " 'data_weights_path': None,\n", + " 'dataset_type': 'regression',\n", + " 'depth': 3,\n", + " 'depth_solvent': 3,\n", + " 'device': device(type='cpu'),\n", + " 'dropout': 0.0,\n", + " 'empty_cache': False,\n", + " 'ensemble_size': 1,\n", + " 'epochs': 1,\n", + " 'evidential_regularization': 0,\n", + " 'explicit_h': False,\n", + " 'extra_metrics': [],\n", + " 'features_generator': None,\n", + " 'features_only': False,\n", + " 'features_path': None,\n", + " 'features_scaling': True,\n", + " 'features_size': None,\n", + " 'ffn_hidden_size': 300,\n", + " 'ffn_num_layers': 2,\n", + " 'final_lr': 0.0001,\n", + " 'folds_file': None,\n", + " 'freeze_first_only': False,\n", + " 'frzn_ffn_layers': 0,\n", + " 'gpu': None,\n", + " 'grad_clip': None,\n", + " 'hidden_size': 300,\n", + " 'hidden_size_solvent': 300,\n", + " 'ignore_columns': None,\n", + " 'init_lr': 0.0001,\n", + " 'is_atom_bond_targets': False,\n", + " 'keeping_atom_map': False,\n", + " 'log_frequency': 10,\n", + " 'loss_function': 'mse',\n", + " 'max_data_size': None,\n", + " 'max_lr': 0.001,\n", + " 'metric': 'rmse',\n", + " 'metrics': ['rmse'],\n", + " 'minimize_score': True,\n", + " 'mpn_shared': False,\n", + " 'multiclass_num_classes': 3,\n", + " 'no_adding_bond_types': False,\n", + " 'no_atom_descriptor_scaling': False,\n", + " 'no_bond_descriptor_scaling': False,\n", + " 'no_cache_mol': False,\n", + " 'no_cuda': False,\n", + " 'no_features_scaling': False,\n", + " 'no_shared_atom_bond_ffn': False,\n", + " 'num_folds': 1,\n", + " 'num_lrs': 1,\n", + " 'num_tasks': 1,\n", + " 'num_workers': 8,\n", + " 'number_of_molecules': 1,\n", + " 'overwrite_default_atom_features': False,\n", + " 'overwrite_default_bond_features': False,\n", + " 'phase_features_path': None,\n", + " 'pytorch_seed': 0,\n", + " 'quiet': False,\n", + " 'reaction': False,\n", + " 'reaction_mode': 'reac_diff',\n", + " 'reaction_solvent': False,\n", + " 'resume_experiment': False,\n", + " 'save_dir': '/Users/hwpang/Software/test_chemprop_v1_to_v2/fold_0',\n", + " 'save_preds': False,\n", + " 'save_smiles_splits': True,\n", + " 'seed': 0,\n", + " 'separate_test_atom_descriptors_path': None,\n", + " 'separate_test_bond_descriptors_path': None,\n", + " 'separate_test_constraints_path': None,\n", + " 'separate_test_features_path': None,\n", + " 'separate_test_path': None,\n", + " 'separate_test_phase_features_path': None,\n", + " 'separate_val_atom_descriptors_path': None,\n", + " 'separate_val_bond_descriptors_path': None,\n", + " 'separate_val_constraints_path': None,\n", + " 'separate_val_features_path': None,\n", + " 'separate_val_path': None,\n", + " 'separate_val_phase_features_path': None,\n", + " 'shared_atom_bond_ffn': True,\n", + " 'show_individual_scores': False,\n", + " 'smiles_columns': ['smiles'],\n", + " 'spectra_activation': 'exp',\n", + " 'spectra_phase_mask': None,\n", + " 'spectra_phase_mask_path': None,\n", + " 'spectra_target_floor': 1e-08,\n", + " 'split_key_molecule': 0,\n", + " 'split_sizes': [0.8, 0.1, 0.1],\n", + " 'split_type': 'random',\n", + " 'target_columns': None,\n", + " 'target_weights': None,\n", + " 'task_names': ['logSolubility'],\n", + " 'test': False,\n", + " 'test_fold_index': None,\n", + " 'train_data_size': 400,\n", + " 'undirected': False,\n", + " 'use_input_features': False,\n", + " 'val_fold_index': None,\n", + " 'warmup_epochs': 2.0,\n", + " 'weights_ffn_num_layers': 2}\n" + ] + } + ], + "source": [ + "# Here are the input arguments that is stored in v1 model\n", + "pprint(model_v1_dict['args'].__dict__)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['encoder.encoder.0.cached_zero_vector',\n", + " 'encoder.encoder.0.W_i.weight',\n", + " 'encoder.encoder.0.W_h.weight',\n", + " 'encoder.encoder.0.W_o.weight',\n", + " 'encoder.encoder.0.W_o.bias',\n", + " 'readout.1.weight',\n", + " 'readout.1.bias',\n", + " 'readout.4.weight',\n", + " 'readout.4.bias']\n" + ] + } + ], + "source": [ + "# Here are the state_dict that is stored in v1 model\n", + "pprint(list(model_v1_dict['state_dict'].keys()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Convert loaded v1 model dictionary into v2 model dictionary" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "model_v2_dict = convert_model_dict_v1_to_v2(model_v1_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['epoch',\n", + " 'global_step',\n", + " 'pytorch-lightning_version',\n", + " 'state_dict',\n", + " 'loops',\n", + " 'callbacks',\n", + " 'optimizer_states',\n", + " 'lr_schedulers',\n", + " 'hparams_name',\n", + " 'hyper_parameters']\n" + ] + } + ], + "source": [ + "# Here are all the keys in the converted model\n", + "pprint(list(model_v2_dict.keys()))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['message_passing.W_i.weight',\n", + " 'message_passing.W_h.weight',\n", + " 'message_passing.W_o.weight',\n", + " 'message_passing.W_o.bias',\n", + " 'predictor.ffn.0.0.weight',\n", + " 'predictor.ffn.0.0.bias',\n", + " 'predictor.ffn.1.2.weight',\n", + " 'predictor.ffn.1.2.bias',\n", + " 'predictor.output_transform.mean',\n", + " 'predictor.output_transform.scale',\n", + " 'predictor.criterion.task_weights']\n" + ] + } + ], + "source": [ + "# Here are all the keys in the converted state_dict\n", + "pprint(list(model_v2_dict['state_dict'].keys()))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['batch_norm',\n", + " 'metrics',\n", + " 'warmup_epochs',\n", + " 'init_lr',\n", + " 'max_lr',\n", + " 'final_lr',\n", + " 'message_passing',\n", + " 'agg',\n", + " 'predictor']\n" + ] + } + ], + "source": [ + "# Here are all the keys in the converted hyper_parameters\n", + "pprint(list(model_v2_dict['hyper_parameters'].keys()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Save" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "torch.save(model_v2_dict, model_v2_output_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load converted model" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "mpnn = MPNN.load_from_checkpoint(model_v2_output_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=147, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=433, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): Identity()\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): Identity()\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0): RMSE(task_weights=[[1.0]])\n", + " (1): MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# now visually check the converted model is what is expected\n", + "mpnn" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop/examples/extra_features_descriptors.ipynb b/chemprop/examples/extra_features_descriptors.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..1dcdfaf4af8c59446b932f1dddd8f6edae5f8955 --- /dev/null +++ b/chemprop/examples/extra_features_descriptors.ipynb @@ -0,0 +1,1101 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using extra features and descriptors\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook demonstrates how to use extra features and descriptors in addition to the default Chemprop featurizers.\n", + "\n", + "* Extra atom and bond features are used in addition to those calculated by Chemprop internally. \n", + "* Extra atom descriptors get incorporated into the atom descriptors from message passing via a learned linear transformation. \n", + "* Extra bond descriptors are not currently supported because the bond descriptors from message passing are not used for molecular property prediction. \n", + "* Extra molecule features can be used as extra datapoint descriptors, which are concatenated to the output of the aggregation layer before the final prediction layer." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/extra_features_descriptors.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Loading packages and data" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from pathlib import Path\n", + "\n", + "from lightning import pytorch as pl\n", + "from rdkit import Chem\n", + "\n", + "from chemprop import data, featurizers, models, nn, utils" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\"\n", + "smiles_column = \"smiles\"\n", + "target_columns = [\"lipo\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "df_input = pd.read_csv(input_path)\n", + "smis = df_input.loc[:, smiles_column].values\n", + "ys = df_input.loc[:, target_columns].values" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Getting extra features and descriptors" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `rdkit.Chem.Mol` representation of molecules is needed as input to many featurizers. Chemprop provides a helpful wrapper to rdkit to make these from SMILES." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "mols = [utils.make_mol(smi, keep_h=False, add_h=False) for smi in smis]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Extra atom features, atom descriptors, bond features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Extra atom and bond features frequently come from QM calculations. The calculation results can be saved to a file and then loaded in a notebook using pandas or numpy. The loaded atom or bond features can be a list of numpy arrays where each numpy array of features corresponds to a single molecule in the dataset. Each row in an array corresponds to a different atom or bond in the same order of atoms or bonds in the `rdkit.Chem.Mol` objects. \n", + "\n", + "The atom features could also be used as extra atom descriptors." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# This code is just a placeholder for the actual QM calculation\n", + "\n", + "\n", + "def QM_calculation(mol):\n", + " n_extra_atom_feats = 10\n", + " n_extra_bond_feats = 4\n", + " extra_atom_features = np.array([np.random.randn(n_extra_atom_feats) for a in mol.GetAtoms()])\n", + " extra_bond_features = np.array([np.random.randn(n_extra_bond_feats) for a in mol.GetBonds()])\n", + " return extra_atom_features, extra_bond_features\n", + "\n", + "\n", + "extra_atom_featuress = []\n", + "extra_bond_featuress = []\n", + "\n", + "for mol in mols:\n", + " extra_atom_features, extra_bond_features = QM_calculation(mol)\n", + " extra_atom_featuress.append(extra_atom_features)\n", + " extra_bond_featuress.append(extra_bond_features)\n", + "\n", + "# Save to a file\n", + "np.savez(\"atom_features.npz\", *extra_atom_featuress)\n", + "np.savez(\"bond_features.npz\", *extra_bond_featuress)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "extra_atom_featuress = np.load(\"atom_features.npz\")\n", + "extra_atom_featuress = [extra_atom_featuress[f\"arr_{i}\"] for i in range(len(extra_atom_featuress))]\n", + "\n", + "extra_atom_descriptorss = extra_atom_featuress\n", + "\n", + "extra_bond_featuress = np.load(\"bond_features.npz\")\n", + "extra_bond_featuress = [extra_bond_featuress[f\"arr_{i}\"] for i in range(len(extra_bond_featuress))]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also get extra atom and bond features from other sources." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "atom_radii = {1: 0.79, 5: 1.2, 6: 0.91, 7: 0.75, 8: 0.65, 9: 0.57, 16: 1.1, 17: 0.97, 35: 1.1}\n", + "\n", + "extra_atom_featuress = [\n", + " np.vstack([np.array([[atom_radii[a.GetAtomicNum()]] for a in mol.GetAtoms()])]) for mol in mols\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Extra molecule features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A QM calculation could also be used to get extra molecule features. Extra molecule features are different from extra atom and bond features in that they are stored in a single numpy array where each row corresponds to a single molecule in the dataset, instead of a list of numpy arrays." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def QM_calculation(mol):\n", + " n_extra_mol_feats = 7\n", + " return np.random.randn(n_extra_mol_feats)\n", + "\n", + "\n", + "extra_mol_features = np.array([QM_calculation(mol) for mol in mols])\n", + "\n", + "np.savez(\"mol_features.npz\", extra_mol_features)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "extra_mol_features = np.load(\"mol_features.npz\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The extra molecule features can also be calculated using built-in Chemprop featurizers or featurizers from other packages." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "molecule_featurizer = featurizers.MorganBinaryFeaturizer()\n", + "\n", + "extra_mol_features = np.array([molecule_featurizer(mol) for mol in mols])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# First install other package\n", + "# !pip install descriptastorus\n", + "\n", + "# from descriptastorus.descriptors import rdNormalizedDescriptors\n", + "# generator = rdNormalizedDescriptors.RDKit2DNormalized()\n", + "# extra_mol_features = np.array([generator.process(smi)[1:] for smi in smis])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The molecule featurizers available in Chemprop are registered in `MoleculeFeaturizerRegristry`." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "morgan_binary\n", + "morgan_count\n", + "rdkit_2d\n", + "v1_rdkit_2d\n", + "v1_rdkit_2d_normalized\n" + ] + } + ], + "source": [ + "for MoleculeFeaturizer in featurizers.MoleculeFeaturizerRegistry.keys():\n", + " print(MoleculeFeaturizer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your model takes multiple components as input, you can use extra molecule features for each component as extra datapoint descriptors. Simply concatentate the extra molecule features together before passing them to the datapoints." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "extra_mol_features_comp1 = np.random.rand(len(mols), 5)\n", + "extra_mol_features_comp2 = np.random.rand(len(mols), 5)\n", + "\n", + "extra_datapoint_descriptors = np.concatenate(\n", + " [extra_mol_features_comp1, extra_mol_features_comp2], axis=1\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Making datapoints, datasets, and dataloaders" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once you have all the extra features and descriptors your model will use, you can make the datapoints." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "datapoints = [\n", + " data.MoleculeDatapoint(mol, y, V_f=V_f, E_f=E_f, V_d=V_d, x_d=X_d)\n", + " for mol, y, V_f, E_f, V_d, X_d in zip(\n", + " mols,\n", + " ys,\n", + " extra_atom_featuress,\n", + " extra_bond_featuress,\n", + " extra_atom_descriptorss,\n", + " extra_datapoint_descriptors,\n", + " )\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After splitting the data, the datasets are made. To make a dataset, you need a `MolGraph` featurizer, which needs to be told the size of extra atom and bond features. " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "n_extra_atom_feats = extra_atom_featuress[0].shape[1]\n", + "n_extra_bond_feats = extra_bond_featuress[0].shape[1]\n", + "\n", + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer(\n", + " extra_atom_fdim=n_extra_atom_feats, extra_bond_fdim=n_extra_bond_feats\n", + ")\n", + "\n", + "train_indices, val_indices, test_indices = data.make_split_indices(mols, \"random\", (0.8, 0.1, 0.1))\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " datapoints, train_indices, val_indices, test_indices\n", + ")\n", + "\n", + "train_dset = data.MoleculeDataset(train_data[0], featurizer)\n", + "val_dset = data.MoleculeDataset(val_data[0], featurizer)\n", + "test_dset = data.MoleculeDataset(test_data[0], featurizer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Often scaling the extra features and descriptors improves model performance. The scalers for the extra features and descriptors should be fit to the training dataset, applied to the validation dataset, and then given to the model to apply to the test dataset at prediction time. This is the same as for scaling target values to improve model performance." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
StandardScaler()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "StandardScaler()" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "targets_scaler = train_dset.normalize_targets()\n", + "extra_atom_features_scaler = train_dset.normalize_inputs(\"V_f\")\n", + "extra_bond_features_scaler = train_dset.normalize_inputs(\"E_f\")\n", + "extra_atom_descriptors_scaler = train_dset.normalize_inputs(\"V_d\")\n", + "extra_datapoint_descriptors_scaler = train_dset.normalize_inputs(\"X_d\")\n", + "\n", + "val_dset.normalize_targets(targets_scaler)\n", + "val_dset.normalize_inputs(\"V_f\", extra_atom_features_scaler)\n", + "val_dset.normalize_inputs(\"E_f\", extra_bond_features_scaler)\n", + "val_dset.normalize_inputs(\"V_d\", extra_atom_descriptors_scaler)\n", + "val_dset.normalize_inputs(\"X_d\", extra_datapoint_descriptors_scaler)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# Featurize the train and val datasets to save computation time.\n", + "train_dset.cache = True\n", + "val_dset.cache = True\n", + "\n", + "train_loader = data.build_dataloader(train_dset)\n", + "val_loader = data.build_dataloader(val_dset, shuffle=False)\n", + "test_loader = data.build_dataloader(test_dset, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Making the model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The message passing layer needs to know the total size of atom and bond features (i.e. the sum of the sizes of the Chemprop atom and bond features and the extra atom and bond features). The `MolGraph` featurizer collects this information. The message passing layer also needs to know the number of extra atom descriptors.\n", + "\n", + "The extra atom and bond features scalers are combined into a graph transform which is given to the message passing layer to use at prediction time. To avoid scaling the atom and bond features from the internal Chemprop featurizers, the graph transform uses a pad equal to the length of features from the Chemprop internal atom and bond featurizers. This information is stored in the `MolGraph` featurizer.\n", + "\n", + "The extra atom descriptor scaler are also converted to a transform and given to the message passing layer to use at prediction time." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "n_V_features = featurizer.atom_fdim - featurizer.extra_atom_fdim\n", + "n_E_features = featurizer.bond_fdim - featurizer.extra_bond_fdim\n", + "\n", + "V_f_transform = nn.ScaleTransform.from_standard_scaler(extra_atom_features_scaler, pad=n_V_features)\n", + "E_f_transform = nn.ScaleTransform.from_standard_scaler(extra_bond_features_scaler, pad=n_E_features)\n", + "\n", + "graph_transform = nn.GraphTransform(V_f_transform, E_f_transform)\n", + "\n", + "V_d_transform = nn.ScaleTransform.from_standard_scaler(extra_atom_descriptors_scaler)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n_extra_atom_descs = extra_atom_descriptorss[0].shape[1]\n", + "\n", + "mp = nn.BondMessagePassing(\n", + " d_v=featurizer.atom_fdim,\n", + " d_e=featurizer.bond_fdim,\n", + " d_vd=n_extra_atom_descs,\n", + " graph_transform=graph_transform,\n", + " V_d_transform=V_d_transform,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The predictor layer needs to know the size of the its input, including any extra datapoint descriptors. " + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "ffn_input_dim = mp.output_dim + extra_datapoint_descriptors.shape[1]\n", + "\n", + "output_transform = nn.UnscaleTransform.from_standard_scaler(targets_scaler)\n", + "ffn = nn.RegressionFFN(input_dim=ffn_input_dim, output_transform=output_transform)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The overall model is given the transform from the extra datapoint descriptors scaler." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X_d_transform = nn.ScaleTransform.from_standard_scaler(extra_datapoint_descriptors_scaler)\n", + "\n", + "chemprop_model = models.MPNN(mp, nn.NormAggregation(), ffn, X_d_transform=X_d_transform)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training and prediction" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The rest of the training and prediction are the same as other Chemprop workflows." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(\n", + " logger=False, enable_checkpointing=False, enable_progress_bar=True, max_epochs=5\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 325 K | train\n", + "1 | agg | NormAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | RegressionFFN | 96.6 K | train\n", + "4 | X_d_transform | ScaleTransform | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "422 K Trainable params\n", + "0 Non-trainable params\n", + "422 K Total params\n", + "1.690 Total estimated model params size (MB)\n", + "27 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sanity Checking DataLoader 0: 0%| | 0/1 [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test/mse 0.9464761018753052 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9464761018753052 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results = trainer.test(chemprop_model, test_loader)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop/examples/hpopting.ipynb b/chemprop/examples/hpopting.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..768b48b5bad6e051cb9ce7cfafb2d7fd8728e67f --- /dev/null +++ b/chemprop/examples/hpopting.ipynb @@ -0,0 +1,1467 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Running hyperparameter optimization on Chemprop model using RayTune" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/hpopting.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install \".[hpopt]\"\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "2024-10-22 09:03:28,414\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n", + "2024-10-22 09:03:28,801\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n", + "2024-10-22 09:03:29,333\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "from lightning import pytorch as pl\n", + "import ray\n", + "from ray import tune\n", + "from ray.train import CheckpointConfig, RunConfig, ScalingConfig\n", + "from ray.train.lightning import (RayDDPStrategy, RayLightningEnvironment,\n", + " RayTrainReportCallback, prepare_trainer)\n", + "from ray.train.torch import TorchTrainer\n", + "from ray.tune.search.hyperopt import HyperOptSearch\n", + "from ray.tune.search.optuna import OptunaSearch\n", + "from ray.tune.schedulers import FIFOScheduler\n", + "\n", + "from chemprop import data, featurizers, models, nn" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\" # path to your data .csv file\n", + "num_workers = 0 # number of workers for dataloader. 0 means using main process for data loading\n", + "smiles_column = 'smiles' # name of the column containing SMILES strings\n", + "target_columns = ['lipo'] # list of names of the columns containing targets\n", + "\n", + "hpopt_save_dir = Path.cwd() / \"hpopt\" # directory to save hyperopt results\n", + "hpopt_save_dir.mkdir(exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smileslipo
0Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc143.54
1COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)...-1.18
2COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl3.69
3OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C...3.37
4Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N...3.10
.........
95CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C...2.20
96CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)...2.04
97CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)...4.49
98COc1ccc(Cc2c(N)n[nH]c2N)cc10.20
99CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(...2.00
\n", + "

100 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " smiles lipo\n", + "0 Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14 3.54\n", + "1 COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)... -1.18\n", + "2 COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl 3.69\n", + "3 OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C... 3.37\n", + "4 Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N... 3.10\n", + ".. ... ...\n", + "95 CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C... 2.20\n", + "96 CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)... 2.04\n", + "97 CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)... 4.49\n", + "98 COc1ccc(Cc2c(N)n[nH]c2N)cc1 0.20\n", + "99 CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(... 2.00\n", + "\n", + "[100 rows x 2 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_input = pd.read_csv(input_path)\n", + "df_input" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "smis = df_input.loc[:, smiles_column].values\n", + "ys = df_input.loc[:, target_columns].values" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Make data points, splits, and datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "all_data = [data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "mols = [d.mol for d in all_data] # RDkit Mol objects are use for structure based splits\n", + "train_indices, val_indices, test_indices = data.make_split_indices(mols, \"random\", (0.8, 0.1, 0.1))\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " all_data, train_indices, val_indices, test_indices\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer()\n", + "\n", + "train_dset = data.MoleculeDataset(train_data[0], featurizer)\n", + "scaler = train_dset.normalize_targets()\n", + "\n", + "val_dset = data.MoleculeDataset(val_data[0], featurizer)\n", + "val_dset.normalize_targets(scaler)\n", + "\n", + "test_dset = data.MoleculeDataset(test_data[0], featurizer)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Define helper function to train the model" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def train_model(config, train_dset, val_dset, num_workers, scaler):\n", + "\n", + " # config is a dictionary containing hyperparameters used for the trial\n", + " depth = int(config[\"depth\"])\n", + " ffn_hidden_dim = int(config[\"ffn_hidden_dim\"])\n", + " ffn_num_layers = int(config[\"ffn_num_layers\"])\n", + " message_hidden_dim = int(config[\"message_hidden_dim\"])\n", + "\n", + " train_loader = data.build_dataloader(train_dset, num_workers=num_workers, shuffle=True)\n", + " val_loader = data.build_dataloader(val_dset, num_workers=num_workers, shuffle=False)\n", + "\n", + " mp = nn.BondMessagePassing(d_h=message_hidden_dim, depth=depth)\n", + " agg = nn.MeanAggregation()\n", + " output_transform = nn.UnscaleTransform.from_standard_scaler(scaler)\n", + " ffn = nn.RegressionFFN(output_transform=output_transform, input_dim=message_hidden_dim, hidden_dim=ffn_hidden_dim, n_layers=ffn_num_layers)\n", + " batch_norm = True\n", + " metric_list = [nn.metrics.RMSE(), nn.metrics.MAE()]\n", + " model = models.MPNN(mp, agg, ffn, batch_norm, metric_list)\n", + "\n", + " trainer = pl.Trainer(\n", + " accelerator=\"auto\",\n", + " devices=1,\n", + " max_epochs=20, # number of epochs to train for\n", + " # below are needed for Ray and Lightning integration\n", + " strategy=RayDDPStrategy(),\n", + " callbacks=[RayTrainReportCallback()],\n", + " plugins=[RayLightningEnvironment()],\n", + " )\n", + "\n", + " trainer = prepare_trainer(trainer)\n", + " trainer.fit(model, train_loader, val_loader)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define parameter search space" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "search_space = {\n", + " \"depth\": tune.qrandint(lower=2, upper=6, q=1),\n", + " \"ffn_hidden_dim\": tune.qrandint(lower=300, upper=2400, q=100),\n", + " \"ffn_num_layers\": tune.qrandint(lower=1, upper=3, q=1),\n", + " \"message_hidden_dim\": tune.qrandint(lower=300, upper=2400, q=100),\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "
\n", + "

Tune Status

\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Current time:2024-10-22 09:05:01
Running for: 00:01:23.70
Memory: 10.9/15.3 GiB
\n", + "
\n", + "
\n", + "
\n", + "

System Info

\n", + " Using FIFO scheduling algorithm.
Logical resource usage: 2.0/12 CPUs, 0/0 GPUs\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "

Trial Status

\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Trial name status loc train_loop_config/de\n", + "pth train_loop_config/ff\n", + "n_hidden_dim train_loop_config/ff\n", + "n_num_layers train_loop_config/me\n", + "ssage_hidden_dim iter total time (s) train_loss train_loss_step val/rmse
TorchTrainer_f1a6e41aTERMINATED172.31.231.162:24873220002500 20 49.8815 0.0990423 0.168217 0.861368
TorchTrainer_d775c15dTERMINATED172.31.231.162:24953222002400 20 56.6533 0.069695 0.119898 0.90258
\n", + "
\n", + "
\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Setting up process group for: env:// [rank=0, world_size=1]\n", + "\u001b[36m(TorchTrainer pid=24873)\u001b[0m Started distributed worker processes: \n", + "\u001b[36m(TorchTrainer pid=24873)\u001b[0m - (ip=172.31.231.162, pid=24952) world_rank=0, local_rank=0, node_rank=0\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m GPU available: False, used: False\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m TPU available: False, using: 0 TPU cores\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m HPU available: False, using: 0 HPUs\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sanity Checking DataLoader 0: 0%| | 0/1 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000001)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1: 100%|██████████| 2/2 [00:01<00:00, 1.90it/s, v_num=0, train_loss_step=0.406, val_loss=0.904, train_loss_epoch=0.869]\n", + "Epoch 2: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000002)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 2: 100%|██████████| 2/2 [00:01<00:00, 1.29it/s, v_num=0, train_loss_step=1.290, val_loss=0.842, train_loss_epoch=1.210]\n", + "Epoch 3: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 3: 100%|██████████| 2/2 [00:01<00:00, 1.62it/s, v_num=0, train_loss_step=0.749, val_loss=0.912, train_loss_epoch=0.861]\n", + "Epoch 4: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000004)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 4: 100%|██████████| 2/2 [00:01<00:00, 1.31it/s, v_num=0, train_loss_step=0.578, val_loss=0.912, train_loss_epoch=0.792]\n", + "Epoch 5: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000005)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 5: 100%|██████████| 2/2 [00:01<00:00, 1.59it/s, v_num=0, train_loss_step=0.751, val_loss=0.887, train_loss_epoch=0.618]\n", + "Epoch 6: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 6: 100%|██████████| 2/2 [00:01<00:00, 1.53it/s, v_num=0, train_loss_step=0.569, val_loss=0.876, train_loss_epoch=0.450]\n", + "Epoch 7: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:15,207\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 7: 50%|█████ | 1/2 [00:00<00:00, 2.28it/s, v_num=0, train_loss_step=0.339, val_loss=0.876, train_loss_epoch=0.450]\u001b[32m [repeated 2x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)\u001b[0m\n", + "Epoch 1: 100%|██████████| 2/2 [00:00<00:00, 3.75it/s, v_num=0, train_loss_step=0.335, val_loss=0.854, train_loss_epoch=1.010]\u001b[32m [repeated 3x across cluster]\u001b[0m\n", + "Validation: | | 0/? [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1: 100%|██████████| 2/2 [00:00<00:00, 2.01it/s, v_num=0, train_loss_step=0.335, val_loss=0.893, train_loss_epoch=0.703]\u001b[32m [repeated 3x across cluster]\u001b[0m\n", + "Epoch 2: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:17,399\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000009)\u001b[32m [repeated 6x across cluster]\u001b[0m\n", + "2024-10-22 09:04:17,944\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:18,760\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:19,250\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:20,250\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 11: 50%|█████ | 1/2 [00:00<00:00, 1.25it/s, v_num=0, train_loss_step=0.175, val_loss=0.897, train_loss_epoch=0.258]\u001b[32m [repeated 8x across cluster]\u001b[0m\n", + "Epoch 11: 100%|██████████| 2/2 [00:01<00:00, 1.79it/s, v_num=0, train_loss_step=0.312, val_loss=0.897, train_loss_epoch=0.258]\u001b[32m [repeated 7x across cluster]\u001b[0m\n", + "Validation: | | 0/? [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m \u001b[32m [repeated 11x across cluster]\u001b[0m\n", + "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 7.84it/s]\u001b[A\u001b[32m [repeated 7x across cluster]\u001b[0m\n", + "Epoch 11: 100%|██████████| 2/2 [00:01<00:00, 1.56it/s, v_num=0, train_loss_step=0.312, val_loss=0.869, train_loss_epoch=0.258]\u001b[32m [repeated 7x across cluster]\u001b[0m\n", + "Epoch 11: 100%|██████████| 2/2 [00:01<00:00, 1.27it/s, v_num=0, train_loss_step=0.312, val_loss=0.869, train_loss_epoch=0.203]\u001b[32m [repeated 7x across cluster]\u001b[0m\n", + "Epoch 12: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:22,323\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:22,766\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000013)\u001b[32m [repeated 8x across cluster]\u001b[0m\n", + "2024-10-22 09:04:24,404\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:25,524\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 14: 50%|█████ | 1/2 [00:01<00:01, 0.88it/s, v_num=0, train_loss_step=0.131, val_loss=0.841, train_loss_epoch=0.141] \u001b[32m [repeated 6x across cluster]\u001b[0m\n", + "Epoch 7: 100%|██████████| 2/2 [00:01<00:00, 1.13it/s, v_num=0, train_loss_step=0.368, val_loss=0.836, train_loss_epoch=0.399]\u001b[32m [repeated 5x across cluster]\u001b[0m\n", + "Validation: | | 0/? [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:28,260\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000015)\u001b[32m [repeated 4x across cluster]\u001b[0m\n", + "2024-10-22 09:04:30,172\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 9: 50%|█████ | 1/2 [00:01<00:01, 0.72it/s, v_num=0, train_loss_step=0.216, val_loss=0.889, train_loss_epoch=0.254]\u001b[32m [repeated 3x across cluster]\u001b[0m\n", + "Epoch 9: 100%|██████████| 2/2 [00:01<00:00, 1.04it/s, v_num=0, train_loss_step=0.322, val_loss=0.889, train_loss_epoch=0.254]\u001b[32m [repeated 4x across cluster]\u001b[0m\n", + "Validation: | | 0/? [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m \u001b[32m [repeated 9x across cluster]\u001b[0m\n", + "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 4.73it/s]\u001b[A\u001b[32m [repeated 4x across cluster]\u001b[0m\n", + "Epoch 9: 100%|██████████| 2/2 [00:02<00:00, 0.90it/s, v_num=0, train_loss_step=0.322, val_loss=0.910, train_loss_epoch=0.254]\u001b[32m [repeated 4x across cluster]\u001b[0m\n", + "Epoch 9: 100%|██████████| 2/2 [00:02<00:00, 0.70it/s, v_num=0, train_loss_step=0.322, val_loss=0.910, train_loss_epoch=0.237]\u001b[32m [repeated 4x across cluster]\u001b[0m\n", + "Epoch 16: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:33,534\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:34,844\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/d775c15d/checkpoint_000011)\u001b[32m [repeated 5x across cluster]\u001b[0m\n", + "2024-10-22 09:04:35,472\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 18: 50%|█████ | 1/2 [00:01<00:01, 0.98it/s, v_num=0, train_loss_step=0.0962, val_loss=0.781, train_loss_epoch=0.116]\u001b[32m [repeated 5x across cluster]\u001b[0m\n", + "Epoch 11: 100%|██████████| 2/2 [00:01<00:00, 1.91it/s, v_num=0, train_loss_step=0.263, val_loss=0.889, train_loss_epoch=0.219]\u001b[32m [repeated 3x across cluster]\u001b[0m\n", + "Validation: | | 0/? [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:38,006\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000019)\u001b[32m [repeated 3x across cluster]\u001b[0m\n", + "2024-10-22 09:04:40,708\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:04:41,380\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m `Trainer.fit` stopped: `max_epochs=20` reached.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 13: 50%|█████ | 1/2 [00:00<00:00, 1.17it/s, v_num=0, train_loss_step=0.118, val_loss=0.849, train_loss_epoch=0.122]\u001b[32m [repeated 3x across cluster]\u001b[0m\n", + "Epoch 13: 100%|██████████| 2/2 [00:01<00:00, 1.62it/s, v_num=0, train_loss_step=0.0846, val_loss=0.849, train_loss_epoch=0.122]\u001b[32m [repeated 4x across cluster]\u001b[0m\n", + "Validation: | | 0/? [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 15: 50%|█████ | 1/2 [00:01<00:01, 0.64it/s, v_num=0, train_loss_step=0.0923, val_loss=0.839, train_loss_epoch=0.0974]\u001b[32m [repeated 2x across cluster]\u001b[0m\n", + "Epoch 15: 100%|██████████| 2/2 [00:02<00:00, 0.94it/s, v_num=0, train_loss_step=0.0867, val_loss=0.839, train_loss_epoch=0.0974]\u001b[32m [repeated 2x across cluster]\u001b[0m\n", + "Validation: | | 0/? [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 15: 100%|██████████| 2/2 [00:03<00:00, 0.54it/s, v_num=0, train_loss_step=0.0867, val_loss=0.837, train_loss_epoch=0.0912]\n", + "Epoch 16: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 16: 100%|██████████| 2/2 [00:04<00:00, 0.41it/s, v_num=0, train_loss_step=0.0703, val_loss=0.837, train_loss_epoch=0.0774]\n", + "Epoch 17: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 17: 100%|██████████| 2/2 [00:01<00:00, 1.01it/s, v_num=0, train_loss_step=0.156, val_loss=0.836, train_loss_epoch=0.0882]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 18: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 18: 100%|██████████| 2/2 [00:01<00:00, 1.32it/s, v_num=0, train_loss_step=0.064, val_loss=0.830, train_loss_epoch=0.0675]\n", + "Epoch 19: 0%| | 0/2 [00:00= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 19: 100%|██████████| 2/2 [00:01<00:00, 1.55it/s, v_num=0, train_loss_step=0.120, val_loss=0.815, train_loss_epoch=0.0697]\n", + "Epoch 19: 100%|██████████| 2/2 [00:01<00:00, 1.13it/s, v_num=0, train_loss_step=0.120, val_loss=0.815, train_loss_epoch=0.0697]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m `Trainer.fit` stopped: `max_epochs=20` reached.\n", + "2024-10-22 09:05:01,809\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n", + "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n", + "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n", + "2024-10-22 09:05:01,823\tINFO tune.py:1016 -- Wrote the latest version of all result files and experiment state to '/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37' in 0.0305s.\n", + "2024-10-22 09:05:01,873\tINFO tune.py:1048 -- Total run time: 83.87 seconds (83.66 seconds for the tuning loop).\n" + ] + } + ], + "source": [ + "ray.init()\n", + "\n", + "scheduler = FIFOScheduler()\n", + "\n", + "# Scaling config controls the resources used by Ray\n", + "scaling_config = ScalingConfig(\n", + " num_workers=1,\n", + " use_gpu=False, # change to True if you want to use GPU\n", + ")\n", + "\n", + "# Checkpoint config controls the checkpointing behavior of Ray\n", + "checkpoint_config = CheckpointConfig(\n", + " num_to_keep=1, # number of checkpoints to keep\n", + " checkpoint_score_attribute=\"val_loss\", # Save the checkpoint based on this metric\n", + " checkpoint_score_order=\"min\", # Save the checkpoint with the lowest metric value\n", + ")\n", + "\n", + "run_config = RunConfig(\n", + " checkpoint_config=checkpoint_config,\n", + " storage_path=hpopt_save_dir / \"ray_results\", # directory to save the results\n", + ")\n", + "\n", + "ray_trainer = TorchTrainer(\n", + " lambda config: train_model(\n", + " config, train_dset, val_dset, num_workers, scaler\n", + " ),\n", + " scaling_config=scaling_config,\n", + " run_config=run_config,\n", + ")\n", + "\n", + "search_alg = HyperOptSearch(\n", + " n_initial_points=1, # number of random evaluations before tree parzen estimators\n", + " random_state_seed=42,\n", + ")\n", + "\n", + "# OptunaSearch is another search algorithm that can be used\n", + "# search_alg = OptunaSearch() \n", + "\n", + "tune_config = tune.TuneConfig(\n", + " metric=\"val_loss\",\n", + " mode=\"min\",\n", + " num_samples=2, # number of trials to run\n", + " scheduler=scheduler,\n", + " search_alg=search_alg,\n", + " trial_dirname_creator=lambda trial: str(trial.trial_id), # shorten filepaths\n", + " \n", + ")\n", + "\n", + "tuner = tune.Tuner(\n", + " ray_trainer,\n", + " param_space={\n", + " \"train_loop_config\": search_space,\n", + " },\n", + " tune_config=tune_config,\n", + ")\n", + "\n", + "# Start the hyperparameter search\n", + "results = tuner.fit()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hyperparameter optimization results" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ResultGrid<[\n", + " Result(\n", + " metrics={'train_loss': 0.09904231131076813, 'train_loss_step': 0.16821686923503876, 'val/rmse': 0.8613682389259338, 'val/mae': 0.7006751298904419, 'val_loss': 0.7419552206993103, 'train_loss_epoch': 0.09904231131076813, 'epoch': 19, 'step': 40},\n", + " path='/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a',\n", + " filesystem='local',\n", + " checkpoint=Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000019)\n", + " ),\n", + " Result(\n", + " metrics={'train_loss': 0.06969495117664337, 'train_loss_step': 0.11989812552928925, 'val/rmse': 0.902579665184021, 'val/mae': 0.7176367044448853, 'val_loss': 0.8146500587463379, 'train_loss_epoch': 0.06969495117664337, 'epoch': 19, 'step': 40},\n", + " path='/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/d775c15d',\n", + " filesystem='local',\n", + " checkpoint=Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/d775c15d/checkpoint_000019)\n", + " )\n", + "]>" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
train_losstrain_loss_stepval/rmseval/maeval_losstrain_loss_epochepochsteptimestampcheckpoint_dir_name...pidhostnamenode_iptime_since_restoreiterations_since_restoreconfig/train_loop_config/depthconfig/train_loop_config/ffn_hidden_dimconfig/train_loop_config/ffn_num_layersconfig/train_loop_config/message_hidden_dimlogdir
00.0990420.1682170.8613680.7006750.7419550.09904219401729602279checkpoint_000019...24873Knathan-Laptop172.31.231.16249.88151620220002500f1a6e41a
10.0696950.1198980.9025800.7176370.8146500.06969519401729602299checkpoint_000019...24953Knathan-Laptop172.31.231.16256.65333620222002400d775c15d
\n", + "

2 rows × 27 columns

\n", + "
" + ], + "text/plain": [ + " train_loss train_loss_step val/rmse val/mae val_loss \\\n", + "0 0.099042 0.168217 0.861368 0.700675 0.741955 \n", + "1 0.069695 0.119898 0.902580 0.717637 0.814650 \n", + "\n", + " train_loss_epoch epoch step timestamp checkpoint_dir_name ... pid \\\n", + "0 0.099042 19 40 1729602279 checkpoint_000019 ... 24873 \n", + "1 0.069695 19 40 1729602299 checkpoint_000019 ... 24953 \n", + "\n", + " hostname node_ip time_since_restore iterations_since_restore \\\n", + "0 Knathan-Laptop 172.31.231.162 49.881516 20 \n", + "1 Knathan-Laptop 172.31.231.162 56.653336 20 \n", + "\n", + " config/train_loop_config/depth config/train_loop_config/ffn_hidden_dim \\\n", + "0 2 2000 \n", + "1 2 2200 \n", + "\n", + " config/train_loop_config/ffn_num_layers \\\n", + "0 2 \n", + "1 2 \n", + "\n", + " config/train_loop_config/message_hidden_dim logdir \n", + "0 500 f1a6e41a \n", + "1 400 d775c15d \n", + "\n", + "[2 rows x 27 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# results of all trials\n", + "result_df = results.get_dataframe()\n", + "result_df" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'depth': 2,\n", + " 'ffn_hidden_dim': 2000,\n", + " 'ffn_num_layers': 2,\n", + " 'message_hidden_dim': 500}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# best configuration\n", + "best_result = results.get_best_result()\n", + "best_config = best_result.config\n", + "best_config['train_loop_config']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best model checkpoint path: /home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000019/checkpoint.ckpt\n" + ] + } + ], + "source": [ + "# best model checkpoint path\n", + "best_result = results.get_best_result()\n", + "best_checkpoint_path = Path(best_result.checkpoint.path) / \"checkpoint.ckpt\"\n", + "print(f\"Best model checkpoint path: {best_checkpoint_path}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "ray.shutdown()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop/examples/interpreting_monte_carlo_tree_search.ipynb b/chemprop/examples/interpreting_monte_carlo_tree_search.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..6bc828b536a17a43e5b69e3d9506a0191d0015fe --- /dev/null +++ b/chemprop/examples/interpreting_monte_carlo_tree_search.ipynb @@ -0,0 +1,1116 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Interpretability with Monte Carlo Tree search\n", + "\n", + "Based on the paper Jin et al., [Multi-Objective Molecule Generation using Interpretable Substructures](https://arxiv.org/abs/2002.03244) and modified from Chemprop v1 [interpret.py](https://github.com/chemprop/chemprop/blob/master/chemprop/interpret.py)\n", + "\n", + "Please scroll to after the helper functions to change the model and data input and run the interpretation algorithm\n", + "\n", + "Note: \n", + "- The interpret function does not yet work with additional atom or bond features, as the substructure extracted doesn't necessarily have the corresponding additional atom or bond features readily available.\n", + "- It currently only works with single molecule model\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/interpreting_monte_carlo_tree_search.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from dataclasses import dataclass, field\n", + "import math\n", + "from pathlib import Path\n", + "import time\n", + "from typing import Callable, Union, Iterable\n", + "\n", + "from lightning import pytorch as pl\n", + "import numpy as np\n", + "import pandas as pd\n", + "from rdkit import Chem\n", + "import torch\n", + "\n", + "from chemprop import data, featurizers, models\n", + "from chemprop.models import MPNN" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define helper function to make model predictions from SMILES" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def make_prediction(\n", + " models: list[MPNN],\n", + " trainer: pl.Trainer,\n", + " smiles: list[str],\n", + ") -> np.ndarray:\n", + " \"\"\"Makes predictions on a list of SMILES.\n", + "\n", + " Parameters\n", + " ----------\n", + " models : list\n", + " A list of models to make predictions with.\n", + " smiles : list\n", + " A list of SMILES to make predictions on.\n", + "\n", + " Returns\n", + " -------\n", + " list[list[float]]\n", + " A list of lists containing the predicted values.\n", + " \"\"\"\n", + "\n", + " test_data = [data.MoleculeDatapoint.from_smi(smi) for smi in smiles]\n", + " test_dset = data.MoleculeDataset(test_data)\n", + " test_loader = data.build_dataloader(\n", + " test_dset, batch_size=1, num_workers=0, shuffle=False\n", + " )\n", + "\n", + " with torch.inference_mode():\n", + " sum_preds = []\n", + " for model in models:\n", + " predss = trainer.predict(model, test_loader)\n", + " preds = torch.cat(predss, 0)\n", + " preds = preds.cpu().numpy()\n", + " sum_preds.append(preds)\n", + "\n", + " # Ensemble predictions\n", + " sum_preds = sum(sum_preds)\n", + " avg_preds = sum_preds / len(models)\n", + "\n", + " return avg_preds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Classes/functions relevant to Monte Carlo Tree Search\n", + "\n", + "Mostly similar to the scripts from Chemprop v1 [interpret.py](https://github.com/chemprop/chemprop/blob/master/chemprop/interpret.py) with additional documentation" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "@dataclass\n", + "class MCTSNode:\n", + " \"\"\"Represents a node in a Monte Carlo Tree Search.\n", + "\n", + " Parameters\n", + " ----------\n", + " smiles : str\n", + " The SMILES for the substructure at this node.\n", + " atoms : list\n", + " A list of atom indices in the substructure at this node.\n", + " W : float\n", + " The total action value, which indicates how likely the deletion will lead to a good rationale.\n", + " N : int\n", + " The visit count, which indicates how many times this node has been visited. It is used to balance exploration and exploitation.\n", + " P : float\n", + " The predicted property score of the new subgraphs' after the deletion, shown as R in the original paper.\n", + " \"\"\"\n", + "\n", + " smiles: str\n", + " atoms: Iterable[int]\n", + " W: float = 0\n", + " N: int = 0\n", + " P: float = 0\n", + " children: list[...] = field(default_factory=list)\n", + "\n", + " def __post_init__(self):\n", + " self.atoms = set(self.atoms)\n", + "\n", + " def Q(self) -> float:\n", + " \"\"\"\n", + " Returns\n", + " -------\n", + " float\n", + " The mean action value of the node.\n", + " \"\"\"\n", + " return self.W / self.N if self.N > 0 else 0\n", + "\n", + " def U(self, n: int, c_puct: float = 10.0) -> float:\n", + " \"\"\"\n", + " Parameters\n", + " ----------\n", + " n : int\n", + " The sum of the visit count of this node's siblings.\n", + " c_puct : float\n", + " A constant that controls the level of exploration.\n", + " \n", + " Returns\n", + " -------\n", + " float\n", + " The exploration value of the node.\n", + " \"\"\"\n", + " return c_puct * self.P * math.sqrt(n) / (1 + self.N)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def find_clusters(mol: Chem.Mol) -> tuple[list[tuple[int, ...]], list[list[int]]]:\n", + " \"\"\"Finds clusters within the molecule. Jin et al. from [1]_ only allows deletion of one peripheral non-aromatic bond or one peripheral ring from each state,\n", + " so the clusters here are defined as non-ring bonds and the smallest set of smallest rings.\n", + "\n", + " Parameters\n", + " ----------\n", + " mol : RDKit molecule\n", + " The molecule to find clusters in.\n", + "\n", + " Returns\n", + " -------\n", + " tuple\n", + " A tuple containing:\n", + " - list of tuples: Each tuple contains atoms in a cluster.\n", + " - list of int: Each atom's cluster index.\n", + " \n", + " References\n", + " ----------\n", + " .. [1] Jin, Wengong, Regina Barzilay, and Tommi Jaakkola. \"Multi-objective molecule generation using interpretable substructures.\" International conference on machine learning. PMLR, 2020. https://arxiv.org/abs/2002.03244\n", + " \"\"\"\n", + "\n", + " n_atoms = mol.GetNumAtoms()\n", + " if n_atoms == 1: # special case\n", + " return [(0,)], [[0]]\n", + "\n", + " clusters = []\n", + " for bond in mol.GetBonds():\n", + " a1 = bond.GetBeginAtom().GetIdx()\n", + " a2 = bond.GetEndAtom().GetIdx()\n", + " if not bond.IsInRing():\n", + " clusters.append((a1, a2))\n", + "\n", + " ssr = [tuple(x) for x in Chem.GetSymmSSSR(mol)]\n", + " clusters.extend(ssr)\n", + "\n", + " atom_cls = [[] for _ in range(n_atoms)]\n", + " for i in range(len(clusters)):\n", + " for atom in clusters[i]:\n", + " atom_cls[atom].append(i)\n", + "\n", + " return clusters, atom_cls" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_subgraph_from_mol(mol: Chem.Mol, selected_atoms: set[int]) -> tuple[Chem.Mol, list[int]]:\n", + " \"\"\"Extracts a subgraph from an RDKit molecule given a set of atom indices.\n", + "\n", + " Parameters\n", + " ----------\n", + " mol : RDKit molecule\n", + " The molecule from which to extract a subgraph.\n", + " selected_atoms : list of int\n", + " The indices of atoms which form the subgraph to be extracted.\n", + "\n", + " Returns\n", + " -------\n", + " tuple\n", + " A tuple containing:\n", + " - RDKit molecule: The subgraph.\n", + " - list of int: Root atom indices from the selected indices.\n", + " \"\"\"\n", + "\n", + " selected_atoms = set(selected_atoms)\n", + " roots = []\n", + " for idx in selected_atoms:\n", + " atom = mol.GetAtomWithIdx(idx)\n", + " bad_neis = [y for y in atom.GetNeighbors() if y.GetIdx() not in selected_atoms]\n", + " if len(bad_neis) > 0:\n", + " roots.append(idx)\n", + "\n", + " new_mol = Chem.RWMol(mol)\n", + "\n", + " for atom_idx in roots:\n", + " atom = new_mol.GetAtomWithIdx(atom_idx)\n", + " atom.SetAtomMapNum(1)\n", + " aroma_bonds = [\n", + " bond for bond in atom.GetBonds() if bond.GetBondType() == Chem.rdchem.BondType.AROMATIC\n", + " ]\n", + " aroma_bonds = [\n", + " bond\n", + " for bond in aroma_bonds\n", + " if bond.GetBeginAtom().GetIdx() in selected_atoms\n", + " and bond.GetEndAtom().GetIdx() in selected_atoms\n", + " ]\n", + " if len(aroma_bonds) == 0:\n", + " atom.SetIsAromatic(False)\n", + "\n", + " remove_atoms = [\n", + " atom.GetIdx() for atom in new_mol.GetAtoms() if atom.GetIdx() not in selected_atoms\n", + " ]\n", + " remove_atoms = sorted(remove_atoms, reverse=True)\n", + " for atom in remove_atoms:\n", + " new_mol.RemoveAtom(atom)\n", + "\n", + " return new_mol.GetMol(), roots" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_subgraph(smiles: str, selected_atoms: set[int]) -> tuple[str, list[int]]:\n", + " \"\"\"Extracts a subgraph from a SMILES given a set of atom indices.\n", + "\n", + " Parameters\n", + " ----------\n", + " smiles : str\n", + " The SMILES string from which to extract a subgraph.\n", + " selected_atoms : list of int\n", + " The indices of atoms which form the subgraph to be extracted.\n", + "\n", + " Returns\n", + " -------\n", + " tuple\n", + " A tuple containing:\n", + " - str: SMILES representing the subgraph.\n", + " - list of int: Root atom indices from the selected indices.\n", + " \"\"\"\n", + " # try with kekulization\n", + " mol = Chem.MolFromSmiles(smiles)\n", + " Chem.Kekulize(mol)\n", + " subgraph, roots = extract_subgraph_from_mol(mol, selected_atoms)\n", + " try:\n", + " subgraph = Chem.MolToSmiles(subgraph, kekuleSmiles=True)\n", + " subgraph = Chem.MolFromSmiles(subgraph)\n", + " except Exception:\n", + " subgraph = None\n", + "\n", + " mol = Chem.MolFromSmiles(smiles) # de-kekulize\n", + " if subgraph is not None and mol.HasSubstructMatch(subgraph):\n", + " return Chem.MolToSmiles(subgraph), roots\n", + "\n", + " # If fails, try without kekulization\n", + " subgraph, roots = extract_subgraph_from_mol(mol, selected_atoms)\n", + " subgraph = Chem.MolToSmiles(subgraph)\n", + " subgraph = Chem.MolFromSmiles(subgraph)\n", + "\n", + " if subgraph is not None:\n", + " return Chem.MolToSmiles(subgraph), roots\n", + " else:\n", + " return None, None" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def mcts_rollout(\n", + " node: MCTSNode,\n", + " state_map: dict[str, MCTSNode],\n", + " orig_smiles: str,\n", + " clusters: list[set[int]],\n", + " atom_cls: list[set[int]],\n", + " nei_cls: list[set[int]],\n", + " scoring_function: Callable[[list[str]], list[float]],\n", + " min_atoms: int = 15,\n", + " c_puct: float = 10.0,\n", + ") -> float:\n", + " \"\"\"A Monte Carlo Tree Search rollout from a given MCTSNode.\n", + "\n", + " Parameters\n", + " ----------\n", + " node : MCTSNode\n", + " The MCTSNode from which to begin the rollout.\n", + " state_map : dict\n", + " A mapping from SMILES to MCTSNode.\n", + " orig_smiles : str\n", + " The original SMILES of the molecule.\n", + " clusters : list\n", + " Clusters of atoms.\n", + " atom_cls : list\n", + " Atom indices in the clusters.\n", + " nei_cls : list\n", + " Neighboring cluster indices.\n", + " scoring_function : function\n", + " A function for scoring subgraph SMILES using a Chemprop model.\n", + " min_atoms : int\n", + " The minimum number of atoms in a subgraph.\n", + " c_puct : float\n", + " The constant controlling the level of exploration.\n", + "\n", + " Returns\n", + " -------\n", + " float\n", + " The score of this MCTS rollout.\n", + " \"\"\"\n", + " # Return if the number of atoms is less than the minimum\n", + " cur_atoms = node.atoms\n", + " if len(cur_atoms) <= min_atoms:\n", + " return node.P\n", + "\n", + " # Expand if this node has never been visited\n", + " if len(node.children) == 0:\n", + " # Cluster indices whose all atoms are present in current subgraph\n", + " cur_cls = set([i for i, x in enumerate(clusters) if x <= cur_atoms])\n", + "\n", + " for i in cur_cls:\n", + " # Leaf atoms are atoms that are only involved in one cluster.\n", + " leaf_atoms = [a for a in clusters[i] if len(atom_cls[a] & cur_cls) == 1]\n", + "\n", + " # This checks\n", + " # 1. If there is only one neighbor cluster in the current subgraph (so that we don't produce unconnected graphs), or\n", + " # 2. If the cluster has only two atoms and the current subgraph has only one leaf atom.\n", + " # If either of the conditions is met, remove the leaf atoms in the current cluster.\n", + " if len(nei_cls[i] & cur_cls) == 1 or len(clusters[i]) == 2 and len(leaf_atoms) == 1:\n", + " new_atoms = cur_atoms - set(leaf_atoms)\n", + " new_smiles, _ = extract_subgraph(orig_smiles, new_atoms)\n", + " if new_smiles in state_map:\n", + " new_node = state_map[new_smiles] # merge identical states\n", + " else:\n", + " new_node = MCTSNode(new_smiles, new_atoms)\n", + " if new_smiles:\n", + " node.children.append(new_node)\n", + "\n", + " state_map[node.smiles] = node\n", + " if len(node.children) == 0:\n", + " return node.P # cannot find leaves\n", + "\n", + " scores = scoring_function([x.smiles for x in node.children])\n", + " for child, score in zip(node.children, scores):\n", + " child.P = score\n", + "\n", + " sum_count = sum(c.N for c in node.children)\n", + " selected_node = max(node.children, key=lambda x: x.Q() + x.U(sum_count, c_puct=c_puct))\n", + " v = mcts_rollout(\n", + " selected_node,\n", + " state_map,\n", + " orig_smiles,\n", + " clusters,\n", + " atom_cls,\n", + " nei_cls,\n", + " scoring_function,\n", + " min_atoms=min_atoms,\n", + " c_puct=c_puct,\n", + " )\n", + " selected_node.W += v\n", + " selected_node.N += 1\n", + "\n", + " return v" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def mcts(\n", + " smiles: str,\n", + " scoring_function: Callable[[list[str]], list[float]],\n", + " n_rollout: int,\n", + " max_atoms: int,\n", + " prop_delta: float,\n", + " min_atoms: int = 15,\n", + " c_puct: int = 10,\n", + ") -> list[MCTSNode]:\n", + " \"\"\"Runs the Monte Carlo Tree Search algorithm.\n", + "\n", + " Parameters\n", + " ----------\n", + " smiles : str\n", + " The SMILES of the molecule to perform the search on.\n", + " scoring_function : function\n", + " A function for scoring subgraph SMILES using a Chemprop model.\n", + " n_rollout : int\n", + " The number of MCTS rollouts to perform.\n", + " max_atoms : int\n", + " The maximum number of atoms allowed in an extracted rationale.\n", + " prop_delta : float\n", + " The minimum required property value for a satisfactory rationale.\n", + " min_atoms : int\n", + " The minimum number of atoms in a subgraph.\n", + " c_puct : float\n", + " The constant controlling the level of exploration.\n", + "\n", + " Returns\n", + " -------\n", + " list\n", + " A list of rationales each represented by a MCTSNode.\n", + " \"\"\"\n", + "\n", + " mol = Chem.MolFromSmiles(smiles)\n", + "\n", + " clusters, atom_cls = find_clusters(mol)\n", + " nei_cls = [0] * len(clusters)\n", + " for i, cls in enumerate(clusters):\n", + " nei_cls[i] = [nei for atom in cls for nei in atom_cls[atom]]\n", + " nei_cls[i] = set(nei_cls[i]) - {i}\n", + " clusters[i] = set(list(cls))\n", + " for a in range(len(atom_cls)):\n", + " atom_cls[a] = set(atom_cls[a])\n", + "\n", + " root = MCTSNode(smiles, set(range(mol.GetNumAtoms())))\n", + " state_map = {smiles: root}\n", + " for _ in range(n_rollout):\n", + " mcts_rollout(\n", + " root,\n", + " state_map,\n", + " smiles,\n", + " clusters,\n", + " atom_cls,\n", + " nei_cls,\n", + " scoring_function,\n", + " min_atoms=min_atoms,\n", + " c_puct=c_puct,\n", + " )\n", + "\n", + " rationales = [\n", + " node\n", + " for _, node in state_map.items()\n", + " if len(node.atoms) <= max_atoms and node.P >= prop_delta\n", + " ]\n", + "\n", + " return rationales" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load model" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "model_path = (\n", + " chemprop_dir / \"tests\" / \"data\" / \"example_model_v2_regression_mol.pt\"\n", + ") # path to model checkpoint (.ckpt) or model file (.pt)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): GraphTransform(\n", + " (V_transform): Identity()\n", + " (E_transform): Identity()\n", + " )\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0-1): 2 x MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mpnn = models.MPNN.load_from_file(model_path) # this is a dummy model for testing purposes\n", + "mpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load data to run interpretation for" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "test_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\"\n", + "smiles_column = \"smiles\"" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smileslipo
0Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc143.54
1COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)...-1.18
2COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl3.69
3OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C...3.37
4Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N...3.10
.........
95CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C...2.20
96CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)...2.04
97CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)...4.49
98COc1ccc(Cc2c(N)n[nH]c2N)cc10.20
99CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(...2.00
\n", + "

100 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " smiles lipo\n", + "0 Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14 3.54\n", + "1 COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)... -1.18\n", + "2 COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl 3.69\n", + "3 OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C... 3.37\n", + "4 Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N... 3.10\n", + ".. ... ...\n", + "95 CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C... 2.20\n", + "96 CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)... 2.04\n", + "97 CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)... 4.49\n", + "98 COc1ccc(Cc2c(N)n[nH]c2N)cc1 0.20\n", + "99 CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(... 2.00\n", + "\n", + "[100 rows x 2 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_test = pd.read_csv(test_path)\n", + "df_test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set up trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(logger=None, enable_progress_bar=False, accelerator=\"cpu\", devices=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Running interpretation" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# MCTS options\n", + "rollout = 10 # number of MCTS rollouts to perform. If mol.GetNumAtoms() > 50, consider setting n_rollout = 1 to avoid long computation time\n", + "\n", + "c_puct = 10.0 # constant that controls the level of exploration\n", + "\n", + "max_atoms = 20 # maximum number of atoms allowed in an extracted rationale\n", + "\n", + "min_atoms = 8 # minimum number of atoms in an extracted rationale\n", + "\n", + "prop_delta = 0.5 # Minimum score to count as positive.\n", + "# In this algorithm, if the predicted property from the substructure if larger than prop_delta, the substructure is considered satisfactory.\n", + "# This value depends on the property you want to interpret. 0.5 is a dummy value for demonstration purposes\n", + "\n", + "num_rationales_to_keep = 5 # number of rationales to keep for each molecule" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# Define the scoring function. \"Score\" for a substructure is the predicted property value of the substructure.\n", + "\n", + "models = [mpnn]\n", + "\n", + "property_for_interpretation = \"lipo\"\n", + "\n", + "property_id = (\n", + " df_test.columns.get_loc(property_for_interpretation) - 1\n", + ") # property index in the dataset; -1 for the SMILES column\n", + "\n", + "\n", + "def scoring_function(smiles: list[str]) -> list[float]:\n", + " return make_prediction(\n", + " models=models,\n", + " trainer=trainer,\n", + " smiles=smiles,\n", + " )[:, property_id]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14',\n", + " 'COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)CCc3ccccc23',\n", + " 'COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl',\n", + " 'OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(Cl)sc4[nH]3',\n", + " 'Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)NCC#N)c1']" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# only use the first 5 SMILES for demonstration purposes\n", + "all_smiles = df_test[smiles_column].tolist()[:5]\n", + "all_smiles" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 10 11 12 13 14\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 11 12 13 14 15\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 8 9 10 11 12\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 7 8 9 10 11\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 1 2 3 4 5\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 0 1 3 4 5\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 0 1 2 3 4\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 11 12 13 14 15\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 8 9 10 11 12\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 7 8 9 10 11\n", + "[19:04:06] Can't kekulize mol. Unkekulized atoms: 10 11 12 13 14\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 13 s, sys: 1.38 s, total: 14.4 s\n", + "Wall time: 3.67 s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "results_df = {\"smiles\": [], property_for_interpretation: []}\n", + "\n", + "for i in range(num_rationales_to_keep):\n", + " results_df[f\"rationale_{i}\"] = []\n", + " results_df[f\"rationale_{i}_score\"] = []\n", + "\n", + "for smiles in all_smiles:\n", + " score = scoring_function([smiles])[0]\n", + " if score > prop_delta:\n", + " rationales = mcts(\n", + " smiles=smiles,\n", + " scoring_function=scoring_function,\n", + " n_rollout=rollout,\n", + " max_atoms=max_atoms,\n", + " prop_delta=prop_delta,\n", + " min_atoms=min_atoms,\n", + " c_puct=c_puct,\n", + " )\n", + " else:\n", + " rationales = []\n", + "\n", + " results_df[\"smiles\"].append(smiles)\n", + " results_df[property_for_interpretation].append(score)\n", + "\n", + " if len(rationales) == 0:\n", + " for i in range(num_rationales_to_keep):\n", + " results_df[f\"rationale_{i}\"].append(None)\n", + " results_df[f\"rationale_{i}_score\"].append(None)\n", + " else:\n", + " min_size = min(len(x.atoms) for x in rationales)\n", + " min_rationales = [x for x in rationales if len(x.atoms) == min_size]\n", + " rats = sorted(min_rationales, key=lambda x: x.P, reverse=True)\n", + "\n", + " for i in range(num_rationales_to_keep):\n", + " if i < len(rats):\n", + " results_df[f\"rationale_{i}\"].append(rats[i].smiles)\n", + " results_df[f\"rationale_{i}_score\"].append(rats[i].P)\n", + " else:\n", + " results_df[f\"rationale_{i}\"].append(None)\n", + " results_df[f\"rationale_{i}_score\"].append(None)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smilesliporationale_0rationale_0_scorerationale_1rationale_1_scorerationale_2rationale_2_scorerationale_3rationale_3_scorerationale_4rationale_4_score
0Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc142.253542c1ccc2c(c1)n[cH:1][nH:1]22.275024NoneNaNNoneNaNNoneNaNNoneNaN
1COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)...2.235016O=[SH:1]c1c[cH:1][cH:1]cc1[OH:1]2.252582c1c([OH:1])c([S:1][NH2:1])c[cH:1][cH:1]12.252185c1c(N[CH3:1])[cH:1]c[cH:1]c1[SH:1]2.251068c1c([S:1][NH2:1])[cH:1]cc([OH:1])[cH:1]12.250288c1c([NH2:1])[cH:1]c[cH:1]c1[S:1][NH2:1]2.249267
2COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl2.245891c1cc[cH:1]c([CH2:1][CH2:1][OH:1])c12.249289O=[CH:1][CH2:1]c1cccc[cH:1]12.249207c1cc[cH:1]c([C@@H]([CH3:1])[NH2:1])c12.247827Clc1ccccc1[CH2:1][NH2:1]2.245391Clc1ccccc1[CH2:1][CH3:1]2.243280
3OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C...2.249847c1c([CH3:1])[nH]c2s[cH:1]cc122.267990Clc1cc2c[cH:1][nH]c2s12.267004O=C1N(C[CH3:1])[CH:1]=[CH:1]C[CH2:1]12.211323NoneNaNNoneNaN
4Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N...2.228097c1cc(C[CH2:1][NH2:1])c[cH:1]c12.247070c1cc(C[CH2:1][CH3:1])c[cH:1]c12.245314Cn1nc([CH3:1])cc1[CH2:1][NH2:1]2.225729C[CH2:1]c1cc([CH2:1][NH2:1])[nH:1]n12.223793c1c([CH3:1])n[nH:1]c1[CH2:1]N[CH3:1]2.223478
\n", + "
" + ], + "text/plain": [ + " smiles lipo \\\n", + "0 Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14 2.253542 \n", + "1 COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)... 2.235016 \n", + "2 COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl 2.245891 \n", + "3 OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C... 2.249847 \n", + "4 Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N... 2.228097 \n", + "\n", + " rationale_0 rationale_0_score \\\n", + "0 c1ccc2c(c1)n[cH:1][nH:1]2 2.275024 \n", + "1 O=[SH:1]c1c[cH:1][cH:1]cc1[OH:1] 2.252582 \n", + "2 c1cc[cH:1]c([CH2:1][CH2:1][OH:1])c1 2.249289 \n", + "3 c1c([CH3:1])[nH]c2s[cH:1]cc12 2.267990 \n", + "4 c1cc(C[CH2:1][NH2:1])c[cH:1]c1 2.247070 \n", + "\n", + " rationale_1 rationale_1_score \\\n", + "0 None NaN \n", + "1 c1c([OH:1])c([S:1][NH2:1])c[cH:1][cH:1]1 2.252185 \n", + "2 O=[CH:1][CH2:1]c1cccc[cH:1]1 2.249207 \n", + "3 Clc1cc2c[cH:1][nH]c2s1 2.267004 \n", + "4 c1cc(C[CH2:1][CH3:1])c[cH:1]c1 2.245314 \n", + "\n", + " rationale_2 rationale_2_score \\\n", + "0 None NaN \n", + "1 c1c(N[CH3:1])[cH:1]c[cH:1]c1[SH:1] 2.251068 \n", + "2 c1cc[cH:1]c([C@@H]([CH3:1])[NH2:1])c1 2.247827 \n", + "3 O=C1N(C[CH3:1])[CH:1]=[CH:1]C[CH2:1]1 2.211323 \n", + "4 Cn1nc([CH3:1])cc1[CH2:1][NH2:1] 2.225729 \n", + "\n", + " rationale_3 rationale_3_score \\\n", + "0 None NaN \n", + "1 c1c([S:1][NH2:1])[cH:1]cc([OH:1])[cH:1]1 2.250288 \n", + "2 Clc1ccccc1[CH2:1][NH2:1] 2.245391 \n", + "3 None NaN \n", + "4 C[CH2:1]c1cc([CH2:1][NH2:1])[nH:1]n1 2.223793 \n", + "\n", + " rationale_4 rationale_4_score \n", + "0 None NaN \n", + "1 c1c([NH2:1])[cH:1]c[cH:1]c1[S:1][NH2:1] 2.249267 \n", + "2 Clc1ccccc1[CH2:1][CH3:1] 2.243280 \n", + "3 None NaN \n", + "4 c1c([CH3:1])n[nH:1]c1[CH2:1]N[CH3:1] 2.223478 " + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results_df = pd.DataFrame(results_df)\n", + "results_df" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop/examples/mpnn_fingerprints.ipynb b/chemprop/examples/mpnn_fingerprints.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..559f3bfc8bfc9aa71b7678a7a8d9b72fd7a7edf9 --- /dev/null +++ b/chemprop/examples/mpnn_fingerprints.ipynb @@ -0,0 +1,395 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Encoding fingerprint latent representation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/mpnn_fingerprints.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import torch\n", + "from sklearn.decomposition import PCA\n", + "from pathlib import Path\n", + "\n", + "from chemprop import data, featurizers, models" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change model input here" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "checkpoint_path = chemprop_dir / \"tests/data/example_model_v2_regression_mol.ckpt\" # path to the checkpoint file.\n", + "# If the checkpoint file is generated using the training notebook,\n", + "# it will be in the `checkpoints` folder with name similar to `checkpoints/epoch=19-step=180.ckpt`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load model" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): GraphTransform(\n", + " (V_transform): Identity()\n", + " (E_transform): Identity()\n", + " )\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0-1): 2 x MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mpnn = models.MPNN.load_from_checkpoint(checkpoint_path)\n", + "mpnn.eval()\n", + "mpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change data input here" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "test_path = '../tests/data/smis.csv'\n", + "smiles_column = 'smiles'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load data" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[MoleculeDatapoint(mol=, y=None, weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14', V_f=None, E_f=None, V_d=None),\n", + " MoleculeDatapoint(mol=, y=None, weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)CCc3ccccc23', V_f=None, E_f=None, V_d=None),\n", + " MoleculeDatapoint(mol=, y=None, weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl', V_f=None, E_f=None, V_d=None),\n", + " MoleculeDatapoint(mol=, y=None, weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(Cl)sc4[nH]3', V_f=None, E_f=None, V_d=None),\n", + " MoleculeDatapoint(mol=, y=None, weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)NCC#N)c1', V_f=None, E_f=None, V_d=None)]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_test = pd.read_csv(test_path)\n", + "\n", + "smis = df_test[smiles_column]\n", + "\n", + "test_data = [data.MoleculeDatapoint.from_smi(smi) for smi in smis]\n", + "test_data[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Get featurizer" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Get datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "test_dset = data.MoleculeDataset(test_data, featurizer=featurizer)\n", + "test_loader = data.build_dataloader(test_dset, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Calculate fingerprints" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`models.MPNN.encoding(inputs : BatchMolGraph, i : int)` calculate the i-th hidden representation.\n", + "\n", + "`i` ia the stop index of slice of the MLP used to encode the input. That is, use all\n", + "layers in the MLP _up to_ :attr:`i` (i.e., ``MLP[:i]``). This can be any integer\n", + "value, and the behavior of this function is dependent on the underlying list\n", + "slicing behavior. For example:\n", + "\n", + "* ``i=0``: use a 0-layer MLP (i.e., a no-op)\n", + "* ``i=1``: use only the first block\n", + "* ``i=-1``: use _up to_ the second-to-last block" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([100, 300])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "with torch.no_grad():\n", + " fingerprints = [\n", + " mpnn.encoding(batch.bmg, batch.V_d, batch.X_d, i=0)\n", + " for batch in test_loader\n", + " ]\n", + " fingerprints = torch.cat(fingerprints, 0)\n", + "\n", + "fingerprints.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([100, 300])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "with torch.no_grad():\n", + " encodings = [\n", + " mpnn.encoding(batch.bmg, batch.V_d, batch.X_d, i=1)\n", + " for batch in test_loader\n", + " ]\n", + " encodings = torch.cat(encodings, 0)\n", + "\n", + "encodings.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using fingerprints" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAsAAAAK7CAYAAAD8yjntAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABXqklEQVR4nO3de3xU5b3v8e8kQKKQjAYKCRcxUq2EiAg0EPDaCgYtXrY9chHUVluherygVSnagLUbObu2WsvFWtStKLJVVGJtND1Yt0iQQqBKw1bEKAqJSJAJXhJhZp0/ciZlkkkyazIz6/Z5v155vczKmuSZ5SR851m/5/f4DMMwBAAAAHhEmtUDAAAAAFKJAAwAAABPIQADAADAUwjAAAAA8BQCMAAAADyFAAwAAABPIQADAADAUwjAAAAA8BQCMAAAADyFAAwAcXjsscfk8/mifvzwhz+Uz+fTY489ZvUwU+bss8/W2WefHddjX375Zc2fPz+h4wGAjnSzegAA4GSPPvqoTj755IhjeXl5uvXWWzVkyBCLRpV6S5YsifuxL7/8shYvXkwIBpAyBGAA6ILCwkKNHj26zfHBgwdbMJrYHTp0SD6fT926de2fga+++kpHH320CgoKEjQyAEg+SiAAIME+/PDDNiUQ8+fPl8/n0z//+U9NmzZNfr9f/fr1049//GMFAoGIxx84cEBXX321cnJy1KtXL11wwQX64IMP5PP52syS7tixQ9OnT1ffvn2VkZGhoUOHavHixRHn/O1vf5PP59MTTzyhW265RQMGDFBGRobef//9llKOiooK/ehHP1JOTo569uypyZMn64MPPoj4PmeffbYKCwv13//93xo3bpyOPvpo/fjHP2752pElEOFr8Jvf/Ea//e1vlZ+fr169eqm4uFgbNmxoOe+qq65qGe+RZSQffvihJOmZZ57RmDFj5Pf7dfTRR+uEE05o+ZkAEC9mgAGgC4LBoA4fPhzz+ZdeeqmmTJmiq6++Wu+8847mzp0rSXrkkUckSaFQSJMnT9amTZs0f/58jRw5UpWVlSopKWnzvaqrqzVu3Dgdd9xxuu+++5Sbm6tXXnlFN9xwg/bt26fS0tKI8+fOnavi4mItW7ZMaWlp6tu3b8vXrr76ak2YMEFPPfWUPv74Y9155506++yz9fbbb+uYY45pOa+2tlYzZszQbbfdpn//939XWlrH8yiLFy/WySefrPvvv1+SdNddd+n8889XTU2N/H6/7rrrLn355Zd69tlnVVlZ2fK4vLw8VVZWasqUKZoyZYrmz5+vzMxMffTRR1q7dm3M1xsAoiEAA0AXjB07ts2xHTt2tHv+1VdfrZ///OeSpHPPPVfvv/++HnnkES1fvlw+n0/l5eVat26dli5dqlmzZkmSJkyYoB49erSE5bA5c+YoKytL69atU3Z2dsu5TU1Nuvfee3XDDTfo2GOPbTl/yJAheuaZZ6KOa/To0Vq+fHnL58OGDdP48eO1ePFizZs3r+X4/v379cwzz+h73/teZ5dGkpSVlaWXXnpJ6enpkqT+/furqKhIf/nLXzR16lQNGTJE/fr1k9T2Wq5fv16GYWjZsmXy+/0tx6+66qqYfjYAtIcSCADogscff1x///vfIz46qqu98MILIz4fPny4GhsbtXfvXknS66+/Lkm67LLLIs6bNm1axOeNjY36v//3/+qSSy7R0UcfrcOHD7d8nH/++WpsbIwoNZCaZ5/bc/nll0d8Pm7cOA0ePFivvfZaxPFjjz025vArSRdccEFL+JWan68kffTRR50+9rvf/a6k5mvxX//1X9q9e3fMPxcAOkIABoAuGDp0qEaPHh3x0ZHevXtHfJ6RkSFJ+vrrryVJ9fX16tatm3JyciLOC8+ShtXX1+vw4cN68MEH1b1794iP888/X5K0b9++iMfk5eW1O67c3Nyox+rr62P+HtF09nw7cuaZZ+qFF17Q4cOHdcUVV2jgwIEqLCzUypUrTY0BAFqjBAIAbKR37946fPiw9u/fHxGC6+rqIs479thjlZ6erpkzZ+q6666L+r3y8/MjPvf5fO3+3NbfP3zs29/+dszfIxkuuugiXXTRRWpqatKGDRu0cOFCTZ8+Xccff7yKi4tTOhYA7sEMMADYyFlnnSVJWrVqVcTxp59+OuLzo48+Wuecc462bNmi4cOHt5mFHj16dJvZ1448+eSTEZ+vX79eH330UdybW5gRy6xwRkaGzjrrLC1atEiStGXLlqSPC4B7MQMMADZSUlKi8ePH65ZbblFDQ4NGjRqlyspKPf7445IU0XXhgQce0Omnn64zzjhDs2fP1vHHH6+DBw/q/fffV1lZmaluCZs2bdI111yj//W//pc+/vhjzZs3TwMGDNDPfvazhD/H1k455RRJ0qJFizRp0iSlp6dr+PDhuueee/TJJ5/o+9//vgYOHKgDBw7ogQceUPfu3VveKABAPAjAAGAjaWlpKisr0y233KJ7771X33zzjcaPH68VK1Zo7NixES3JCgoKVFVVpV/96le68847tXfvXh1zzDE68cQTW+qAY7V8+XI98cQTmjp1qpqamnTOOefogQceaFOLnAzTp0/Xm2++qSVLlujuu++WYRiqqanRmDFjtGnTJt1+++367LPPdMwxx2j06NFau3athg0blvRxAXAvn2EYhtWDAAB07KmnntLll1+uN998U+PGjUvY933sscf0ox/9SH//+987XcAHAG7BDDAA2MzKlSu1e/dunXLKKUpLS9OGDRv0H//xHzrzzDMTGn4BwKsIwABgM1lZWXr66ad1zz336Msvv1ReXp6uuuoq3XPPPVYPDQBcgRIIAAAAeApt0AAAAOApBGAAAAB4CgEYAAAAnsIiuE6EQiHt2bNHWVlZKd8CFAAAAJ0zDEMHDx5U//79IzYMag8BuBN79uzRoEGDrB4GAAAAOvHxxx9r4MCBnZ5HAO5EVlaWpOYLmp2dbfFoAAAA0FpDQ4MGDRrUkts6QwDuRLjsITs7mwAMAABgY7GWq7IIDgAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeEo3qwcAAHCuYMjQxpr92nuwUX2zMlWUn6P0NJ/VwwKADhGAAQBxKd9WqwVl1aoNNLYcy/NnqnRygUoK8ywcGQB0jBIIAIBp5dtqNXtFVUT4laS6QKNmr6hS+bZai0YGAJ0jAAMATAmGDC0oq5YR5WvhYwvKqhUMRTsDAKxHAAYAmLKxZn+bmd8jGZJqA43aWLM/dYMCABMIwAAAU/YebD/8xnMeAKQaARgAYErfrMyEngcAqUYABgCYUpSfozx/ptprduZTczeIovycVA4LAGJGAAYAmJKe5lPp5AJJahOCw5+XTi6gHzAA2yIAAwBMKynM09IZI5XrjyxzyPVnaumMkfQBBmBrbIQBAIhLSWGeJhTkshMcAMchAAMA4pae5lPxkN5WDwMATKEEAgAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeIrjAvCSJUuUn5+vzMxMjRo1Sm+88Ua7565bt07jx49X7969ddRRR+nkk0/W7373uxSOFgAAAHbTzeoBmLFq1SrddNNNWrJkicaPH6+HHnpIkyZNUnV1tY477rg25/fs2VPXX3+9hg8frp49e2rdunW69tpr1bNnT/30pz+14BkASJZgyNDGmv3ae7BRfbMyVZSfo/Q0n9XDAgDYkM8wDMPqQcRqzJgxGjlypJYuXdpybOjQobr44ou1cOHCmL7Hv/3bv6lnz5564oknYjq/oaFBfr9fgUBA2dnZcY0bQHKVb6vVgrJq1QYaW47l+TNVOrlAJYV5Fo4MAJAKZvOaY0ogvvnmG23evFkTJ06MOD5x4kStX78+pu+xZcsWrV+/XmeddVa75zQ1NamhoSHiA4B9lW+r1ewVVRHhV5LqAo2avaJK5dtqLRoZAMCuHBOA9+3bp2AwqH79+kUc79evn+rq6jp87MCBA5WRkaHRo0fruuuu0zXXXNPuuQsXLpTf72/5GDRoUELGDyDxgiFDC8qqFe02VvjYgrJqBUOOudEFAEgBxwTgMJ8vsqbPMIw2x1p74403tGnTJi1btkz333+/Vq5c2e65c+fOVSAQaPn4+OOPEzJuAIm3sWZ/m5nfIxmSagON2lizP3WDAgDYnmMWwfXp00fp6eltZnv37t3bZla4tfz8fEnSKaecok8//VTz58/XtGnTop6bkZGhjIyMxAwaQFLtPdh++I3nPACANzhmBrhHjx4aNWqUKioqIo5XVFRo3LhxMX8fwzDU1NSU6OEBsEDfrMyEngcA8AbHzABL0pw5czRz5kyNHj1axcXF+uMf/6hdu3Zp1qxZkprLF3bv3q3HH39ckrR48WIdd9xxOvnkkyU19wX+zW9+o//9v/+3Zc8BQOIU5ecoz5+pukBj1Dpgn6Rcf3NLNAAAwhwVgKdMmaL6+nrdfffdqq2tVWFhoV5++WUNHjxYklRbW6tdu3a1nB8KhTR37lzV1NSoW7duGjJkiO69915de+21Vj0FAAmUnuZT6eQCzV5RJZ8UEYLDKwNKJxfQDxgAEMFRfYCtQB9gwP7oAwwA3mY2rzlqBhgAoikpzNOEglx2ggMAxIQADMAV0tN8Kh7S2+phAAAcwDFdIAAAAIBEIAADAADAUwjAAAAA8BQCMAAAADyFAAwAAABPIQADAADAUwjAAAAA8BQCMAAAADyFjTBsJBgy2MkqwbimAACgNQKwTZRvq9WCsmrVBhpbjuX5M1U6uUAlhXkWjsy5uKYAACAaSiBsoHxbrWavqIoIapJUF2jU7BVVKt9Wa9HInItrCgAA2kMAtlgwZGhBWbWMKF8LH1tQVq1gKNoZiIZrCgAAOkIAttjGmv1tZimPZEiqDTRqY83+1A3K4bimAACgIwRgi+092H5Qi+c8cE0BAEDHCMAW65uVmdDzwDUFAAAdIwBbrCg/R3n+TLXXmMun5s4FRfk5qRyWo3FNAQBARwjAFktP86l0coEktQls4c9LJxfQu9YErikAAOgIAdgGSgrztHTGSOX6I2/J5/oztXTGSHrWxoFrCgAA2uMzDINeUB1oaGiQ3+9XIBBQdnZ2Un8Wu5YlHtcUAAD3M5vX2AnORtLTfCoe0tvqYbgK1xQAALRGCQQAAAA8hQAMAAAATyEAAwAAwFMIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwAAAAPAUAjAAAAA8pZvVAwAApEYwZGhjzX7tPdiovlmZKsrPUXqaz+phAUDKEYABwAPKt9VqQVm1agONLcfy/JkqnVygksI8C0cGAKlHCQQAuFz5tlrNXlEVEX4lqS7QqNkrqlS+rdaikQGANQjAAOBiwZChBWXVMqJ8LXxsQVm1gqFoZwCAOxGAAcDFNtbsbzPzeyRDUm2gURtr9qduUABgMQIwALjY3oPth994zgMAN2ARHDyFVfDwmr5ZmQk9DwDcgAAMz2AVPLyoKD9Hef5M1QUao9YB+yTl+pvfDAKAV1ACAU9gFTy8Kj3Np9LJBZKaw+6Rwp+XTi7gTggATyEAw/VYBQ+vKynM09IZI5XrjyxzyPVnaumMkdwBAeA5lEDA9cysgi8e0jt1AwNSqKQwTxMKcqmBBwARgOEBrIIHmqWn+XiTBwCiBAIewCp4AABwJAIwXC+8Cr69G70+NXeDYBU8AADeQACG67EKHgAAHIkADE9gFTwAAAhjERw8g1XwAABAIgDDY1gFDwAAKIEAAACApxCAAQAA4CkEYAAAAHgKARgAAACeQgAGAACApxCAAQAA4CkEYAAAAHgKARgAAACeQgAGAACApxCAAQAA4CkEYAAAAHgKARgAAACeQgAGAACApzguAC9ZskT5+fnKzMzUqFGj9MYbb7R77urVqzVhwgR961vfUnZ2toqLi/XKK6+kcLQAAACwG0cF4FWrVummm27SvHnztGXLFp1xxhmaNGmSdu3aFfX8//7v/9aECRP08ssva/PmzTrnnHM0efJkbdmyJcUjBwAAgF34DMMwrB5ErMaMGaORI0dq6dKlLceGDh2qiy++WAsXLozpewwbNkxTpkzRL3/5y5jOb2hokN/vVyAQUHZ2dlzjBgAAQPKYzWuOmQH+5ptvtHnzZk2cODHi+MSJE7V+/fqYvkcoFNLBgweVk5PT7jlNTU1qaGiI+AAAAIB7OCYA79u3T8FgUP369Ys43q9fP9XV1cX0Pe677z59+eWXuuyyy9o9Z+HChfL7/S0fgwYN6tK4AQAAYC+OCcBhPp8v4nPDMNoci2blypWaP3++Vq1apb59+7Z73ty5cxUIBFo+Pv744y6PGQAAAPbRzeoBxKpPnz5KT09vM9u7d+/eNrPCra1atUpXX321nnnmGZ177rkdnpuRkaGMjIwujxcAAAD25JgZ4B49emjUqFGqqKiIOF5RUaFx48a1+7iVK1fqqquu0lNPPaULLrgg2cMEAACAzTlmBliS5syZo5kzZ2r06NEqLi7WH//4R+3atUuzZs2S1Fy+sHv3bj3++OOSmsPvFVdcoQceeEBjx45tmT0+6qij5Pf7LXseAAAAsI6jAvCUKVNUX1+vu+++W7W1tSosLNTLL7+swYMHS5Jqa2sjegI/9NBDOnz4sK677jpdd911LcevvPJKPfbYY6kePgCkVDBkaGPNfu092Ki+WZkqys9RelrnayYAwO0c1QfYCvQBBuBE5dtqtaCsWrWBxpZjef5MlU4uUElhnoUjA4DEc20fYABAbMq31Wr2iqqI8CtJdYFGzV5RpfJttRaNDADsgQAMAC4SDBlaUFataLf2wscWlFUrGOLmH5wtGDJUubNeL27drcqd9bymYYqjaoABAB3bWLO/zczvkQxJtYFGbazZr+IhvVM3MCCBKPFBVzEDDAAusvdg++E3nvMAu6HEB4lAAAYAF+mblZnQ8wA7ocQHiUIABgAXKcrPUZ4/U+01O/Op+VZxUX5OKocFJISZEh+gIwRgAHCR9DSfSicXSFKbEBz+vHRyAf2A4UiU+CBRCMAA4DIlhXlaOmOkcv2RZQ65/kwtnTGSRUJwLEp8kCh0gQAAFyopzNOEglx2goOrhEt86gKNUeuAfWp+o0eJDzpDAAYAl0pP89HqDK4SLvGZvaJKPikiBFPiAzMogQAAAI5BiQ8SgRlgAADgKJT4oKsIwABgkWDI4B9wIE6U+KArCMAAYAEzW7kSlAEgsQjAAJBi4a1cW69iD2/lemQdo5mgDACIDYvgACCFzGzlGg7KrXe+Cgfl8m21SR8vALgRARgAUijWrVw37KyPOSgDAMwhAANACsW6RWvlB/tiCsoba/YnaGQA4B0EYABIodi3aI1tkVusgRoA8C8EYABIofBWru3FW5+aF7nF2t4p9kANAAgjAANACoW3cpXazvEeuZXr2BN6xxSUi/JzkjRSAHAvAjAApFgsW7nGGpTpBwwA5vkMw2AJcQcaGhrk9/sVCASUnZ1t9XAAuEgsG1zQBxgAOmc2rxGAO0EABmA1doIDgI6ZzWvsBAcANpee5ot5URwAoHPUAAMAAMBTCMAAAADwFAIwAAAAPIUADAAAAE8hAAMAAMBTCMAAAADwFAIwAAAAPIUADAAAAE8hAAMAAMBTCMAAAADwFAIwAAAAPIUADAAAAE/pZvUAAAAAkHjBkKGNNfu192Cj+mZlqig/R+lpvnaPewkBGAAAwGXKt9VqQVm1agONLcfy/Jm68NQ8rflHbZvjpZMLVFKYZ8VQLeEzDMOwehB21tDQIL/fr0AgoOzsbKuHAwAA0KHybbWavaJKsQa88Nzv0hkjHRuCzeY1aoABAABcIhgytKCsOubwK6nl3AVl1QqGvDEvSgAGAABwiY01+yPKG2JlSKoNNGpjzf7ED8qGqAEGAABwib0HzYffRD5ean/xnZ0QgAEAAFyib1ZmSh/fOux+/uU3+tWf2y6+s9siOwIwAACASxTl5yjPn6m6QKOpOmCfpFx/82xtrKJ1moimLtCo2SuqbLXIjhpgAAAAl0hP86l0coGkf3V36Ez4vNLJBTGXKoQ7TcRSb2zHRXYEYADwgGDIUOXOer24dbcqd9bb5h8hwC3s9DtWUpinpTNGKtcfWc6Q58/UtWfmK6/V8Vx/pqnZ2Xg7TdhpkR0lEADgcu01xLdbTR7a54RFRV5mx9+xksI8TSjIjfq6ua1kaJdeT/F2mpASs8guEQjAAOBi7TXEt2NNHqKzY7jCv9j5dyw9zafiIb1jPh6rroTYri7SSxRKIADApTq6TWnHmjy01V6dZThclW+rtWhkkLz7OxZPiPWp+Y2bmUV2yUQABgCX6uw2pd1q8hDJq+HKSbz6OxbuNJHMRXbJRgAGAJeK9TalXWryEMmr4cpJvPo7ZrbThNlFdqlADTAAuFSstyntUpOHSF4NV07i5d+xcKeJaPXpd10wVMf2zLD1ok0CMAC4VGcN8eNpfI/U8XK4cgqv/4511GnC7iiBAACX6ug2pR1r8hCpszpLuy0q8iJ+x/7VUeKiEQNUPKS3Y54rARgAXKy9hvh2rMlDJMKVM/A75kw+wzBYPtqBhoYG+f1+BQIBZWdnWz0cAIgLGyk4F32AnYHfMWuZzWsE4E4QgAEAViNcAR0zm9dYBAcAgM11decuAJGoAQYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKQRgAAAAeAoBGAAAAJ5CAAYAAICndLN6AAAAOF0wZGhjzX7tPdiovlmZKsrPUXqaz+phAWgHARgAgC4o31arBWXVqg00thzL82eqdHKBSgrzLBwZgPZQAgEAQJzKt9Vq9oqqiPArSXWBRs1eUaXybbUWjQxARwjAAADEIRgytKCsWkaUr4WPLSirVjAU7QwAViIAAwAQh401+9vM/B7JkFQbaNTGmv2pGxSAmBCAAQCIw96D7YffeM4DkDoEYAAA4tA3KzOh5wFIHQIwAABxKMrPUZ4/U+01O/OpuRtEUX5OKocFIAYEYAAA4pCe5lPp5AJJahOCw5+XTi6gHzBgQwRgAADiVFKYp6UzRirXH1nmkOvP1NIZI+kDDE8KhgxV7qzXi1t3q3JnvS07oThuI4wlS5boP/7jP1RbW6thw4bp/vvv1xlnnBH13NraWt1yyy3avHmzduzYoRtuuEH3339/agcMAHC1ksI8TSjIZSc4QM7ZGMZRM8CrVq3STTfdpHnz5mnLli0644wzNGnSJO3atSvq+U1NTfrWt76lefPm6dRTT03xaAEAXpGe5lPxkN66aMQAFQ/pTfiFJzlpYxifYRj2m5dux5gxYzRy5EgtXbq05djQoUN18cUXa+HChR0+9uyzz9aIESNMzwA3NDTI7/crEAgoOzs7nmEDAAC4WjBk6PRFa9vtje1Tc2nQutu/l5Q3iGbzmmNmgL/55htt3rxZEydOjDg+ceJErV+/PmE/p6mpSQ0NDREfAAAAaJ/TNoZxTADet2+fgsGg+vXrF3G8X79+qqurS9jPWbhwofx+f8vHoEGDEva9AQCAMzlhYZeVnLYxjOMWwfl8kdPmhmG0OdYVc+fO1Zw5c1o+b2hoIAQDAOBhTlnYZSWnbQzjmBngPn36KD09vc1s7969e9vMCndFRkaGsrOzIz4AAIA3OWlhl5WctjGMYwJwjx49NGrUKFVUVEQcr6io0Lhx4ywaFQAAcKtgyNCCsmpFK3YIH1tQVk05hJy3MYxjArAkzZkzR3/605/0yCOPaPv27br55pu1a9cuzZo1S1Jz+cIVV1wR8ZitW7dq69at+uKLL/TZZ59p69atqq6utmL4gG1QywYAnXPawi6rOWljGEfVAE+ZMkX19fW6++67VVtbq8LCQr388ssaPHiwpOaNL1r3BD7ttNNa/nvz5s166qmnNHjwYH344YepHDpgG9SyAUCzYMjocAMTpy3ssgOnbAzjqD7AVqAPMNwkXMvW+pc+/GfJbu/QASBZYpkMqNxZr2kPb+j0e638yVgVD+mdtLEmU2dvApzCbF5z1AwwgPh1VsvmU3Mt24SCXEf+8QOAWLU3GRBe2BaeDAgv7KoLNEb92xne3MEuC7vM8vIdQUfVAAOIH7VsAGBuYZvTFnaZ4fXuFgRgwCOoZQNSjwWn9mN2MsBJC7tiRXcLSiAAz3Bak3LA6bx8e9nO4pkMcMrCrliZeRPg1NrmzhCAAY9wey0bYCex1phayS2Ln8yKdzIgPc3nmjDIHUECMOAZ4Vq22Suq5JMi/mF2ei0bYCdOWHDq5dlpJgO4IyhRAwx4ihtr2QC7sfuCU68vfnLzwrZYOW3b4mRgBhjwGLfVsgF2Y+fby06YnU6F8GRA61nwXI/MgnNHkAAMeJKbatkAu7Hz7WUWP/2L1ycDvP4mgAAMAEAC2bnG1M6z01bw+mSAl98EEIABAEggO99etvPsNKzh1TcBLIIDACDB7LrglMVPQDNmgAEASAI73l628+w0kEo+wzDcu89dAjQ0NMjv9ysQCCg7O9vq4QAA0GVe7gMMdzKb15gBBgDAY5I1O+3V3eXgPARgAAA8KNGLn5hVhpOwCA4AAHSJ13eXg/MQgAEAQNw6211Oat5dLhhiyZEZwZChyp31enHrblXurOf6JRglEAAAIG7sLpd4lJMkHzPAAAAgbuwul1iUk6QGARgAAMSN3eUSh3KS1CEAAwCAuLG7XOKYKSdB1xCAAQBA3MK7y0lqE4LZXc4cyklShwAMAAC6pKQwT0tnjFSuP7LMIdefqaUzRrJwK0aUk6QOXSAAAECXJWt3OS8Jl5PUBRqj1gH71PymgnKSriMAAwCAhEj07nJeEy4nmb2iSj4pIgRTTpJYcZVAhEKhdo/v2rWrSwMCAAD2wGYMqUc5SWqYmgFuaGjQNddco7KyMmVnZ2vWrFn65S9/qfT0dEnSZ599pvz8fAWDwaQMFgAApIYbNmMIhgxHlmRQTpJ8pgLwXXfdpX/84x964okndODAAd1zzz3avHmzVq9erR49ekiSDIN3hwAAOFl4M4bW/6KHN2Nwwkyk0wM85STJZaoE4oUXXtBDDz2kH/7wh7rmmmu0efNm7du3T5MnT1ZTU5Mkyefj3QkAAE7lhs0Y2E0NnTEVgPft26fBgwe3fN67d29VVFTo4MGDOv/88/XVV18lfIAAACB1nL4ZgxsCPJLPVAAeNGiQtm/fHnEsKytLr776qr7++mtdcsklCR0cAABILadvxuD0AI/UMBWAJ06cqEcffbTN8V69eumVV15RZiaNmQEAcDKnb8bg9ACP1DC1CG7BggXas2dP1K9lZWXpr3/9qzZv3pyQgQEAgNQLb8bQ0Sxqno03Y3B6gEdqmJoBPvbYYzVs2LB2v97U1KQtW7Z0eVAAADiJm/rlpqf5dOGpHXdJuPDUPNu25AoH+PZG55O9AzxSo8s7wRmGoVdffVXLly/Xiy++qOzsbN10000JGBoAAPbnlHZbsfbEDYYMrflHx10S1vyjVreVDLVlCGY3NcQirp3gJOnDDz/UL3/5Sw0ePFjnn3++MjMz9ec//1l1dXWJHB8AALbllHZb5dtqdfqitZr28Abd+PRWTXt4g05ftDbq+DpbRCbZfxEZu6mhM6ZmgJuamrR69Wr96U9/0vr16zVp0iT99re/1bRp03THHXeooKAgWeMEANdx6i5VaNZZuy2fmtttTSjItfT/q9lNLdyyiIzd1NARUwF4wIABKigo0IwZM/Tss8/q2GOPlSRNmzYtKYMDALdyym1ztM9Muy2rdvSKJ6S7aREZu6mhPaZKIILBoHw+n3w+n9LT05M1JgBwNafcNkfHnDBTGk9PXBaRwQtMBeDa2lr99Kc/1cqVK5Wbm6tLL71Uzz//PNsfA0CM2KXKPZwwUxpPSA8vIpPUJgSziAxuYSoAZ2Zm6vLLL9fatWv1zjvvaOjQobrhhht0+PBh/frXv1ZFRYWCwWCyxgoAjscuVe7hhJnSeEM6i8jgdnG3QRsyZIjuuece3X333XrllVe0fPly/eAHP1CvXr1UX1+fyDECgGs44bY5YuOEdlvhkF4XaIx618Gn5lAbLaSziAxuFncbtJZvkJamSZMm6dlnn9Xu3bs1b968RIwLAFzJCbfNETu7z5R2tZwhvIjsohEDVDykt6vCr5s2L4F5pmaAP//8c61YsUJXXnmlsrOzI74WCAS0cuVKXXPNNQkdIAC4SVdm5GBPdp8pDYf01l1Hcj3cdYQuLPAZhhHzW55f/epXevvtt/XMM89E/fpll12mESNG6Be/+EXCBmi1hoYG+f1+BQKBNqEfAOIR7gIhRb9tboeZQ7gPfaebtdcXmd8/ZzOb10yVQDz33HOaNWtWu1+/9tpr2w3HAIBmdr9tHgtuHzuPm8sZYkUXFoSZKoHYuXOnTjzxxHa/fuKJJ2rnzp1dHhQAOJGZGTa73zbvCLeP7YEZ3fa1d22csHkJUsNUAE5PT9eePXt03HHHRf36nj17lJbW5XV1AOA48YRCJ+5SZXZbXSQHb0La19G1aTociul70IXF/Uyl1dNOO00vvPBCu19//vnnddppp3V1TADgKF7Z2Y3bx/bglddbPDq7Nh/u+yqm70MXFvczFYCvv/563XffffrDH/4QseFFMBjUgw8+qN/97ne67rrrEj5IALArL4VCNvGwnpdeb2bFcm2e/vsu5WZn2HrzEqSGqQB86aWX6rbbbtMNN9ygnJwcnXbaaRo5cqRycnJ00003ac6cOfrhD3+YrLECgO14KRSyiYf1vPR6MyvWazOtqLmMk22evc30TnC//vWvdfHFF+vJJ5/Ujh07ZBiGzjzzTE2fPl1FRUXJGCMA2JaXQqFXNvGw8+IyL73ezIr1OR/fpyd9kWEuAH/11Vf6+c9/rhdeeEGHDh3S97//fT344IPq06dPssYHALbmlVAoeWMTD7svLvPS680sM9emeEhvx3ZhQWKYKoEoLS3VY489pgsuuEDTpk3TX//6V82ePTtZYwMA2wuHQi/UFHZ1W127c8LiMi+93joSrQ+12WtDX2RvM7UT3JAhQ/TrX/9aU6dOlSRt3LhR48ePV2Njo9LT05M2SCuxExyAznhtZze7z5LGIxgydPqite3WkIZnt9fd/j3Lg1IiXm92LvPoTEevP0me+l3Ev5jNa6YCcI8ePVRTU6MBAwa0HDvqqKP03nvvadCgQfGN2OYIwABi4cZQ2BEnB6hoKnfWa9rDGzo9b+VPxtqid3NXXm9Ofq3Gso2xJMc+P8TPbF4zVQMcDAbVo0ePyG/QrZsOHz5sbpQA4DJO3tktHk7YxMNMSHfa4rJ4X29O3sikszZnPjUH33W3f89Tv4uIj6kAbBiGrrrqKmVkZLQca2xs1KxZs9SzZ8+WY6tXr07cCAHAIZwQCr3C7CynExeXmX29xRogJxTk2jIsmt3GmN9FdMRUAL7yyivbHJsxY0bCBgMAQFfFM8vphQ4XZgOk3Thtlh72ZioAP/roo8kaBwAAXRbvLGe4w8XsFVXyKfoCqvAiq8qd9Z3eWrdjjbTTA6QTZ+lhX6Y3wgAAwK5ineV87M0a9cnKiAinJYV5HW6QIKlNp4hoZRV2XWTm9ADphVl6pI6pLhBeRBcIAHCOF7fu1o1PbzX1mNbhNNrsbUV1XafdB0oK82LqUmBVCA63eussQNqh1Vt7vNZyELEzm9dMbYQBAICdxTN72Xqji9YbJEjqsKwi/PVvDodiOi8YsmbeyQ0bmYRn6XP9kf+fc/2ZhF+YQgkEAKDL7FLz2tlt8mg664AQa1nFE5Uf2n6RWWdlHk4IkF5rOYjkIAADAOIWDBn6w9odevTND3Xg60Mtx62qee1oMVtHOgqnsS4K+2j/VzGdZ/UiMzcESFoOoqsogQAAxKV8W61G3VOh3/11R0T4ldqWFaRSe7fJYxEtnMZaVjE45+iYzrPDIrPWZR5OCr9AIjADDAAwrb3FXmFWb6zQepZz38Em/erP2zt9XLRwGmv3gZnFx+tP62roUgA4ADPAAABTOuq1e6QjywqscOQs51Xj85Xnz2yz+CvMp+ayjWjhNNbFYz26pTl+kRngFQRgAIApnS0Ka83qmlep6x0QYu0+QJcCwBkogQAAmGI20Nqh5lXqegeEWBePuWGRmR3YpbMI3IkADAAwxUygba+swCpdDaexdh+gS0HX2HU3PbgHARgAYIqZXrt2rHklnNpbewssw51FKCVBIlADDAAwpaN62rBjju6uZQQVmNTRAks77KYH9yAAAwBMa2+x1zFHd9fN556kzXdOIPzCtFh33bOqswjcgxIIAEBcWOyFRIt1gaUdOovA2QjAAIC4UU+LRIp1gaVdOovAuSiBAAAAthBeYBnPhiWAGQRgAABgC13dsASIFQEYAADYBrvpIRWoAQYAALaSqgWW7DbnXQRgAABgO8leYMluc95GCQQAAPCU8G5zrXsOh3ebK99Wa9HIkCoEYACA7QRDhip31uvFrbtVubOenb+QMOw2B4kSCACAzXBrGslkZrc5ely7FzPAAADb4NY0ko3d5iA5MAAvWbJE+fn5yszM1KhRo/TGG290eP7rr7+uUaNGKTMzUyeccIKWLVuWopECAMzg1jRS4cN9X8V0HrvNuZujAvCqVat00003ad68edqyZYvOOOMMTZo0Sbt27Yp6fk1Njc4//3ydccYZ2rJli37xi1/ohhtu0HPPPZfikQMAWmtd57thZ33Mt6aBeJRvq9X9f32vw3PYbc4bfIZhOOat9JgxYzRy5EgtXbq05djQoUN18cUXa+HChW3Ov/3227VmzRpt37695disWbP0j3/8Q5WVlTH9zIaGBvn9fgUCAWVnZ3f9SQCAh4X7rlZU1+mFrXu0/8tvWr52zFHddeDrQ51+jwemjtBFIwYkc5hwoWDI0OmL1nb4JitsGRtuOI7ZvOaYRXDffPONNm/erDvuuCPi+MSJE7V+/fqoj6msrNTEiRMjjp133nlavny5Dh06pO7du7d5TFNTk5qamlo+b2hoSMDoAQDRFrcdKZbwK3FrGvHpbPFb2M3nnkj49QDHlEDs27dPwWBQ/fr1izjer18/1dXVRX1MXV1d1PMPHz6sffv2RX3MwoUL5ff7Wz4GDRqUmCcAAB7W3uI2M7g1ja60x4t1UdvxfXrGOzw4iGNmgMN8vsgtCg3DaHOss/OjHQ+bO3eu5syZ0/J5Q0MDIRgAuqCjxW2xCv/FLp1cwFa1HtXV9nix3jngDoM3OCYA9+nTR+np6W1me/fu3dtmljcsNzc36vndunVT797Re/tlZGQoIyMjMYMGAMR86/lIreuBc+kD7GnhOwit30SF2+Mtnn6aju2Zob0HG9U3q/kuQes3SkX5OcrzZ6ou0Bj1zZhPza8z7jB4g2MCcI8ePTRq1ChVVFTokksuaTleUVGhiy66KOpjiouLVVZWFnHs1Vdf1ejRo6PW/wIAEi+efqqLLx+pNJ+vw0ADb4ilPd71K7foyGqIaDPD6Wk+lU4u0OwVVfId8ViJOwxe5JgaYEmaM2eO/vSnP+mRRx7R9u3bdfPNN2vXrl2aNWuWpObyhSuuuKLl/FmzZumjjz7SnDlztH37dj3yyCNavny5br31VqueAgB4jplbyuE637En9FbxkN66aMQAFQ/pTSjxsFjuILQuBW5v45SSwjwtnTFSuf7I12SuP1NL6fzgKY6ZAZakKVOmqL6+Xnfffbdqa2tVWFiol19+WYMHD5Yk1dbWRvQEzs/P18svv6ybb75ZixcvVv/+/fX73/9el156qVVPAQA8p7Nbz2HMwiGaeO4gGGp+PS0oq9aEgtyI11NJYZ4mFORqY81+7jB4mKP6AFuBPsAA0HXhGk5J7YZgMwua4B2VO+s17eENcT9+5U/GqnhI9HU/cA/X9gEGADhX+NZz61X8OT2765IRA3RuQS6zcIgq1jsI7YlnBhnuRwAGAKQEt54Rj44Wr8WCtmaIhgAMAEiZ9DQft6NhWnt3ENJ8bRfAhdHWDB0hAAMAANuLdgfh8y+/0XVPta0tZ0ElOkMABgAAjhDtDsLStLYzw2ycgs4QgAEAgGNRW454EIABAICjUVsOsxy1ExwAAADQVQRgAAAAeAoBGAAAAJ5CAAYAAICnEIABAADgKXSBAAC4SjBk0BILQIcIwAAA1yjfVttmU4Q8NkUA0AolEAAAVyjfVqvZK6oiwq8k1QUaNXtFlcq31Vo0MgB2QwAGADheMGRoQVm1jChfCx9bUFatYCjaGQC8hgAMAHC8jTX728z8HsmQVBto1Maa/akbFADbIgADABxv78H2w2885wFwNwIwAMDx+mZlJvQ8AO5GAAYAOF5Rfo7y/Jlqr9mZT83dIIryc1I5LAA2RQAGADheeppPpZMLJKlNCA5/Xjq5gH7AACQRgAEALlFSmKelM0Yq1x9Z5pDrz9TSGSPpAwygBRthAABco6QwTxMKctkJDkCHCMAAAFdJT/OpeEhvq4eBdrBVNeyAAAwAAFKCraphF9QAAwCApGOratgJARgAACQVW1XDbgjAAGBDwZChyp31enHrblXurCcYwNHYqhp2Qw0wANgMdZJwG7aqht0wAwwANkKdJNyIraphNwRgALAJ6iThVvFuVU0pEJKFEggAsAkzdZL0uYWThLeqnr2iSj4p4k1ee1tVUwqEZGIGGABsgjpJuJmZraopBUKyMQMMADZBnWRb7BrmLrFsVd1ZKZBPzaVAEwpyeS0gbgRgALCJcJ1kXaAx6j/+PjXPlrWuk3QrboG7U2dbVVMKhFSgBAIAbCJcJympzWKh9uok3Ypb4N5FKRBSgQAMADZipk7SreiG4W2UAiEVKIEAAJuJpU7SzbgF7m2UAiEVCMAAYEOd1Um2xw2LxrgF7m3xtEwDzCIAA0CCWRVC3bJojFvgCJcCtX495zrw9Qx7IgADQAJZFULDi8Za3zIOLxpzUv0wt8AhUQqE5GIRHAAkiFWdC9y2aIxuGAgLlwJdNGKAiof05v85EoYADAAJYGUIjXXR2O8q3lXlznpHBGG6YQBIJkogACABrOxcEOtisD+8tlN/eG2nY+qCuQUOIFkIwACQAFZ2LjC7GMxJdcHxdsMAgI5QAgEACWBl54LworFY50U7KskIhgxV7qzXi1t3O6Zcwu34fwIkHjPAAJAAVnYu6KhvanuilWS4pY2am/D/BEgOZoABIAGs7lzQ3qKxzoRLMhLZwYIZy8SwqqtIsvH6gB0wAwwACWJ18/4jF429+f4+/eG19zt9TN+szE47WPjUXC4xoSC30wDPjGViJPL/iZ3w+oBdEIABIIGs7lwQXjRWlJ+j56o+iakkI9YOFht21mv8iX3aPc9Nm3EkS6y7BFrZVSRZeH3ATgjAAJBgduhc0FFdcOuSjFg7U1z3VJXuvfSUqCHFrTOWiWRm9tPKriLJwOsDdkMNMAC4VKybScTameLA14farT01M2PpRWbrea3sKpIMvD5gN8wAA4CLxVKS0VkHi9aizdS5bcYykeKZ/bSyq0gy8PqA3TADDAAuFy7JuGjEABUP6d3mFvORHSw6095MndtmLBMpntlPq7uKJBqvD9gNARgA0FIuccxR3WM6v/VMXWebcfjUXO/qlBnLRIp39jPWEhYn4PUBu6EEAgAgqTlwZWV21+V/eqvTc1vP1JlZdOc1XZn9tLqrSKLw+oDdMAMMAGgx9oTecc/UuWnGMpG6OvvZWQmLU/D6gJ34DMNgC5YONDQ0yO/3KxAIKDs72+rhAEDShTsWSNFn6joLK7H2uvWSrl5TN+H1gWQwm9cIwJ0gAAPwInbsSjyuKZA8BOAEIwAD8Cpm6hLPq9fUq88bqWM2r7EIDgAQlR12tHMbL15TZr5hRyyCAwAASWF2BzwgVQjAAAAg4TrbAU9q3gEvGKISE6lHAAYAAAkXzw54QKpQAwzAEiyKAdwt3h3wgFQgAANIORbFAO7XlR3wgGSjBAJASrEoxp6CIUOVO+v14tbdqtxZT10muqyrO+ABycQMMICU6WxRjE/Ni2ImFORSDpFCzMgjGdLTfCqdXKDZK6rkU/Qd8EonF/C7DkswAwwgZVgUYz/MyCOZSgrztHTGSOX6I8sccv2Zntr+GfbDDDCAlGFRjL0wI49UKCnM04SCXBa9wlYIwABShkUx9mJmRt5ru5chsby4Ax7sjQAMIGXCi2LqAo1RZx19ar41yqKY1Ej1jDyt7wDYBQEYQMqwKMZeUjkjz0I7AHbCIjgAKcWimI6lsh1ZqtpUsdAOgN0wAwwg5VgUE12qZ0lTMSPPQjsAdsQMMABLhBfFXDRigIqH9PZ8+LFqljTZM/K0vgNgR8wAA4DFrJ4lTeaMPK3vANgRARgALGaHdmTJalNF6zsAdkQABjpA2yakgptnSWl9B8COCMBAO2jbhFRx8ywpre8A2BGL4IAoaNuEVEpVOzKr0PoOgN0wAwy0YvWCJHiPF2ZJaX0HwE6YAQZaoW0TrOCFWVJa3wGwC2aAgVbcvCAJ9sYsKQCkBgEYaMXNC5KQOMnqEJKsdmQAgH8hAAOt0LYJnaFDCOyGlo2AOY6pAf788881c+ZM+f1++f1+zZw5UwcOHOjwMatXr9Z5552nPn36yOfzaevWrSkZK5wtvCBJUptV+W5ZkIT40SEEdlO+rVanL1qraQ9v0I1Pb9W0hzfo9EVreS0CHXBMAJ4+fbq2bt2q8vJylZeXa+vWrZo5c2aHj/nyyy81fvx43XvvvSkaJdzCCwuSYF5nHUKk5g4hwVC0M4DE4w0ZEB9HlEBs375d5eXl2rBhg8aMGSNJevjhh1VcXKx3331X3/nOd6I+LhyQP/zww1QNFS7CgiS0Zocti4EwWjYC8XNEAK6srJTf728Jv5I0duxY+f1+rV+/vt0AHI+mpiY1NTW1fN7Q0JCw7w3nYUESjpSMDiHUbiJevCED4ueIAFxXV6e+ffu2Od63b1/V1dUl9GctXLhQCxYsSOj3BOAOie4QwmI6dAUtG4H4WVoDPH/+fPl8vg4/Nm3aJEny+drOiBiGEfV4V8ydO1eBQKDl4+OPP07o9wfgXIncspjaTXQVLRuB+Fk6A3z99ddr6tSpHZ5z/PHH6+2339ann37a5mufffaZ+vXrl9AxZWRkKCMjI6HfE4A7JGrLYmo3kQi0bATiZ2kA7tOnj/r06dPpecXFxQoEAtq4caOKiookSW+99ZYCgYDGjRuX7GECQItwh5DWpQu5JkoXqN1EIiTqDRngRY6oAR46dKhKSkr0k5/8RA899JAk6ac//al+8IMfRCyAO/nkk7Vw4UJdcsklkqT9+/dr165d2rNnjyTp3XfflSTl5uYqNzc3xc8CgFt0tUMItZtIlES8IQO8yBEBWJKefPJJ3XDDDZo4caIk6cILL9Qf/vCHiHPeffddBQKBls/XrFmjH/3oRy2fh8stSktLNX/+/OQPGoBrdaVDCLWbSCRaNgLm+QzDoGN7BxoaGuT3+xUIBJSdnW31cAC4QDBk6PRFazut3Vx3+/cIMQAQA7N5zTE7wQGAW7DdNpItGDJUubNeL27drcqd9exOCLTimBIIAHATajeRLPSXBjpHCUQnKIEAkEzsBJd8XrrG4f7Srf9hDz/bpTNGEoLhSmbzGjPAAGAhtttOLi/NhtJfGogdNcAAAFfy2m57ZvpLA15HAAYAuE5ns6FS82yomxaH0V8aiB0BGADgOl6cDU11f2k6TcDJqAEGALiOF2dDi/JzlOfP7LS/dFF+Tpd/lpdqq+FOzAADAFzHi7vtpaq/tNdqq+FOBGAAgOuEZ0Pbi3o+Nc9YJmI21E7C/aVz/ZHBPtefmZAWaF6srYY7UQIBAHCd8Gzo7BVV8kkRgc3tu+2VFOZpQkFuUnofm6mtpr0f7IwADACwhURvWOHl3faS1V/ai7XVcCcCMADAcslaVJXM2VAv8mJtNdyJAAwAsFR72/eGF1V1tXaV3fYSJ5WdJoBkYhEcAMAyLKpyllR1mgCSjQAMALCMFzescLpkd5oAUoESCACAZVhU5UzUVsPpCMAAAMuwqMq5qK2GkxGAAQCW8dKiqkS3eQMQPwIwAMAyXtmwIllt3gDEh0VwAABLuX1RVbjNW+vFfuE2b+Xbai0aGeBdzAADACzn1kVVnbV586m5zduEglzHP1fASQjAAABbcOOiKjNt3tz23AE7owQCAIAkoc0bYE8EYAAAkoQ2b4A9EYABAEiScJu39qp7fWruBuGGNm+AkxCAAQBIknCbN0ltQrCb2rxJzQv+KnfW68Wtu1W5s17BULSlf4A9sAgOAIB2JGLzinCbt9Z9gHNd1AeYPsdwGp9hGLxF60BDQ4P8fr8CgYCys7OtHg4AIEUSHercuhNcuM9x6zARfmZu6OUM+zOb1wjAnSAAm+fWP/IAvINQF5tgyNDpi9a22+otvJX1utu/x78DSCqzeY0SCCQUt8EAOB2bV8SOPsdwKhbBIWHY7hOAG5gJdV5Hn2M4FQEYCdHZjInUPGPCqmAAdkeoix19juFUBGAkBDMmANyCUBc7+hzDqQjASAhmTAC4BaEudl7qcwx3IQAjIZgxAeAWhDpzwn2Oc/2Rf99z/Zl0y4Bt0QUCCRGeMakLNEatAw63wmHGBIATeGHzikQqKczThIJcWmDCMegD3An6AMcu3AVCUkQIpm8mAKeirzngDGyEkWAEYHPoAwwAAFKNjTBgKW6DAQAAuyMAI+HS03zs+APANMoNAKQKARgAYDnKpwCkEm3QAACWYht1AKlGAAYAWIZt1AFYgQAMALAM26gDsAIBGABgGbZRB2AFFsEBAOKSiK4NbKNuDTpuwOsIwAAA0xLVtYFt1FOPjhsAJRAA4DrBkKHKnfV6cetuVe6sT/gCskR2bUhP86l0coGkf22bHhb+vHRyAbOTCULHDaAZM8AA4CLJnt3rrGuDT81dGyYU5MYcWksK87R0xsg2485lVjKhkvH/DnAqAjAAuER4dq91wAnP7i2dMbLLYdJM1wYzO0KyjXryJev/nRWoYUZXEYABwAVSNbtX15C8rg1so55cbum4QQ0zEoEaYABwgVT00y3fVqtfvfTPmM6la4P9uKHjBjXMSBQCMAC4QLJn98LBY/+Xhzo8z6fm2Ti6NthPuONGe/P/dv9/x66BSCQCMAC4QDJn9zoKHkeia4O9Ob3jBrsGIpEIwADgAsmc3esseITl9OzRpYV2yW7fhn913Mj1R74RyvVnJmSRZDK5pYYZ9sAiOABwgfDs3uwVVfJJEbO1XZ3dizVQ3HnB0LgDFAubUsepHTfcUMMM+2AGGABcIlmze7EGilz/UXF9fxY2pV6448ZFIwaoeEhv24dfyfk1zLAXZoABwEWSMbuXzO2K2ZwBsUrmXQ54DzPAAOAyiZ7dS+biKTsubKIW2b6cXMMMe2EGGADQqWRtV2y3hU3UItufU2uYYS8EYABATJIRPOy0sCkVW0kjMdg1EF1FAAYAxCzRwSOZ9cVmUIsMeAs1wAAAy9hlcwY71iIDSB4CMADAUnZY2GS3WmQAyUUJBADAclYvbLJTLTKA5CMAAwBswcqFTXapRQaQGpRAAAA8zy61yABSgwAMAIDsUYsMIDUogQAA4P+zuhYZQGoQgAEAOAKbLDT3ReZNANyMAAwAAFqwHTS8gBpgAAAg6V/bQbfeFCS8HXT5tlqLRgYkFgEYAAB0uh201LwddDAU7QzAWQjAAACA7aDhKQRgAADAdtDwFAIwAABgO2h4CgEYAAC0bAfdXrMzn5q7QbAdNNyAAAwAANgOGp5CAAYAAJLYDhrewUYYAICUY6cx+2I7aHgBARgAkFLsNGZ/bAcNt6MEAgCQMuw0BsAOCMAAgJRgpzEAdkEABgCkhBt3GguGDFXurNeLW3ercmc94R1wCGqAAQAp4badxqhlBpzLMTPAn3/+uWbOnCm/3y+/36+ZM2fqwIED7Z5/6NAh3X777TrllFPUs2dP9e/fX1dccYX27NmTukEDAFq4aacxapkBZ3NMAJ4+fbq2bt2q8vJylZeXa+vWrZo5c2a753/11VeqqqrSXXfdpaqqKq1evVrvvfeeLrzwwhSOGgC8IZZSALfsNEYtM+B8jiiB2L59u8rLy7VhwwaNGTNGkvTwww+ruLhY7777rr7zne+0eYzf71dFRUXEsQcffFBFRUXatWuXjjvuuJSMHQDcLtZSgPBOY7NXVMknRQRIJ+00ZqaWmVZigD05Yga4srJSfr+/JfxK0tixY+X3+7V+/fqYv08gEJDP59MxxxzT7jlNTU1qaGiI+AAARGe2FMANO425rZYZ8CJHzADX1dWpb9++bY737dtXdXV1MX2PxsZG3XHHHZo+fbqys7PbPW/hwoVasGBB3GMFAK/orBTAp+ZSgAkFuRGzuk7facxNtcyAV1k6Azx//nz5fL4OPzZt2iRJ8vna/mE0DCPq8dYOHTqkqVOnKhQKacmSJR2eO3fuXAUCgZaPjz/+OL4nBwAu15W2ZuGdxi4aMUDFQ3o7JvxK7qllBrzM0hng66+/XlOnTu3wnOOPP15vv/22Pv300zZf++yzz9SvX78OH3/o0CFddtllqqmp0dq1azuc/ZWkjIwMZWRkdD54APA4r5YCuKWWGfAySwNwnz591KdPn07PKy4uViAQ0MaNG1VUVCRJeuuttxQIBDRu3Lh2HxcOvzt27NBrr72m3r1ZjAAAieLlUoBwLXPrxX+59AEGHMFnGIYj+rRMmjRJe/bs0UMPPSRJ+ulPf6rBgwerrKys5ZyTTz5ZCxcu1CWXXKLDhw/r0ksvVVVVlV566aWImeKcnBz16NEjpp/b0NAgv9+vQCDQ6ewxAHhJMGTo9EVrVRdojFoH7FNzIFx3+/dcOxsaDBmOrWUG3MRsXnNEFwhJevLJJ3XKKado4sSJmjhxooYPH64nnngi4px3331XgUBAkvTJJ59ozZo1+uSTTzRixAjl5eW1fJjpHAEAiC5cCiCpTT2sV0oBnFzLDHiZY2aArcIMMAB0jC2BAVjNbF5zRBs0AIB9Ob2tGQDvIQADALosXAoAAE7gmBpgAAAAIBGYAQYAtIsuBwDciAAMAIiKxW0A3IoSCABAG+XbajV7RVWbrY7rAo2avaJK5dtqLRoZAHQdARgAECEYMrSgrDrq5hbhYwvKqhUM0UUTgDMRgAEAETbW7G8z83skQ1JtoFEba/anblAAkEAEYABAhL0H2w+/8ZwHAHZDAAYAROiblZnQ8wDAbgjAAIAIRfk5yvNnqr1mZz41d4Moys9J5bAAIGEIwACACOlpPpVOLpCkNiE4/Hnp5AL6AQNwLAIwAKCNksI8LZ0xUrn+yDKHXH+mls4YSR9gAI7GRhgAgKhKCvM0oSCXneAAuA4BGADQrvQ0n4qH9LZ6GACQUJRAAAAAwFMIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFPYCQ4AAAcKhgy2qQbiRAAGAMBhyrfVakFZtWoDjS3H8vyZKp1coJLCPAtHBjgDJRAAADhI+bZazV5RFRF+Jaku0KjZK6pUvq3WopEBzkEABgDAIYIhQwvKqmVE+Vr42IKyagVD0c4AEEYABgDAITbW7G8z83skQ1JtoFEba/anblCAAxGAAQBwiL0H2w+/8ZwHeBUBGAAAh+iblZnQ8wCvIgADAOAQRfk5yvNnqr1mZz41d4Moys9J5bAAxyEAAwDgEOlpPpVOLpCkNiE4/Hnp5AL6AQOdIAADAOAgJYV5WjpjpHL9kWUOuf5MLZ0xkj7AQAzYCAMAAIcpKczThIJcdoID4kQABgDAgdLTfCoe0tvqYQCORAkEAAAAPIUADAAAAE8hAAMAAMBTCMAAAADwFAIwAAAAPIUADAAAAE8hAAMAAMBTCMAAAADwFAIwAAAAPIUADAAAAE8hAAMAAMBTCMAAAADwFAIwAAAAPIUADAAAAE8hAAMAAMBTCMAAAADwFAIwAAAAPIUADAAAAE8hAAMAAMBTCMAAAADwlG5WD8DuDMOQJDU0NFg8EgAAAEQTzmnh3NYZAnAnDh48KEkaNGiQxSMBAABARw4ePCi/39/peT4j1qjsUaFQSHv27JFhGDruuOP08ccfKzs72+ph2V5DQ4MGDRrE9YoR18scrpc5XC9zuF7mcc3M4XqZE8v1MgxDBw8eVP/+/ZWW1nmFLzPAnUhLS9PAgQNbptazs7N5sZrA9TKH62UO18scrpc5XC/zuGbmcL3M6ex6xTLzG8YiOAAAAHgKARgAAACeQgCOUUZGhkpLS5WRkWH1UByB62UO18scrpc5XC9zuF7mcc3M4XqZk4zrxSI4AAAAeAozwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwO349a9/rXHjxunoo4/WMcccE9NjDMPQ/Pnz1b9/fx111FE6++yz9c9//jO5A7WJzz//XDNnzpTf75ff79fMmTN14MCBDh/zxRdf6Prrr9fAgQN11FFHaejQoVq6dGlqBmwD8VwzSdq+fbsuvPBC+f1+ZWVlaezYsdq1a1fyB2yxeK9X2LXXXiufz6f7778/aWO0E7PX69ChQ7r99tt1yimnqGfPnurfv7+uuOIK7dmzJ3WDTqElS5YoPz9fmZmZGjVqlN54440Oz3/99dc1atQoZWZm6oQTTtCyZctSNFJ7MHO9Vq9erQkTJuhb3/qWsrOzVVxcrFdeeSWFo7We2ddX2Jtvvqlu3bppxIgRyR2gDZm9Zk1NTZo3b54GDx6sjIwMDRkyRI888kjsP9BAVL/85S+N3/72t8acOXMMv98f02PuvfdeIysry3juueeMd955x5gyZYqRl5dnNDQ0JHewNlBSUmIUFhYa69evN9avX28UFhYaP/jBDzp8zDXXXGMMGTLEeO2114yamhrjoYceMtLT040XXnghRaO2VjzX7P333zdycnKMn//850ZVVZWxc+dO46WXXjI+/fTTFI3aOvFcr7Dnn3/eOPXUU43+/fsbv/vd75I7UJswe70OHDhgnHvuucaqVauM//mf/zEqKyuNMWPGGKNGjUrhqFPj6aefNrp37248/PDDRnV1tXHjjTcaPXv2ND766KOo53/wwQfG0Ucfbdx4441GdXW18fDDDxvdu3c3nn322RSP3Bpmr9eNN95oLFq0yNi4caPx3nvvGXPnzjW6d+9uVFVVpXjk1jB7vcIOHDhgnHDCCcbEiRONU089NTWDtYl4rtmFF15ojBkzxqioqDBqamqMt956y3jzzTdj/pkE4E48+uijMQXgUChk5ObmGvfee2/LscbGRsPv9xvLli1L4gitV11dbUgyNmzY0HKssrLSkGT8z//8T7uPGzZsmHH33XdHHBs5cqRx5513Jm2sdhHvNZsyZYoxY8aMVAzRVuK9XoZhGJ988okxYMAAY9u2bcbgwYM9EYC7cr2OtHHjRkNSp/9wO01RUZExa9asiGMnn3yycccdd0Q9/7bbbjNOPvnkiGPXXnutMXbs2KSN0U7MXq9oCgoKjAULFiR6aLYU7/WaMmWKceeddxqlpaWeC8Bmr9lf/vIXw+/3G/X19XH/TEogEqSmpkZ1dXWaOHFiy7GMjAydddZZWr9+vYUjS77Kykr5/X6NGTOm5djYsWPl9/s7fO6nn3661qxZo927d8swDL322mt67733dN5556Vi2JaK55qFQiH9+c9/1kknnaTzzjtPffv21ZgxY/TCCy+kaNTWifc1FgqFNHPmTP385z/XsGHDUjFUW4j3erUWCATk8/liLgNzgm+++UabN2+O+FstSRMnTmz32lRWVrY5/7zzztOmTZt06NChpI3VDuK5Xq2FQiEdPHhQOTk5yRiircR7vR599FHt3LlTpaWlyR6i7cRzzdasWaPRo0fr//yf/6MBAwbopJNO0q233qqvv/465p9LAE6Quro6SVK/fv0ijvfr16/la25VV1envn37tjnet2/fDp/773//exUUFGjgwIHq0aOHSkpKtGTJEp1++unJHK4txHPN9u7dqy+++EL33nuvSkpK9Oqrr+qSSy7Rv/3bv+n1119P9pAtFe9rbNGiRerWrZtuuOGGZA7PduK9XkdqbGzUHXfcoenTpys7OzvRQ7TMvn37FAwGTf2trquri3r+4cOHtW/fvqSN1Q7iuV6t3Xffffryyy912WWXJWOIthLP9dqxY4fuuOMOPfnkk+rWrVsqhmkr8VyzDz74QOvWrdO2bdv0/PPP6/7779ezzz6r6667Luaf66kAPH/+fPl8vg4/Nm3a1KWf4fP5Ij43DKPNMacwc72iPcfOnvvvf/97bdiwQWvWrNHmzZt133336Wc/+5n++te/Ju05JVsyr1koFJIkXXTRRbr55ps1YsQI3XHHHfrBD37g2AU5ybxemzdv1gMPPKDHHnvMsb+DrSX7dzLs0KFDmjp1qkKhkJYsWZLw52EHZv9WRzs/2nG3ivfftpUrV2r+/PlatWpV1DdlbhXr9QoGg5o+fboWLFigk046KVXDsyUzr7FQKCSfz6cnn3xSRUVFOv/88/Xb3/5Wjz32WMyzwJ56q3H99ddr6tSpHZ5z/PHHx/W9c3NzJTXPFOTl5bUc37t3b5t3NU4R6/V6++239emnn7b52meffdbuc//666/1i1/8Qs8//7wuuOACSdLw4cO1detW/eY3v9G5557b9SdggWResz59+qhbt24qKCiIOD506FCtW7cu/kFbKJnX64033tDevXt13HHHtRwLBoO65ZZbdP/99+vDDz/s0titkMzrFXbo0CFddtllqqmp0dq1a101+ys1/x6lp6e3mVnq6G91bm5u1PO7deum3r17J22sdhDP9QpbtWqVrr76aj3zzDOO/ZtultnrdfDgQW3atElbtmzR9ddfL6k53BmGoW7duunVV1/V9773vZSM3SrxvMby8vI0YMAA+f3+lmNDhw6VYRj65JNPdOKJJ3b6cz0VgPv06aM+ffok5Xvn5+crNzdXFRUVOu200yQ117W8/vrrWrRoUVJ+ZrLFer2Ki4sVCAS0ceNGFRUVSZLeeustBQIBjRs3LupjDh06pEOHDiktLfImRHp6estMpxMl85r16NFD3/3ud/Xuu+9GHH/vvfc0ePDgrg/eAsm8XjNnzmzzj+55552nmTNn6kc/+lHXB2+BZF4v6V/hd8eOHXrttddcGe569OihUaNGqaKiQpdccknL8YqKCl100UVRH1NcXKyysrKIY6+++qpGjx6t7t27J3W8VovneknNM78//vGPtXLlypZJDi8we72ys7P1zjvvRBxbsmSJ1q5dq2effVb5+flJH7PV4nmNjR8/Xs8884y++OIL9erVS1Lzv4VpaWkaOHBgbD847uVzLvfRRx8ZW7ZsMRYsWGD06tXL2LJli7Flyxbj4MGDLed85zvfMVavXt3y+b333mv4/X5j9erVxjvvvGNMmzbNU23Qhg8fblRWVhqVlZXGKaec0qblUuvrddZZZxnDhg0zXnvtNeODDz4wHn30USMzM9NYsmRJqodviXiu2erVq43u3bsbf/zjH40dO3YYDz74oJGenm688cYbqR5+ysVzvVrzShcIwzB/vQ4dOmRceOGFxsCBA42tW7catbW1LR9NTU1WPIWkCbdcWr58uVFdXW3cdNNNRs+ePY0PP/zQMAzDuOOOO4yZM2e2nB9ug3bzzTcb1dXVxvLlyz3ZBi3W6/XUU08Z3bp1MxYvXhzxOjpw4IBVTyGlzF6v1rzYBcLsNTt48KAxcOBA44c//KHxz3/+03j99deNE0880bjmmmti/pkE4HZceeWVhqQ2H6+99lrLOZKMRx99tOXzUChklJaWGrm5uUZGRoZx5plnGu+8807qB2+B+vp64/LLLzeysrKMrKws4/LLLzc+//zziHNaX6/a2lrjqquuMvr3729kZmYa3/nOd4z77rvPCIVCqR28ReK5ZoZhGMuXLze+/e1vG5mZmcapp57qmb7J8V6vI3kpAJu9XjU1NVH/5rX+u+cWixcvNgYPHmz06NHDGDlypPH666+3fO3KK680zjrrrIjz//a3vxmnnXaa0aNHD+P44483li5dmuIRW8vM9TrrrLOivo6uvPLK1A/cImZfX0fyYgA2DPPXbPv27ca5555rHHXUUcbAgQONOXPmGF999VXMP89nGP+/kh8AAADwAE91gQAAAAAIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwAAAAPAUAjAAAAA8hQAMAAAATyEAAwAAwFMIwADgMFdddZV8Pp98Pp+6d++uE044Qbfeequ+/PLLlnOee+45nX322fL7/erVq5eGDx+uu+++W/v374/4Xl9//bWOPfZY5eTk6Ouvv27zs/74xz/q7LPPVnZ2tnw+nw4cOJDspwcASUcABgAHKikpUW1trT744APdc889WrJkiW699VZJ0rx58zRlyhR997vf1V/+8hdt27ZN9913n/7xj3/oiSeeiPg+zz33nAoLC1VQUKDVq1e3+TlfffWVSkpK9Itf/CIlzwsAUsFnGIZh9SAAALG76qqrdODAAb3wwgstx37yk5/opZde0osvvqgxY8bo/vvv14033tjmsQcOHNAxxxzT8vk555yjqVOnyjAM/dd//ZfWrl0b9Wf+7W9/0znnnKPPP/884vEA4ETdrB4AAKDrjjrqKB06dEhPPvmkevXqpZ/97GdRzzsyvO7cuVOVlZVavXq1DMPQTTfdpA8++EAnnHBCikYNANagBAIAHG7jxo166qmn9P3vf187duzQCSecoO7du3f6uEceeUSTJk1qqQEuKSnRI488koIRA4C1CMAA4EAvvfSSevXqpczMTBUXF+vMM8/Ugw8+KMMw5PP5On18MBjUf/7nf2rGjBktx2bMmKH//M//VDAYTObQAcBylEAAgAOdc845Wrp0qbp3767+/fu3zPiedNJJWrdunQ4dOtThLPArr7yi3bt3a8qUKRHHg8GgXn31VU2aNCmp4wcAKzEDDAAO1LNnT33729/W4MGDI4Lu9OnT9cUXX2jJkiVRHxduY7Z8+XJNnTpVW7dujfi4/PLLtXz58lQ8BQCwDDPAAOAiY8aM0W233aZbbrlFu3fv1iWXXKL+/fvr/fff17Jly3T66adr+vTpKisr05o1a1RYWBjx+CuvvFIXXHCBPvvsM33rW99SXV2d6urq9P7770uS3nnnHWVlZem4445TTk6OFU8RALqMGWAAcJlFixbpqaee0ltvvaXzzjtPw4YN05w5czR8+HBdeeWVevzxx9WzZ099//vfb/PYc845R1lZWS39gpctW6bTTjtNP/nJTyRJZ555pk477TStWbMmpc8JABKJPsAAAADwFGaAAQAA4CkEYAAAAHgKARgAAACeQgAGAACApxCAAQAA4CkEYAAAAHgKARgAAACeQgAGAACApxCAAQAA4CkEYAAAAHgKARgAAACe8v8AXBe7GAAcJRkAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fingerprints = fingerprints.detach()\n", + "\n", + "pca = PCA(n_components=2)\n", + "\n", + "principalComponents = pca.fit_transform(fingerprints)\n", + "\n", + "fig = plt.figure(figsize=(8, 8))\n", + "ax = fig.add_subplot(1, 1, 1)\n", + "ax.set_title(\"Fingerprints\")\n", + "ax.set_xlabel('PCA1'); ax.set_ylabel('PCA2')\n", + "\n", + "ax.scatter(principalComponents[:, 0], principalComponents[:, 1])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAscAAAK7CAYAAAAeFiKUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABbOUlEQVR4nO3dfXzT9b338XdaSjuQRmhtUxS1cDxKrQ4KK1S8V6CIiOzsACJVrnnYJTsexZsN0eNK3XZx4XU23eZA5/AWRM8UlTpXRVGns1ihFE8tMof1DlK5T/GmUNPf9UeTSNq0Tdokv98veT0fjz62/vpN+k1NyzvffL6fr8MwDEMAAAAAlGL2BAAAAACrIBwDAAAAPoRjAAAAwIdwDAAAAPgQjgEAAAAfwjEAAADgQzgGAAAAfAjHAAAAgA/hGAAAAPAhHAOACR5++GE5HI4uP1577TWzp9jJa6+91mlu8+bN08knn2zanAAg2vqZPQEASGYPPfSQTjvttE7XCwoKTJhN5O644w7dcMMNZk8DAKKGcAwAJiosLNTYsWPNnkavjRgxwuwpAEBUUVYBABbmcDh03XXX6bHHHtPIkSM1YMAAffe739Xzzz/faez777+vK664Qrm5uUpPT9eJJ56oq666SocPHw6Mqa+v1/Tp0zV48GBlZGRo1KhReuSRR0LeV2lpqQYMGKDs7Gxde+21OnToUKdxocoqIpnzc889pzPPPFPp6ekaPny4fvOb32jJkiVyOBxB4/70pz9p3LhxcjqdGjBggIYPH64f/vCH4f4YASBsrBwDgIm8Xq+++eaboGsOh0OpqamBz//85z/rnXfe0Z133qljjjlGd911l2bMmKHt27dr+PDhkqStW7fq7LPPVnZ2tu68806dcsopcrvdWrdunY4cOaL09HRt375dZ511lnJycvTb3/5WWVlZWrVqlebNm6fPP/9cP/3pTyVJn3/+uc477zylpaVp+fLlys3N1erVq3XdddeF/bjCmXNVVZW+//3v69xzz9WTTz6pb775Rv/1X/+lzz//POi+qqurNWvWLM2aNUtLlixRRkaGPv74Y23YsKFXP3MA6JYBAIi7hx56yJAU8iM1NTUwTpKRm5trNDc3B641NTUZKSkpxtKlSwPXLrzwQuPYY481du/e3eX3nD17tpGenm588sknQdenTJliDBgwwDh48KBhGIaxaNEiw+FwGHV1dUHjJk6caEgyXn311cC1q6++2jjppJOCxoU75+9973vGsGHDjMOHDweuHTp0yMjKyjKO/ufpv/7rvwxJgfkBQCxRVgEAJnr00Uf1zjvvBH28/fbbQWMuuOACDRo0KPB5bm6ucnJy9PHHH0uSvvrqK73++uuaOXOmjjvuuC6/14YNG3TRRRdp2LBhQdfnzZunr776StXV1ZKkV199Vaeffrq++93vBo2bM2dO2I+rpzl/+eWX2rRpky6//HL1798/MO6YY47RtGnTgu7re9/7niRp5syZ+u///m/t3Lkz7HkAQKQIxwBgopEjR2rs2LFBH2PGjAkak5WV1el26enp+vrrryVJBw4ckNfr1QknnNDt99q3b5/y8vI6XR86dGjg6/7/dblcncaFutaVcOZsGIZyc3M7jet47dxzz9Wzzz6rb775RldddZVOOOEEFRYWas2aNWHPBwDCRTgGAJsbMmSIUlNT9dlnn3U7LisrS263u9P1Xbt2SZKys7MD45qamjqNC3WttwYPHiyHw9Gpvrir7zN9+nS98sor8ng8eu2113TCCSdozpw5gdVuAIgWwjEA2Nx3vvMdnXfeefrTn/6kvXv3djnuoosu0oYNGwJh2O/RRx/VgAEDNH78eEntJRHvvfeetm7dGjTu8ccfj9qcBw4cqLFjx+rZZ5/VkSNHAte/+OKLkF0t/NLT03Xeeedp2bJlkqQtW7ZEbU4AINGtAgBMVV9f36lbhdTeP7i7+uGOfv3rX+vss8/WuHHjdOutt+qf/umf9Pnnn2vdunW6//77NWjQIJWXl+v555/XBRdcoJ/97GcaMmSIVq9erT//+c+666675HQ6JUkLFy7Ugw8+qKlTp+oXv/hFoFvF+++/H7XHLUl33nmnpk6dqsmTJ+uGG26Q1+vV//t//0/HHHOM9u/fHxj3s5/9TJ999pkuuuginXDCCTp48KB+85vfKC0tTeedd15U5wQAhGMAMNH/+l//K+T1Bx54QP/2b/8W9v1897vfVU1NjcrLy7V48WIdOnRILpdLF154YWDD26mnnqq33npLt912m/793/9dX3/9tUaOHKmHHnpI8+bNC9yXy+XS66+/rhtuuEELFizQgAEDNGPGDN17772aPn16nx7v0UpLS/X000/rZz/7mWbNmiWXy6Uf//jH2rVrlx577LHAuHHjxmnTpk1atGiR9uzZo2OPPVZjx47Vhg0bdPrpp0dtPgAgSQ7DMAyzJwEAgCS1trZq1KhROv744/XSSy+ZPR0ASYiVYwCAaa655hpNnDhReXl5ampq0n333adt27bpN7/5jdlTA5CkCMcAANMcOnRIt9xyi/bs2aO0tDQVFRXphRde0MUXX2z21AAkKcoqAAAAAB9auQEAAAA+hGMAAADAh3AMAAAA+LAhLwra2tq0a9cuDRo0SA6Hw+zpAAAAoAPDMHTo0CENHTpUKSldrw8TjqNg165dGjZsmNnTAAAAQA8+/fRTnXDCCV1+nXAcBYMGDZLU/sPOzMw0eTYAAADoqLm5WcOGDQvktq4QjqPAX0qRmZlJOAYAALCwnkpg2ZAHAAAA+BCOAQAAAB/CMQAAAOBDOAYAAAB8CMcAAACAD+EYAAAA8CEcAwAAAD6EYwAAAMCHcAwAAAD4EI4BAAAAH8IxAAAA4EM4BgAAAHwIxwAAAIAP4RgAAADwIRwDAAAAPoRjAAAAwIdwDAAAAPgQjgEAAAAfwjEAAADgQzgGAAAAfPqZPQEAycXbZqimcb92H2pRzqAMFecPUWqKw+xpAQAgiXAMII6q6t2qqGyQ29MSuJbnzFD5tAKVFuaZODMAANpRVgEgLqrq3VqwqjYoGEtSk6dFC1bVqqrebdLMAAD4FuEYQMx52wxVVDbICPE1/7WKygZ520KNAAAgfgjHAGKupnF/pxXjoxmS3J4W1TTuj9+kAAAIgXAMIOZ2H+o6GPdmHAAAsUI4BhBzOYMyojoOAIBYIRwDiLni/CHKc2aoq4ZtDrV3rSjOHxLPaQEA0AnhGEDMpaY4VD6tQJI6BWT/5+XTCuh3DAAwHeEYQFyUFuZpxdwiuZzBpRMuZ4ZWzC2izzEAwBI4BARA3JQW5mligYsT8gAAlkU4BhBXqSkOlYzIMnsaAACERFkFAAAA4EM4BgAAAHwIxwAAAIAP4RgAAADwIRwDAAAAPoRjAAAAwIdwDAAAAPjQ5xgAkpS3zeBAFgDogHAMAEmoqt6tisoGuT0tgWt5zgyVTyvgKG8ASY2yCgBIMlX1bi1YVRsUjCWpydOiBatqVVXvNmlmAGA+wjEAJBFvm6GKygYZIb7mv1ZR2SBvW6gRAJD4CMcAkERqGvd3WjE+miHJ7WlRTeP++E0KACyEcAwASWT3oa6DcW/GAUCiIRwDQBLJGZQR1XEAkGgIxwCQRIrzhyjPmaGuGrY51N61ojh/SDynBQCWQTgGgCSSmuJQ+bQCSeoUkP2fl08roN8xgKRFOAaSlLfNUPWOfXqubqeqd+yjO0ESKS3M04q5RXI5g0snXM4MrZhbRJ9jAEmNQ0CAJMQBECgtzNPEAhcn5AFABw7DMFgu6qPm5mY5nU55PB5lZmaaPR2gW/4DIDr+4vsjESuHAIBEFG5eo6wCSCIcAAEAQPcIx0AS4QAIe6I+HADih5pjIIlwAIT9UB8OAPHFyjGQRDgAwl789eEdV/ubPC1asKpWVfVuk2YGAImLcAwkEQ6AsA/qwwHAHIRjIIlwAIR9UB8OAOYgHANJhgMg7IH6cAAwBxvygCTEARDWR304AJiDcAwkqdQUh0pGZJk9DXTBXx/e5GkJWXfsUPtqP/XhABBdlFUAgAVRHw4A5iAcA4BFUR8OAPFHWQUAWBj14QAQX4RjALA46sMBIH4oqwAAAAB8CMcAAACAD2UVAJCAvG0GdcoA0AuEYwBIMFX1blVUNgQdP53nzFD5tAI6XABADyirAIAEUlXv1oJVtUHBWJKaPC1asKpWVfVuk2YGAPZAOAaABOFtM1RR2RDyRD3/tYrKBnnbQo0AAEiEYwBIGDWN+zutGB/NkOT2tKimcX/8JgUANkM4BoAEsftQ18G4N+MAIBkRjgEgQeQMyuh5UATjACAZEY4BIEEU5w9RnjNDXTVsc6i9a0Vx/pB4TgsAbIVwDAAJIjXFofJpBZLUKSD7Py+fVkC/YwDohu3C8fLly5Wfn6+MjAyNGTNGb7zxRpdj3W635syZo1NPPVUpKSlauHBhyHFPP/20CgoKlJ6eroKCAj3zzDMxmj2AePK2GaresU/P1e1U9Y59SdGlobQwTyvmFsnlDC6dcDkztGJuEX2OAaAHtjoE5Mknn9TChQu1fPlyTZgwQffff7+mTJmihoYGnXjiiZ3GHz58WMcdd5xuv/123X333SHvs7q6WrNmzdLPf/5zzZgxQ88884xmzpypN998U+PGjYv1QwIQI8l8EEZpYZ4mFrg4IQ8AesFhGIZtllLGjRunoqIirVixInBt5MiRuvzyy7V06dJub3v++edr1KhRuueee4Kuz5o1S83NzfrLX/4SuFZaWqrBgwdrzZo1Yc2rublZTqdTHo9HmZmZ4T8gADHhPwij4x83fzRkBRUAkk+4ec02ZRVHjhzR5s2bNWnSpKDrkyZN0ltvvdXr+62uru50n5MnT+72Pg8fPqzm5uagDwDWwEEYAIC+sE043rt3r7xer3Jzc4Ou5+bmqqmpqdf329TUFPF9Ll26VE6nM/AxbNiwXn9/ANHFQRgAgL6wTTj2cziCa+YMw+h0Ldb3uXjxYnk8nsDHp59+2qfvDyB6OAgDANAXttmQl52drdTU1E4rurt37+608hsJl8sV8X2mp6crPT29198TQOxwEAYAu/K2GWyktQDbhOP+/ftrzJgxWr9+vWbMmBG4vn79ek2fPr3X91tSUqL169frxhtvDFx76aWXdNZZZ/VpvkAisOMfav9BGE2elpB1xw61tzXjIAwAVpLMHXasxjbhWJJuuukmlZWVaezYsSopKdEf/vAHffLJJ7r22msltZc77Ny5U48++mjgNnV1dZKkL774Qnv27FFdXZ369++vgoL2Rvk33HCDzj33XC1btkzTp0/Xc889p5dffllvvvlm3B8fYCV2/UPtPwhjwapaOaSggMxBGACsqKsOO02eFi1YVUuHnTizVSs3qf0QkLvuuktut1uFhYW6++67de6550qS5s2bp48++kivvfZaYHyo2uGTTjpJH330UeDzp556Sv/5n/+pDz/8UCNGjNAvf/lLff/73w97TrRyQ6JJhFZodg33AJKLt83Q2cs2dLmR2P9u15uLLuRFfR+Fm9dsF46tiHCMRJJIf6jtWBYCILlU79inKx7Y2OO4NfPHq2REVhxmlLjCzWu2KqsAEHuRtEKz+h/q1BSH5ed4NMI8kHzosGM9hGMAQfhDbQ7KQIDkRIcd67Fdn2MAscUf6vjz13h3XLH3b8apqnebNDMAsebvsNPVe0QOtb9QpsNO/BCOAQThD3V8cdw1kNz8HXYkdfq7S4cdcxCOAQThD3V8cdw1gNLCPK2YWySXM/gdOZczwxbdgRINNccAOvH/oe5YA+uiBjbqqPEGILX/3Z1Y4GJTrgUQjoEYSISuA/yhjg9qvAH42a3DTqIiHANRlkhdB5LxD3W8X9gc+PJIj2Oo8QaA+CEcA1HEEaD2Fu8XNt42Qz//c0OP4+6YOpIVewCIEzbkAVFC1wF7M6OdWk+b8fwGD0yP6vf1thmq3rFPz9XtVPWOfTwnAeAorBwDUZJIJ8slm55e2DjU/sJmYoErqiu4ZmzGS6SyHwCIBVaOgSih64B9mdVOLd6b8ThsBAB6RjgGooSuA/Zl1gubeB64QtkPAISHcAxECSfL2ZdZL2zieeBKPFbHqWUGkAioOQaixB90FqyqlUMKWqHjZDlr87+wafK0hFxZdaj9AJRYvLCJ14ErsV4dp5YZQKIgHANRxMly9mT2C5t4HLgSy9VxWhgCSCQOwzB436uPmpub5XQ65fF4lJmZafZ0YAHxPEgiEU7js4pEXv30thk6e9mGHlfH31x0YUTPH//9dlWy0dv7BYBoCzevsXIMxEC8TpZL5DBnhkQ+MjtWq+O0MASQaNiQB9gUbbn6pqvNY/4XNtNHHa+SEVkJEYz9/GU/Lmdw6YTLmdHr0gdaGAJINKwcAzZk1qEViSKZV9yjvTpOC0MAiYaVY8CGzDq0IhGw4h7d1XFaGAJINIRjwIZ4K7t3kvEgjFj3Ho5nr2YAiAfKKgAb4q3s3km2zWPxKh+hhSGAREI4BmzIzEMr7CyZVtzj3Xs4kTt9AEgulFUANsRb2b2TLCvuZpWPJHKnDwDJg3AM2FQs2nIlumTZPMaGTQDoPcoqABvjrezImH1MdLwkU/kIAEQb4RiwuXidxpcokmHzWLKUjwBALBCOASSdRF9xZ8MmAPQe4RhAUkrkFfdkKR8BgFhgQx4AJCA2bAJA77ByDAAJKtHLRwAgFgjHAJDAErl8BABigbIKAAAAwIdwDAAAAPgQjgEAAAAfwjEAAADgQzgGAAAAfOhWAQC94G0zaJEGAAmIcAwgKpIpLFbVu1VR2SC3pyVwLc+ZofJpBRyuAQA2RzgG0GfJFBar6t1asKo26EhmSWrytGjBqlpOnwMAm6PmGECf+MPi0cFY+jYsVtW7TZpZ9HnbDFVUNnQKxpIC1yoqG+RtCzUCAGAHhGMAvZZsYbGmcX+nFwFHMyS5PS2qadwfv0kBAKKKcAyg15ItLO4+1PVj7c04AID1EI4B9FqyhcWcQRlRHQcAsB425AHotXiHRbM7YhTnD1GeM0NNnpaQpSQOSS5n+7wAAPZEOAbQa/EMi1boiJGa4lD5tAItWFUrhxT0mP0RvXxaQcK2sAOAZEBZBYBe84dF6dtw6BfNsGiljhilhXlaMbdILmfwarjLmRFxGzdvm6HqHfv0XN1OVe/YlzAbFwHAzhyGYfDXuI+am5vldDrl8XiUmZlp9nSAuIvlqq63zdDZyzZ0ufHPvzr95qIL47pi29cSDyushANAMgk3rxGOo4BwDDuLVh1vrOqBq3fs0xUPbOxx3Jr541UyIqvP3y8eujpIxP/T4iARAIi+cPMaNcdAEovm6mVqiiMm4TTROmL01Bvaofbe0BMLXNQuA4AJqDkGkpSV6ni7k2jt05KtNzQA2A3hGEhCdjrZzt8Ro6s1VIfaV7vt0j4t0VbCASDREI6BJGSn1ct4dcSIl0RbCQeAREM4BpKQ3VYvo9k+zWyJthIOAImGDXlAErLj6mVpYZ4mFrhMPSEvGjhIBACsjZVjIAnZdfXS3xFj+qjjVTIiy7YBMpFWwgEg0bByDCQhVi/Nlygr4QCQaDgEJAo4BAR2xSltAIBkwSEgAHrE6iUAAMEIx0CSi9XJdgAA2BEb8gAAAAAfwjEAAADgQzgGAAAAfKg5BgAkNG+bwaZTAGEjHAOIOsIIIhWr5wztCtERf5/QE8IxgKgijCBSsXrOVNW7tWBVrTo282/ytGjBqlpOI0xC/H1COKg5BhA1/jBy9D880rdhpKrebdLMYFWxes542wxVVDZ0CsbStydCVlQ2yNvGOVjJgr9PCBfhGEBUEEYQqVg+Z2oa93cKQR3v3+1pUU3j/ojvG/bD3ydEgnAMICoII4hULJ8zuw91fb+9GQd74+8TIkE4BhAVhBFEKpbPmZxBGVEdB3vj7xMiwYY8AFFBGDGHnXfex/I5U5w/RHnODDV5WkK+le6Q5HK2/7yQ+Pj7hEgQjgFEBWEk/uy+8z6Wz5nUFIfKpxVowapaOaSg+/e/dCifVmCbFxLoG/4+IRKUVQCICn8Ykb4NH36EkehLhJ33sX7OlBbmacXcIrmcwauBLmcGbdySDH+fEAmHYRhszeyj5uZmOZ1OeTweZWZmmj0dwFR2X820A2+bobOXbehyg5F/FezNRRfa4h/7WD9n7Fx6guji71NyCzevEY6jgHAMBCOMxFb1jn264oGNPY5bM3+8SkZkxWFGfcdzBvHCcy15hZvXbFdWsXz5cuXn5ysjI0NjxozRG2+80e34119/XWPGjFFGRoaGDx+u++67L+jrDz/8sBwOR6ePlhZ2rMLavG2Gqnfs03N1O1W9Y5+l+nOmpjhUMiJL00cdr5IRWfzDE2WJtPPe/zx+/t1dkqRLzxzKcwYxxd8n9MRWG/KefPJJLVy4UMuXL9eECRN0//33a8qUKWpoaNCJJ57YaXxjY6MuueQSzZ8/X6tWrdLf/vY3/fjHP9Zxxx2nf/mXfwmMy8zM1Pbt24Num5HBjlVYF28NJrdE2XnP8xiAFdmqrGLcuHEqKirSihUrAtdGjhypyy+/XEuXLu00ftGiRVq3bp22bdsWuHbttddq69atqq6ultS+crxw4UIdPHiw1/OirALx5N+I1fEX17/2wUajxOevOe5p572Va455HgOIt4Qrqzhy5Ig2b96sSZMmBV2fNGmS3nrrrZC3qa6u7jR+8uTJ2rRpk1pbWwPXvvjiC5100kk64YQTdOmll2rLli3dzuXw4cNqbm4O+gDigSNQIdl/5z3PYwBWZptwvHfvXnm9XuXm5gZdz83NVVNTU8jbNDU1hRz/zTffaO/evZKk0047TQ8//LDWrVunNWvWKCMjQxMmTNAHH3zQ5VyWLl0qp9MZ+Bg2bFgfHx0QHo5AhZ+d25TxPAZgZbaqOZYkhyN4JcQwjE7Xehp/9PXx48dr/Pjxga9PmDBBRUVF+t3vfqff/va3Ie9z8eLFuummmwKfNzc3E5ARF4m0EQt9V1qYp4kFLtvtvOd5DMDKbBOOs7OzlZqa2mmVePfu3Z1Wh/1cLlfI8f369VNWVuj2RikpKfre977X7cpxenq60tPTI3wEQN8lykasSNB2qXv+nfd2kozPYwD2YZtw3L9/f40ZM0br16/XjBkzAtfXr1+v6dOnh7xNSUmJKisrg6699NJLGjt2rNLS0kLexjAM1dXV6Ywzzoje5IEoSbYjUOlmkJiS7XkMwF5sU3MsSTfddJP++Mc/6sEHH9S2bdt044036pNPPtG1114rqb3c4aqrrgqMv/baa/Xxxx/rpptu0rZt2/Tggw9q5cqVuuWWWwJjKioq9OKLL+rDDz9UXV2drrnmGtXV1QXuE7ASu2/EikQiHI+M0JLpeQzAfmwVjmfNmqV77rlHd955p0aNGqW//vWveuGFF3TSSSdJktxutz755JPA+Pz8fL3wwgt67bXXNGrUKP385z/Xb3/726AexwcPHtSPfvQjjRw5UpMmTdLOnTv117/+VcXFxXF/fEA47LwRK1x0M4i/eB8qkwzPYwD2ZKs+x1ZFn2OYIZFrcRPxeGQrM7N8JZGfxwCsJdy8ZpuaYwDB7LgRK1x0M4ifrg7j8JevxHoVN5GfxwDsyVZlFQCSA90M4oPyFQDojHAMwHL83Qy6enPdofa3/elm0DccxgEAnRGOAVgO3Qzig/IVAOiMcAzAkuhmEHuUrwBAZ2zIA2BZdj0e2S44jAMAOiMcA7A0uhnEjr98ZcGqWjmkoIBM+QqAZEVZBQBTxfvwCQSjfMV6+J0AzMXKMQDTmHn4hN1F8/AMylesg98JwHyckBcFnJAHRK6rwyf8cYxVy64RoBITvxNAbIWb1yirACwqkd9a5fCJ3vMHqI79if0n2lXVu02aGfqC3wnAOiirACwo0VcGIzl8gs143+opQDnUHqAmFrgoibAZficA62DlGLCYZFgZ5PCJ3uFEu8TF7wRgHYRjwEKS5a1VDp/oHQJU4uJ3ArAOwjFgIcmyMug/fKKrN/4dai8j4fCJYASoxMXvBGAdhGPAQpJlZdB/+ISkTmGAwye6RoBKXPxOANZBOAYsJJlWBjl8InLRDFCJ3A3FrvidAKyBPsdRQJ9jRIu3zdDZyzaoydMSsu7YofZ/KN9cdGHCrCBF8zCLZNHXbiaJ3g3F7vidAGIj3LxGOI4CwjGiyd+tQlJQQOYgAByttwGKgyYAJCsOAQFsirdWEY7UFIdKRmRp+qjjVTIiK+xSimTohgIAfcEhIIAFlRbmaWKBi7dWEVUcNAEAPSMcAxblXxkEoiVZuqEAQF9QVgEASSKZuqEAQG8RjgEgSdAnGQB6RjgGgCTBQRMA0DPCMQAkkdLCPP1+zmgNHpgWdN3q3VA4tARAvLAhDwCSSFW9Wz//8zbt/7I1cG3IwP66Y6p1DwDh0BIA8cTKMQAkCf8BIB3buR348oj+/fFaVdW7TZpZ17qac5OnRQtWWXPOAOyNcAwAScCOB4DYcc4A7I9wDFgUNZaIpkgOALEKO84ZgP1RcwxYEDWWiLa+HADibTNMOa2RQ0sAmIFwDFiMv8ay4zqxv8bSyh0FYF29PQDEzBdqHFoCwAyUVQAWQo0lYqU3B4CYvRmOQ0sAmIFwDFgINZaIlXAOAJn9vRP1/Lu7VL1jn45802b6CzUOLQFgBsoqAAuhxtL+zKrPDUdpYZ5WzC3qVCZx7IA0GZLufvnvgWtDBqYF9ULu6OgXaiUjsuI+Zxc1+ABihHAMWAg1lvZmh42UpYV5mljgCgT4j/Z+qbtf/qDTuO6C8dHi8UKt45yt9qIDQGIhHAMW4q+xbPK0hHw726H2FTNqLK3HThspU1McKhmRJW+bobOXbejTfUXjhVo4q+3+OQNArBGOAQvx11guWFUrhxQUtKixtK6eNlI61F6fO7HAZan/dj3VuHcnWi/U7LDaDiC5sCEPsBh/jaXLGbwi53JmWGr1Ed+y60bK3pZEROuFmtndMAAgFFaOAQuixtJe7LqRMtySiCED+2v/l0cCn0djM5xdV9sBJD7CMWBR1Fjah103UoZb4/76Ty7Q5o8PRPWFWiSr7fweAIgnyioAoI/selhFuH2E+/dLUcmILE0fdbxKRmRFZSXXrqvtABIf4RgA+sjOh1WYVeNu19V2AImPsgoAiAI7H1ZhRo07bQsBWJXDMIzYnf2ZJJqbm+V0OuXxeJSZmWn2dGAzVj5RDZHjv2f4/N0qpNBtC+nOAiCaws1rhOMoIByjt+jximTH7wCAeCEcxxHhGL3R1YlqrJoh2bDaDiAews1r1BwDJqDHq70R5qKLtoUArIRwDJiAHq/2RRkAACQ2WrkBJqDHqz1x3DEAJD7CMWACerzaT0+lMFJ7KYy3jW0cAGBnhGPABHY9US2ZRVIKAwCwL8IxYAI7n6iWrCiFAYDkQDgGTGLWsb3oHUphACA50K0CMJEZx/aidzjuGACSA+EYMBk9Xu3BXwqzYFWtHAp93DGlMABgf5RVAECYKIUBgMTHyjEARIBSGABIbIRjAIgQpTAAkLgoqwAAAAB8CMcAAACAD+EYAAAA8CEcAwAAAD6EYwAAAMCHcAwAAAD40MoNiDFvm0FPXISF5woAmI9wDMRQVb1bFZUNcntaAtfynBkqn1bAaWoIwnMFAKyBsgogRqrq3VqwqjYo7EhSk6dFC1bVqqrebdLMYDU8VwDAOgjHQAx42wxVVDbICPE1/7WKygZ520KNQDLhuQIA1kI4BmKgpnF/p1XAoxmS3J4W1TTuj9+kYEk8VwDAWgjHQAzsPtR12Dnayw1NMZ4JrC7c50q44wAAfUM4BmIgZ1BGWOOeqdvJ2+VJLtznSrjjAAB9QzgGYqA4f4iGDOzf47j9X7bydnmSK84fojxnhrpq2OZQe9eK4vwh8ZwWACQtwjEQA6kpDl0+amhYY3m7PLmlpjhUPq2g2zHl0wrodwwAcUI4BmJkYoErrHG8XY7Swjz96Nx8dcy/KQ7pR+fmR63PsbfNUPWOfXqubqeqd+yjpAcAQrBdOF6+fLny8/OVkZGhMWPG6I033uh2/Ouvv64xY8YoIyNDw4cP13333ddpzNNPP62CggKlp6eroKBAzzzzTKymjyTif7u8K7xdDr+qerf+8NdGdcyqhiH94a+NUelzXFXv1tnLNuiKBzbqhifqdMUDG3X2sg30UAaADmwVjp988kktXLhQt99+u7Zs2aJzzjlHU6ZM0SeffBJyfGNjoy655BKdc8452rJli2677TZdf/31evrppwNjqqurNWvWLJWVlWnr1q0qKyvTzJkz9fbbb8frYSFB+d8ud0id6kn9n/N2OeLR55hDRgAgfA7DMGzzvtq4ceNUVFSkFStWBK6NHDlSl19+uZYuXdpp/KJFi7Ru3Tpt27YtcO3aa6/V1q1bVV1dLUmaNWuWmpub9Ze//CUwprS0VIMHD9aaNWvCmldzc7OcTqc8Ho8yMzN7+/CQoDgWGN2p3rFPVzywscdxa+aPV8mIrIjv39tm6OxlG7rspeyQ5HJm6M1FF/JCDUBCCzev9YvjnPrkyJEj2rx5s2699dag65MmTdJbb70V8jbV1dWaNGlS0LXJkydr5cqVam1tVVpamqqrq3XjjTd2GnPPPfd0OZfDhw/r8OHDgc+bm5sjfDRIJqWFeZpY4FJN437tPtSinEHtpRQEEUix73McySEjvQnfAJBobBOO9+7dK6/Xq9zc3KDrubm5amoKfZBCU1NTyPHffPON9u7dq7y8vC7HdHWfkrR06VJVVFT08pEgGaWmOAgeCCnWfY45ZAQAImOrmmNJcjiCV9sMw+h0rafxHa9Hep+LFy+Wx+MJfHz66adhzx8AjhbrPsccMgIAkbFNOM7OzlZqamqnFd3du3d3Wvn1c7lcIcf369dPWVlZ3Y7p6j4lKT09XZmZmUEfANAbR/c5jsXGTQ4ZAYDI2CYc9+/fX2PGjNH69euDrq9fv15nnXVWyNuUlJR0Gv/SSy9p7NixSktL63ZMV/eJ5EFPWMRLaWGeVswtkqtD6z+XM0Mr5hb1aeNmrMM3ACQa29QcS9JNN92ksrIyjR07ViUlJfrDH/6gTz75RNdee62k9nKHnTt36tFHH5XU3pni3nvv1U033aT58+erurpaK1euDOpCccMNN+jcc8/VsmXLNH36dD333HN6+eWX9eabb5ryGGENdJhAvMVy46Y/fHd8Trt4TgNAJ7Zq5Sa1HwJy1113ye12q7CwUHfffbfOPfdcSdK8efP00Ucf6bXXXguMf/3113XjjTfqvffe09ChQ7Vo0aJAmPZ76qmn9J//+Z/68MMPNWLECP3yl7/U97///bDnRCu3xOLvCdvxF8MfUfq6kgeYxdtm0DUFQNIKN6/ZLhxbEeE4cdATFgCAxBRuXrNNzTEQD5H0hAUAAImHcAwchZ6wAAAkN8IxcBR6wgIAkNxs1a0CiDV/T9gmT0unDXnStzXH9IRFNLBBDgCsh3AMHMXfE3bBqlo5pKCAHGlPWIIPukO7QACwJrpVRAHdKhJPX4MLwQfdoV0gAMQfrdziiHCcmHq78kvwQXdoFwgA5gg3r1FWAXQhNcWhkhFZEd3G22aoorIhZL2yofbgU1HZoIkFLoJPkoqkXWA4zz/KdwAgugjHQJjCCSHRDj5IPNFsF0j5DgBEH+EYCEO4IYQ+yehJtNoFdlW+0+Rp0YJVtZTvAEAv0ecY6IE/hHRcEfaHkKp6d+AafZLRE3+7wK4KHxxqf+HVXbvAnsp3pPbyHW8bW0oAIFKEY6AbkYaQaASfROBtM1S9Y5+eq9up6h37YhLS4vE9YsHfLlBSp+dJuO0COeYcAGKHsgqgG5HWEHfXJ9k/fvb3hsVwxuaLRx2s3WttSwvztGJuUafH4ArzMVC+AwCxQzgGutGbENJV8PG7++UP9MQ7n9omyEUiHnWwVqm17WuXiNLCPE0scPXqPijfAYDYIRwD3ehtCPEHn3s3/EN3v/z3TuMTcdNUPNrYWaVVXrRWrnvTLlDimHMAiCVqjoFu9LWG+Il3Pgl5PRE3TcWjDtYKtbaRbNCMlWjULQMAQiMcA93oSwixQpCLp3jUwZpda2ulLhH+8h2XM/hdi8ED0/TDCSfL+Z3+CfPCCwDiiXAM9KCrEOJyZnRbFmF2kIu3eNTBml1ra7UXPKWFeXpz0YVaM3+8fjjhZA0Z2F/7v2zVyr99pCse2Kizl22Iy0o2ACQSao6BMPRm85TZQS7e4lEHG+3vEemmOiu+4ElNccjz9RE99LePTN+kCACJgHAMhCnSzVPJtmmquzZ20aqDjeb36M2mOiu+4LHKJsV46munEADoDmUVQIzYbdNUNA7V6G0JSry/R2831VnxkBerlXrEWlW9W2cv26ArHtioG56oo3wEQNQ5DMNgx0YfNTc3y+l0yuPxKDMz0+zpwGLscGBFtOcYj5W93n4Pb5uhs5dt6DJQ+lf031x0Ycj78wdrKfTKdbxLGJ6r26kbnqjrcdxvZo/S9FHHx35CMdRVj2uzfvYA7CXcvEZZBRBjfTnsIR5icahGb/v3xuN7RHrqYUd9Pd0u2qxY6hELyVg+AsAchGMgDuIRFnsjGQNHNDbVWekFT7LUtvf1RQ0AhIuaYyCJJVu9qhS9lVb/C57po45XyYgs01482K22vbes2CkEQGIiHANJLBkDR3H+EB07IK3Lr5uxqa6v4rER0mzJUj4CwHyUVQAhJEurqGQMHOsbmnTwq9Yuv27IniutVir1iIVkKR8BYD7CMdCBHbpLREuyBQ5/jXV3Bg9I08QCV5xmFF1WrW2Phnj00QYAibIKIEhv+9/a1dH1qh11FTii0Q/ZLD3VWEvSga9aE6rGOpEkQ/kIAPOxcgz4JGPnBj/ngLROpQbHDkjT0u+fERQ47L6qnow11okm0ctHAJiPcAz4JGOrqK56HEvtK6jhjO1LP+RY6apmPBlrrBNRIpePADAf4RjwSbZVxe5WyqXglXL5/r8dVtW7W92eWOBKqhprAEDkelVz3NbW1uX1Tz75pE8TAsySbKuKkayU26Ufck814+sbmpKiJzAAoPciCsfNzc2aOXOmBg4cqNzcXJWXl8vr9Qa+vmfPHuXn50d9kkA8+Ds3dBWL7Nj/tjuRrJTbYVW9p5px6dvVbTZ1AQC6ElFZxR133KGtW7fqscce08GDB/WLX/xCmzdv1tq1a9W/f39JkmHYZ+c6cLRkaxX10d6vwhoXyUq5mavqkaxus6kLANCViMLxs88+q0ceeUTnn3++JGnGjBmaOnWqpk2bpnXr1kmSHA7+cYF9+VtFdaxZddmoI0M4vG2G1tT0XALlykwPrJRbpVa3q812ka5us6kLABBKROF47969OumkkwKfZ2Vlaf369Zo8ebIuueQS/fGPf4z6BIF4S4ZVxZrG/Wpq7jlMXlF8YuBxW2FVvbvNdslWMw4AiI2Iao6HDRumbdu2BV0bNGiQXnrpJX399deaMWNGVCcHmMW/qjh91PEqGZGVUMFYCr82+OTsgYH/b/YBDD1ttjvw5eGkqhkHAMRGRCvHkyZN0kMPPaRLLrkk6PoxxxyjF198URMnTozq5ADERm9XWc1aVQ/ngJaf/3mb7phaoH9/PDlqxgEAsRFROK6oqNCuXbtCfm3QoEF6+eWXtXnz5qhMDEDs+Dtz9KaG2Ixa3XA32w0e2D8pasYBALETUTgePHiwBg8e3OXXDx8+rC1btui8887r88QAxI7dOnNEstlu+qjjE75mHAAQO706BORohmHoxRdf1MyZMzV06FD98pe/jMa8AMSY2TXEkYi0DCTRa8YBALHT6+OjP/roIz344IN6+OGHtXPnTl155ZX685//rAsuuCCa8wMQQ3bpzNGXMhAAACIR0crx4cOHtWbNGl100UUaOXKk6uvr9etf/1opKSm69dZbdfHFFys1NTVWcwUQA3ZYZfWXgUgc+wwAiK2IwvHxxx+vFStWaNasWdq1a5fWrl2rH/zgB7GaGwAE2KkMBABgXxGVVXi9XjkcDjkcDlaIAcSdXcpAAAD2FVE4drvdevrpp7Vy5UrdcMMNmjJliubOncuR0UgaXR1djPjh2GcAQCw5DMMItb+lRzt27NBDDz2kRx55RDt37tQVV1yhefPm6cILL0y6VeXm5mY5nU55PB5lZmaaPR3ESHdHF/OWPgAA1hZuXut1OPZra2vTiy++qJUrV6qyslLHHHOM9u3b15e7tB3CceLzH13c8ZfFv2ZMzSsAANYWbl7rdSs3v5SUFE2ZMkVTpkzR3r179eijj/b1LgFLCefo4orKBk0scFFiAQCAzUXUreLAgQP63e9+p+bm5k5f83g8WrNmjf7t3/4tapMDrCDco4trGvfHb1IAACAmIgrH9957r/7617+GXIp2Op164403dO+990ZtcoAVRHJ0MYDo8bYZqt6xT8/V7VT1jn3ytvWpChAAwhJRWcXTTz+tX/3qV11+/X//7/+tW265RbfddlufJwZYRaRHFwPoOzbAAjBLRCvHO3bs0CmnnNLl10855RTt2LGjz5MCrMR/dHFX1cQOtf+jzdHFQHT4N8B2LGdq8rRowapaVdW7TZoZgGQQUThOTU3Vrl27uvz6rl27lJIS0V0ClsfRxUD89LQBVmrfAEuJBYBYiSjJjh49Ws8++2yXX3/mmWc0evTovs4JsByOLgbigw2wAMwWUc3xddddp9mzZ+uEE07QggULAod9eL1eLV++XHfffbcef/zxmEwUMBtHFwOxxwZYAGaLKBz/y7/8i37605/q+uuv1+23367hw4fL4XBox44d+uKLL/STn/xEP/jBD2I1V8QQxyLbG//9kCjYAAvAbBEfAvLLX/5Sl19+uVavXq0PPvhAhmHo3HPP1Zw5c1RcXByLOSLG2BUeHqv+nKw6L6A3/BtgmzwtIeuOHWovZ2IDLIBYiej46K+++ko/+clP9Oyzz6q1tVUXXXSRfve73yk7OzuWc7Q8Ox8fzbHI4bHqz8mq8wL6wv+8lhT03OZ5DaAvws1rEW3IKy8v18MPP6ypU6fqiiuu0Msvv6wFCxb0ebIwB7vCw2PVn5NV5wX0FRtgAZgporKKtWvXauXKlZo9e7Yk6corr9SECRPk9XoDm/NgH5HsCi8ZkRW/iVmMVX9OVp0XEA1sgAVglojC8aeffqpzzjkn8HlxcbH69eunXbt2adiwYVGfHGKLXeHhserPyarzAqIlNcXBCzsAcRdRWYXX61X//v2DrvXr10/ffPNNVCeF+GBXeHis+nOy6rwAALCziFaODcPQvHnzlJ6eHrjW0tKia6+9VgMHDgxcW7t2bfRmiJhhV3h4rPpzsuq8AACws4hWjq+++mrl5OTI6XQGPubOnauhQ4cGXYM9cCxyeKz6c7LqvAAAsLOIWrkhNDu3cpPokxsuq/6crDovAACsJNy8RjiOAruHY4kT1sJl1Z+TVecFAIBVhJvXIj4hD4mJXeHhserPyarzAgDAbiKqOQYAAAASGSvHsAxKAwAAgNkIx7AENpUBAAAroKwCpquqd2vBqtpORyE3eVq0YFWtqurdUfte3jZD1Tv26bm6naresU/eNvajAgCAb7FyDFN52wxVVDaEPMTCUHu/3orKBk0scPW5xILVaQAA0BNWjmGqmsb9nVaMj2ZIcntaVNO4v0/fJ56r0wAAwL4IxzDV7kNdB+PejAulp9VpqX11mhILe6JUBgAQTbYJxwcOHFBZWVngiOqysjIdPHiw29sYhqElS5Zo6NCh+s53vqPzzz9f7733XtCY888/Xw6HI+hj9uzZMXwkOFrOoIyojgslXqvTiL+qerfOXrZBVzywUTc8UacrHtios5dt4J0AAECv2SYcz5kzR3V1daqqqlJVVZXq6upUVlbW7W3uuusu/frXv9a9996rd955Ry6XSxMnTtShQ4eCxs2fP19utzvwcf/998fyoeAoxflDlOfMUFfVxA611wUX5w/p9feIx+o04o9SGQBALNgiHG/btk1VVVX64x//qJKSEpWUlOiBBx7Q888/r+3bt4e8jWEYuueee3T77bfr+9//vgoLC/XII4/oq6++0uOPPx40dsCAAXK5XIEPp9MZj4cFtZ/sVj6tQJI6BWT/5+XTCvq0GS8eq9OIL0plAACxYotwXF1dLafTqXHjxgWujR8/Xk6nU2+99VbI2zQ2NqqpqUmTJk0KXEtPT9d5553X6TarV69Wdna2Tj/9dN1yyy2dVpY7Onz4sJqbm4M+0HulhXlaMbdILmdwOHU5M7RiblGfO0nEY3Ua8UWpDAAgVmzRyq2pqUk5OTmdrufk5KipqanL20hSbm5u0PXc3Fx9/PHHgc+vvPJK5efny+Vyqb6+XosXL9bWrVu1fv36LuezdOlSVVRU9OahoAulhXmaWOCKyQl5/tXpBatq5ZCCVhujtTqdyKx4ciGlMgCAWDE1HC9ZsqTHkPnOO+9IkhyOzv8YG4YR8vrROn69423mz58f+P+FhYU65ZRTNHbsWNXW1qqoqCjkfS5evFg33XRT4PPm5mYNGzas23mgZ6kpDpWMyIrJfftXpzv2OXZ16HNsxSBoJqv2hqZUBgAQK6aG4+uuu67HzhAnn3yy3n33XX3++eedvrZnz55OK8N+LpdLUvsKcl7et/+I7969u8vbSFJRUZHS0tL0wQcfdBmO09PTlZ6e3u2844UwF76eVqetGgTN4t/w1rFq17/hLRolL73lL5Vp8rSErDt2qP2FD6UyAIBImRqOs7OzlZ2d3eO4kpISeTwe1dTUqLi4WJL09ttvy+Px6Kyzzgp5G3+pxPr16zV69GhJ0pEjR/T6669r2bJlXX6v9957T62trUGB2qoIc5HranXaykHQDPE8ubA3KJUBAMSKLTbkjRw5UqWlpZo/f742btyojRs3av78+br00kt16qmnBsaddtppeuaZZyS1l1MsXLhQ/+f//B8988wzqq+v17x58zRgwADNmTNHkrRjxw7deeed2rRpkz766CO98MIL+td//VeNHj1aEyZMMOWxhos2VtFD54PO7LDhLdYbOQEAyckWG/Kk9o4S119/faD7xGWXXaZ77703aMz27dvl8XgCn//0pz/V119/rR//+Mc6cOCAxo0bp5deekmDBg2SJPXv31+vvPKKfvOb3+iLL77QsGHDNHXqVJWXlys1NTV+Dy5CVl/Vs5tIgmCsaqKtxi4b3mK5kRNIdpTtIVnZJhwPGTJEq1at6naMYQTHRYfDoSVLlmjJkiUhxw8bNkyvv/56tKYYN4S56LJLEIwnO214i+VGTiBZUbaHZGaLsgoEI8xFl52CYLzQGxpIXpTtIdkRjm2IMBddBMHO4nFyIQDrYQ8GQDi2JcJcdBEEQ7Pyhjdvm6HqHfv0XN1OVe/Yxz/UQJTYYTMuEGu2qTnGt2hjFX3hHhKSbKy44Y1aSCB2KNsDCMe2RZiLPisGQSuw0oY3+lEDsUXZHkA4tjXCXPRZKQgiGC0Mgdjj9EmAmmPb84e56aOOV8mILEIBEha1kEDssQcDIBwDsAlqIYH4sPJmXCAeKKsAYAvUQgLxQ9kekhnhGIAtUAsJxBd7MJCsKKsAYAvUQgIA4oFwDMA2qIUEAMQaZRUAbIVaSABALBGOAdgOtZAAgFihrAIAAADwIRwDAAAAPoRjAAAAwIdwDAAAAPgQjgEAAAAfwjEAAADgQzgGAAAAfAjHAAAAgA/hGAAAAPDhhDwApvC2GRwBDQCwHMIxgLirqnerorJBbk9L4FqeM0Pl0wpUWphn4swAAMmOsgoAcVVV79aCVbVBwViSmjwtWrCqVlX1bpNmBgAA4RhAHHnbDFVUNsgI8TX/tYrKBnnbQo0AACD2CMcA4qamcX+nFeOjGZLcnhbVNO6P36QAADgK4RhA3Ow+1HUw7s04AACijQ15NsMOf9hZzqCMqI4DACDaCMc2wg5/2F1x/hDlOTPU5GkJWXfskORytr/oAwDADJRV2AQ7/JEIUlMcKp9WIKk9CB/N/3n5tALeDQEAmIZwbAPs8EciKS3M04q5RXI5g0snXM4MrZhbxLsgAABTUVZhA5Hs8C8ZkRW/iQG9VFqYp4kFLurnAQCWQzi2AXb4IxGlpjh4MQcAsBzCsQ2wwz8YHTsAAECsEI5tgB3+36JjBwAAiCU25NkAO/zb0bEDAADEGuHYJpJ9hz8dOwAAQDxQVmEjybzDn44dAAAgHgjHNpOsO/zp2AEAAOKBsgrYAh07AABAPBCOYQv+jh1dFZA41N61Ihk6dgAAgNghHCPA22aoesc+PVe3U9U79llqcxsdOwAAQDxQcwxJ9ugf7O/Y0XGeLovNEwAA2JfDMAzrLA/aVHNzs5xOpzwejzIzM02bR29PjvP3D+74RPDf0mqt4jghDwAARCrcvMbKcYLo7cpvT/2DHWrvHzyxwGWZAJqsHTsAAEDsUXOcAPpyclwk/YMBAAASHeHY5vp6chz9gwEAAL5FOLa5vq780j8YAADgW4Rjm+vryi/9gwEAAL5FOLa5vq78htM/+I6pI1XTuN+S/Y8BAACiiW4VNudf+W3ytISsO3aovQ9wdyu/3fUPvuy7efr5n7dZuv8xAABAtNDnOArM7nPs71YhKSggR9qnuGP/4ANfHtG/P26f/scAAABdCTevUVaRAPwrvy5ncOmEy5kRUYD19w+ePup4FecP0c//3PsuGAAAAHZEWUWCKC3M08QCV9ROjoukCwYHcgAAgERBOE4g0Tw5jv7HAAAgGVFWgZDofwwAAJIRK8cIKRpdMADYV8cNun0p0wIAOyEcIyR//+MFq2rlUOguGOXTCvjHEkhAVfXuTq0daeEIIFlQVoEuRasLBgD78LeG7Lght8nTogWralVV7zZpZgAQH6wco1vR7oIBwLq8bYYqKrtu4ehQewvHiQUu/gYASFiEY/Qoml0wAFgXLRwBgLIKAIAPLRwBgJVjAICP2S0c6ZABwAoIxwAASea2cKRDBgCroKwCACDp2xaO0rctG/1i2cKRDhkArIRwDAAIiHcLx546ZEjtHTK8baFGAED0UVYBAAgSzxaOdMgAYDWEYwBAJ9Fq4djTJjs6ZACwGsIxACAmwtlkZ3aHDADoiJpjAEDUhbvJzt8ho6uCDYfaA3UsOmQAQCiEYwBAVEWyyc6sDhkA0BXCMQAgqiLZZCfFv0MGAHSHmmMAQFT1ZpNdPDtkAEB3bLNyfODAAZWVlcnpdMrpdKqsrEwHDx7s9jZr167V5MmTlZ2dLYfDobq6uk5jDh8+rP/4j/9Qdna2Bg4cqMsuu0yfffZZbB4EACQgb5uh6h379FzdTlXv2KfsY9LDul3HTXb+DhnTRx2vkhFZBGMAprDNyvGcOXP02WefqaqqSpL0ox/9SGVlZaqsrOzyNl9++aUmTJigf/3Xf9X8+fNDjlm4cKEqKyv1xBNPKCsrSzfffLMuvfRSbd68WampqTF5LACQKEJ1pHBlpuvYAWnyfNUa92OoAaCvHIZhWP7YoW3btqmgoEAbN27UuHHjJEkbN25USUmJ3n//fZ166qnd3v6jjz5Sfn6+tmzZolGjRgWuezweHXfccXrsscc0a9YsSdKuXbs0bNgwvfDCC5o8eXJY82tubpbT6ZTH41FmZmbvHiQA2Iy/I0XHf0Qc+nbj3dH/3/+5JGqJAcRduHnNFmUV1dXVcjqdgWAsSePHj5fT6dRbb73V6/vdvHmzWltbNWnSpMC1oUOHqrCwsNv7PXz4sJqbm4M+ACCZ9NSRwiHp2AFpys1kkx0Ae7FFWUVTU5NycnI6Xc/JyVFTU1Of7rd///4aPHhw0PXc3Nxu73fp0qWqqKjo9fcFALsLpyPFwa9atfqaIqWkONhkB8A2TF05XrJkiRwOR7cfmzZtkiQ5HJ3/mBqGEfJ6X/V0v4sXL5bH4wl8fPrpp1GfAwBYWbgdKfZ+eZhNdgBsxdSV4+uuu06zZ8/udszJJ5+sd999V59//nmnr+3Zs0e5ubm9/v4ul0tHjhzRgQMHglaPd+/erbPOOqvL26Wnpys9Pbzd2ACQiDj2GUCiMjUcZ2dnKzs7u8dxJSUl8ng8qqmpUXFxsSTp7bfflsfj6TbE9mTMmDFKS0vT+vXrNXPmTEmS2+1WfX297rrrrl7fLwAkOv+xz02eFjpSAEgottiQN3LkSJWWlmr+/PnauHGjNm7cqPnz5+vSSy8N6lRx2mmn6Zlnngl8vn//ftXV1amhoUGStH37dtXV1QXqiZ1Op6655hrdfPPNeuWVV7RlyxbNnTtXZ5xxhi6++OL4PkgAsBGOfQaQqGwRjiVp9erVOuOMMzRp0iRNmjRJZ555ph577LGgMdu3b5fH4wl8vm7dOo0ePVpTp06VJM2ePVujR4/WfffdFxhz99136/LLL9fMmTM1YcIEDRgwQJWVlfQ4BoAecOwzgERkiz7HVkefYwDJzNtmcOwzAMsLN6/ZopUbAMC6/Mc+A0AisE1ZBQAAABBrhGMAAADAh3AMAAAA+BCOAQAAAB825AERYmc+AACJi3AMRKCq3q2Kyga5PS2Ba3nODJVPK6CnKwAACYCyCiBMVfVuLVhVGxSMJanJ06IFq2pVVe82aWYAACBaCMdAGLxthioqGxTqxBz/tYrKBnnbOFMHAAA7IxwDYahp3N9pxfhohiS3p0U1jfvjNykAABB1hGMgDLsPdR2MezMOAABYE+EYCEPOoIyojgMAANZEOAbCUJw/RHnODHXVsM2h9q4VxflD4jktAAAQZYRjIAypKQ6VTyuQpE4B2f95+bQC+h0DAGBzhGMgTKWFeVoxt0guZ3DphMuZoRVzi+hzDABAAuAQECACpYV5mljg4oQ8AAASFOEYiFBqikMlI7LMngYAAIgByioAAAAAH8IxAAAA4EM4BgAAAHwIxwAAAIAP4RgAAADwIRwDAAAAPoRjAAAAwIdwDAAAAPgQjgEAAAAfwjEAAADgQzgGAAAAfAjHAAAAgE8/sycAAABgVd42QzWN+7X7UItyBmWoOH+IUlMcZk8LMUQ4BgAACKGq3q2Kyga5PS2Ba3nODJVPK1BpYZ6JM0MsUVYBAADQQVW9WwtW1QYFY0lq8rRowapaVdW7TZoZYo1wDAAAcBRvm6GKygYZIb7mv1ZR2SBvW6gRsDvCMQAAwFFqGvd3WjE+miHJ7WlRTeP++E0KcUM4BgAAOMruQ10H496Mg70QjgEAAI6SMygjquNgL4RjAACAoxTnD1GeM0NdNWxzqL1rRXH+kHhOC3FCOAYAADhKaopD5dMKJKlTQPZ/Xj6tgH7HCYpwDAAA0EFpYZ5WzC2SyxlcOuFyZmjF3CL6HCcwDgEBAAAIobQwTxMLXJyQl2QIxwAAAF1ITXGoZESW2dNAHFFWAQAAAPgQjgEAAAAfyioAAAHeNoP6SgBJjXAMAJAkVdW7VVHZEHRsbp4zQ+XTCtiZDyBpUFYBAFBVvVsLVtUGBWNJavK0aMGqWlXVu02aGQDEF+EYAJKct81QRWWDjBBf81+rqGyQty3UCABILIRjAEhyNY37O60YH82Q5Pa0qKZxf/wmBQAmIRwDQJLbfajrYNybcQBgZ4RjAEhyOYMyeh4UwTgAsDPCMQAkueL8IcpzZqirhm0OtXetKM4fEs9pAYApCMcAkORSUxwqn1YgSZ0Csv/z8mkF9DsGkBQIxwAAlRbmacXcIrmcwaUTLmeGVswtos8xgKTBISAAAEntAXligYsT8gAkNcIxACAgNcWhkhFZZk8DAExDWQUAAADgQzgGAAAAfAjHAAAAgA/hGAAAAPAhHAMAAAA+hGMAAADAh3AMAAAA+BCOAQAAAB/CMQAAAOBDOAYAAAB8CMcAAACAD+EYAAAA8CEcAwAAAD6EYwAAAMCHcAwAAAD4EI4BAAAAH8IxAAAA4EM4BgAAAHz6mT0BAAAAq/C2Gapp3K/dh1qUMyhDxflDlJriMHtaiCPCMQAAgKSqercqKhvk9rQEruU5M1Q+rUClhXkmzgzxRFkFAABIelX1bi1YVRsUjCWpydOiBatqVVXvNmlmiDfbhOMDBw6orKxMTqdTTqdTZWVlOnjwYLe3Wbt2rSZPnqzs7Gw5HA7V1dV1GnP++efL4XAEfcyePTs2DwIAAFiOt81QRWWDjBBf81+rqGyQty3UCCQa24TjOXPmqK6uTlVVVaqqqlJdXZ3Kysq6vc2XX36pCRMm6P/+3//b7bj58+fL7XYHPu6///5oTh0AAFhYTeP+TivGRzMkuT0tqmncH79JwTS2qDnetm2bqqqqtHHjRo0bN06S9MADD6ikpETbt2/XqaeeGvJ2/vD80UcfdXv/AwYMkMvliuqcAQCAPew+1HUw7s042JstVo6rq6vldDoDwViSxo8fL6fTqbfeeqvP97969WplZ2fr9NNP1y233KJDhw51O/7w4cNqbm4O+gAAAPaUMygjquNgb7ZYOW5qalJOTk6n6zk5OWpqaurTfV955ZXKz8+Xy+VSfX29Fi9erK1bt2r9+vVd3mbp0qWqqKjo0/cFAADWUJw/RHnODDV5WkLWHTskuZztbd2Q+ExdOV6yZEmnzXAdPzZt2iRJcjg69xg0DCPk9UjMnz9fF198sQoLCzV79mw99dRTevnll1VbW9vlbRYvXiyPxxP4+PTTT/s0BwAAYJ7UFIfKpxVIag/CR/N/Xj6tgH7HScLUlePrrruux84QJ598st599119/vnnnb62Z88e5ebmRnVORUVFSktL0wcffKCioqKQY9LT05Wenh7V7wsAAMxTWpinFXOLOvU5dtHnOOmYGo6zs7OVnZ3d47iSkhJ5PB7V1NSouLhYkvT222/L4/HorLPOiuqc3nvvPbW2tiovj18CAACSSWlhniYWuDghL8nZouZ45MiRKi0t1fz58wNt1n70ox/p0ksvDepUcdppp2np0qWaMWOGJGn//v365JNPtGvXLknS9u3bJUkul0sul0s7duzQ6tWrdckllyg7O1sNDQ26+eabNXr0aE2YMCHOjxIAAJgtNcWhkhFZZk8DJrJFtwqpvaPEGWecoUmTJmnSpEk688wz9dhjjwWN2b59uzweT+DzdevWafTo0Zo6daokafbs2Ro9erTuu+8+SVL//v31yiuvaPLkyTr11FN1/fXXa9KkSXr55ZeVmpoavwcHAAAAS3AYhsFxL33U3Nwsp9Mpj8ejzMxMs6cDAACADsLNa7ZZOQYAAABijXAMAAAA+BCOAQAAAB/CMQAAAOBDOAYAAAB8CMcAAACAD+EYAAAA8CEcAwAAAD6EYwAAAMCHcAwAAAD4EI4BAAAAH8IxAAAA4EM4BgAAAHwIxwAAAIAP4RgAAADwIRwDAAAAPoRjAAAAwIdwDAAAAPgQjgEAAAAfwjEAAADgQzgGAAAAfAjHAAAAgA/hGAAAAPAhHAMAAAA+hGMAAADAh3AMAAAA+BCOAQAAAB/CMQAAAOBDOAYAAAB8CMcAAACAD+EYAAAA8CEcAwAAAD6EYwAAAMCHcAwAAAD4EI4BAAAAH8IxAAAA4EM4BgAAAHwIxwAAAIBPP7MnAACAnXjbDNU07tfuQy3KGZSh4vwhSk1xmD0tAFFCOAYAIExV9W5VVDbI7WkJXMtzZqh8WoFKC/NMnBmAaKGsAgCAMFTVu7VgVW1QMJakJk+LFqyqVVW926SZAYgmwjEAAD3wthmqqGyQEeJr/msVlQ3ytoUaAcBOCMcAAPSgpnF/pxXjoxmS3J4W1TTuj9+kAMQE4RgAgB7sPtR1MO7NOADWRTgGAKAHOYMyojoOgHURjgEA6EFx/hDlOTPUVcM2h9q7VhTnD4nntADEAOEYAIAepKY4VD6tQJI6BWT/5+XTCuh3DCQAwjEAAGEoLczTirlFcjmDSydczgytmFtEn2MgQXAICAAAYSotzNPEAhcn5AEJjHAMAEAEUlMcKhmRZfY0AMQI4RgAAABx5W0zLPsODOEYAAAAcVNV71ZFZUPQwTp5zgyVTyuwRO0+G/IAAAAQF1X1bi1YVdvpxMkmT4sWrKpVVb3bpJl9i3AMAACAmPO2GaqobJAR4mv+axWVDfK2hRoRP4RjAAAAxFxN4/5OK8ZHMyS5PS2qadwfv0mFQDgGAABAzO0+1HUw7s24WCEcAwAAIOZyBmX0PCiCcbFCOAYAAEDMFecPUZ4zo9MR7H4OtXetKM4fEs9pdUI4BgAAQMylpjhUPq1AkjoFZP/n5dMKTO93TDgGAABAXJQW5mnF3CK5nMGlEy5nhlbMLbJEn2MOAQEAAEDclBbmaWKBixPyAAB9Z+UjVwEgXKkpDpWMyDJ7GiERjgHAJqx+5CoAJAJqjgHABuxw5CoAJALCMQBYnF2OXAWAREA4BgCLs8uRqwCQCAjHAGBxdjlyFQASAeEYACzOLkeuAkAiIBwDgMXZ5chVAEgEhGMAsDi7HLkKAImAcAwANmCHI1cBIBFwCAgA2ITVj1wFgERAOAYAG7HykasAkAgoqwAAAAB8CMcAAACAj23C8YEDB1RWVian0ymn06mysjIdPHiwy/Gtra1atGiRzjjjDA0cOFBDhw7VVVddpV27dgWNO3z4sP7jP/5D2dnZGjhwoC677DJ99tlnMX40AAAAsCLbhOM5c+aorq5OVVVVqqqqUl1dncrKyroc/9VXX6m2tlZ33HGHamtrtXbtWv3973/XZZddFjRu4cKFeuaZZ/TEE0/ozTff1BdffKFLL71UXq831g8JAAAAFuMwDMMwexI92bZtmwoKCrRx40aNGzdOkrRx40aVlJTo/fff16mnnhrW/bzzzjsqLi7Wxx9/rBNPPFEej0fHHXecHnvsMc2aNUuStGvXLg0bNkwvvPCCJk+eHNb9Njc3y+l0yuPxKDMzs3cPEgAAADETbl6zxcpxdXW1nE5nIBhL0vjx4+V0OvXWW2+FfT8ej0cOh0PHHnusJGnz5s1qbW3VpEmTAmOGDh2qwsLCbu/38OHDam5uDvoAAACA/dkiHDc1NSknJ6fT9ZycHDU1NYV1Hy0tLbr11ls1Z86cwKuFpqYm9e/fX4MHDw4am5ub2+39Ll26NFD77HQ6NWzYsAgeDQAAAKzK1HC8ZMkSORyObj82bdokSXI4Oje5Nwwj5PWOWltbNXv2bLW1tWn58uU9ju/pfhcvXiyPxxP4+PTTT3u8TwAAAFifqYeAXHfddZo9e3a3Y04++WS9++67+vzzzzt9bc+ePcrNze329q2trZo5c6YaGxu1YcOGoBoTl8ulI0eO6MCBA0Grx7t379ZZZ53V5X2mp6crPT292+8LAAAA+zE1HGdnZys7O7vHcSUlJfJ4PKqpqVFxcbEk6e2335bH4+k2xPqD8QcffKBXX31VWVnBp0qNGTNGaWlpWr9+vWbOnClJcrvdqq+v11133dWHRwYAAAA7skXN8ciRI1VaWqr58+dr48aN2rhxo+bPn69LL700qFPFaaedpmeeeUaS9M033+gHP/iBNm3apNWrV8vr9aqpqUlNTU06cuSIJMnpdOqaa67RzTffrFdeeUVbtmzR3LlzdcYZZ+jiiy825bECAADAPKauHEdi9erVuv766wOdJS677DLde++9QWO2b98uj8cjSfrss8+0bt06SdKoUaOCxr366qs6//zzJUl33323+vXrp5kzZ+rrr7/WRRddpIcfflipqamxfUAAAACwHFv0ObY6+hwDAABYW0L1OQYAAADigXAMAAAA+BCOAQAAAB/CMQAAAOBDOAYAAAB8CMcAAACAD+EYAAAA8LHNISBW5m8V3dzcbPJMAAAAEIo/p/V0xAfhOAoOHTokSRo2bJjJMwEAAEB3Dh06JKfT2eXXOSEvCtra2rRr1y4NGjRIDofD7OkgAs3NzRo2bJg+/fRTTjdMUjwHIPE8AM+BZGAYhg4dOqShQ4cqJaXrymJWjqMgJSVFJ5xwgtnTQB9kZmbyxzDJ8RyAxPMAPAcSXXcrxn5syAMAAAB8CMcAAACAD+EYSS09PV3l5eVKT083eyowCc8BSDwPwHMA32JDHgAAAODDyjEAAADgQzgGAAAAfAjHAAAAgA/hGAAAAPAhHCPpHDhwQGVlZXI6nXI6nSorK9PBgwe7HN/a2qpFixbpjDPO0MCBAzV06FBdddVV2rVrV/wmjT5Zvny58vPzlZGRoTFjxuiNN97odvzrr7+uMWPGKCMjQ8OHD9d9990Xp5kiViJ5Dqxdu1YTJ07Ucccdp8zMTJWUlOjFF1+M42wRK5H+LfD729/+pn79+mnUqFGxnSAsgXCMpDNnzhzV1dWpqqpKVVVVqqurU1lZWZfjv/rqK9XW1uqOO+5QbW2t1q5dq7///e+67LLL4jhr9NaTTz6phQsX6vbbb9eWLVt0zjnnaMqUKfrkk09Cjm9sbNQll1yic845R1u2bNFtt92m66+/Xk8//XScZ45oifQ58Ne//lUTJ07UCy+8oM2bN+uCCy7QtGnTtGXLljjPHNEU6fPAz+Px6KqrrtJFF10Up5nCbLRyQ1LZtm2bCgoKtHHjRo0bN06StHHjRpWUlOj999/XqaeeGtb9vPPOOyouLtbHH3+sE088MZZTRh+NGzdORUVFWrFiReDayJEjdfnll2vp0qWdxi9atEjr1q3Ttm3bAteuvfZabd26VdXV1XGZM6Ir0udAKKeffrpmzZqln/3sZ7GaJmKst8+D2bNn65RTTlFqaqqeffZZ1dXVxWG2MBMrx0gq1dXVcjqdgWAsSePHj5fT6dRbb70V9v14PB45HA4de+yxMZglouXIkSPavHmzJk2aFHR90qRJXf73rq6u7jR+8uTJ2rRpk1pbW2M2V8RGb54DHbW1tenQoUMaMmRILKaIOOjt8+Chhx7Sjh07VF5eHuspwkL6mT0BIJ6ampqUk5PT6XpOTo6amprCuo+WlhbdeuutmjNnjjIzM6M9RUTR3r175fV6lZubG3Q9Nze3y//eTU1NIcd/88032rt3r/Ly8mI2X0Rfb54DHf3qV7/Sl19+qZkzZ8ZiioiD3jwPPvjgA916661644031K8fcSmZsHKMhLBkyRI5HI5uPzZt2iRJcjgcnW5vGEbI6x21trZq9uzZamtr0/Lly6P+OBAbHf/b9vTfO9T4UNdhH5E+B/zWrFmjJUuW6Mknnwz5whr2Eu7zwOv1as6cOaqoqNA///M/x2t6sAheCiEhXHfddZo9e3a3Y04++WS9++67+vzzzzt9bc+ePZ1WFDpqbW3VzJkz1djYqA0bNrBqbAPZ2dlKTU3ttDK0e/fuLv97u1yukOP79eunrKysmM0VsdGb54Dfk08+qWuuuUZ/+tOfdPHFF8dymoixSJ8Hhw4d0qZNm7RlyxZdd911ktrLawzDUL9+/fTSSy/pwgsvjMvcEX+EYySE7OxsZWdn9ziupKREHo9HNTU1Ki4uliS9/fbb8ng8Ouuss7q8nT8Yf/DBB3r11VcJSTbRv39/jRkzRuvXr9eMGTMC19evX6/p06eHvE1JSYkqKyuDrr300ksaO3as0tLSYjpfRF9vngNS+4rxD3/4Q61Zs0ZTp06Nx1QRQ5E+DzIzM/U///M/QdeWL1+uDRs26KmnnlJ+fn7M5wwTGUCSKS0tNc4880yjurraqK6uNs444wzj0ksvDRpz6qmnGmvXrjUMwzBaW1uNyy67zDjhhBOMuro6w+12Bz4OHz5sxkNABJ544gkjLS3NWLlypdHQ0GAsXLjQGDhwoPHRRx8ZhmEYt956q1FWVhYY/+GHHxoDBgwwbrzxRqOhocFYuXKlkZaWZjz11FNmPQT0UaTPgccff9zo16+f8fvf/z7o9/3gwYNmPQREQaTPg47Ky8uN7373u3GaLcxEOEbS2bdvn3HllVcagwYNMgYNGmRceeWVxoEDB4LGSDIeeughwzAMo7Gx0ZAU8uPVV1+N+/wRud///vfGSSedZPTv398oKioyXn/99cDXrr76auO8884LGv/aa68Zo0ePNvr372+cfPLJxooVK+I8Y0RbJM+B8847L+Tv+9VXXx3/iSOqIv1bcDTCcfKgzzEAAADgQ7cKAAAAwIdwDAAAAPgQjgEAAAAfwjEAAADgQzgGAAAAfAjHAAAAgA/hGAAAAPAhHAMAAAA+hGMAAADAh3AMAAlo3rx5cjgccjgcSktL0/Dhw3XLLbfoyy+/DIx5+umndf7558vpdOqYY47RmWeeqTvvvFP79+8Puq+vv/5agwcP1pAhQ/T11193+l5/+MMfdP755yszM1MOh0MHDx6M9cMDgJghHANAgiotLZXb7daHH36oX/ziF1q+fLluueUWSdLtt9+uWbNm6Xvf+57+8pe/qL6+Xr/61a+0detWPfbYY0H38/TTT6uwsFAFBQVau3Ztp+/z1VdfqbS0VLfddltcHhcAxJLDMAzD7EkAAKJr3rx5OnjwoJ599tnAtfnz5+v555/Xc889p3Hjxumee+7RDTfc0Om2Bw8e1LHHHhv4/IILLtDs2bNlGIb++7//Wxs2bAj5PV977TVdcMEFOnDgQNDtAcBO+pk9AQBAfHznO99Ra2urVq9erWOOOUY//vGPQ447Otju2LFD1dXVWrt2rQzD0MKFC/Xhhx9q+PDhcZo1AMQXZRUAkARqamr0+OOP66KLLtIHH3yg4cOHKy0trcfbPfjgg5oyZUqg5ri0tFQPPvhgHGYMAOYgHANAgnr++ed1zDHHKCMjQyUlJTr33HP1u9/9ToZhyOFw9Hh7r9erRx55RHPnzg1cmzt3rh555BF5vd5YTh0ATENZBQAkqAsuuEArVqxQWlqahg4dGlgp/ud//me9+eabam1t7Xb1+MUXX9TOnTs1a9asoOter1cvvfSSpkyZEtP5A4AZWDkGgAQ1cOBA/dM//ZNOOumkoBA8Z84cffHFF1q+fHnI2/lbsa1cuVKzZ89WXV1d0MeVV16plStXxuMhAEDcsXIMAElm3Lhx+ulPf6qbb75ZO3fu1IwZMzR06FD94x//0H333aezzz5bc+bMUWVlpdatW6fCwsKg21999dWaOnWq9uzZo+OOO05NTU1qamrSP/7xD0nS//zP/2jQoEE68cQTNWTIEDMeIgD0GivHAJCEli1bpscff1xvv/22Jk+erNNPP1033XSTzjzzTF199dV69NFHNXDgQF100UWdbnvBBRdo0KBBgX7I9913n0aPHq358+dLks4991yNHj1a69ati+tjAoBooM8xAAAA4MPKMQAAAOBDOAYAAAB8CMcAAACAD+EYAAAA8CEcAwAAAD6EYwAAAMCHcAwAAAD4EI4BAAAAH8IxAAAA4EM4BgAAAHwIxwAAAIDP/wc9/ahdqkYreAAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "encodings = encodings.detach()\n", + "\n", + "pca = PCA(n_components=2)\n", + "\n", + "principalComponents = pca.fit_transform(encodings)\n", + "\n", + "fig = plt.figure(figsize=(8, 8))\n", + "ax = fig.add_subplot(1, 1, 1)\n", + "ax.set_title(\"Encodings\")\n", + "ax.set_xlabel('PCA1'); ax.set_ylabel('PCA2')\n", + "\n", + "ax.scatter(principalComponents[:, 0], principalComponents[:, 1])\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop/examples/multi_task.ipynb b/chemprop/examples/multi_task.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..0c8a67aa3b85b6e816bb89279919061e31da851c --- /dev/null +++ b/chemprop/examples/multi_task.ipynb @@ -0,0 +1,315 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Multitask model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/multi_task.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from lightning import pytorch as pl\n", + "import torch\n", + "import numpy as np\n", + "import pandas as pd\n", + "from pathlib import Path\n", + "from chemprop import data, models, nn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Step 1: Make datapoints" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol_multitask.csv\"\n", + "smiles_column = 'smiles' \n", + "target_columns = [\"mu\",\"alpha\",\"homo\",\"lumo\",\"gap\",\"r2\",\"zpve\",\"cv\",\"u0\",\"u298\",\"h298\",\"g298\"] \n", + "\n", + "df_input = pd.read_csv(input_path)\n", + "smis = df_input.loc[:, smiles_column].values\n", + "ys = df_input.loc[:, target_columns].values\n", + "\n", + "datapoints = [data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Step 2: Split data and make datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "split_indices = data.make_split_indices(datapoints)\n", + "train_data, val_data, test_data = data.split_data_by_indices(datapoints, *split_indices)\n", + "\n", + "\n", + "train_dset = data.MoleculeDataset(train_data[0])\n", + "val_dset = data.MoleculeDataset(val_data[0])\n", + "test_dset = data.MoleculeDataset(test_data[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Step 3: Scale targets and make dataloaders" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "output_scaler = train_dset.normalize_targets()\n", + "val_dset.normalize_targets(output_scaler)\n", + "\n", + "train_loader = data.build_dataloader(train_dset)\n", + "val_loader = data.build_dataloader(val_dset)\n", + "test_loader = data.build_dataloader(test_dset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Step 4: Define the model" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "output_transform = nn.transforms.UnscaleTransform.from_standard_scaler(output_scaler)\n", + "\n", + "ffn = nn.RegressionFFN(n_tasks = len(target_columns), output_transform=output_transform)\n", + "chemprop_model = models.MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), ffn)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Step 5: Set up the trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(logger=False, enable_checkpointing=False, max_epochs=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Step 6: Train the model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | RegressionFFN | 93.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "321 K Trainable params\n", + "0 Non-trainable params\n", + "321 K Total params\n", + "1.286 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sanity Checking DataLoader 0: 0%| | 0/1 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smileslipo
0Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc143.54
1COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)...-1.18
2COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl3.69
3OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C...3.37
4Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N...3.10
.........
95CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C...2.20
96CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)...2.04
97CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)...4.49
98COc1ccc(Cc2c(N)n[nH]c2N)cc10.20
99CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(...2.00
\n", + "

100 rows × 2 columns

\n", + "" + ], + "text/plain": [ + " smiles lipo\n", + "0 Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14 3.54\n", + "1 COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)... -1.18\n", + "2 COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl 3.69\n", + "3 OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C... 3.37\n", + "4 Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N... 3.10\n", + ".. ... ...\n", + "95 CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C... 2.20\n", + "96 CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)... 2.04\n", + "97 CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)... 4.49\n", + "98 COc1ccc(Cc2c(N)n[nH]c2N)cc1 0.20\n", + "99 CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(... 2.00\n", + "\n", + "[100 rows x 2 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_test = pd.read_csv(test_path)\n", + "df_test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get smiles" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14\n", + "1 COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)...\n", + "2 COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl\n", + "3 OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C...\n", + "4 Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N...\n", + " ... \n", + "95 CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C...\n", + "96 CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)...\n", + "97 CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)...\n", + "98 COc1ccc(Cc2c(N)n[nH]c2N)cc1\n", + "99 CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(...\n", + "Name: smiles, Length: 100, dtype: object" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "smis = df_test[smiles_column]\n", + "smis" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get molecule datapoints" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "test_data = [data.MoleculeDatapoint.from_smi(smi) for smi in smis]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get molecule dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer()\n", + "test_dset = data.MoleculeDataset(test_data, featurizer=featurizer)\n", + "test_loader = data.build_dataloader(test_dset, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Set up trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicting DataLoader 0: 100%|████████████████████| 2/2 [00:00<00:00, 48.17it/s]\n" + ] + } + ], + "source": [ + "with torch.inference_mode():\n", + " trainer = pl.Trainer(\n", + " logger=None,\n", + " enable_progress_bar=True,\n", + " accelerator=\"cpu\",\n", + " devices=1\n", + " )\n", + " test_preds = trainer.predict(mpnn, test_loader)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smileslipopred
0Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc143.542.253542
1COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)...-1.182.235016
2COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl3.692.245891
3OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C...3.372.249847
4Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N...3.102.228097
............
95CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C...2.202.233408
96CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)...2.042.236931
97CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)...4.492.237789
98COc1ccc(Cc2c(N)n[nH]c2N)cc10.202.252625
99CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(...2.002.235702
\n", + "

100 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " smiles lipo pred\n", + "0 Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14 3.54 2.253542\n", + "1 COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)... -1.18 2.235016\n", + "2 COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl 3.69 2.245891\n", + "3 OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C... 3.37 2.249847\n", + "4 Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N... 3.10 2.228097\n", + ".. ... ... ...\n", + "95 CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C... 2.20 2.233408\n", + "96 CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)... 2.04 2.236931\n", + "97 CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)... 4.49 2.237789\n", + "98 COc1ccc(Cc2c(N)n[nH]c2N)cc1 0.20 2.252625\n", + "99 CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(... 2.00 2.235702\n", + "\n", + "[100 rows x 3 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_preds = np.concatenate(test_preds, axis=0)\n", + "df_test['pred'] = test_preds\n", + "df_test" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop/examples/predicting_regression_multicomponent.ipynb b/chemprop/examples/predicting_regression_multicomponent.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..e43d7331324d6548a55018413c0d738f0192dd9d --- /dev/null +++ b/chemprop/examples/predicting_regression_multicomponent.ipynb @@ -0,0 +1,618 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Predicting Regression - Multicomponent" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/predicting_regression_multicomponent.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import torch\n", + "from lightning import pytorch as pl\n", + "from pathlib import Path\n", + "\n", + "from chemprop import data, featurizers\n", + "from chemprop.models import multi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change model input here" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "checkpoint_path = chemprop_dir / \"tests\" / \"data\" / \"example_model_v2_regression_mol+mol.ckpt\" # path to the checkpoint file. \n", + "# If the checkpoint file is generated using the training notebook, it will be in the `checkpoints` folder with name similar to `checkpoints/epoch=19-step=180.ckpt`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load model" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MulticomponentMPNN(\n", + " (message_passing): MulticomponentMessagePassing(\n", + " (blocks): ModuleList(\n", + " (0-1): 2 x BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): GraphTransform(\n", + " (V_transform): Identity()\n", + " (E_transform): Identity()\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): BatchNorm1d(600, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=600, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0-1): 2 x MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mcmpnn = multi.MulticomponentMPNN.load_from_checkpoint(checkpoint_path)\n", + "mcmpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change predict input here" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "test_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol+mol\" / \"mol+mol.csv\" # path to your .csv file containing SMILES strings to make predictions for\n", + "smiles_columns = ['smiles', 'solvent'] # name of the column containing SMILES strings" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load test smiles" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smilessolventpeakwavs_max
0CCCCN1C(=O)C(=C/C=C/C=C/C=C2N(CCCC)c3ccccc3N2C...ClCCl642.0
1C(=C/c1cnccn1)\\c1ccc(N(c2ccccc2)c2ccc(/C=C/c3c...ClCCl420.0
2CN(C)c1ccc2c(-c3ccc(N)cc3C(=O)[O-])c3ccc(=[N+]...O544.0
3c1ccc2[nH]ccc2c1O290.0
4CCN(CC)c1ccc2c(c1)OC1=C(/C=C/C3=[N+](C)c4ccc5c...ClC(Cl)Cl736.0
............
95COc1ccc(C2CC(c3ccc(O)cc3)=NN2c2ccc(S(N)(=O)=O)...C1CCOC1359.0
96COc1ccc2c3c(c4ccc(OC)cc4c2c1)C1(c2ccccc2-c2ccc...C1CCCCC1386.0
97CCCCOc1c(C=C2N(C)c3ccccc3C2(C)C)c(=O)c1=OCCO425.0
98Cc1cc2ccc(-c3cccc4cccc(-c5ccc6cc(C)c(=O)oc6c5)...c1ccccc1324.0
99Cc1ccc(C(=O)c2c(C)c3ccc4cccc5c6cccc7ccc2c(c76)...ClCCl391.0
\n", + "

100 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " smiles solvent peakwavs_max\n", + "0 CCCCN1C(=O)C(=C/C=C/C=C/C=C2N(CCCC)c3ccccc3N2C... ClCCl 642.0\n", + "1 C(=C/c1cnccn1)\\c1ccc(N(c2ccccc2)c2ccc(/C=C/c3c... ClCCl 420.0\n", + "2 CN(C)c1ccc2c(-c3ccc(N)cc3C(=O)[O-])c3ccc(=[N+]... O 544.0\n", + "3 c1ccc2[nH]ccc2c1 O 290.0\n", + "4 CCN(CC)c1ccc2c(c1)OC1=C(/C=C/C3=[N+](C)c4ccc5c... ClC(Cl)Cl 736.0\n", + ".. ... ... ...\n", + "95 COc1ccc(C2CC(c3ccc(O)cc3)=NN2c2ccc(S(N)(=O)=O)... C1CCOC1 359.0\n", + "96 COc1ccc2c3c(c4ccc(OC)cc4c2c1)C1(c2ccccc2-c2ccc... C1CCCCC1 386.0\n", + "97 CCCCOc1c(C=C2N(C)c3ccccc3C2(C)C)c(=O)c1=O CCO 425.0\n", + "98 Cc1cc2ccc(-c3cccc4cccc(-c5ccc6cc(C)c(=O)oc6c5)... c1ccccc1 324.0\n", + "99 Cc1ccc(C(=O)c2c(C)c3ccc4cccc5c6cccc7ccc2c(c76)... ClCCl 391.0\n", + "\n", + "[100 rows x 3 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_test = pd.read_csv(test_path)\n", + "df_test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get smiles" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([['CCCCN1C(=O)C(=C/C=C/C=C/C=C2N(CCCC)c3ccccc3N2CCCC)C(=O)N(CCCC)C1=S',\n", + " 'ClCCl'],\n", + " ['C(=C/c1cnccn1)\\\\c1ccc(N(c2ccccc2)c2ccc(/C=C/c3cnccn3)cc2)cc1',\n", + " 'ClCCl'],\n", + " ['CN(C)c1ccc2c(-c3ccc(N)cc3C(=O)[O-])c3ccc(=[N+](C)C)cc-3oc2c1',\n", + " 'O'],\n", + " ['c1ccc2[nH]ccc2c1', 'O'],\n", + " ['CCN(CC)c1ccc2c(c1)OC1=C(/C=C/C3=[N+](C)c4ccc5ccccc5c4C3(C)C)CCCC1=C2c1ccccc1C(=O)O',\n", + " 'ClC(Cl)Cl']], dtype=object)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "smiss = df_test[smiles_columns].values\n", + "smiss[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get molecule datapoints" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "n_componenets = len(smiles_columns)\n", + "test_datapointss = [[data.MoleculeDatapoint.from_smi(smi) for smi in smiss[:, i]] for i in range(n_componenets)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get molecule datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer()\n", + "test_dsets = [data.MoleculeDataset(test_datapoints, featurizer) for test_datapoints in test_datapointss]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Get multicomponent dataset and data loader" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "test_mcdset = data.MulticomponentDataset(test_dsets)\n", + "test_loader = data.build_dataloader(test_mcdset, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Set up trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicting DataLoader 0: 0%| | 0/2 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smilessolventpeakwavs_maxpred
0CCCCN1C(=O)C(=C/C=C/C=C/C=C2N(CCCC)c3ccccc3N2C...ClCCl642.0454.898621
1C(=C/c1cnccn1)\\c1ccc(N(c2ccccc2)c2ccc(/C=C/c3c...ClCCl420.0453.561584
2CN(C)c1ccc2c(-c3ccc(N)cc3C(=O)[O-])c3ccc(=[N+]...O544.0448.694977
3c1ccc2[nH]ccc2c1O290.0448.159760
4CCN(CC)c1ccc2c(c1)OC1=C(/C=C/C3=[N+](C)c4ccc5c...ClC(Cl)Cl736.0456.897003
...............
95COc1ccc(C2CC(c3ccc(O)cc3)=NN2c2ccc(S(N)(=O)=O)...C1CCOC1359.0454.548584
96COc1ccc2c3c(c4ccc(OC)cc4c2c1)C1(c2ccccc2-c2ccc...C1CCCCC1386.0455.287140
97CCCCOc1c(C=C2N(C)c3ccccc3C2(C)C)c(=O)c1=OCCO425.0453.560364
98Cc1cc2ccc(-c3cccc4cccc(-c5ccc6cc(C)c(=O)oc6c5)...c1ccccc1324.0454.656891
99Cc1ccc(C(=O)c2c(C)c3ccc4cccc5c6cccc7ccc2c(c76)...ClCCl391.0453.118774
\n", + "

100 rows × 4 columns

\n", + "" + ], + "text/plain": [ + " smiles solvent \\\n", + "0 CCCCN1C(=O)C(=C/C=C/C=C/C=C2N(CCCC)c3ccccc3N2C... ClCCl \n", + "1 C(=C/c1cnccn1)\\c1ccc(N(c2ccccc2)c2ccc(/C=C/c3c... ClCCl \n", + "2 CN(C)c1ccc2c(-c3ccc(N)cc3C(=O)[O-])c3ccc(=[N+]... O \n", + "3 c1ccc2[nH]ccc2c1 O \n", + "4 CCN(CC)c1ccc2c(c1)OC1=C(/C=C/C3=[N+](C)c4ccc5c... ClC(Cl)Cl \n", + ".. ... ... \n", + "95 COc1ccc(C2CC(c3ccc(O)cc3)=NN2c2ccc(S(N)(=O)=O)... C1CCOC1 \n", + "96 COc1ccc2c3c(c4ccc(OC)cc4c2c1)C1(c2ccccc2-c2ccc... C1CCCCC1 \n", + "97 CCCCOc1c(C=C2N(C)c3ccccc3C2(C)C)c(=O)c1=O CCO \n", + "98 Cc1cc2ccc(-c3cccc4cccc(-c5ccc6cc(C)c(=O)oc6c5)... c1ccccc1 \n", + "99 Cc1ccc(C(=O)c2c(C)c3ccc4cccc5c6cccc7ccc2c(c76)... ClCCl \n", + "\n", + " peakwavs_max pred \n", + "0 642.0 454.898621 \n", + "1 420.0 453.561584 \n", + "2 544.0 448.694977 \n", + "3 290.0 448.159760 \n", + "4 736.0 456.897003 \n", + ".. ... ... \n", + "95 359.0 454.548584 \n", + "96 386.0 455.287140 \n", + "97 425.0 453.560364 \n", + "98 324.0 454.656891 \n", + "99 391.0 453.118774 \n", + "\n", + "[100 rows x 4 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_preds = np.concatenate(test_preds, axis=0)\n", + "df_test['pred'] = test_preds\n", + "df_test" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop/examples/predicting_regression_reaction.ipynb b/chemprop/examples/predicting_regression_reaction.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..15523b7b922a461508c75712cf5e4232b2e723f5 --- /dev/null +++ b/chemprop/examples/predicting_regression_reaction.ipynb @@ -0,0 +1,435 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Predicting Regression - Reaction" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/predicting_regression_reaction.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import torch\n", + "from lightning import pytorch as pl\n", + "from pathlib import Path\n", + "\n", + "from chemprop import data, featurizers, models" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change model input here" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "checkpoint_path = chemprop_dir / \"tests\" / \"data\" / \"example_model_v2_regression_rxn.ckpt\" # path to the checkpoint file.\n", + "# If the checkpoint file is generated using the training notebook, it will be in the `checkpoints` folder with name similar to `checkpoints/epoch=19-step=180.ckpt`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load model" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=134, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=406, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): GraphTransform(\n", + " (V_transform): Identity()\n", + " (E_transform): Identity()\n", + " )\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0-1): 2 x MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mpnn = models.MPNN.load_from_checkpoint(checkpoint_path)\n", + "mpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change predict input here" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "test_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"rxn\" / \"rxn.csv\"\n", + "smiles_column = 'smiles'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load smiles" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['[O:1]([C:2]([C:3]([C:4](=[O:5])[C:6]([O:7][H:15])([H:13])[H:14])([H:11])[H:12])([H:9])[H:10])[H:8]>>[C:3](=[C:4]=[O:5])([H:11])[H:12].[C:6]([O:7][H:15])([H:8])([H:13])[H:14].[O:1]=[C:2]([H:9])[H:10]',\n", + " '[C:1]1([H:8])([H:9])[O:2][C@@:3]2([H:10])[C@@:4]3([H:11])[O:5][C@:6]1([H:12])[C@@:7]23[H:13]>>[C:1]1([H:8])([H:9])[O:2][C:3]([H:10])=[C:7]([H:13])[C@:6]1([O+:5]=[C-:4][H:11])[H:12]',\n", + " '[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])=[C:6]([H:16])[C@@:7]12[H:17])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([H:14])=[C:5]([H:15])[C:6]([H:16])=[C:7]1[H:17])([H:8])([H:9])[H:10]',\n", + " '[C:1]([O:2][C:3]([C@@:4]([C:5]([H:14])([H:15])[H:16])([C:6]([O:7][H:19])([H:17])[H:18])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C-:1]([O+:2]=[C:3]([C@@:4]([C:5]([H:14])([H:15])[H:16])([C:6]([O:7][H:19])([H:17])[H:18])[H:13])[H:12])([H:8])[H:10].[H:9][H:11]',\n", + " '[C:1]([C:2]#[C:3][C:4]([C:5](=[O:6])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[C:3]=[C:4]([H:10])[H:11])[C:5](=[O:6])[H:12])([H:7])([H:8])[H:9]'],\n", + " dtype=object)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_test = pd.read_csv(test_path)\n", + "\n", + "smis = df_test.loc[:, smiles_column].values\n", + "smis[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load datapoints" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "test_data = [data.ReactionDatapoint.from_smi(smi) for smi in smis]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define featurizer" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.CondensedGraphOfReactionFeaturizer(mode_=\"PROD_DIFF\")\n", + "# Testing parameters should match training parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get dataset and dataloader" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "test_dset = data.ReactionDataset(test_data, featurizer=featurizer)\n", + "test_loader = data.build_dataloader(test_dset, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Perform tests" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicting DataLoader 0: 100%|███████████████████| 2/2 [00:00<00:00, 119.42it/s]\n" + ] + } + ], + "source": [ + "with torch.inference_mode():\n", + " trainer = pl.Trainer(\n", + " logger=None,\n", + " enable_progress_bar=True,\n", + " accelerator=\"cpu\",\n", + " devices=1\n", + " )\n", + " test_preds = trainer.predict(mpnn, test_loader)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smileseapreds
0[O:1]([C:2]([C:3]([C:4](=[O:5])[C:6]([O:7][H:1...8.8989348.071494
1[C:1]1([H:8])([H:9])[O:2][C@@:3]2([H:10])[C@@:...5.4643288.108090
2[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H...5.2705528.087680
3[C:1]([O:2][C:3]([C@@:4]([C:5]([H:14])([H:15])...8.4730068.070966
4[C:1]([C:2]#[C:3][C:4]([C:5](=[O:6])[H:12])([H...5.5790378.065533
............
95[C:1]([C:2]([C:3]([H:12])([H:13])[H:14])([C:4]...9.2956658.071316
96[O:1]=[C:2]([C@@:3]1([H:9])[C:4]([H:10])([H:11...7.7534428.085133
97[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H...10.6502158.096391
98[C:1]1([H:8])([H:9])[C@@:2]2([H:10])[N:3]1[C:4...10.1389458.202709
99[C:1]([C@@:2]1([C:3]([C:4]([O:5][H:15])([H:13]...6.9799348.107012
\n", + "

100 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " smiles ea preds\n", + "0 [O:1]([C:2]([C:3]([C:4](=[O:5])[C:6]([O:7][H:1... 8.898934 8.071494\n", + "1 [C:1]1([H:8])([H:9])[O:2][C@@:3]2([H:10])[C@@:... 5.464328 8.108090\n", + "2 [C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H... 5.270552 8.087680\n", + "3 [C:1]([O:2][C:3]([C@@:4]([C:5]([H:14])([H:15])... 8.473006 8.070966\n", + "4 [C:1]([C:2]#[C:3][C:4]([C:5](=[O:6])[H:12])([H... 5.579037 8.065533\n", + ".. ... ... ...\n", + "95 [C:1]([C:2]([C:3]([H:12])([H:13])[H:14])([C:4]... 9.295665 8.071316\n", + "96 [O:1]=[C:2]([C@@:3]1([H:9])[C:4]([H:10])([H:11... 7.753442 8.085133\n", + "97 [C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H... 10.650215 8.096391\n", + "98 [C:1]1([H:8])([H:9])[C@@:2]2([H:10])[N:3]1[C:4... 10.138945 8.202709\n", + "99 [C:1]([C@@:2]1([C:3]([C:4]([O:5][H:15])([H:13]... 6.979934 8.107012\n", + "\n", + "[100 rows x 3 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_preds = np.concatenate(test_preds, axis=0)\n", + "df_test['preds'] = test_preds\n", + "df_test" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop/examples/rigr_featurizer.ipynb b/chemprop/examples/rigr_featurizer.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..868ca7d0b75e05e9cb3d6fe96d6c414a569cd38b --- /dev/null +++ b/chemprop/examples/rigr_featurizer.ipynb @@ -0,0 +1,577 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# RIGR: Resonance Invariant Graph Representation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "RIGR is introduced and discussed in our work [RIGR: Resonance Invariant Graph Representation for Molecular Property Prediction](). It is a featurizer implemented as part of Chemprop v2.1.2, designed to impose strict resonance invariance for molecular property prediction tasks. It ensures a single graph representation of different resonance structures of the same molecule, including non-equivalent resonance forms. For CLI users, RIGR is available as a choice for the multi-hot atom featurization scheme. To use RIGR, add the following argument to your training or inference script:\n", + " ```bash\n", + " --multi-hot-atom-featurizer-mode RIGR\n", + " ```\n", + "In this Jupyter notebook, we show how to train and infer models using RIGR which is very similar to the generic training [example](./training.ipynb). RIGR can be easily implemented in your existing code by changing the `SimpleMoleculeMolGraphFeaturizer()` to this:\n", + " ```python\n", + " rigr_atom_featurizer = RIGRAtomFeaturizer()\n", + " rigr_bond_featurizer = RIGRBondFeaturizer()\n", + " featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer(atom_featurizer=rigr_atom_featurizer, bond_featurizer=rigr_bond_featurizer)\n", + " ```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/rigr_featurizer.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from typing import Sequence\n", + "\n", + "from lightning import pytorch as pl\n", + "import numpy as np\n", + "import pandas as pd\n", + "from rdkit import Chem\n", + "from rdkit.Chem.rdchem import Atom, Bond, Mol\n", + "\n", + "from chemprop import data, featurizers, models, nn\n", + "from chemprop.featurizers.atom import RIGRAtomFeaturizer\n", + "from chemprop.featurizers.bond import RIGRBondFeaturizer\n", + "from chemprop.featurizers.molecule import ChargeFeaturizer\n", + "from chemprop.utils import make_mol" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\" # path to your data .csv file\n", + "num_workers = 0 # number of workers for dataloader. 0 means using main process for data loading\n", + "smiles_column = 'smiles' # name of the column containing SMILES strings\n", + "target_columns = ['lipo'] # list of names of the columns containing targets" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smileslipo
0Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc143.54
1COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)...-1.18
2COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl3.69
3OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C...3.37
4Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N...3.10
.........
95CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C...2.20
96CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)...2.04
97CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)...4.49
98COc1ccc(Cc2c(N)n[nH]c2N)cc10.20
99CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(...2.00
\n", + "

100 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " smiles lipo\n", + "0 Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14 3.54\n", + "1 COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)... -1.18\n", + "2 COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl 3.69\n", + "3 OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C... 3.37\n", + "4 Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N... 3.10\n", + ".. ... ...\n", + "95 CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C... 2.20\n", + "96 CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)... 2.04\n", + "97 CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)... 4.49\n", + "98 COc1ccc(Cc2c(N)n[nH]c2N)cc1 0.20\n", + "99 CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(... 2.00\n", + "\n", + "[100 rows x 2 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_input = pd.read_csv(input_path)\n", + "df_input" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "smis = df_input.loc[:, smiles_column].values\n", + "ys = df_input.loc[:, target_columns].values" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Featurization and Make Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "RIGR uses only the subset of atom and bond features from Chemprop that remain invariant across different resonance forms. The tables below indicate which atom and bond features are present and absent in RIGR.\n", + "\n", + "### Atom Features\n", + "\n", + "| **Feature** | **Description** | **Present in RIGR?** |\n", + "|------------------------|---------------------------------------------------------------------------------|:--------------------:|\n", + "| Atomic number | The choice for atom type denoted by atomic number | ☑️ |\n", + "| Degree | Number of direct neighbors of the atom | ☑️ |\n", + "| Formal charge | Integer charge assigned to the atom | ☐ |\n", + "| Chiral tag | The choices for an atom's chiral tag (See `rdkit.Chem.rdchem.ChiralType`) | ☐ |\n", + "| Number of H | Number of bonded hydrogen atoms | ☑️ |\n", + "| Hybridization | Atom's hybridization type (See `rdkit.Chem.rdchem.HybridizationType`) | ☐ |\n", + "| Aromaticity | Indicates whether the atom is aromatic or not | ☐ |\n", + "| Atomic mass | The atomic mass of the atom | ☑️ |\n", + "\n", + "\n", + "### Bond Features\n", + "\n", + "| **Feature** | **Description** | **Present in RIGR?** |\n", + "|-----------------------|------------------------------------------------------------------------------------------------------|:--------------------:|\n", + "| Bond type | The known bond types: single, double, or triple bond | ☐ |\n", + "| Conjugation | Indicates whether the bond is conjugated or not | ☐ |\n", + "| Ring | Indicates whether the bond is a part of a ring | ☑️ |\n", + "| Stereochemistry | Stores the known bond stereochemistries (See [BondStereo](https://www.rdkit.org/docs/source/rdkit.Chem.rdchem.html#rdkit.Chem.rdchem.BondStereo.values)) | ☐ |" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "The return type of make_split_indices has changed in v2.1 - see help(make_split_indices)\n" + ] + } + ], + "source": [ + "mols = [make_mol(smi, add_h=True, keep_h=True) for smi in smis]\n", + "\n", + "charge_featurizer = ChargeFeaturizer()\n", + "x_ds = [charge_featurizer(mol) for mol in mols]\n", + "\n", + "all_data = [data.MoleculeDatapoint(mol, name=smi, y=y, x_d=x_d) for mol, smi, y, x_d in zip(mols, smis, ys, x_ds)]\n", + "train_indices, val_indices, test_indices = data.make_split_indices(mols, \"random\", (0.8, 0.1, 0.1))\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " all_data, train_indices, val_indices, test_indices\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "rigr_atom_featurizer = RIGRAtomFeaturizer()\n", + "rigr_bond_featurizer = RIGRBondFeaturizer()\n", + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer(atom_featurizer=rigr_atom_featurizer, bond_featurizer=rigr_bond_featurizer)\n", + "\n", + "train_dset = data.MoleculeDataset(train_data[0], featurizer)\n", + "scaler = train_dset.normalize_targets()\n", + "\n", + "val_dset = data.MoleculeDataset(val_data[0], featurizer)\n", + "val_dset.normalize_targets(scaler)\n", + "\n", + "test_dset = data.MoleculeDataset(test_data[0], featurizer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dataloader" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = data.build_dataloader(train_dset, num_workers=num_workers)\n", + "val_loader = data.build_dataloader(val_dset, num_workers=num_workers, shuffle=False)\n", + "test_loader = data.build_dataloader(test_dset, num_workers=num_workers, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Model" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=54, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=352, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): Identity()\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=301, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0): RMSE(task_weights=[[1.0]])\n", + " (1): MAE(task_weights=[[1.0]])\n", + " (2): MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mp = nn.BondMessagePassing(\n", + " d_v=featurizer.atom_fdim,\n", + " d_e=featurizer.bond_fdim,\n", + ")\n", + "agg = nn.MeanAggregation()\n", + "output_transform = nn.UnscaleTransform.from_standard_scaler(scaler)\n", + "ffn = nn.RegressionFFN(\n", + " input_dim=mp.output_dim + train_dset.d_xd,\n", + " output_transform=output_transform,\n", + ")\n", + "batch_norm = True\n", + "metric_list = [nn.metrics.RMSE(), nn.metrics.MAE()] # Only the first metric is used for training and early stopping\n", + "mpnn = models.MPNN(mp, agg, ffn, batch_norm, metric_list)\n", + "mpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/akshatz/anaconda3/envs/chemprop/lib/python3.12/site-packages/lightning/fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/akshatz/anaconda3/envs/chemprop/lib/python3.12 ...\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(\n", + " logger=False,\n", + " enable_checkpointing=True, # Use `True` if you want to save model checkpoints. The checkpoints will be saved in the `checkpoints` folder.\n", + " enable_progress_bar=True,\n", + " accelerator=\"auto\",\n", + " devices=1,\n", + " max_epochs=20, # number of epochs to train for\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start Training" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision\n", + "/home/akshatz/anaconda3/envs/chemprop/lib/python3.12/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /home/akshatz/chemprop/examples/checkpoints exists and is not empty.\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/akshatz/anaconda3/envs/chemprop/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=63` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 212 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | BatchNorm1d | 600 | train\n", + "3 | predictor | RegressionFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "303 K Trainable params\n", + "0 Non-trainable params\n", + "303 K Total params\n", + "1.214 Total estimated model params size (MB)\n", + "25 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sanity Checking DataLoader 0: 0%| | 0/1 [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test/mae 0.6620886325836182 │\n", + "│ test/rmse 0.9359426498413086 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mae \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.6620886325836182 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test/rmse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9359426498413086 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results = trainer.test(mpnn, test_loader)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop/examples/shapley_value_with_customized_featurizers.ipynb b/chemprop/examples/shapley_value_with_customized_featurizers.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..2c10b2e5865523784ac51eee827eb8c8a6f9ff8b --- /dev/null +++ b/chemprop/examples/shapley_value_with_customized_featurizers.ipynb @@ -0,0 +1,1242 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Shapley value analysis for Chemprop models" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook demonstrates how to perform Shapley (SHAP) value analysis for a chemprop model. In addition, it also serves as an example on how to customize chemprop featurizers.\n", + "\n", + "* Example 1: Shapley value analysis to explain importance of default chemprop atom and bond features\n", + "* Example 2: Shapley value analysis to explain importance of particular atom/node and bond/edge\n", + "* Example 3: Shapley value analysis to explain importance of extra features (not yet implemented, will be done in the future release, this is likely eaiser to be achieved by modifying MoleculeDatapoint and dataloader functions)\n", + "\n", + "Disclaimer: This notebook is for feature demonstration purposes only. The models used in this notebook are not trained models, and the computed Shapley values are provided solely for illustrative purposes. \n", + "\n", + "Note: This notebook is developed for Chemprop v2. \n", + "\n", + "For Chemprop v1 SHAP implementation checkout: https://github.com/oscarwumit/chemprop_developing/tree/shap_v1\n", + "\n", + "This notebook requires the SHAP package, do \"pip install shap\" if you don't have it installed." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/shapley_value_with_customized_featurizers.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " !pip install shap\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Citation for SHAP implementation in Chemprop\n", + "\n", + "Li & Wu, et al. “When Do Quantum Mechanical Descriptors Help Graph Neural Networks to Predict Chemical Properties?” Journal of the American Chemical Society, vol. 146, no. 33, Aug. 2024, pp. 23103–20. https://doi.org/10.1021/jacs.4c04670.\n", + "\n", + "Bibtex format\n", + "
\n",
+    "@article{li_and_wu_qm_des_2024, \n",
+    "\ttitle = {When {Do} {Quantum} {Mechanical} {Descriptors} {Help} {Graph} {Neural} {Networks} to {Predict} {Chemical} {Properties}?}, \n",
+    "\tvolume = {146}, \n",
+    "\tcopyright = {https://doi.org/10.15223/policy-029}, \n",
+    "\tissn = {0002-7863, 1520-5126}, \n",
+    "\turl = {https://pubs.acs.org/doi/10.1021/jacs.4c04670}, \n",
+    "\tdoi = {10.1021/jacs.4c04670}, \n",
+    "\tlanguage = {en}, \n",
+    "\tnumber = {33}, \n",
+    "\turldate = {2025-01-13}, \n",
+    "\tjournal = {Journal of the American Chemical Society}, \n",
+    "\tauthor = {Li, Shih-Cheng and Wu, Haoyang and Menon, Angiras and Spiekermann, Kevin A. and Li, Yi-Pei and Green, William H.}, \n",
+    "\tmonth = aug, \n",
+    "\tyear = {2024}, \n",
+    "\tpages = {23103--23120}, \n",
+    "} \n",
+    "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Customize Chemprop featurizers for SHAP analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Import dependencies and classes\n", + "import sys\n", + "\n", + "from copy import deepcopy\n", + "from lightning import pytorch as pl\n", + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "import torch\n", + "\n", + "from dataclasses import InitVar, dataclass\n", + "from typing import List, Sequence, Tuple, Union, Optional\n", + "from rdkit import Chem\n", + "from rdkit.Chem import Mol, Draw\n", + "from rdkit.Chem.rdchem import Atom, Bond, BondType\n", + "\n", + "from chemprop.featurizers.atom import MultiHotAtomFeaturizer \n", + "from chemprop.featurizers.bond import MultiHotBondFeaturizer \n", + "from chemprop.featurizers.molgraph.molecule import SimpleMoleculeMolGraphFeaturizer \n", + "\n", + "from chemprop.data.molgraph import MolGraph \n", + "from chemprop.featurizers.base import GraphFeaturizer \n", + "from chemprop.featurizers.molgraph.mixins import _MolGraphFeaturizerMixin \n", + "\n", + "from chemprop import data, featurizers, models \n", + "\n", + "import shap # do \"pip install shap\" if you don't have it installed\n", + "\n", + "import logging\n", + "\n", + "# Set logging level to WARNING to suppress INFO logs\n", + "logging.getLogger(\"lightning.pytorch.utilities.rank_zero\").setLevel(logging.WARNING)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### CustomMultiHotAtomFeaturizer" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "class CustomMultiHotAtomFeaturizer(MultiHotAtomFeaturizer):\n", + " \"\"\"A custom MultiHotAtomFeaturizer that allows for selective feature ablation.\n", + " \n", + " Parameters\n", + " ----------\n", + " keep_features : List[bool], optional\n", + " a list of booleans to indicate which atom features to keep. If None, all features are kept. For any element that is False, the corresponding feature's encoding is set to all zeros. Useful for ablation and SHAP analysis.\n", + " \"\"\"\n", + " \n", + " def __init__(self,\n", + " atomic_nums: Sequence[int],\n", + " degrees: Sequence[int],\n", + " formal_charges: Sequence[int],\n", + " chiral_tags: Sequence[int],\n", + " num_Hs: Sequence[int],\n", + " hybridizations: Sequence[int],\n", + " keep_features: List[bool] = None):\n", + " super().__init__(atomic_nums, degrees, formal_charges, chiral_tags, num_Hs, hybridizations)\n", + " \n", + " if keep_features is None:\n", + " keep_features = [True] * (len(self._subfeats) + 2)\n", + " self.keep_features = keep_features\n", + "\n", + " def __call__(self, a: Atom | None) -> np.ndarray:\n", + " x = np.zeros(self._MultiHotAtomFeaturizer__size)\n", + " if a is None:\n", + " return x\n", + " \n", + " feats = [\n", + " a.GetAtomicNum(),\n", + " a.GetTotalDegree(),\n", + " a.GetFormalCharge(),\n", + " int(a.GetChiralTag()),\n", + " int(a.GetTotalNumHs()),\n", + " a.GetHybridization(),\n", + " ]\n", + " \n", + " i = 0\n", + " for feat, choices, keep in zip(feats, self._subfeats, self.keep_features[:len(feats)]):\n", + " j = choices.get(feat, len(choices))\n", + " if keep:\n", + " x[i + j] = 1\n", + " i += len(choices) + 1\n", + " \n", + " if self.keep_features[len(feats)]:\n", + " x[i] = int(a.GetIsAromatic())\n", + " if self.keep_features[len(feats) + 1]:\n", + " x[i + 1] = 0.01 * a.GetMass()\n", + "\n", + " return x\n", + "\n", + " def zero_mask(self) -> np.ndarray:\n", + " \"\"\"Featurize the atom by setting all bits to zero.\"\"\"\n", + " return np.zeros(len(self))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Atom features all: [1. 0. 0. 0. 0. 0. 0. 1. 0.\n", + " 1. 0. 0. 1. 0. 0. 0. 0. 0.\n", + " 0. 1. 0. 0. 0. 1. 0. 0.12011]\n", + "Atom features some: [1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.\n", + " 0. 0.]\n", + "Atom features none: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0. 0.]\n" + ] + } + ], + "source": [ + "# Example usage\n", + "atomic_nums = [6, 7, 8]\n", + "degrees = [1, 2, 3]\n", + "formal_charges = [-1, 0, 1]\n", + "chiral_tags = [0, 1, 2]\n", + "num_Hs = [0, 1, 2]\n", + "hybridizations = [1, 2, 3]\n", + "\n", + "keep_features_all = [True] * 8\n", + "keep_features_some = [True, True, False, True, False, True, True, False]\n", + "keep_features_none = [False] * 8\n", + "\n", + "featurizer_all = CustomMultiHotAtomFeaturizer(\n", + " atomic_nums=atomic_nums,\n", + " degrees=degrees,\n", + " formal_charges=formal_charges,\n", + " chiral_tags=chiral_tags,\n", + " num_Hs=num_Hs,\n", + " hybridizations=hybridizations,\n", + " keep_features=keep_features_all\n", + ")\n", + "\n", + "featurizer_some = CustomMultiHotAtomFeaturizer(\n", + " atomic_nums=atomic_nums,\n", + " degrees=degrees,\n", + " formal_charges=formal_charges,\n", + " chiral_tags=chiral_tags,\n", + " num_Hs=num_Hs,\n", + " hybridizations=hybridizations,\n", + " keep_features=keep_features_some\n", + ")\n", + "\n", + "featurizer_none = CustomMultiHotAtomFeaturizer(\n", + " atomic_nums=atomic_nums,\n", + " degrees=degrees,\n", + " formal_charges=formal_charges,\n", + " chiral_tags=chiral_tags,\n", + " num_Hs=num_Hs,\n", + " hybridizations=hybridizations,\n", + " keep_features=keep_features_none\n", + ")\n", + "\n", + "mol = Chem.MolFromSmiles('CCO')\n", + "atom = mol.GetAtomWithIdx(0) # Get the first atom\n", + "\n", + "features = featurizer_all(atom)\n", + "print(\"Atom features all:\", features)\n", + "\n", + "features = featurizer_some(atom)\n", + "print(\"Atom features some:\", features)\n", + "\n", + "features = featurizer_none(atom)\n", + "print(\"Atom features none:\", features)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### CustomMultiHotBondFeaturizer" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class CustomMultiHotBondFeaturizer(MultiHotBondFeaturizer):\n", + " \"\"\"A custom MultiHotBondFeaturizer that allows for selective feature ablation.\n", + " \n", + " Parameters\n", + " ----------\n", + " keep_features : List[bool], optional\n", + " a list of booleans to indicate which bond features to keep except for nullity. If None, all features are kept. For any element that is False, the corresponding feature's encoding is set to all zeros. Useful for ablation and SHAP analysis.\n", + " \"\"\"\n", + " \n", + " def __init__(self,\n", + " bond_types: Sequence[BondType] | None = None,\n", + " stereos: Sequence[int] | None = None,\n", + " keep_features: List[bool] = None):\n", + " super().__init__(bond_types, stereos)\n", + " \n", + " self._MultiHotBondFeaturizer__size = 1 + len(self.bond_types) + 2 + (len(self.stereo) + 1)\n", + "\n", + " if keep_features is None:\n", + " keep_features = [True] * 4 \n", + " self.keep_features = keep_features \n", + "\n", + " def __len__(self) -> int:\n", + " return self._MultiHotBondFeaturizer__size\n", + "\n", + " def __call__(self, b: Bond) -> np.ndarray:\n", + " x = np.zeros(len(self), int)\n", + "\n", + " if b is None:\n", + " x[0] = 1\n", + " return x\n", + " i = 1\n", + " bond_type = b.GetBondType()\n", + " bt_bit, size = self.one_hot_index(bond_type, self.bond_types)\n", + " if self.keep_features[0] and bt_bit != size:\n", + " x[i + bt_bit] = 1\n", + " i += size - 1\n", + "\n", + " if self.keep_features[1]:\n", + " x[i] = int(b.GetIsConjugated())\n", + " if self.keep_features[2]:\n", + " x[i + 1] = int(b.IsInRing())\n", + " i += 2\n", + "\n", + " if self.keep_features[3]:\n", + " stereo_bit, _ = self.one_hot_index(int(b.GetStereo()), self.stereo)\n", + " x[i + stereo_bit] = 1\n", + "\n", + " return x\n", + "\n", + " def zero_mask(self) -> np.ndarray:\n", + " \"\"\"Featurize the bond by setting all bits to zero.\"\"\"\n", + " return np.zeros(len(self), int)\n", + "\n", + " @classmethod\n", + " def one_hot_index(cls, x, xs: Sequence) -> tuple[int, int]:\n", + " \"\"\"Returns a tuple of the index of ``x`` in ``xs`` and ``len(xs) + 1`` if ``x`` is in ``xs``.\n", + " Otherwise, returns a tuple with ``len(xs)`` and ``len(xs) + 1``.\"\"\"\n", + " n = len(xs)\n", + " return xs.index(x) if x in xs else n, n + 1" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bond features all: [0 1 0 0 0 0 0 1 0 0 0 0 0 0]\n", + "Bond features some: [0 1 0 0 0 0 0 0 0 0 0 0 0 0]\n", + "Bond features none: [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n" + ] + } + ], + "source": [ + "# Example usage\n", + "bond_types = [BondType.SINGLE, BondType.DOUBLE, BondType.TRIPLE, BondType.AROMATIC]\n", + "stereos = [0, 1, 2, 3, 4, 5]\n", + "keep_features_all = [True] * 4\n", + "keep_features_some = [True, False, True, False]\n", + "keep_features_none = [False] * 4\n", + "\n", + "featurizer_all = CustomMultiHotBondFeaturizer(\n", + " bond_types=bond_types,\n", + " stereos=stereos,\n", + " keep_features=keep_features_all\n", + ")\n", + "\n", + "featurizer_some = CustomMultiHotBondFeaturizer(\n", + " bond_types=bond_types,\n", + " stereos=stereos,\n", + " keep_features=keep_features_some\n", + ")\n", + "\n", + "featurizer_none = CustomMultiHotBondFeaturizer(\n", + " bond_types=bond_types,\n", + " stereos=stereos,\n", + " keep_features=keep_features_none\n", + ")\n", + "\n", + "mol = Chem.MolFromSmiles('CCO')\n", + "bond = mol.GetBondWithIdx(0) # Get the first bond\n", + "\n", + "features = featurizer_all(bond)\n", + "print(\"Bond features all:\", features)\n", + "\n", + "features = featurizer_some(bond)\n", + "print(\"Bond features some:\", features)\n", + "\n", + "features = featurizer_none(bond)\n", + "print(\"Bond features none:\", features)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### CustomSimpleMoleculeMolGraphFeaturizer" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "@dataclass\n", + "class CustomSimpleMoleculeMolGraphFeaturizer(SimpleMoleculeMolGraphFeaturizer):\n", + " \"\"\"A custom SimpleMoleculeMolGraphFeaturizer with additional feature control.\"\"\"\n", + " \n", + " keep_atom_features: Optional[List[bool]] = None\n", + " keep_bond_features: Optional[List[bool]] = None\n", + " keep_atoms: Optional[List[bool]] = None\n", + " keep_bonds: Optional[List[bool]] = None\n", + "\n", + " def __post_init__(self, extra_atom_fdim: int = 0, extra_bond_fdim: int = 0):\n", + " super().__post_init__(extra_atom_fdim, extra_bond_fdim)\n", + "\n", + " if isinstance(self.atom_featurizer, CustomMultiHotAtomFeaturizer) and self.keep_atom_features is not None:\n", + " self.atom_featurizer.keep_features = self.keep_atom_features\n", + " if isinstance(self.bond_featurizer, CustomMultiHotBondFeaturizer) and self.keep_bond_features is not None:\n", + " self.bond_featurizer.keep_features = self.keep_bond_features\n", + "\n", + " def __call__(\n", + " self,\n", + " mol: Chem.Mol,\n", + " atom_features_extra: np.ndarray | None = None,\n", + " bond_features_extra: np.ndarray | None = None,\n", + " ) -> MolGraph:\n", + " n_atoms = mol.GetNumAtoms()\n", + " n_bonds = mol.GetNumBonds()\n", + "\n", + " if self.keep_atoms is None:\n", + " self.keep_atoms = [True] * n_atoms\n", + " if self.keep_bonds is None:\n", + " self.keep_bonds = [True] * n_bonds\n", + "\n", + " if atom_features_extra is not None and len(atom_features_extra) != n_atoms:\n", + " raise ValueError(\n", + " \"Input molecule must have same number of atoms as `len(atom_features_extra)`!\"\n", + " f\"got: {n_atoms} and {len(atom_features_extra)}, respectively\"\n", + " )\n", + " if bond_features_extra is not None and len(bond_features_extra) != n_bonds:\n", + " raise ValueError(\n", + " \"Input molecule must have same number of bonds as `len(bond_features_extra)`!\"\n", + " f\"got: {n_bonds} and {len(bond_features_extra)}, respectively\"\n", + " )\n", + " if n_atoms == 0:\n", + " V = np.zeros((1, self.atom_fdim), dtype=np.single)\n", + " else:\n", + " V = np.array([self.atom_featurizer(a) if self.keep_atoms[a.GetIdx()] else self.atom_featurizer.zero_mask()\n", + " for a in mol.GetAtoms()], dtype=np.single)\n", + "\n", + " if atom_features_extra is not None:\n", + " V = np.hstack((V, atom_features_extra))\n", + "\n", + " E = np.empty((2 * n_bonds, self.bond_fdim))\n", + " edge_index = [[], []]\n", + "\n", + " i = 0\n", + " for u in range(n_atoms):\n", + " for v in range(u + 1, n_atoms):\n", + " bond = mol.GetBondBetweenAtoms(u, v)\n", + " if bond is None:\n", + " continue\n", + "\n", + " x_e = self.bond_featurizer(bond) if self.keep_bonds[bond.GetIdx()] else self.bond_featurizer.zero_mask()\n", + "\n", + " if bond_features_extra is not None:\n", + " x_e = np.concatenate((x_e, bond_features_extra[bond.GetIdx()]), dtype=np.single)\n", + "\n", + " E[i: i + 2] = x_e\n", + " edge_index[0].extend([u, v])\n", + " edge_index[1].extend([v, u])\n", + " i += 2\n", + "\n", + " rev_edge_index = np.arange(len(E)).reshape(-1, 2)[:, ::-1].ravel()\n", + " edge_index = np.array(edge_index, int)\n", + " return MolGraph(V, E, edge_index, rev_edge_index)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Molecule graph: MolGraph(V=array([[1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0.,\n", + " 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],\n", + " [1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0.,\n", + " 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],\n", + " [0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0.,\n", + " 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]], dtype=float32), E=array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]), edge_index=array([[0, 1, 1, 2],\n", + " [1, 0, 2, 1]]), rev_edge_index=array([1, 0, 3, 2]))\n" + ] + } + ], + "source": [ + "# Example usage\n", + "atom_featurizer = CustomMultiHotAtomFeaturizer(\n", + " atomic_nums=[6, 7, 8],\n", + " degrees=[1, 2, 3],\n", + " formal_charges=[-1, 0, 1],\n", + " chiral_tags=[0, 1, 2],\n", + " num_Hs=[0, 1, 2],\n", + " hybridizations=[1, 2, 3],\n", + " keep_features=[True, True, False, True, False, True, True, False]\n", + ")\n", + "\n", + "bond_featurizer = CustomMultiHotBondFeaturizer(\n", + " bond_types=[BondType.SINGLE, BondType.DOUBLE, BondType.TRIPLE, BondType.AROMATIC],\n", + " stereos=[0, 1, 2, 3, 4, 5],\n", + " keep_features=[True, False, True, False]\n", + ")\n", + "\n", + "featurizer = CustomSimpleMoleculeMolGraphFeaturizer(\n", + " atom_featurizer=atom_featurizer,\n", + " bond_featurizer=bond_featurizer,\n", + " keep_atom_features=[True, True, False, True, False, True, True, False],\n", + " keep_bond_features=[True, False, True, False],\n", + ")\n", + "\n", + "# Example molecule (RDKit Mol object required)\n", + "from rdkit import Chem\n", + "mol = Chem.MolFromSmiles('CCO')\n", + "\n", + "mol_graph = featurizer(mol)\n", + "print(\"Molecule graph:\", mol_graph)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### SHAP analysis to interpret Chemprop model prediction" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 1: Shapley value analysis to explain importance of default chemprop atom and bond features" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing molecule: Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14\n" + ] + } + ], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "\n", + "# load chemprop model checkpoint file\n", + "checkpoint_path = chemprop_dir / \"tests\" / \"data\" / \"example_model_v2_regression_mol.ckpt\" \n", + "mpnn = models.MPNN.load_from_checkpoint(checkpoint_path)\n", + "\n", + "# load data\n", + "test_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\"\n", + "smiles_column = 'smiles'\n", + "df_test = pd.read_csv(test_path)\n", + "smis = df_test[smiles_column]\n", + "test_data = [data.MoleculeDatapoint.from_smi(smi) for smi in smis]\n", + "\n", + "# pick a test molecule for demonstration \n", + "test_mol = smis.iloc[0]\n", + "print(f\"Testing molecule: {test_mol}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# initialize the featurizer\n", + "atom_featurizer = CustomMultiHotAtomFeaturizer.v2() # chemprop v2 default atom featurizer settings\n", + "bond_featurizer = CustomMultiHotBondFeaturizer()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# A helper function to get predictions from a molecule with ability to keep or remove specific atom and bond features\n", + "def get_predictions(keep_atom_features: Optional[List[bool]], keep_bond_features: Optional[List[bool]], mol: str) -> float:\n", + " featurizer = CustomSimpleMoleculeMolGraphFeaturizer(\n", + " atom_featurizer=atom_featurizer,\n", + " bond_featurizer=bond_featurizer,\n", + " keep_atom_features=keep_atom_features,\n", + " keep_bond_features=keep_bond_features\n", + " )\n", + " test_data = [data.MoleculeDatapoint.from_smi(mol)]\n", + " test_dset = data.MoleculeDataset(test_data, featurizer=featurizer)\n", + " test_loader = data.build_dataloader(test_dset, shuffle=False, batch_size=1)\n", + "\n", + " with torch.inference_mode():\n", + " trainer = pl.Trainer(\n", + " logger=False,\n", + " enable_progress_bar=False,\n", + " accelerator=\"cpu\",\n", + " devices=1\n", + " )\n", + " test_preds = trainer.predict(mpnn, test_loader)\n", + " return test_preds[0][0]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop_delete/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prediction with keep_features set 0: tensor([2.2535])\n", + "Prediction with keep_features set 1: tensor([2.2319])\n", + "Prediction with keep_features set 2: tensor([2.1880])\n" + ] + } + ], + "source": [ + "# example prediction with different keep_atom_features and keep_bond_features\n", + "\n", + "# keep all atom and bond features\n", + "keep_atom_features_0 = [True]*8\n", + "keep_bond_features_0 = [True]*4\n", + "\n", + "# keep some atom and bond features\n", + "keep_atom_features_1 = [True, True, False, True, True, False, True, True]\n", + "keep_bond_features_1 = [True, True, False, True]\n", + "\n", + "# remove all atom and bond features\n", + "keep_atom_features_2 = [False]*8\n", + "keep_bond_features_2 = [False]*4\n", + "\n", + "pred_0 = get_predictions(keep_atom_features_0, keep_bond_features_0, test_mol)\n", + "pred_1 = get_predictions(keep_atom_features_1, keep_bond_features_1, test_mol)\n", + "pred_2 = get_predictions(keep_atom_features_2, keep_bond_features_2, test_mol)\n", + "\n", + "print(f\"Prediction with keep_features set 0: {pred_0}\") # expected 2.2535\n", + "print(f\"Prediction with keep_features set 1: {pred_1}\") # expected 2.2319\n", + "print(f\"Prediction with keep_features set 2: {pred_2}\") # expected 2.1880" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# An example wrapper class for use as the model input in SHAP explainer\n", + "# The wrapper needs to be initialized first with the molecule to be explained, and then can be called with a boolean list representing the features to keep\n", + "# The wrapper is needed because SHAP explainer requires a callable model with a single input argument, adapt X as needed\n", + "class MoleculeModelWrapper:\n", + " def __init__(self, mol: str):\n", + " self.mol = mol\n", + " \n", + " def __call__(self, X):\n", + " preds = []\n", + " for keep_features in X:\n", + " try:\n", + " # unpacking X, indices corresponds to feature orders from default chemprop featurizer, adapt as needed\n", + " keep_atom_features = keep_features[:8] # 8 atom features\n", + " keep_bond_features = keep_features[8:] # 4 bond features\n", + " except:\n", + " print(f\"Invalid input: {keep_features}\")\n", + " raise\n", + " pred = get_predictions(keep_atom_features, keep_bond_features, self.mol)\n", + " preds.append([pred.item()])\n", + " return np.array(preds)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# An example masker function for use with SHAP explainer\n", + "# The masker function takes in a binary mask and the input data X, and returns the masked input data. This simulates the effect of masking out certain features.\n", + "def binary_masker(binary_mask, x):\n", + " masked_x = deepcopy(x)\n", + " masked_x[binary_mask == 0] = 0\n", + " return np.array([masked_x])" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the model wrapper with the test molecule\n", + "model_wrapper = MoleculeModelWrapper(test_mol)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[2.25354147]])" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Test the model wrapper with a random feature choice\n", + "keep_features = [1] * 12 # 8 atom features + 4 bond features\n", + "feature_choice = np.array([keep_features])\n", + "model_wrapper(feature_choice) # expected 2.25354171" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the SHAP explainer with the model wrapper and masker\n", + "explainer = shap.PermutationExplainer(model_wrapper, masker=binary_masker)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop_delete/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n", + "PermutationExplainer explainer: 2it [00:28, 28.55s/it] \n" + ] + } + ], + "source": [ + "# Compute SHAP values, using 100 evaluations of different feature choices (notice that feature choices are masked out randomly by the binary masker, so the results may vary between runs)\n", + "explanation = explainer(feature_choice, max_evals=100) " + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + ".values =\n", + "array([[ 0.00123063, 0.01496077, 0.00213072, -0.01216608, 0.00954816,\n", + " 0.00413817, 0.00643879, -0.00101143, 0.01162252, 0.00842983,\n", + " 0.00846943, 0.01178101]])\n", + "\n", + ".base_values =\n", + "array([[2.18796897]])\n", + "\n", + ".data =\n", + "array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Print the SHAP values\n", + "explanation" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAu0AAAKFCAYAAAB4GddQAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAnCFJREFUeJzs3Xt81NWd//HXJCSTe8KQkIQQMIJICWAKAgGWFlrkEha7KxBh44WIDVAiFQm43GugRmCpLQVsQBtuqwSCdbuSxp+CAmWCKFouBq1hEQ0XJZCLGBhCkt8fAwOTSSBAhhnC+/l4zIOZ8z3nez7fPHbre75z5oyhpqamBhERERERcVseri5ARERERESuTaFdRERERMTNKbSLiIiIiLg5hXYRERERETen0C4iIiIi4uYU2kVERERE3JxCu4iIiIiIm1Nob0JqamooLy9HW++LiIiINC0K7U3I999/T3BwMN9//72rSxERERGRRqTQLiIiIiLi5hTaRURERETcnEK7iIiIiIibU2gXEREREXFzCu0iIiIiIm5OoV1ERERExM0ptIuIiIiIuDmFdhERERERN6fQLiIiIiLi5hTaRURERETcnEK7iIiIiIibU2gXEREREXFzCu0iIiIiIm5OoV1ERERExM0ptIuIiIiIuDmFdhERERERN6fQLiIiIiLi5hTaRURERETcnEK7iIiIiIibU2gXEREREXFzCu0iIiIiIm5OoV1ERERExM0ptIuIiIiIuDmFdhERERERN6fQLiIiIiLi5pq5ugBxghNn4OxFV1chIiIicuP8jRDs7+oq3I5Ce1OUugq+KXd1FSIiIiI3JqYlvDZJob0OCu1N0VfFUHjK1VWIiIiISCPRmnYRERERETen0C4iIiIi4uYU2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuTqFdRERERJq+0h8g5RUIGwv+Y2DAXPjkcMPHHyqCIekQ8B9gegIe/wOcKnPs99scePhFCE8GwyPwmw2NUr5Cu4iIiIg0bdXVMGwBvL4TUofCoifguzLoPxe+PH798UXF8JPZUHgSXkyCtIdhy1546AW4UGnfd/br8FEh/DimUS/BaaE9JycHg8FQ7yM3N9dZUwMwffp0MjIynDpHYygpKWH8+PHEx8djMpkwGAx06dLF1WWJiIiI3Dn6z4Gxf6z/eE4+mL+A1akw71GYNBQ+SAdPD5iXff3zv7gZfjgP216AycNg5kjYOBX2fQWr37fve+RPcOLPsP7ZW7kiB07/RdQBAwaQkJDg0N61a1enzpuVlUVERAQzZsxw6jy36tixY6xcuZLg4GA6dOjA3r17XV2SiIiISNOSkw/hIfBI/JW2sGBI7APrd4ClEoxe9Y/fvBv+9UFoE3albeAD0KEVbDRDyqAr7fe0bPTy4TaE9ri4ONLS0pw9zW1lsViorKwkICDgls9177338vnnn3P//fcD4OPjc8vnFBEREZGrfHoEut0LHrUWmfS8D1a+C/88Dl3a1j322GnrUpoH2zke63kf5N6eG65usaZ96dKldOrUCR8fH4xGIx06dGD58uV19ouPjyc0NBQvLy8CAwPp06cPO3bssOtnMBgoLi7m4MGDdktyDh48aDs+ZMgQh/NnZGRgMBjIycmxtaWkpGAwGDCbzYwePRqTyYSvry95eXkAVFRUkJqaSnR0NF5eXvj5+dGrVy+2bdvWoGv38/OzBXYRERERcYITJRDZ3LH9ctvxM9cee3Xf2uPPnLXeqXcyp99pr6iooKioyK7N19eXFi1aAPDUU0+RlZVF9+7dmTx5Mp6enmzZsoXU1FROnDjBggULbOMyMzMJDg4mMTGRyMhICgsL2bx5M4MGDcJsNtOtWzcAFi9eTHp6OoGBgUyZMsU2vnXr1jd9HUlJSXh7ezNu3DgMBgNt27bFYrEQHx/PoUOHGDRoEMnJyZSWlpKdnc3QoUN5++23eeihh256ThERERGppfIilFU4tlkqobjcvt0UYL27fu4CGOuIvT7e1n/PXah/vsvH6lo+4+N1pc+1ltc0AqeH9szMTDIzM+3a+vfvz/vvv8/WrVvJysriySefZPXq1bbjGRkZ9OnThyVLljBlyhRbwN+1axchISF255o4cSL9+vVj/vz5/OUvfwEgLS2NhQsXYjKZGm1pTkBAAHv37sXb29vWNm3aNA4cOMC6det47LHHbO1z586lY8eOPPfccxw4cKBR5hcRERG5G1y4cAHvq16bzWb69Olje12w6k06TapjG0XzF7Dh7/ZtR/5EuckHf2MzPC0XHc953hrIC44U0oletuO7d++mR48eeHp6gq+1mh/OlOJ/6XhRURHV1dW0OW+9w15eeZ6vDx6hc+fOtnPs2bOHnte4jhvl9NA+fPhwxowZY9fWpk0bAF599VUMBgOTJk1yuBs/bNgw8vPzycvLIykpCcAW2Kurqzlz5gznz5+ndevWtGrViv379zv1Op555hm7wA7w5ptv0qpVK/r37+9Qf69evcjLy+P7778nMDDQqbWJiIiINBW181btoNtpTAJ0qLW0eOpqiGgO035h3x4RQpCPN0S1uLLM5epzXmrrNPBf7IbFx1/1hdVLy2L8y6/cjbet3jhRAqYAgsJa0Dmshd05evbsCayo9zpulNNDe/v27R1C+2WFhYXU1NRcuqi6HTt2zPZ827ZtzJgxg3379mGxWOz6hYWF1R7aqOrahrGoqIgLFy4QHR1d77hjx47RsWNHZ5YmIiIicvdoHmDduaV2W2Rzx/bL4u6BnYes+7Vf/WXUD78EP6N1F5j6RLWAsCD4uI4fYtrzJcQ17n7s9XF6aL+WmpoaDAYDa9eutX78UIfLgf7QoUMkJCTg5+fH+PHjiY2NJSAgAIPBwPPPP8+5c+duuZ6LFy/We6yuu+U1NTW0adOGl156qd5xt7KOXkREREQawcje1m0f39wNIy/d8S4uh01mGP6g/Xr0wyet/7aLuNI2ojeseR++KYboUGvb1v3WXWemDL8tl+DS0B4TE8PevXtp3769/ccQdVi9ejUWi4V169YxatQou2MTJ07Ey8t+8b/BYKj3XP7+/pSWljq0Hz58Az9lC0RGRlJWVkZiYmK9bzpERERExMVG9ob4DpC8DAqKIDQQVuRBVTW8MNq+78/nWf/96qrvZM4cYQ34A+bCr4fB2fOw+H+s20Qm/8x+/LoP4OgpqLi0KmRHASzYZH3++E+h7c3t4+7SLR+ffvppAKZOnUplpeNWOUeOHLE9vxyKa2pq7Pqkp6dTVlbmMNbHx4fy8nKHdoCoqCgKCgrsjp88eZK33nrrhuofMWIEZWVlTJs2rc7jV9cvIiIiIi7i6Qm5s+HRvrB0C0xbC6FB1l84vT/q+uOjQ2H7fOvd9/9cD4vegoRu8O48x11jXtsKc96AjDetr98/aH095w048t1NX4JL77QPHjyYlJQUVq5cSbt27UhISCAqKorjx4+zb98+9uzZY1uykpiYyJIlS5gwYQI7d+7EZDJhNpvJz88nPDycqqoqu3N37dqV3Nxcxo0bR2xsLB4eHowdO5aQkBBSUlJIS0ujR48ejBo1ipKSEjZu3Eh4eHidbwDqk5GRwfbt23n55ZfZuXMn/fr1Izg4mKNHj7Jr1y6MRmODviA7c+ZM253/qqoqvv32W371q18B0L17d8aNG9fgmkRERETuOh/Mv36f5gHw6iTr41q+yqy7PbYNvDO3cWq5CS4N7WDdEjI+Pp5ly5axfv16LBYLQUFBxMTEMGvWLFu/uLg4NmzYwKxZs1i1ahUeHh507tyZd955hwkTJnDy5Em78y5fvpykpCSys7OpqKigpqaGgQMHEhISwtSpUykqKmLdunUsXLiQli1b8uyzz+Lh4cHMmTMbXLvRaCQ/P5958+axadMm2w9CmUwmYmNjSU5ObtB5Vq1aRXFxse31qVOneOWVVwDrGxuFdhEREZG7m6Gm9noTuWOVl5cTHBxMWfsUggpPubocERERkRtzf5R1yUork6srcTsuXdMuIiIiIiLXp9AuIiIiIuLmFNpFRERERNycQruIiIiIiJtTaBcRERERcXMK7SIiIiIibs7l+7SLE9wTCp7erq5CRERE5MbEtHR1BW5Lob0pWvZLCAxydRUiIiIiN87f6OoK3JJCe1MUaYIghXYRERGRpkJr2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuTqFdRERERMTNKbSLiIiIiLg5hXYRERERETen0C4iIiIi4uYU2kVERERE3JxCu4iIiIiIm1NoFxERERFxc81cXYA4wYkzcPaiq6sQERFxP/5GCPZ3dRUiN0yhvSlKXQXflLu6ChEREfcS0xJem6TQLnckhfam6KtiKDzl6ipEREREpJFoTbuIiIiIiJtTaBcRERERcXMK7SIiIiIibk6hXURERETEzSm0i4iIiIi4OYV2ERERERE3p9AuIiIicqtKf4CUVyBsLPiPgQFz4ZPDDR9/qAiGpEPAf4DpCXj8D3CqzLHfb3Pg4RchPBkMj8BvNjTaJYh7U2gXERERuRXV1TBsAby+E1KHwqIn4Lsy6D8Xvjx+/fFFxfCT2VB4El5MgrSHYcteeOgFuFBp33f26/BRIfw4xjnXIm7LaaE9JycHg8FQ7yM3N9dZUwMwffp0MjIynDpHY8jNzaVfv35ERETg4+ODj48Pbdq0Yfz48Zw6pR9IEhERcbn+c2DsH+s/npMP5i9gdSrMexQmDYUP0sHTA+ZlX//8L26GH87Dthdg8jCYORI2ToV9X8Hq9+37HvkTnPgzrH/2Vq5I7kBO/0XUAQMGkJCQ4NDetWtXp86blZVFREQEM2bMcOo8t+rgwYOcP3+e4cOHExUVRXV1NR999BGvvfYaf/vb3/jss88IDAx0dZkiIiJSn5x8CA+BR+KvtIUFQ2IfWL8DLJVg9Kp//Obd8K8PQpuwK20DH4AOrWCjGVIGXWm/p2Wjly93BqeH9ri4ONLS0pw9zW1lsViorKwkICDgls81ffp0pk+f7tA+ZcoUfv/735OVlcXkyZNveR4RERFxkk+PQLd7waPWAoae98HKd+Gfx6FL27rHHjttXUrzYDvHYz3vg9y9jV+v3JHcYk370qVL6dSpEz4+PhiNRjp06MDy5cvr7BcfH09oaCheXl4EBgbSp08fduzYYdfPYDBQXFzMwYMH7ZbkHDx40HZ8yJAhDufPyMjAYDCQk5Nja0tJScFgMGA2mxk9ejQmkwlfX1/y8vIAqKioIDU1lejoaLy8vPDz86NXr15s27btlv4mMTHWtWpnzpy5pfOIiIiIk50ogcjmju2X245f47/lJ0rs+9Yef+as9U693PWcfqe9oqKCoqIiuzZfX19atGgBwFNPPUVWVhbdu3dn8uTJeHp6smXLFlJTUzlx4gQLFiywjcvMzCQ4OJjExEQiIyMpLCxk8+bNDBo0CLPZTLdu3QBYvHgx6enpBAYGMmXKFNv41q1b3/R1JCUl4e3tzbhx4zAYDLRt2xaLxUJ8fDyHDh1i0KBBJCcnU1paSnZ2NkOHDuXtt9/moYceatD5y8vLKS8v5/vvv2f79u0sWLCAZs2a8W//9m83XbOIiIjcoMqLUFbh2GaphOJy+3ZTgPXu+rkLYKwjUvl4W/89d6H++S4fq2v5jI/XlT7XWl4jdwWn32nPzMwkOjra7jFy5EgAtm7dSlZWFk8++SQff/wxixYtIiMjg/3799O7d2+WLFnC6dOnbefatWsXZrOZFStWMGfOHNasWcN7771HVVUV8+fPt/VLS0vDaDRiMplIS0uzPUJCQm76OgICAjhw4ACLFy9m0aJF9OjRg9mzZ3PgwAGysrLYsmUL6enpLF261LYO/bnnnmvw+SdMmEB0dDSdOnVi4sSJ+Pj4sHr1auLi4m66ZhEREbFXXV3NP//5T7s2s9l85cWuz63bNl79MH8BG/7u2P51sfWcPs04e7rUdory8nLrp/vnLwVyX2/7Oa6e09ca7As/+5yqqirb8YKCAs6Xfm/rU1RUxNdff207/v333ztcW71zXLJ7926HOUpKSmyva89huw7N4ZQ5bpTT77QPHz6cMWPG2LW1adMGgFdffRWDwcCkSZMc7sYPGzaM/Px88vLySEpKArCF7urqas6cOcP58+dp3bo1rVq1Yv/+/U69jmeeeQZvb2+7tjfffJNWrVrRv39/h/p79epFXl4e33//fYO+SJqWlsbw4cM5c+YMf//739mzZw/ffvtto16DiIjI3c7Dw4MOHTrYtfXp0+fKiwfugXfn2Q+auhoimsO0X9i3R4RYz9mqBQHfX1nCEhQUROfOneHD96wNrUz0qbWm3TbnpWUx7f1N4OlpO96pUycoedd6N9/o5bBaoK5sYXcddbyOj4+3e92pUye717XnsF2H5nDKHDfK6aG9ffv2DqH9ssLCQmpqaujZs2e9448dO2Z7vm3bNmbMmMG+ffuwWCx2/cLCwmoPbVRdunRxaCsqKuLChQtER0fXO+7YsWN07Njxuufv1q2bbXnPpEmTWL9+PY8//jgGg8FuiY+IiIg4UfMA684ttdsimzu2XxZ3D+w8ZN2v/eovo374JfgZrbvA1CeqBYQFwcd1/BDTni8hTvuxi5XTQ/u11NTUYDAYWLt2LZ5Xvbu82uVAf+jQIRISEvDz82P8+PHExsYSEBCAwWDg+eef59y5c7dcz8WLF+s9Vtc72pqaGtq0acNLL71U77ibXUf/2GOPkZqayquvvqrQLiIi4s5G9rZu+/jmbhh56W5qcTlsMsPwB+3Xox8+af23XcSVthG9Yc378E0xRIda27but+46M2X47bkGcXsuDe0xMTHs3buX9u3bO3wMUdvq1auxWCysW7eOUaNG2R2bOHEiXl72X9AwGAz1nsvf35/S0lKH9sOHb+DnhoHIyEjKyspITEys903Hrbhw4QLl5eXX7ygiIiKuM7I3xHeA5GVQUAShgbAiD6qq4YXR9n1/fmnpzVeZV9pmjrAG/AFz4dfD4Ox5WPw/1m0ik39mP37dB3D0FFRcWnGwowAWbLI+f/yn0Fb7uDdVLt3y8emnnwZg6tSpVFY6bmd05MgR2/PLobimpsauT3p6OmVlZQ5jfXx86g28UVFRFBQU2B0/efIkb7311g3VP2LECMrKypg2bVqdx6+uvz7/93//V2f7woULOXfunNN/hEpERERukacn5M6GR/vC0i0wbS2EBll/4fT+qOuPjw6F7fOtd9//cz0segsSulnX1tfeNea1rTDnDch40/r6/YPW13PegCPfNfqliftw6Z32wYMHk5KSwsqVK2nXrh0JCQlERUVx/Phx9u3bx549e2xLVhITE1myZAkTJkxg586dmEwmzGYz+fn5hIeH233DF6y/uJqbm8u4ceOIjY3Fw8ODsWPHEhISQkpKCmlpafTo0YNRo0ZRUlLCxo0bCQ8Pr/MNQH0yMjLYvn07L7/8Mjt37qRfv34EBwdz9OhRdu3ahdFovO4XZAcOHEhwcDDdu3enbdu2lJaWsnv3bvLz8zGZTCxZsuTG/7AiIiLSeD6Yf/0+zQPg1UnWx7VcfYf9arFt4J25jVOLNEkuDe1g3RIyPj6eZcuWsX79eiwWC0FBQcTExDBr1ixbv7i4ODZs2MCsWbNYtWoVHh4edO7cmXfeeYcJEyZw8uRJu/MuX76cpKQksrOzqaiooKamhoEDBxISEsLUqVMpKipi3bp1LFy4kJYtW/Lss8/i4eHBzJkzG1y70WgkPz+fefPmsWnTJtsPQplMJmJjY0lOTr7uOR577DH++te/snnzZr7//ns8PT2JiIjg8ccf58UXXyQqqgHv0EVERESkSTPU1F5vInes8vJygoODKWufQlDhKVeXIyIi4l7uj7IuWWllcnUlIjfMpWvaRURERETk+hTaRURERETcnEK7iIiIiIibU2gXEREREXFzCu0iIiIiIm5OoV1ERERExM25fJ92cYJ7QsHT29VViIiIuJeYlq6uQOSmKbQ3Rct+CYFBrq5CRETE/fgbXV2ByE1RaG+KIk0QpNAuIiIi0lRoTbuIiIiIiJtTaBcRERERcXMK7SIiIiIibk6hXURERETEzSm0i4iIiIi4OYV2ERERERE3p9AuIiIiIuLmFNpFRERERNycQruIiIiIiJtTaBcRERERcXMK7SIiIiIibq6ZqwsQJzhxBs5edHUVIiIizuFvhGB/V1chclsptDdFqavgm3JXVyEiItL4YlrCa5MU2uWuo9DeFH1VDIWnXF2FiIiIiDQSrWkXEREREXFzCu0iIiIiIm5OoV1ERERExM0ptIuIiIiIuDmFdhERERERN6fQLiIiIiLi5hTaRURERC4r/QFSXoGwseA/BgbMhU8ON3z8oSIYkg4B/wGmJ+DxP8CpMsd+v82Bh1+E8GQwPAK/2dBolyBNk0K7iIiICEB1NQxbAK/vhNShsOgJ+K4M+s+FL49ff3xRMfxkNhSehBeTIO1h2LIXHnoBLlTa9539OnxUCD+Occ61SJNzQ6E9JycHg8FQ7yM3N9dZdQIwffp0MjIynDpHYygpKWH8+PHEx8djMpkwGAx06dKl3v5Lly5l6NCh3HPPPXh6emIwGDh48OBtrFhEROQu0H8OjP1j/cdz8sH8BaxOhXmPwqSh8EE6eHrAvOzrn//FzfDDedj2AkweBjNHwsapsO8rWP2+fd8jf4ITf4b1z97KFcld5KZ+EXXAgAEkJCQ4tHft2vWWC7qWrKwsIiIimDFjhlPnuVXHjh1j5cqVBAcH06FDB/bu3XvN/qtWreKLL74gJiaGiIgIjh9vwLt5ERERaVw5+RAeAo/EX2kLC4bEPrB+B1gqwehV//jNu+FfH4Q2YVfaBj4AHVrBRjOkDLrSfk/LRi9fmrabCu1xcXGkpaU1di0uZbFYqKysJCAg4JbPde+99/L5559z//33A+Dj43PN/hs3bqRdu3Z4e3szYsQI3nzzzVuuQURERG7Qp0eg273gUWshQs/7YOW78M/j0KVt3WOPnbYupXmwneOxnvdB7rVv4Ilcj9PWtC9dupROnTrh4+OD0WikQ4cOLF++vM5+8fHxhIaG4uXlRWBgIH369GHHjh12/QwGA8XFxRw8eNBuSc7lZSQGg4EhQ4Y4nD8jIwODwUBOTo6tLSUlBYPBgNlsZvTo0ZhMJnx9fcnLywOgoqKC1NRUoqOj8fLyws/Pj169erFt27YGXbufn58tsDfEj370I7y9vRvcX0RERJzgRAlENndsv9x2/My1x17dt/b4M2etd+pFbtJN3WmvqKigqKjIrs3X15cWLVoA8NRTT5GVlUX37t2ZPHkynp6ebNmyhdTUVE6cOMGCBQts4zIzMwkODiYxMZHIyEgKCwvZvHkzgwYNwmw2061bNwAWL15Meno6gYGBTJkyxTa+devWN3MJACQlJeHt7c24ceMwGAy0bdsWi8VCfHw8hw4dYtCgQSQnJ1NaWkp2djZDhw7l7bff5qGHHrrpOUVEROQ2qLwIZRWObZZKKC63bzcFWO+un7sAxjqikc+lG2vnLtQ/3+VjdS2f8fG60uday2tEruGm7rRnZmYSHR1t9xg5ciQAW7duJSsriyeffJKPP/6YRYsWkZGRwf79++nduzdLlizh9OnTtnPt2rULs9nMihUrmDNnDmvWrOG9996jqqqK+fPn2/qlpaVhNBoxmUykpaXZHiEhITd98QEBARw4cIDFixezaNEievTowezZszlw4ABZWVls2bKF9PR0li5dymeffUZgYCDPPffcTc8nIiIijWv37t1UVVXZXhcUFFBSUgK7Prdu23j1w/wFbPi7Q/vev7xjHezrDZaLmM1muzk+/8f+K8evnuOSoqIiTpZdugtvqaS8vNx+Q4nzlXbjAYc5ioqK6r6Oq45//fXXttcOc9Rxztqv6/1baQ6XzHGjbupO+/DhwxkzZoxdW5s2bQB49dVXMRgMTJo0yeFu/LBhw8jPzycvL4+kpCQAW+iurq7mzJkznD9/ntatW9OqVSv2799/M+U12DPPPOOwLOXNN9+kVatW9O/f36H+Xr16kZeXx/fff09gYKBTaxMREZHri4+Pt3vdqVMn65MHvODdefadp66GiOYw7Rd2zd3/5UfWJ5HN4UQJffr0sTveMTjc+qSVyX6OS1q3bg0GX+uLEyUEBQXRuXPnKx1OlFjv5l91l732HK1btwZPT8fruPr4VRzmqOOctV/X+7fSHC6Z40bdVGhv3769Q2i/rLCwkJqaGnr27Fnv+GPHjtmeb9u2jRkzZrBv3z4sFotdv7CwsNpDG1Vd2zAWFRVx4cIFoqOj6x137NgxOnbs6MzSRERE5FY0D7Du3FK7LbK5Y/tlcffAzkPW/dqv/jLqh1+Cn9G6C0x9olpAWBB8XMcPMe35EuK0H7vcmpsK7ddSU1ODwWBg7dq1eF71jvFqlwP9oUOHSEhIwM/Pj/HjxxMbG0tAQAAGg4Hnn3+ec+fO3XI9Fy9erPdYXXfLa2pqaNOmDS+99FK9425lHb2IiIi4qZG9rds+vrkbRl66K1pcDpvMMPxB+/Xoh09a/20XcaVtRG9Y8z58UwzRoda2rfutu85MGX57rkGarEYP7TExMezdu5f27ds7fLRQ2+rVq7FYLKxbt45Ro0bZHZs4cSJeXvZf1jAYDPWey9/fn9LSUof2w4dv4KeHgcjISMrKykhMTKz3TYeIiIg0QSN7Q3wHSF4GBUUQGggr8qCqGl4Ybd/355eW3nyVeaVt5ghrwB8wF349DM6eh8X/Y90mMvln9uPXfQBHT0HFpVUGOwpgwSbr88d/Cm21j7vYa/QtH59++mkApk6dSmWl49ZGR44csT2/HIpramrs+qSnp1NWVuYw1sfHh/Lycod2gKioKAoKCuyOnzx5krfeeuuG6h8xYgRlZWVMmzatzuNX1y8iIiJNiKcn5M6GR/vC0i0wbS2EBll/4fT+qOuPjw6F7fOtd9//cz0segsSulnX1tfeNea1rTDnDci49Nss7x+0vp7zBhz5rtEvTe58jX6nffDgwaSkpLBy5UratWtHQkICUVFRHD9+nH379rFnzx7bkpXExESWLFnChAkT2LlzJyaTCbPZTH5+PuHh4Xbf2gXrL67m5uYybtw4YmNj8fDwYOzYsYSEhJCSkkJaWho9evRg1KhRlJSUsHHjRsLDw+t8A1CfjIwMtm/fzssvv8zOnTvp168fwcHBHD16lF27dmE0Ghv0BdmZM2fa7vxXVVXx7bff8qtf/QqA7t27M27cOFvfv/zlL7z77rsAtm8ev/jii7Yv6a5YsaLB9YuIiEg9Pph//T7NA+DVSdbHtVx9h/1qsW3gnbmNU4vIVRo9tIN1S8j4+HiWLVvG+vXrsVgsBAUFERMTw6xZs2z94uLi2LBhA7NmzWLVqlV4eHjQuXNn3nnnHSZMmMDJkyftzrt8+XKSkpLIzs6moqKCmpoaBg4cSEhICFOnTqWoqIh169axcOFCWrZsybPPPouHhwczZ85scO1Go5H8/HzmzZvHpk2bbD8IZTKZiI2NJTk5uUHnWbVqFcXFxbbXp06d4pVXXgGsb2yuDu1/+9vfWLVqld34N954w/ZcoV1ERETk7maoqb02Re5Y5eXlBAcHU9Y+haDCU64uR0REpPHdH2VdrnJp+0WRu0Wjr2kXEREREZHGpdAuIiIiIuLmFNpFRERERNycQruIiIiIiJtTaBcRERERcXMK7SIiIiIibs4p+7SLi90TCp7erq5CRESk8cW0dHUFIi6h0N4ULfslBAa5ugoRERHn8De6ugKR206hvSmKNEGQQruIiIhIU6E17SIiIiIibk6hXURERETEzSm0i4iIiIi4OYV2ERERERE3p9AuIiIiIuLmFNpFRERERNycQruIiIiIiJtTaBcRERERcXMK7SIiIiIibk6hXURERETEzSm0i4iIiIi4uWauLkCc4MQZOHvR1VWIiIjcGn8jBPu7ugoRt6DQ3hSlroJvyl1dhYiIyM2LaQmvTVJoF7lEob0p+qoYCk+5ugoRERERaSRa0y4iIiIi4uYU2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuTqFdRERERMTNKbSLiIjI3af0B0h5BcLGgv8YGDAXPjnc8PGHimBIOgT8B5iegMf/AKfKHPv9NgcefhHCk8HwCPxmQ6NdgtxdFNpFRETk7lJdDcMWwOs7IXUoLHoCviuD/nPhy+PXH19UDD+ZDYUn4cUkSHsYtuyFh16AC5X2fWe/Dh8Vwo9jnHMtctdwWmjPycnBYDDU+8jNzXXW1ABMnz6djIwMp87RWIqKinjssceIiorCaDQSEhLCAw88wMqVK11dmoiIyJ2n/xwY+8f6j+fkg/kLWJ0K8x6FSUPhg3Tw9IB52dc//4ub4YfzsO0FmDwMZo6EjVNh31ew+n37vkf+BCf+DOufvZUrEnH+L6IOGDCAhIQEh/auXbs6dd6srCwiIiKYMWOGU+e5VeXl5fTo0YPTp0/zi1/8gq5du3L69GlycnIYP348RUVFpKenu7pMERGRpiMnH8JD4JH4K21hwZDYB9bvAEslGL3qH795N/zrg9Am7ErbwAegQyvYaIaUQVfa72nZ6OXL3cnpoT0uLo60tDRnT3NbWSwWKisrCQgIuOVzvfbaa5w8eZLp06ezcOFCW/uMGTNo27Yt69atU2gXERFpTJ8egW73gketBQc974OV78I/j0OXtnWPPXbaupTmwXaOx3reB7l7G79eEdxkTfvSpUvp1KkTPj4+GI1GOnTowPLly+vsFx8fT2hoKF5eXgQGBtKnTx927Nhh189gMFBcXMzBgwftluQcPHjQdnzIkCEO58/IyMBgMJCTk2NrS0lJwWAwYDabGT16NCaTCV9fX/Ly8gCoqKggNTWV6OhovLy88PPzo1evXmzbtq1B115WZv3SSnR0tF17WFgYRqMRHx+fBp1HREREGuhECUQ2d2y/3Hb8zLXHXt239vgzZ6136kUamdPvtFdUVFBUVGTX5uvrS4sWLQB46qmnyMrKonv37kyePBlPT0+2bNlCamoqJ06cYMGCBbZxmZmZBAcHk5iYSGRkJIWFhWzevJlBgwZhNpvp1q0bAIsXLyY9PZ3AwECmTJliG9+6deubvo6kpCS8vb0ZN24cBoOBtm3bYrFYiI+P59ChQwwaNIjk5GRKS0vJzs5m6NChvP322zz00EPXPO/DDz/M/PnzWbBgAQEBAfTu3ZtTp07x29/+loqKCp5//vmbrllERKTJq7wIZRWObZZKKC63bzcFWO+un7sAxjoikI+39d9zF+qf7/KxupbP+Hhd6XOt5TUiN8Hpd9ozMzOJjo62e4wcORKArVu3kpWVxZNPPsnHH3/MokWLyMjIYP/+/fTu3ZslS5Zw+vRp27l27dqF2WxmxYoVzJkzhzVr1vDee+9RVVXF/Pnzbf3S0tIwGo2YTCbS0tJsj5CQkJu+joCAAA4cOMDixYtZtGgRPXr0YPbs2Rw4cICsrCy2bNlCeno6S5cu5bPPPiMwMJDnnnvuuuft1q0by5Yto7KykuTkZDp27Ei/fv3Iz89n8+bNjB079qZrFhERudN9+umndq/NZrPd64JVb1q3bbz6Yf4CNvzdsf3rYsrLy6kyNgPLRcdznrcG8oIjhXZz7N69m6qqKusLX2uw/+FMqe14UVERX3/9NZy33mEvrzxv+3T/sj179lzzOuzmAAoKCigpKXGc45Ly8nKHOWqfU3O49xw3yul32ocPH86YMWPs2tq0aQPAq6++isFgYNKkSQ5344cNG0Z+fj55eXkkJSUB2EJ3dXU1Z86c4fz587Ru3ZpWrVqxf/9+p17HM888g7e3t13bm2++SatWrejfv79D/b169SIvL4/vv/+ewMDAa567RYsWtGvXjpEjR9K9e3e++eYbVq5cSVJSEm+//TY//elPG/16RERE7gQ//vGP7V736dPH7nWnMQnQ4X77QVNXQ0RzmPYL+/aIEIJ8vCGqxZVlLlef81Jbp4H/YjcsPv6qL6xeWhbjX37lbrztk/wTJWAKICisBZ3DWtido2fPnsCKeq/Dbg6gU6dOdq9rrxYICgqic+fOdm21z6k53HuOG+X00N6+fXuH0H5ZYWEhNTU1l/4PuW7Hjh2zPd+2bRszZsxg3759WCwWu35hYWG1hzaqLl26OLQVFRVx4cIFh/XoVzt27BgdO3as9/j69et54oknWLNmDY8//ritPTk5mc6dOzNx4kQKCgpurXgREZGmqnmAdeeW2m2RzR3bL4u7B3Yesu7XfvWXUT/8EvyM1l1g6hPVAsKC4OM6fohpz5cQp/3YxTmcHtqvpaamBoPBwNq1a/H09Kyzz+VAf+jQIRISEvDz82P8+PHExsYSEBCAwWDg+eef59y5c7dcz8WLF+s9Vtfd8pqaGtq0acNLL71U77jrraNfvHgxRqPRLrAD3HvvvXTp0oWPP/6Yc+fO4evre53qRUREpEFG9rZu+/jmbhh56e5ncTlsMsPwB+3Xox8+af23XcSVthG9Yc378E0xRIda27but+46M2X47bkGueu4NLTHxMSwd+9e2rdv7/AxRG2rV6/GYrGwbt06Ro0aZXds4sSJeHnZf+HDYDDUey5/f39KS0sd2g8fvoGfLwYiIyMpKysjMTGx3jcd13Pq1Clqamqorq7Go9bWU1VVVVRXV9utqRIREZFbNLI3xHeA5GVQUAShgbAiD6qq4YXR9n1/Ps/671eZV9pmjrAG/AFz4dfD4Ox5WPw/1m0ik39mP37dB3D0FFRcWiGwowAWbLI+f/yn0Fb7uEvDuHTLx6effhqAqVOnUlnpuD3SkSNHbM8vh+Kamhq7Punp6bZtE6/m4+NDeXm5QztAVFQUBQUFdsdPnjzJW2+9dUP1jxgxgrKyMqZNm1bn8avrr09MTAwWi4Vly5bZte/fv58DBw7Qpk2bRtkPXkRERC7x9ITc2fBoX1i6BaathdAg6y+c3h91/fHRobB9vvXu+3+uh0VvQUI3eHee464xr22FOW9AxpvW1+8ftL6e8wYc+a7RL02aLpfeaR88eDApKSmsXLmSdu3akZCQQFRUFMePH2ffvn3s2bPHtmQlMTGRJUuWMGHCBHbu3InJZMJsNpOfn094eLjD3eiuXbuSm5vLuHHjiI2NxcPDg7FjxxISEkJKSgppaWn06NGDUaNGUVJSwsaNGwkPD6/zDUB9MjIy2L59Oy+//DI7d+6kX79+BAcHc/ToUXbt2oXRaLzuF2Tnzp3L8OHDee6553j//feJi4vjm2++YePGjVRWVjJv3rwb/8OKiIjczT6Yf/0+zQPg1UnWx7VcfYf9arFt4J25jVOLSAO4NLSDdUvI+Ph4li1bxvr167FYLAQFBRETE8OsWbNs/eLi4tiwYQOzZs1i1apVeHh40LlzZ9555x0mTJjAyZMn7c67fPlykpKSyM7OpqKigpqaGgYOHEhISAhTp06lqKiIdevWsXDhQlq2bMmzzz6Lh4cHM2fObHDtRqOR/Px85s2bx6ZNm2w/CGUymYiNjSU5Ofm65xg8eDB5eXnMmzePbdu28de//hUfHx86duzIjBkzbNtjioiIiMjdy1BTe72J3LHKy8sJDg6mrH0KQYWnXF2OiIjIzbs/yrpcpZXJ1ZWIuAWXrmkXEREREZHrU2gXEREREXFzCu0iIiIiIm5OoV1ERERExM0ptIuIiIiIuDmXb/koTnBPKHh6u7oKERGRmxejXwoVuZpCe1O07JcQGOTqKkRERG6Nv9HVFYi4DYX2pijSBEEK7SIiIiJNhda0i4iIiIi4OYV2ERERERE3p9AuIiIiIuLmFNpFRERERNycQruIiIiIiJtTaBcRERERcXMK7SIiIiIibk6hXURERETEzSm0i4iIiIi4OYV2ERERERE3p9AuIiIiIuLmFNpFRERERNxcM1cXIE5w4gycvejqKkRE5DJ/IwT7u7oKEbmDKbQ3Ramr4JtyV1chIiIAMS3htUkK7SJySxTam6KviqHwlKurEBEREZFGojXtIiIiIiJuTqFdRERERMTNKbSLiIiIiLg5hXYRERERETen0C4iIiIi4uYU2kVERO5kpT9AyisQNhb8x8CAufDJ4YaPP1QEQ9Ih4D/A9AQ8/gc4VebY77c58PCLEJ4MhkfgNxsa7RJE5PoU2kVERO5U1dUwbAG8vhNSh8KiJ+C7Mug/F748fv3xRcXwk9lQeBJeTIK0h2HLXnjoBbhQad939uvwUSH8OMY51yIi1+S00J6Tk4PBYKj3kZub66ypAZg+fToZGRlOncMZDh8+jL+/PwaDgeeee87V5YiIiCv1nwNj/1j/8Zx8MH8Bq1Nh3qMwaSh8kA6eHjAv+/rnf3Ez/HAetr0Ak4fBzJGwcSrs+wpWv2/f98if4MSfYf2zt3JFInKTnP7jSgMGDCAhIcGhvWvXrk6dNysri4iICGbMmOHUeRrb2LFjqaqqcnUZIiJyJ8jJh/AQeCT+SltYMCT2gfU7wFIJRq/6x2/eDf/6ILQJu9I28AHo0Ao2miFl0JX2e1o2evki0nBOD+1xcXGkpaU5e5rbymKxUFlZSUBAQKOed9WqVezatYtf//rX/P73v2/Uc4uISBP06RHodi941PrgvOd9sPJd+Odx6NK27rHHTluX0jzYzvFYz/sgd2/j1ysiN80t1rQvXbqUTp064ePjg9FopEOHDixfvrzOfvHx8YSGhuLl5UVgYCB9+vRhx44ddv0MBgPFxcUcPHjQbknOwYMHbceHDBnicP6MjAwMBgM5OTm2tpSUFAwGA2azmdGjR2MymfD19SUvLw+AiooKUlNTiY6OxsvLCz8/P3r16sW2bdtu6G9w+vRp/vM//5N///d/p2/fvjc0VkRE7lInSiCyuWP75bbjZ6499uq+tcefOWu9Uy8ibsHpd9orKiooKiqya/P19aVFixYAPPXUU2RlZdG9e3cmT56Mp6cnW7ZsITU1lRMnTrBgwQLbuMzMTIKDg0lMTCQyMpLCwkI2b97MoEGDMJvNdOvWDYDFixeTnp5OYGAgU6ZMsY1v3br1TV9HUlIS3t7ejBs3DoPBQNu2bbFYLMTHx3Po0CEGDRpEcnIypaWlZGdnM3ToUN5++20eeuihBp0/JSWF6upq/vSnP7F9+/abrlNERO5QlRehrMKxzVIJxeX27aYA6931cxfAWMd/yn28rf+eu1D/fJeP1bV8xsfrSp9rLa8RkdvG6aE9MzOTzMxMu7b+/fvz/vvvs3XrVrKysnjyySdZvXq17XhGRgZ9+vRhyZIlTJkyxRbwd+3aRUhIiN25Jk6cSL9+/Zg/fz5/+ctfAEhLS2PhwoWYTKZGW5oTEBDA3r178fb2trVNmzaNAwcOsG7dOh577DFb+9y5c+nYsSPPPfccBw4cuO65c3Nz+ctf/sLvf/97wsLCrttfRESaoF2fW7drrM38BWz4u33bkT9Z15j7eoPlouOY85cCua+347HLLh+r6276+crrjxeR28rpy2OGDx/O66+/bve4fPf81VdfxWAwMGnSJIqKiuwew4YN4/z587ZlKIAtsFdXV1NcXExRURGtW7emVatW7N+/36nX8cwzz9gFdoA333yTVq1a0b9/f7vaz58/T69evSgoKOD777+/5nktFgsTJkzgxz/+MZMnT3bmJYiIiItUVtoHY7PZ7Pj6gXvg3Xnw7jwK/pBIVd4c6NoWBsVx9NWxfP/mVNvxoos/8PXXX1uXsZwooby83LYEFLiy9KWVqd459x4/Yte3oKCAkpISW1tViB9ff3vC1t9hjvqu4yq7d++221zBbg6gqKjIeh2aQ3PchXPcKENNTU3NLZ2hHjk5OYwaNYopU6bwu9/9rs4+PXr04OOPP77meRYuXMj06dMB2LZtGzNmzGDfvn1YLBa7fmFhYXz33Xd2ryMiIuq8020wGBg8eLDdGwKw3uGfOXMmmzZtYuTIkYB12cqqVaswm8307t3brr/RaOTChWt89AgcOnSIjh071nv8V7/6Fa+++ip79uwhLi4OaNjfri7l5eUEBwdT1j6FoMJTDR4nIiJOdH+UdUvFqwJ0g/WfY72jvvqZuo+PWgw7D8HxV+2/jJryCvz3Djiz9trLW1qOhf6dYWOtT6XvT4XWLWDrC45jisutP+Q0LxF+M/pGr0hEbpLTl8dcS01NDQaDgbVr1+Lp6Vlnn549ewLW8JuQkICfnx/jx48nNjaWgIAADAYDzz//POfOnbvlei5erOMjxksCAwPrrL9Nmza89NJL9Y671jr6w4cP8+qrrzJ06FBqamr49NNPATh69Chg/XLqp59+SkxMjMOyIBEREUb2tm77+OZuGNnH2lZcDpvMMPxB+8B++KT133YRV9pG9IY178M3xRAdam3but+668yU4bfnGkSkQVwa2mNiYti7dy/t27cnPj7+mn1Xr16NxWJh3bp1jBo1yu7YxIkT8fKyv5NgMBjqPZe/vz+lpaUO7YcP38DPPgORkZGUlZWRmJhY75uOa/n666+prKzkr3/9K3/9618djq9du5a1a9eybNkyJk2adMPnFxGRJm5kb4jvAMnLoKAIQgNhRR5UVcMLte6C/3ye9d+vrvqe2cwR1oA/YC78ehicPQ+L/8e6TWTyz+zHr/sAjp6CikufdO8ogAWbrM8f/ym01T7uIs7k0tD+9NNPk5OTw9SpU/nggw8cgveRI0eIibH+XPLlUFx7NU96ejplZWWEhobatfv4+FBeXuvb9pdERUVRUFBAeXk5QUFBAJw8eZK33nrrhuofMWIEL7/8MtOmTatzGcvV9delc+fOLFu2zKF9//79rFy5kkGDBvHwww8zaNCgOkaLiMhdz9MTcmfDtDWwdIt1t5ce7a3Lae6Puv746FDYPh+eWw3/uR68m8Gw7rBkrOOymte2wvbPrrx+/6D1AfAvP1JoF3Eyl4b2wYMHk5KSwsqVK2nXrh0JCQlERUVx/Phx9u3bx549e2xLVhITE1myZAkTJkxg586dmEwmzGYz+fn5hIeHO/yKaNeuXcnNzWXcuHHExsbi4eHB2LFjCQkJISUlhbS0NHr06MGoUaMoKSlh48aNhIeHU1ZW1uD6MzIy2L59Oy+//DI7d+6kX79+BAcHc/ToUXbt2oXRaLzmF2TDwsLqvIOek5PDypUriY2N1R12EZG72Qfzr9+neQC8Osn6uJavMutuj20D79Sxa83N1CIiTuPS0A7WLSHj4+NZtmwZ69evx2KxEBQURExMDLNmzbL1i4uLY8OGDcyaNYtVq1bh4eFB586deeedd5gwYQInT560O+/y5ctJSkoiOzubiooKampqGDhwICEhIUydOpWioiLWrVvHwoULadmyJc8++yweHh7MnDmzwbUbjUby8/OZN28emzZtsv0glMlkIjY2luTk5Mb5I4mIiIjIXc1pu8fI7afdY0RE3NCt7B4jInKJ0/dpFxERERGRW6PQLiIiIiLi5hTaRURERETcnEK7iIiIiIibU2gXEREREXFzCu0iIiIiIm7O5fu0ixPcEwqe3q6uQkREAGL0S6EicusU2puiZb+EwCBXVyEiIpf5G11dgYjc4RTam6JIEwQptIuIiIg0FVrTLiIiIiLi5hTaRURERETcnEK7iIiIiIibU2gXEREREXFzCu0iIiIiIm5OoV1ERERExM0ptIuIiIiIuDmFdhERERERN6fQLiIiIiLi5hTaRURERETcnEK7iIiIiIibU2gXEREREXFzzVxdgDjBiTNw9qKrqxARuX38jRDs7+oqREScRqG9KUpdBd+Uu7oKEZHbI6YlvDZJoV1EmjSF9qboq2IoPOXqKkRERESkkWhNu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuTqFdRERERMTNKbSLiIiIiLg5hXYREZH6lP4AKa9A2FjwHwMD5sInhxs+/lARDEmHgP8A0xPw+B/gVJljv9/mwMMvQngyGB6B32xotEsQkaZBoV1ERKQu1dUwbAG8vhNSh8KiJ+C7Mug/F748fv3xRcXwk9lQeBJeTIK0h2HLXnjoBbhQad939uvwUSH8OMY51yIid7wbCu05OTkYDIZ6H7m5uc6qE4Dp06eTkZHh1DkaQ0lJCePHjyc+Ph6TyYTBYKBLly7XHLN69Wo6duyI0WgkICCAn/zkJ+zfv/82VSwichfqPwfG/rH+4zn5YP4CVqfCvEdh0lD4IB08PWBe9vXP/+Jm+OE8bHsBJg+DmSNh41TY9xWsft++75E/wYk/w/pnb+WKRKQJu6kfVxowYAAJCQkO7V27dr3lgq4lKyuLiIgIZsyY4dR5btWxY8dYuXIlwcHBdOjQgb17916z/yuvvMKkSZNo27Yt06dPp7S0lLVr1/LTn/6Ujz76iPbt29+mykVExCYnH8JD4JH4K21hwZDYB9bvAEslGL3qH795N/zrg9Am7ErbwAegQyvYaIaUQVfa72nZ6OWLSNNyU6E9Li6OtLS0xq7FpSwWC5WVlQQEBNzyue69914+//xz7r//fgB8fHyuOe+sWbMwmUx88sknNG/eHIBHHnmEn//850yZMoX//d//veWaRETkBn16BLrdCx61PpTueR+sfBf+eRy6tK177LHT1qU0D7ZzPNbzPsi99s0cEZHanLamfenSpXTq1AkfHx+MRiMdOnRg+fLldfaLj48nNDQULy8vAgMD6dOnDzt27LDrZzAYKC4u5uDBg3ZLcg4ePGg7PmTIEIfzZ2RkYDAYyMnJsbWlpKRgMBgwm82MHj0ak8mEr68veXl5AFRUVJCamkp0dDReXl74+fnRq1cvtm3b1qBr9/PzswX263nzzTcpKSlh1KhRtsAO1k8zunTpwtatW7FYLA06l4iINKITJRDZ3LH9ctvxM9cee3Xf2uPPnLXeqRcRaaCbutNeUVFBUVGRXZuvry8tWrQA4KmnniIrK4vu3bszefJkPD092bJlC6mpqZw4cYIFCxbYxmVmZhIcHExiYiKRkZEUFhayefNmBg0ahNlsplu3bgAsXryY9PR0AgMDmTJlim1869atb+YSAEhKSsLb25tx48ZhMBho27YtFouF+Ph4Dh06xKBBg0hOTqa0tJTs7GyGDh3K22+/zUMPPXTTc9ZmNpsB+MlPfuJwrFu3buzfv5+9e/fSp0+fRptTROSuU3kRyioc2yyVUFxu324KsN5dP3cBjHX8Z9LH2/rvuQv1z3f5WF3LZ3y8rvS51vIaEZGr3FRoz8zMJDMz066tf//+vP/++2zdupWsrCyefPJJVq9ebTuekZFBnz59WLJkCVOmTLEF/F27dhESEmJ3rokTJ9KvXz/mz5/PX/7yFwDS0tJYuHAhJpOp0ZbmBAQEsHfvXry9vW1t06ZN48CBA6xbt47HHnvM1j537lw6duzIc889x4EDBxplfoATJ04AEBPjuGPA5TckR44cUWgXEbkVuz63btdYm/kL2PB3+7Yjf7KuMff1BstFxzHnLwVyX2/HY5ddPlbX3fTzldcfLyJSy00tjxk+fDivv/663ePy3fNXX30Vg8HApEmTKCoqsnsMGzaM8+fP25ahALbAXl1dTXFxMUVFRbRu3ZpWrVo5ffeUZ555xi6wg3W5SqtWrejfv79d7efPn6dXr14UFBTw/fffN1oN586dA6yfVNR2eS38Dz/80GjziYg0Zbt376aqqsr2uqCggJKSEnjgHnh3Hqden8S3/z0R3p0HXdty8WedObLySevrS4/8I59bB0c2hxMltk9ELzv890vr0VuZ7Oe4pKioiGPVl+7qnyihvLzctpTzcltlkI/dXfbacxQVFdV9HVcd//rrr22vHeao45y1X9f7t9IcmkNz3JY5bpShpqampqGdc3JyGDVqFFOmTOF3v/tdnX169OjBxx9/fM3zLFy4kOnTpwOwbds2ZsyYwb59+xzWboeFhfHdd9/ZvY6IiKjzTrfBYGDw4MF2bwjAeod/5syZbNq0iZEjRwLWNe2rVq3CbDbTu3dvu/5Go5ELF67xkSdw6NAhOnbseM0+V/Px8eG+++6rs+6RI0eyefNm8vPziY+Ptzs2Z84cFixYwPr160lKSrruPOXl5QQHB1PWPoWgwlMNrk9E5I52f5R1W8VLIbrB+s+x3lFf/Uzdx0cthp2H4Pir9l9GTXkF/nsHnFl77eUtLcdC/86wsdanw/enQusWsPUFxzHF5dYfcpqXCL8ZfWPXIyJN2k0tj7mWmpoaDAYDa9euxdPTs84+PXv2BKzhNyEhAT8/P8aPH09sbCwBAQEYDAaef/55213oW3HxYh0fbV4SGBhYZ/1t2rThpZdeqnfcrayjry0yMhKwLoGpHdovf2+grqUzIiLiZCN7W7d9fHM3jLy0RLG4HDaZYfiD9oH98Enrv+0irrSN6A1r3odviiE61Nq2db9115kpw2/PNYhIk9HooT0mJoa9e/fSvn17hxBa2+rVq7FYLKxbt45Ro0bZHZs4cSJeXvZ3MAwGQ73n8vf3p7S01KH98OEb+LlprCG6rKyMxMTEet90NKY+ffqwbNkyduzYwZgxY+yOffLJJ/j6+tK9e3en1yEiIrWM7A3xHSB5GRQUQWggrMiDqmp4odZd8J/Ps/771VXf95o5whrwB8yFXw+Ds+dh8f9Yt4lM/pn9+HUfwNFTUHHpE+cdBbBgk/X54z+FttrHXeRu1+hbPj799NMATJ06lcpKxy/gHDlyxPb8ciiuvUInPT2dsrIyh7E+Pj6Ul5c7tANERUVRUFBgd/zkyZO89dZbN1T/iBEjKCsrY9q0aXUev7r+xvDII48QEhLCpk2b7NZKbd++nQMHDjBgwACMRmOjzikiIg3g6Qm5s+HRvrB0C0xbC6FB1qU490ddf3x0KGyfb737/p/rYdFbkNDNuna+9rKa17bCnDcg403r6/cPWl/PeQOOfOdwahG5+zT6nfbBgweTkpLCypUradeuHQkJCURFRXH8+HH27dvHnj17bEtWEhMTWbJkCRMmTGDnzp2YTCbMZjP5+fmEh4fbfQEArL+4mpuby7hx44iNjcXDw4OxY8cSEhJCSkoKaWlp9OjRg1GjRlFSUsLGjRsJDw+v8w1AfTIyMti+fTsvv/wyO3fupF+/fgQHB3P06FF27dqF0Whs0BdkZ86cabvzX1VVxbfffsuvfvUrALp37864ceMA6xr6+fPnM3nyZLp168Zjjz1GWVkZa9asISgoiN///vcNrl1ERG7AB/Ov36d5ALw6yfq4lq8y626PbQPv1LFrzc3UIiJ3tUYP7WDdEjI+Pp5ly5axfv16LBYLQUFBxMTEMGvWLFu/uLg4NmzYwKxZs1i1ahUeHh507tyZd955hwkTJnDy5Em78y5fvpykpCSys7OpqKigpqaGgQMHEhISwtSpUykqKmLdunUsXLiQli1b8uyzz+Lh4cHMmTMbXLvRaCQ/P5958+axadMm2w9CmUwmYmNjSU5ObtB5Vq1aRXFxse31qVOneOWVVwDrG5vLoR0gNTUVPz8/Fi5cyKJFi2jWrBndunXjj3/8I/fdd1+DaxcRERGRpumGdo8R96bdY0TkrnSzu8eIiNxBGn1Nu4iIiIiINC6FdhERERERN6fQLiIiIiLi5hTaRURERETcnEK7iIiIiIibU2gXEREREXFzTtmnXVzsnlDw9HZ1FSIit0dMS1dXICLidArtTdGyX0JgkKurEBG5ffyNrq5ARMSpFNqbokgTBCm0i4iIiDQVWtMuIiIiIuLmFNpFRERERNycQruIiIiIiJtTaBcRERERcXMK7SIiIiIibk6hXURERETEzSm0i4iIiIi4OYV2ERERERE3p9AuIiIiIuLmFNpFRERERNycQruIiIiIiJtTaBcRERERcXPNXF2AOMGJM3D2oqurEBFxPn8jBPu7ugoREadTaG+KUlfBN+WurkJExLliWsJrkxTaReSuoNDeFH1VDIWnXF2FiIiIiDQSrWkXEREREXFzCu0iIiIiIm5OoV1ERERExM0ptIuIiIiIuDmFdhERERERN6fQLiIiUpfSHyDlFQgbC/5jYMBc+ORww8cfKoIh6RDwH2B6Ah7/A5wqc+z32xx4+EUITwbDI/CbDY12CSLSdCi0i4iI1FZdDcMWwOs7IXUoLHoCviuD/nPhy+PXH19UDD+ZDYUn4cUkSHsYtuyFh16AC5X2fWe/Dh8Vwo9jnHMtItIkOC205+TkYDAY6n3k5uY6a2oApk+fTkZGhlPnaCz/+Mc/GDhwIMHBwXh5eREREcEvf/lLzp496+rSRESapv5zYOwf6z+ekw/mL2B1Ksx7FCYNhQ/SwdMD5mVf//wvboYfzsO2F2DyMJg5EjZOhX1fwer37fse+ROc+DOsf/ZWrkhEmjin/7jSgAEDSEhIcGjv2rWrU+fNysoiIiKCGTNmOHWeW7Vnzx4GDBhAVVUViYmJ3HvvvezevZvXXnuNTz75hI8++ggPD30gIiJyW+XkQ3gIPBJ/pS0sGBL7wPodYKkEo1f94zfvhn99ENqEXWkb+AB0aAUbzZAy6Er7PS0bvXwRaXqcHtrj4uJIS0tz9jS3lcViobKykoCAgFs+17PPPsu5c+f4n//5H4YPH25rf+aZZ1i2bBm/+93vmtzfT0TE7X16BLrdC7VvmvS8D1a+C/88Dl3a1j322GnrUpoH2zke63kf5O5t/HpFpMlzi1u4S5cupVOnTvj4+GA0GunQoQPLly+vs198fDyhoaF4eXkRGBhInz592LFjh10/g8FAcXExBw8etFuSc/DgQdvxIUOGOJw/IyMDg8FATk6OrS0lJQWDwYDZbGb06NGYTCZ8fX3Jy8sDoKKigtTUVKKjo/Hy8sLPz49evXqxbdu2Bl37P/7xDyIjI+0CO1jDPMC6desadB4REWlEJ0ogsrlj++W242euPfbqvrXHnzlrvVMvInIDnH6nvaKigqKiIrs2X19fWrRoAcBTTz1FVlYW3bt3Z/LkyXh6erJlyxZSU1M5ceIECxYssI3LzMwkODiYxMREIiMjKSwsZPPmzQwaNAiz2Uy3bt0AWLx4Menp6QQGBjJlyhTb+NatW9/0dSQlJeHt7c24ceMwGAy0bdsWi8VCfHw8hw4dYtCgQSQnJ1NaWkp2djZDhw7l7bff5qGHHrrmeSsrKzEajQ7tQUFBAHzxxRdUV1driYyIyM2qvAhlFY5tlkooLrdvNwVY766fuwDGOv4T6eNt/ffchfrnu3ysruUzPl5X+lxreY2ISC1OT4KZmZlER0fbPUaOHAnA1q1bycrK4sknn+Tjjz9m0aJFZGRksH//fnr37s2SJUs4ffq07Vy7du3CbDazYsUK5syZw5o1a3jvvfeoqqpi/vz5tn5paWkYjUZMJhNpaWm2R0hIyE1fR0BAAAcOHGDx4sUsWrSIHj16MHv2bA4cOEBWVhZbtmwhPT2dpUuX8tlnnxEYGMhzzz133fO2adOGoqIi/u///s+u/S9/+QtgXYpz8uTJm65bRKQpu3DBPjzv3r2bqqoq2+uCggK+z/vIum3j1Q/zF7Dh747tXxdjNpvB1xssFwGsry87b52vyruZ3RwlJSW219+Wl1qfXLqbXl5ebvukl/PWtvx/2C+RsZujnuu4eo6ioiK+/vpr22u7Oeo5p+bQHJrDvea4UYaampqaWzpDPXJychg1ahTDhw9nzJgxdsfatGlD3759GTNmDNnZ2Xz44YdERkba9VmzZg2zZ89m/fr1JCUl2R2rrq7mzJkznD9/HoC+ffvSrFkzDh++sn9uWFgYERERHDhwwKE2g8HA4MGDbUtcLsvIyGDmzJls2rTJ9sYiJSWFVatWkZmZSUpKil3/du3acf78eT788EOHOcaPH09eXh6lpaUEBgbW+3f63e9+x9SpU7nvvvvIyMigY8eObN26lblz53L27Fmqqqr4/PPPuf/+++s9x2Xl5eUEBwdT1j6FoMJT1+0vInJHuz/KujtLK9O1+5Wchb219lefuhoimsO0X9i3/8uPrHfT75sE90VC7mz746+9B0+vgP0vX3tNe+tfwsLHYfq/2x97/A/WNe2n1zqOKy63vnGYlwi/GX3taxKRu47Tl8e0b9/eIbRfVlhYSE1NDT179qx3/LFjx2zPt23bxowZM9i3bx8Wi8WuX1hYWO2hjapLly4ObUVFRVy4cIHo6Oh6xx07doyOHTvWe/y5557j1KlT/OEPf7C9UWjWrBlPPfUU27Zto7Cw0LaUSEREbkLzAOvOLbXbIps7tl8Wdw/sPGTdr/3q5Ykffgl+RusuMPWJagFhQfBxHT/EtOdLiNN+7CJy45we2q+lpqYGg8HA2rVr8fT0rLPP5UB/6NAhEhIS8PPzY/z48cTGxhIQEIDBYOD555/n3Llzt1zPxYsX6z1W193ympoa2rRpw0svvVTvuIaso8/IyGDOnDns2rWLc+fO0atXL8LDwwkKCiIkJITQ0NCGXYCIiDSOkb2t2z6+uRtG9rG2FZfDJjMMf9B+PfrhS0sY20VcaRvRG9a8D98UQ/Sl/w3fut+668wU+40HREQawqWhPSYmhr1799K+fXvi4+Ov2Xf16tVYLBbWrVvHqFGj7I5NnDgRLy/7L/QYDIZ6z+Xv709paalD+9XLaxoiMjKSsrIyEhMT633T0VB+fn52X1p99913+f777+vc415ERJxsZG+I7wDJy6CgCEIDYUUeVFXDC7WWrvx8nvXfrzKvtM0cYQ34A+bCr4fB2fOw+H+sS2qSf2Y/ft0HcPQUVFz6BHlHASzYZH3++E+hrfZxFxEXb/n49NNPAzB16lQqKx23vzpy5Ijt+eVQXHsJfnp6OmVlZQ5jfXx8KC8vd2gHiIqKoqCgwO74yZMneeutt26o/hEjRlBWVsa0adPqPH51/Tfi7Nmz/PrXv6ZZs2bMmzfvps4hIiK3wNPTup790b6wdAtMWwuhQdY19PdHXX98dChsn2+9+/6f62HRW5DQDd6d57hrzGtbYc4bkPGm9fX7B62v57wBR75r9EsTkTuTS++0Dx48mJSUFFauXEm7du1ISEggKiqK48ePs2/fPvbs2WNbspKYmMiSJUuYMGECO3fuxGQyYTabyc/PJzw83O4bvmD9xdXc3FzGjRtHbGwsHh4ejB07lpCQEFJSUkhLS6NHjx6MGjWKkpISNm7cSHh4eJ1vAOqTkZHB9u3befnll9m5cyf9+vUjODiYo0ePsmvXLoxGI/v377/mOcxmM0888QQ///nPiY6O5uTJk7z55pt8++23ZGRkXHO9v4iI3KQP5l+/T/MAeHWS9XEtV99hv1psG3hnbuPUIiJ3PZeGdrBuCRkfH8+yZctYv349FouFoKAgYmJimDVrlq1fXFwcGzZsYNasWaxatQoPDw86d+7MO++8w4QJExy2RVy+fDlJSUlkZ2dTUVFBTU0NAwcOJCQkhKlTp1JUVMS6detYuHAhLVu25Nlnn8XDw4OZM2c2uHaj0Uh+fj7z5s1j06ZNth+EMplMxMbGkpycfN1zREZGEh4ezqZNmygvL8fPz4+uXbvy5z//uc4fgBIRERGRu4/TtnyU209bPorIXaWhWz6KiDQB+plNERERERE3p9AuIiIiIuLmFNpFRERERNycQruIiIiIiJtTaBcRERERcXMK7SIiIiIibs7l+7SLE9wTCp7erq5CRMS5Ylq6ugIRkdtGob0pWvZLCAxydRUiIs7nb3R1BSIit4VCe1MUaYIghXYRERGRpkJr2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuTqFdRERERMTNKbSLiIiIiLg5hXYRERERETen0C4iIiIi4uYU2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJurpmrCxAnOHEGzl50dRUicrP8jRDs7+oqRETEjSi0N0Wpq+CbcldXISI3I6YlvDZJoV1EROwotDdFXxVD4SlXVyEiIiIijURr2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuTqFdRERERMTNKbSLiNxNSn+AlFcgbCz4j4EBc+GTww0ff6gIhqRDwH+A6Ql4/A9wqsyx329z4OEXITwZDI/AbzY02iWIiNyNFNpFRO4W1dUwbAG8vhNSh8KiJ+C7Mug/F748fv3xRcXwk9lQeBJeTIK0h2HLXnjoBbhQad939uvwUSH8OMY51yIicpdxWmjPycnBYDDU+8jNzXXW1ABMnz6djIwMp87RmL744guGDx9OaGgoXl5ehISE8OCDD5Kfn+/q0kTkTtF/Doz9Y/3Hc/LB/AWsToV5j8KkofBBOnh6wLzs65//xc3ww3nY9gJMHgYzR8LGqbDvK1j9vn3fI3+CE3+G9c/eyhWJiMglTv9xpQEDBpCQkODQ3rVrV6fOm5WVRUREBDNmzHDqPI1h27ZtPPzww/j6+vLII4/Qtm1bTp8+zaeffsrx4w24+yUi0hA5+RAeAo/EX2kLC4bEPrB+B1gqwehV//jNu+FfH4Q2YVfaBj4AHVrBRjOkDLrSfk/LRi9fRORu5vTQHhcXR1pamrOnua0sFguVlZUEBATc8rnOnj1LUlISYWFhfPTRR4SGhjZChSIidfj0CHS7Fzxqfcja8z5Y+S788zh0aVv32GOnrUtpHmzneKznfZC7t/HrFRERG7dY07506VI6deqEj48PRqORDh06sHz58jr7xcfH25aQBAYG0qdPH3bs2GHXz2AwUFxczMGDB+2W5Bw8eNB2fMiQIQ7nz8jIwGAwkJOTY2tLSUnBYDBgNpsZPXo0JpMJX19f8vLyAKioqCA1NZXo6Gi8vLzw8/OjV69ebNu2rUHXvmLFCk6ePMns2bMJDQ2loqKCioqKBv/tREQa7EQJRDZ3bL/cdvzMtcde3bf2+DNnrXfqRUTEKZx+p72iooKioiK7Nl9fX1q0aAHAU089RVZWFt27d2fy5Ml4enqyZcsWUlNTOXHiBAsWLLCNy8zMJDg4mMTERCIjIyksLGTz5s0MGjQIs9lMt27dAFi8eDHp6ekEBgYyZcoU2/jWrVvf9HUkJSXh7e3NuHHjMBgMtG3bFovFQnx8PIcOHWLQoEEkJydTWlpKdnY2Q4cO5e233+ahhx665nn/9re/AdCiRQtiY2M5dOgQNTU13HPPPcyfP5/HHnvspmsWkSas8iKUVTi2WSqhuNy+3RRgvbt+7gIY6/iffR9v67/nLtQ/3+VjdS2f8fG60uday2tEROSmOf1Oe2ZmJtHR0XaPkSNHArB161aysrJ48skn+fjjj1m0aBEZGRns37+f3r17s2TJEk6fPm07165duzCbzaxYsYI5c+awZs0a3nvvPaqqqpg/f76tX1paGkajEZPJRFpamu0REhJy09cREBDAgQMHWLx4MYsWLaJHjx7Mnj2bAwcOkJWVxZYtW0hPT2fp0qV89tlnBAYG8txzz133vF999RUATzzxBAEBAfz+979n7ty5lJeX88QTT/DGG2/cdM0icme6cME+PO/evZuqqirb64KCAr7P+8i6bePVD/MXsOHvju1fF2M2m8HXGywXAayvLztvna/Ku5ndHCUlJbbX35aXWp9cupteXl5u+/SS89a2/H/YL5Gxm6Oe67h6jqKiIr7++mvba7s56jmn5tAcmkNz3Klz3ChDTU1NzS2doR45OTmMGjWK4cOHM2bMGLtjbdq0oW/fvowZM4bs7Gw+/PBDIiMj7fqsWbOG2bNns379epKSkuyOVVdXc+bMGc6fPw9A3759adasGYcPX9lrOCwsjIiICA4cOOBQm8FgYPDgwbYlLpdlZGQwc+ZMNm3aZHtjkZKSwqpVq8jMzCQlJcWuf7t27Th//jwffvihwxzjx48nLy+P0tJSAgMD6/07RURE8O2339KpUycOHDiAx6W1pp988gk9evSgXbt2/POf/6x3/NXKy8sJDg6mrH0KQYWnGjRGRNzM/VHW3Vlama7dr+Qs7K21v/rU1RDRHKb9wr79X35kvZt+3yS4LxJyZ9sff+09eHoF7H/52mvaW/8SFj4O0//d/tjjf7CuaT+91nFccbn1jcO8RPjN6Gtfk4iI1Mvpy2Pat2/vENovKywspKamhp49e9Y7/tixY7bn27ZtY8aMGezbtw+LxWLXLywsrPbQRtWlSxeHtqKiIi5cuEB0dHS9444dO0bHjh3rPe7tbf1YevTo0bbADtCtWzd+9KMfUVBQQGlp6S19SiAiTVDzAOvOLbXbIps7tl8Wdw/sPGTdr/3qL6N++CX4Ga27wNQnqgWEBcHHdfwQ054vIU77sYuIOJPTQ/u11NTUYDAYWLt2LZ6ennX2uRzoDx06REJCAn5+fowfP57Y2FgCAgIwGAw8//zznDt37pbruXjxYr3H6rpbXlNTQ5s2bXjppZfqHXe9dfQtW7bkm2++ISoqyuFYWFgYNTU1FBcXK7SLyK0b2du67eObu2FkH2tbcTlsMsPwB+3Xox8+af23XcSVthG9Yc378E0xRF/a6WrrfuuuM1OG355rEBG5S7k0tMfExLB3717at29PfHz8NfuuXr0ai8XCunXrGDVqlN2xiRMn4uVl/+Ung8FQ77n8/f0pLS11aL96eU1DREZGUlZWRmJiYr1vOq7nxz/+MXv37uXo0aMOx7799ls8PDyIiIioY6SIyA0a2RviO0DyMigogtBAWJEHVdXwQq2lKz+fZ/33q8wrbTNHWAP+gLnw62Fw9jws/h/rkprkn9mPX/cBHD0FFZc+Fd1RAAs2WZ8//lNoq33cRURuhEu3fHz66acBmDp1KpWVjluFHTlyxPb8ciiuvQQ/PT2dsrIyh7E+Pj6Ul5c7tANERUVRUFBgd/zkyZO89dZbN1T/iBEjKCsrY9q0aXUev7r++qSkpODh4cF///d/2335bPv27Xz++ec88MADjbIfvIgInp7W9eyP9oWlW2DaWggNsq6hv9/x0z4H0aGwfb717vt/rodFb0FCN3h3nuOuMa9thTlvQMab1tfvH7S+nvMGHPmu0S9NRKSpc+md9sGDB5OSksLKlStp164dCQkJREVFcfz4cfbt28eePXtsS1YSExNZsmQJEyZMYOfOnZhMJsxmM/n5+YSHh9t9wxesv7iam5vLuHHjiI2NxcPDg7FjxxISEkJKSgppaWn06NGDUaNGUVJSwsaNGwkPD6/zDUB9MjIy2L59Oy+//DI7d+6kX79+BAcHc/ToUXbt2oXRaGT//v3XPEePHj14/PHHWbNmDQ888AD/9m//xpkzZ/jv//5vvL29+f3vf3/Df1cRuUt9MP/6fZoHwKuTrI9rufoO+9Vi28A7cxunFhERaTCXhnawbgkZHx/PsmXLWL9+PRaLhaCgIGJiYpg1a5atX1xcHBs2bGDWrFmsWrUKDw8POnfuzDvvvMOECRM4efKk3XmXL19OUlIS2dnZVFRUUFNTw8CBAwkJCWHq1KkUFRWxbt06Fi5cSMuWLXn22Wfx8PBg5syZDa7daDSSn5/PvHnz2LRpk+0HoUwmE7GxsSQnJzfoPKtXryYmJoasrCz+67/+C29vb3784x+zaNEi+vTp0+B6RERERKRpctqWj3L7actHkSagoVs+iojIXcWla9pFREREROT6FNpFRERERNycQruIiIiIiJtTaBcRERERcXMK7SIiIiIibk6hXURERETEzbl8n3ZxgntCwdPb1VWIyM2IaenqCkRExA0ptDdFy34JgUGurkJEbpa/0dUViIiIm1Fob4oiTRCk0C4iIiLSVGhNu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuTqFdRERERMTNKbSLiIiIiLg5hXYRERERETen0C4iIiIi4uYU2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJurpmrCxAnOHEGzl50dRVyN/A3QrC/q6sQERFp8hTam6LUVfBNuaurkKYupiW8NkmhXURE5DZQaG+KviqGwlOurkJEREREGonWtIuIiIiIuDmFdhERERERN6fQLiIiIiLi5hTaRURERETcnEK7iIiIiIibU2gXEREREXFzCu0i4n5Kf4CUVyBsLPiPgQFz4ZPDrq5KRETEZRTaRcS9VFfDsAXw+k5IHQqLnoDvyqD/XPjyuKurExERcQmnhfacnBwMBkO9j9zcXGdNDcD06dPJyMhw6hyN4eDBg/X+jdq0aePq8kQaX/85MPaP9R/PyQfzF7A6FeY9CpOGwgfp4OkB87JvX50iIiJuxOm/iDpgwAASEhIc2rt27erUebOysoiIiGDGjBlOnaex9O3bl3/7t3+zazOZTK4pRsSVcvIhPAQeib/SFhYMiX1g/Q6wVILRy2XliYiIuILTQ3tcXBxpaWnOnua2slgsVFZWEhAQ0Gjn7NSpU5P7O4nclE+PQLd7waPWB4E974OV78I/j0OXtq6pTURExEXcYk370qVL6dSpEz4+PhiNRjp06MDy5cvr7BcfH09oaCheXl4EBgbSp08fduzYYdfPYDBQXFzssPTk4MGDtuNDhgxxOH9GRgYGg4GcnBxbW0pKCgaDAbPZzOjRozGZTPj6+pKXlwdARUUFqampREdH4+XlhZ+fH7169WLbtm03/Hc4e/Ys5eXlNzxOpEk5UQKRzR3bL7cdP3N76xEREXEDTr/TXlFRQVFRkV2br68vLVq0AOCpp54iKyuL7t27M3nyZDw9PdmyZQupqamcOHGCBQsW2MZlZmYSHBxMYmIikZGRFBYWsnnzZgYNGoTZbKZbt24ALF68mPT0dAIDA5kyZYptfOvWrW/6OpKSkvD29mbcuHEYDAbatm2LxWIhPj6eQ4cOMWjQIJKTkyktLSU7O5uhQ4fy9ttv89BDDzXo/GvXruXVV1+lpqYGk8nEv//7v7N06VL8/PxuumYRl6u8CGUVjm2WSiiu9QbVFGC9u37uAhjr+J8mH2/rv+cuOKdWERERN+b0O+2ZmZlER0fbPUaOHAnA1q1bycrK4sknn+Tjjz9m0aJFZGRksH//fnr37s2SJUs4ffq07Vy7du3CbDazYsUK5syZw5o1a3jvvfeoqqpi/vz5tn5paWkYjUZMJhNpaWm2R0hIyE1fR0BAAAcOHGDx4sUsWrSIHj16MHv2bA4cOEBWVhZbtmwhPT2dpUuX8tlnnxEYGMhzzz133fN6enrStWtXpkyZQmZmJvPnz+eee+7htddeo2/fvlRWVt50zSK3S3l5ue2TrMvMZjPs+ty6bePVD/MXsOHvDu1fbs2npKQEfL3BcpGioiK+/vpr2/kqzpRan/h6289Re86r7N69m6qqKtvrgoIC6xyX1J6j3uvQHJpDc2gOzaE5GnmOG2WoqampuaUz1CMnJ4dRo0YxfPhwxowZY3esTZs29O3blzFjxpCdnc2HH35IZGSkXZ81a9Ywe/Zs1q9fT1JSkt2x6upqzpw5w/nz5wHrlzibNWvG4cNX9nEOCwsjIiKCAwcOONRmMBgYPHiwbYnLZRkZGcycOZNNmzbZ3likpKSwatUqMjMzSUlJsevfrl07zp8/z4cffugwx/jx48nLy6O0tJTAwMDr/bkcDB8+nLfffpv/+q//YurUqQ0aU15eTnBwMGXtUwgqPHXDc4rckPujYNsL0OoaX5guOQt7a+2vPnU1RDSHab+wb/+XH1nvpt83Ce6LhNzZ9sdfew+eXgH7X9aadhERues4fXlM+/btHUL7ZYWFhdTU1NCzZ896xx87dsz2fNu2bcyYMYN9+/ZhsVjs+oWFhTVOwfXo0qWLQ1tRUREXLlwgOjq63nHHjh2jY8eONzzfSy+9xNtvv82WLVsaHNpF3E7zABj4gGNbZHPH9svi7oGdh6z7tV/9ZdQPvwQ/I3Ro5bRyRURE3JXTQ/u11NTUYDAYWLt2LZ6ennX2uRzoDx06REJCAn5+fowfP57Y2FgCAgIwGAw8//zznDt37pbruXjxYr3H6rpbXlNTQ5s2bXjppZfqHXez6+g7duyIh4cHpaWlNzVe5I41srd128c3d8PIPta24nLYZIbhD2q7RxERuSu5NLTHxMSwd+9e2rdvT3x8/DX7rl69GovFwrp16xg1apTdsYkTJ+LlZf8fcoPBUO+5/P396wzDVy+vaYjIyEjKyspITEys903Hzdq/fz/V1dW2L+yK3DVG9ob4DpC8DAqKIDQQVuRBVTW8MNrV1YmIiLiES7d8fPrppwGYOnVqnV+4PHLkiO355VBcewl+eno6ZWVlDmN9fHzq3T4xKiqKgoICu+MnT57krbfeuqH6R4wYQVlZGdOmTavz+NX11+fq5T+XVVVV2b7EOnz48BuqSeSO5+lpXc/+aF9YugWmrYXQIOv6+fujXF2diIiIS7j0TvvgwYNJSUlh5cqVtGvXjoSEBKKiojh+/Dj79u1jz549tiUriYmJLFmyhAkTJrBz505MJhNms5n8/HzCw8PtvuEL1l9czc3NZdy4ccTGxuLh4cHYsWMJCQkhJSWFtLQ0evTowahRoygpKWHjxo2Eh4fX+QagPhkZGWzfvp2XX36ZnTt30q9fP4KDgzl69Ci7du3CaDSyf//+a57j0Ucf5ezZszz44IO0adOGU6dO8be//Y3Dhw/Tu3dvJk2adON/WBF39sH86/dpHgCvTrI+RERExLWhHaxbQsbHx7Ns2TLWr1+PxWIhKCiImJgYZs2aZesXFxfHhg0bmDVrFqtWrcLDw4POnTvzzjvvMGHCBE6ePGl33uXLl5OUlER2djYVFRXU1NQwcOBAQkJCmDp1KkVFRaxbt46FCxfSsmVLnn32WTw8PJg5c2aDazcajeTn5zNv3jw2bdpk+0Eok8lEbGwsycnJ1z3HkCFDyM7OJicnh7Nnz9KsWTPatm3L7Nmz+c1vftPoy25ERERE5M7jtC0f5fbTlo9yWzVky0cRERFpFC5d0y4iIiIiIten0C4iIiIi4uYU2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuzuX7tIsT3BMKnt6urkKaupiWrq5ARETkrqHQ3hQt+yUEBrm6Crkb+BtdXYGIiMhdQaG9KYo0QZBCu4iIiEhToTXtIiIiIiJuTqFdRERERMTNKbSLiIiIiLg5hXYRERERETen0C4iIiIi4uYU2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuTqFdRERERMTNKbSLiIiIiLi5Zq4uQJzgxBk4e9HVVdyZ/I0Q7O/qKkRERETsKLQ3Ramr4JtyV1dx54lpCa9NUmgXERERt6PQ3hR9VQyFp1xdhYiIiIg0Eq1pFxERERFxcwrtIiIiIiJuTqFdRERERMTNKbSLiIiIiLg5hXYRERERETen0C4iIiIi4uYU2kVul9IfIOUVCBsL/mNgwFz45LCrqxIREZE7gEK7yO1QXQ3DFsDrOyF1KCx6Ar4rg/5z4cvjrq5ORERE3JzTQntOTg4Gg6HeR25urrOmBmD69OlkZGQ4dY7GsHXrVv7jP/6De++9F39/f/z9/Wnfvj1z5szBYrG4ujxpqP5zYOwf6z+ekw/mL2B1Ksx7FCYNhQ/SwdMD5mXfvjpFRETkjuT0X0QdMGAACQkJDu1du3Z16rxZWVlEREQwY8YMp85zqxYsWMDHH3/MT37yEx577DEuXrzIO++8w4IFC8jNzeWjjz7Cw0MfiNzxcvIhPAQeib/SFhYMiX1g/Q6wVILRy2XliYiIiHtzemiPi4sjLS3N2dPcVhaLhcrKSgICAm75XFOmTOFnP/uZ3blefPFFBg4cyNatW1m9ejVPPfXULc8jLvbpEeh2L9R+A9bzPlj5LvzzOHRp65raRERExO25xS3cpUuX0qlTJ3x8fDAajXTo0IHly5fX2S8+Pp7Q0FC8vLwIDAykT58+7Nixw66fwWCguLiYgwcP2i3JOXjwoO34kCFDHM6fkZGBwWAgJyfH1paSkoLBYMBsNjN69GhMJhO+vr7k5eUBUFFRQWpqKtHR0Xh5eeHn50evXr3Ytm1bg6794YcfrjP8jxkzBoB//OMfDTqPuLkTJRDZ3LH9ctvxM7e3HhEREbmjOP1Oe0VFBUVFRXZtvr6+tGjRAoCnnnqKrKwsunfvzuTJk/H09GTLli2kpqZy4sQJFixYYBuXmZlJcHAwiYmJREZGUlhYyObNmxk0aBBms5lu3boBsHjxYtLT0wkMDGTKlCm28a1bt77p60hKSsLb25tx48ZhMBho27YtFouF+Ph4Dh06xKBBg0hOTqa0tJTs7GyGDh3K22+/zUMPPXRT8x09ehSAiIiIm65ZnKTyIpRVOLZZKqG43L7dFGC9u37uAhjr+H83H2/rv+cuOKdWERERaRKcfqc9MzOT6Ohou8fIkSMB65cws7KyePLJJ/n4449ZtGgRGRkZ7N+/n969e7NkyRJOnz5tO9euXbswm82sWLGCOXPmsGbNGt577z2qqqqYP3++rV9aWhpGoxGTyURaWprtERISctPXERAQwIEDB1i8eDGLFi2iR48ezJ49mwMHDpCVlcWWLVtIT09n6dKlfPbZZwQGBvLcc8/d1FwlJSX86U9/wtfXl7Fjx950zXLjLlZV2Z6Xl5fbPp25zGw2w67Prds2Xv0wfwEb/u7Q/uXWfEpKSsDXGywXKSoq4uuvv7adr+JMqfWJr7f9HLXnvMru3bupuqrOgoIC6xyX1J6j3uvQHJpDc2gOzaE5NIfL5rhRhpqamppbOkM9cnJyGDVqFMOHD7ct9bisTZs29O3blzFjxpCdnc2HH35IZGSkXZ81a9Ywe/Zs1q9fT1JSkt2x6upqzpw5w/nz5wHo27cvzZo14/DhK3teh4WFERERwYEDBxxqMxgMDB482LbE5bKMjAxmzpzJpk2bbG8sUlJSWLVqFZmZmaSkpNj1b9euHefPn+fDDz90mGP8+PHk5eVRWlpKYGDg9f5cNpWVlfz0pz8lPz+f3/3ud3afFFxPeXk5wcHBlLVPIajwVIPHySX3R8G2F6CV6dr9Ss7C3lr7q09dDRHNYdov7Nv/5UfWu+n3TYL7IiF3tv3x196Dp1fA/pe1pl1ERETq5fTlMe3bt3cI7ZcVFhZSU1NDz5496x1/7Ngx2/Nt27YxY8YM9u3b57AdYlhYWOMUXI8uXbo4tBUVFXHhwgWio6PrHXfs2DE6duzYoDmqqqr413/9V/Lz85k0adINBXa5jZoHwMAHHNsimzu2XxZ3D+w8ZN2v/eovo374JfgZoUMrp5UrIiIidz6nh/ZrqampwWAwsHbtWjw9PevscznQHzp0iISEBPz8/Bg/fjyxsbEEBARgMBh4/vnnOXfu3C3Xc/HixXqP1XW3vKamhjZt2vDSSy/VO66h6+gvB/b/9//+H7/85S9ZtmxZg8bJHWJkb+u2j2/uhpF9rG3F5bDJDMMf1HaPIiIick0uDe0xMTHs3buX9u3bEx8ff82+q1evxmKxsG7dOkaNGmV3bOLEiXh52Yceg8FQ77n8/f0pLS11aL96eU1DREZGUlZWRmJiYr1vOhricmDPy8vjqaeeYuXKlTd9LnFTI3tDfAdIXgYFRRAaCCvyoKoaXhjt6upERETEzbl0y8enn34agKlTp1JZWelw/MiRI7bnl0Nx7SX46enplJWVOYz18fGhvLzcoR0gKiqKgoICu+MnT57krbfeuqH6R4wYQVlZGdOmTavz+NX116e6upqHH36YvLw8xo4dy2uvvXZDNcgdwtPTup790b6wdAtMWwuhQdY19PdHubo6ERERcXMuvdM+ePBgUlJSWLlyJe3atSMhIYGoqCiOHz/Ovn372LNnj23JSmJiIkuWLGHChAns3LkTk8mE2WwmPz+f8PBwu2/4gvUXV3Nzcxk3bhyxsbF4eHgwduxYQkJCSElJIS0tjR49ejBq1ChKSkrYuHEj4eHhdb4BqE9GRgbbt2/n5ZdfZufOnfTr14/g4GCOHj3Krl27MBqN7N+//5rneOyxx8jNzeWee+4hNjaW//qv/7I7/qMf/Yhhw4Y1uCZxkQ/mX79P8wB4dZL1ISIiInIDXBrawbolZHx8PMuWLWP9+vVYLBaCgoKIiYlh1qxZtn5xcXFs2LCBWbNmsWrVKjw8POjcuTPvvPMOEyZM4OTJk3bnXb58OUlJSWRnZ1NRUUFNTQ0DBw4kJCSEqVOnUlRUxLp161i4cCEtW7bk2WefxcPDg5kzZza4dqPRSH5+PvPmzWPTpk22H4QymUzExsaSnJx83XNc3t3mq6++qvOO/eDBgxXaRURERO5yTtvyUW4/bfl4ixq65aOIiIjIbebSNe0iIiIiInJ9Cu0iIiIiIm5OoV1ERERExM0ptIuIiIiIuDmFdhERERERN6fQLiIiIiLi5ly+T7s4wT2h4Ont6iruPDEtXV2BiIiISJ0U2puiZb+EwCBXV3Fn8je6ugIRERERBwrtTVGkCYIU2kVERESaCq1pFxERERFxcwrtIiIiIiJuTqFdRERERMTNKbSLiIiIiLg5hXYRERERETen0C4iIiIi4uYU2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuTqFdRERERMTNNXN1AeIEJ87A2YuuruLG+Rsh2N/VVYiIiIi4HYX2pih1FXxT7uoqbkxMS3htkkK7iIiISB0U2puir4qh8JSrqxARERGRRqI17SIiIiIibk6hXURERETEzSm0i4iIiIi4OYV2ERERERE3p9AuIiIiIuLmFNpFRERERNycQrs0faU/QMorEDYW/MfAgLnwyWFXVyUiIiLSYArt0rRVV8OwBfD6TkgdCouegO/KoP9c+PK4q6sTERERaRCnhvacnBwMBkO9j9zcXGdOz/Tp08nIyHDqHI2loqKCp59+mpYtW+Ll5UV4eDi/+tWvsFgsri7NvfWfA2P/WP/xnHwwfwGrU2HeozBpKHyQDp4eMC/79tUpIiIicgtuyy+iDhgwgISEBIf2rl27OnXerKwsIiIimDFjhlPnaQwDBw4kPz+fhIQE4uPj2b17N6+88gr/93//R15enqvLu3Pl5EN4CDwSf6UtLBgS+8D6HWCpBKOXy8oTERERaYjbEtrj4uJIS0u7HVPdNhaLhcrKSgICAm75XKtXryY/P5/Ro0fzxhtv2NrHjBnDhg0b+N///V+GDx9+y/PclT49At3uBY9aHyr1vA9Wvgv/PA5d2rqmNhEREZEGcps17UuXLqVTp074+PhgNBrp0KEDy5cvr7NffHw8oaGheHl5ERgYSJ8+fdixY4ddP4PBQHFxMQcPHrRbknPw4EHb8SFDhjicPyMjA4PBQE5Ojq0tJSUFg8GA2Wxm9OjRmEwmfH19bXfAKyoqSE1NJTo6Gi8vL/z8/OjVqxfbtm1r0LWvX78egHnz5tm1X3795z//uUHnkTqcKIHI5o7tl9uOn7m99YiIiIjchNtyp72iooKioiK7Nl9fX1q0aAHAU089RVZWFt27d2fy5Ml4enqyZcsWUlNTOXHiBAsWLLCNy8zMJDg4mMTERCIjIyksLGTz5s0MGjQIs9lMt27dAFi8eDHp6ekEBgYyZcoU2/jWrVvf9HUkJSXh7e3NuHHjMBgMtG3bFovFQnx8PIcOHWLQoEEkJydTWlpKdnY2Q4cO5e233+ahhx665nkLCgpo3rw5HTt2tGvv2LEjzZs358CBAzddc5NSeRHKKhzbLJVQXG7fbgqw3l0/dwGMdfyfuY+39d9zF5xTq4iIiEgjui132jMzM4mOjrZ7jBw5EoCtW7eSlZXFk08+yccff8yiRYvIyMhg//799O7dmyVLlnD69GnbuXbt2oXZbGbFihXMmTOHNWvW8N5771FVVcX8+fNt/dLS0jAajZhMJtLS0myPkJCQm76OgIAADhw4wOLFi1m0aBE9evRg9uzZHDhwgKysLLZs2UJ6ejpLly7ls88+IzAwkOeee+665z1z5gyhoaF1HmvRooXd9TdlNTU1tucFBQWUlJTYXhcVFfHtm9ut2zZe/TB/ARv+7tj+dTFmsxl8vcFyEYDdu3dTVVVlPeF5a1j//uIFuzm+/vpr2+vy8nLbJzOXmc3ma762m6Oe69AcmkNzaA7NoTk0h+a4UYaaq5NSI8vJyWHUqFEMHz6cMWPG2B1r06YNffv2ZcyYMWRnZ/Phhx8SGRlp12fNmjXMnj2b9evXk5SUZHesurqaM2fOcP78eQD69u1Ls2bNOHz4yv7bYWFhRERE1Hmn2mAwMHjwYIcveWZkZDBz5kw2bdpke2ORkpLCqlWryMzMJCUlxa5/u3btOH/+PB9++KHDHOPHjycvL4/S0lICAwPr/Tt5eHjwox/9iM8++8zhWGxsLEeOHKGioqKOkfbKy8sJDg6mrH0KQYWnrtvfrdwfBdtegFam+vuUnIW9tfZXn7oaIprDtF/Yt//Lj6x30++bBPdFQu5s++OvvQdPr4D9L2tNu4iIiLi927I8pn379g6h/bLCwkJqamro2bNnveOPHTtme75t2zZmzJjBvn37HLZDDAsLa5yC69GlSxeHtqKiIi5cuEB0dHS9444dO+aw9OVq3t7eVFZW1nnswoULGI3GGy+2KWoeAAMfcGyLbO7YflncPbDzkHW/9qu/jPrhl+BnhA6tnFauiIiISGO5LaH9WmpqajAYDKxduxZPT886+1wO9IcOHSIhIQE/Pz/Gjx9PbGwsAQEBGAwGnn/+ec6dO3fL9Vy8eLHeY3XdLa+pqaFNmza89NJL9Y673jp6k8lEcXFxncdOnz5tW/svN2Fkb+u2j2/uhpF9rG3F5bDJDMMf1HaPIiIickdweWiPiYlh7969tG/fnvj4+Gv2Xb16NRaLhXXr1jFq1Ci7YxMnTsTLyz6AGQyGes/l7+9PaWmpQ/vVy2saIjIykrKyMhITE+t903E9nTp1YuvWrXz++ed2d+Q///xzSkpK+MlPfnJT5xWsoT2+AyQvg4IiCA2EFXlQVQ0vjHZ1dSIiIiIN4vItH59++mkApk6dWucSkSNHjtieXw7FtZfhp6enU1ZW5jDWx8eH8vJyh3aAqKgoCgoK7I6fPHmSt95664bqHzFiBGVlZUybNq3O41fXX5/L6/VfeOEFu/bLr5OTk2+oJrmKp6d1PfujfWHpFpi2FkKDrOvn749ydXUiIiIiDeLyO+2DBw8mJSWFlStX0q5dOxISEoiKiuL48ePs27ePPXv22JasJCYmsmTJEiZMmMDOnTsxmUyYzWby8/MJDw+3+5YvWH9xNTc3l3HjxhEbG4uHhwdjx44lJCSElJQU0tLS6NGjB6NGjaKkpISNGzcSHh5e5xuA+mRkZLB9+3Zefvlldu7cSb9+/QgODubo0aPs2rULo9HI/v37r3mO5ORk/vSnP7FhwwbKy8vp3bs3+fn55ObmMnDgQH7xi19cc/xd7YP51+/TPABenWR9iIiIiNyBXB7awbolZHx8PMuWLWP9+vVYLBaCgoKIiYlh1qxZtn5xcXFs2LCBWbNmsWrVKjw8POjcuTPvvPMOEyZM4OTJk3bnXb58OUlJSWRnZ1NRUUFNTQ0DBw4kJCSEqVOnUlRUxLp161i4cCEtW7bk2WefxcPDg5kzZza4dqPRSH5+PvPmzWPTpk22H4QymUzExsY2+C751q1bmTx5Mv/7v//L//t//4/mzZszYcIEfv/73ze4FhERERFpmpy65aPcXk1+y0cRERGRu5TL17SLiIiIiMi1KbSLiIiIiLg5hXYRERERETen0C4iIiIi4uYU2kVERERE3JxCu4iIiIiIm3OLfdqlkd0TCp7erq7ixsS0dHUFIiIiIm5Lob0pWvZLCAxydRU3zt/o6gpERERE3JJCe1MUaYKgOzC0i4iIiEidtKZdRERERMTNKbSLiIiIiLg5hXYRERERETen0C4iIiIi4uYU2kVERERE3JxCu4iIiIiIm1NoFxERERFxcwrtIiIiIiJuTqFdRERERMTNKbSLiIiIiLg5hXYRERERETfXzNUFiBOcOANnL7q6Cnv+Rgj2d3UVIiIiInckhfamKHUVfFPu6iquiGkJr01SaBcRERG5SQrtTdFXxVB4ytVViIiIiEgj0Zp2ERERERE3p9AuIiIiIuLmFNpFRERERNycQruIiIiIiJtTaBcRERERcXMK7SIiIiIibk6hXe58pT9AyisQNhb8x8CAufDJYVdXJSIiItJoFNrlzlZdDcMWwOs7IXUoLHoCviuD/nPhy+Ourk5ERESkUTgttOfk5GAwGOp95ObmOmtqAKZPn05GRoZT52gMQ4YMuebfKTIy0tUlulb/OTD2j/Ufz8kH8xewOhXmPQqThsIH6eDpAfOyb1+dIiIiIk7k9F9EHTBgAAkJCQ7tXbt2deq8WVlZREREMGPGDKfOc6ueeeYZBg4c6NC+detW8vLy+OlPf+qCqu4gOfkQHgKPxF9pCwuGxD6wfgdYKsHo5bLyRERERBqD00N7XFwcaWlpzp7mtrJYLFRWVhIQEHDL5xo2bBjDhg1zaH/jjTcAmDx58i3P0aR9egS63QsetT406nkfrHwX/nkcurR1TW0iIiIijcQt1rQvXbqUTp064ePjg9FopEOHDixfvrzOfvHx8YSGhuLl5UVgYCB9+vRhx44ddv0MBgPFxcUcPHjQbqnJwYMHbceHDBnicP6MjAwMBgM5OTm2tpSUFAwGA2azmdGjR2MymfD19SUvLw+AiooKUlNTiY6OxsvLCz8/P3r16sW2bdtu+u/x2Wef8emnn9KhQwf69Olz0+e5K5wogcjmju2X246fub31iIiIiDiB0++0V1RUUFRUZNfm6+tLixYtAHjqqafIysqie/fuTJ48GU9PT7Zs2UJqaionTpxgwYIFtnGZmZkEBweTmJhIZGQkhYWFbN68mUGDBmE2m+nWrRsAixcvJj09ncDAQKZMmWIb37p165u+jqSkJLy9vRk3bhwGg4G2bdtisViIj4/n0KFDDBo0iOTkZEpLS8nOzmbo0KH/v717j4qy2v8H/h4GGRCGgQlQvIGCCke8VBbebyksjY6K4VftgpqZHH8dLaGTxgHTjpJZXtLW8SCigqwID2omXlKwzFuJZWZgXssLJnJNwRFk//7gzMQ4M/oAg/Oo79das4T97GdfPjNsPzzseQZffPEFhg0bVu++li1bBiEEXnrppQaP94FUVQ2UVZiW6aqAa+XG5VqX2qvrlbcAlZmXsaND7b+Vt5pmrERERET3k2giGRkZAoDZx6BBg4QQQuzevVsAEJGRkSbn9+7dWzg6Oopr164ZykpKSkzqHTx4UNjb24tRo0YZlXt4eIigoCCzYwMgQkNDTcoXLFggAIiMjAxD2auvvioAiKCgIKHT6YzqR0dHCwAiJSXFqLywsFA89thjFvu/m+rqavHYY48JlUoliouL63VuWVmZACDK/KcKgdHyeXT+f0J3rsBorPv37zf6/uDBg6J69zHJbZb/eFocP35cCOfxQkxeYdrmtiO1dXccNe6jutrw/YkTJ4xifOHCBfHrr78axfP48eN3HbfZebAP9sE+2Af7YB/sg33co4/6UgghRFP8MrBx40ZERETgueeew/jx442OtWvXDn379sX48eORnp6Ow4cPm9wlZd26dYiNjUVqaipeeOEFo2M1NTUoLi7GzZs3AQB9+/aFvb09zpz5897cnp6eaNmyJY4fP24yNoVCgdDQUMMWF72FCxdizpw5yMjIwPPPPw+gdntMYmIiVq1ahalTpxrV9/Pzw82bN3H48GGTPl577TXs2LEDpaWlUKvV9wqXQWpqKl566SWMGDEC27Ztk3weAJSXl0Oj0aDMfypcTxfW69wm1bk1kP0u0Ep793ol14HcO+6vPmst0NIdiBlpXN4vsPZqesfpQEdvICvW+HjSbmDKJ8CPS7innYiIiB54Tb49xt/f3yRp1zt9+jSEEHj66actnn/p0iXD19nZ2Zg9ezaOHTsGnU5nVM/T09M6A7aga9euJmUXL17ErVu30LZtW4vnXbp0CQEBAZL7SUxMBABMnz69/oN80Lm7AEO7m5Z5u5uW6/XwBfbl1d6vve6bUQ+fApqrgE6tmmy4RERERPdLkyftdyOEgEKhwPr166FUKs3W0Sf0eXl5GDFiBJo3b47XXnsNXbp0gYuLCxQKBf7xj3+gsrKy0eOprq62eMzc1XIhBNq1a4eEhASL59VnH/2lS5dw4MABtG3b1uxtMsmM53vX3vYx8xDw/P/etHutHMg4ADzXk7d7JCIiooeCTZP29u3bIzc3F/7+/ujVq9dd665duxY6nQ4pKSmIiIgwOhYVFYVmzYyTM4VCYbEtZ2dnlJaWmpTX3V4jhbe3N8rKyjB27FiLv3TUx9KlS1FdXY1x48Y1uq1HxvO9gV6dgEkrgJ8vAh5q4JMdwO0a4F3GkYiIiB4ONr3l45QpUwAAs2bNQlVVlcnxc+fOGb7WJ8V3bsGfN28eysrKTM51dHREeXm5STkAtG7dGj///LPR8StXrmDz5s31Gv+YMWNQVlaGmJgYs8frjl+K9PR02NvbY8aMGfU675GmVNbuZ/+/vsDybUDMesDDtXYPfefWth4dERERkVXY9Ep7aGgopk6div/85z/w8/PDiBEj0Lp1a1y+fBnHjh3Dt99+a9iyMnbsWHz44YeYNm0a9u3bB61WiwMHDuDgwYNo0aIFbt++bdR2t27dkJWVhVdeeQVdunSBnZ0dJk6cCDc3N0ydOhXR0dF46qmnEBERgZKSEnz22Wdo0aKF2V8ALFm4cCG++uorLFmyBPv27UP//v2h0Wjw66+/Yv/+/VCpVPjxxx8ltbV9+3ZcuHABAwcOROvWTDYN9s6/dx13F2D19NoHERER0UPIpkk7UHvv9V69emHFihVITU2FTqeDq6sr2rdvj3feecdQr0ePHvj000/xzjvvIDExEXZ2dggKCsLOnTsxbdo0XLlyxajdlStX4oUXXkB6ejoqKioghMDQoUPh5uaGWbNm4eLFi0hJScH7778PLy8vzJw5E3Z2dpgzZ47ksatUKhw8eBDx8fHIyMgwfCCUVqtFly5dMGnSJMltffLJJwBgcocaIiIiIqImu+Uj3X8P/C0fiYiIiMgsm+5pJyIiIiKie2PSTkREREQkc0zaiYiIiIhkjkk7EREREZHMMWknIiIiIpI5m9/ykZqArwegdLD1KP7U3svWIyAiIiJ6oDFpfxiteBVQu9p6FMacVbYeAREREdEDi0n7w8hbC7jKLGknIiIiogbjnnYiIiIiIplj0k5EREREJHNM2omIiIiIZI5JOxERERGRzDFpJyIiIiKSOSbtREREREQyx6SdiIiIiEjmmLQTEREREckck3YiIiIiIplj0k5EREREJHNM2omIiIiIZI5JOxERERGRzDFpJyIiIiKSOSbtREREREQyx6SdiIiIiEjmmLQTEREREckck3YiIiIiIplj0k5EREREJHNM2omIiIiIZI5JOxERERGRzDFpJyIiIiKSOSbtREREREQyx6SdiIiIiEjmmLQTEREREcmcva0HQNYjhAAAlJeX23gkRERERHQ3arUaCoVCcn0m7Q+RoqIiAEDbtm1tPBIiIiIiupuysjK4urpKrs+k/SGi1WoBAL/99hs0Go2NRyN/5eXlaNu2LS5cuFCvH5pHEWMlHWMlHWMlHWMlHWNVP4yXdNaOlVqtrld9Ju0PETu72rcoaDQa/uDVg6urK+MlEWMlHWMlHWMlHWMlHWNVP4yXdLaKFd+ISkREREQkc0zaiYiIiIhkjkn7Q0SlUiE+Ph4qlcrWQ3kgMF7SMVbSMVbSMVbSMVbSMVb1w3hJZ+tYKYT+PoFERERERCRLvNJORERERCRzTNqJiIiIiGSOSTsRERERkcwxaZeB/Px8DBs2DM7OzmjZsiXeeust3Lp1657nCSGQkJCAdu3awcnJCb1798ahQ4dM6l2+fBljxoyBWq2GVqvFlClTUF5eblJv69at6N69OxwdHdGpUyckJydbZX7WZOtY3b59G4sWLcKAAQPg4eEBrVaLwYMHY9++fVadpzXYOlZ3ys3NhVKphIuLS6Pm1RTkEqubN28iLi4O7du3h0qlQrt27RATE2OVOVqLHGKl/zkMCAhA8+bN0aFDB8TExOD69etWm6e1NGW8CgsLMWPGDAQHB0OlUt31Z+tRX9+lxIrre636vK70HtX1vT6xssr6LsimiouLhbe3txgwYIDYsWOHSEpKEhqNRkyfPv2e5y5cuFA4ODiIjz76SOzevVuMHj1aqNVqcebMGUOdW7duiaCgIBEUFCQ+//xz8emnn4o2bdqIZ5991qitffv2CaVSKV577TWRnZ0tYmNjhUKhEBkZGVafc0PJIVZ//PGHcHNzEzNnzhRffPGF2L59uxg9erRQKpViz549TTLvhpBDrOqqqakRvXr1Ei1atBDOzs5Wm6c1yCVWt2/fFiEhIcLf318kJyeLvXv3inXr1ok5c+ZYfc4NJZdYvfvuu8Le3l4sXLhQZGdni+XLlwsXFxcxYcIEq8+5MZo6Xt9//73w8vISYWFhok+fPhZ/tri+S4sV1/daUl9Xeo/y+i41VtZa35m029iCBQuEs7OzKCoqMpStWrVKKJVKcenSJYvnVVZWCldXVzF79mxDmU6nEz4+PiIqKspQlpaWJhQKhcjPzzeU7dy5UwAQhw8fNpSFhISIPn36GPUxfvx4ERgY2Kj5WZMcYlVdXS2Ki4uN2q+urhYBAQEiLCys0XO0FjnEqq6kpCTh7+8vZs+eLbtFXS6xWr16tdBoNOLy5cvWmprVySVWnTt3FpGRkUZ9xMXFCZVKJaqqqhozRatq6njdvn3b8HV8fLzFny2u79JixfW9ltTXld6jvL5LjZW11nduj7Gx7du3Y+jQodBqtYaysWPHoqamBrt27bJ43oEDB1BeXo6xY8cayhwcHBAeHo6srCyj9rt164bOnTsbyoYNGwatVmuop9PpkJOTg4iICKM+xo0bh7y8PJw/f76x07QKOcRKqVTC3d3dqH2lUolu3brh8uXLjZ6jtcghVnqlpaV4++23sWTJEjg4OFhjelYll1glJiYiIiIC3t7e1pqa1cklVlVVVdBoNEZ9aDQa1NTUNGp+1tbU8bKzu/d/4Vzfa0mJFdf3WlJipfeor+9SY2Wt9Z1Ju43l5+cjICDAqMzNzQ3e3t7Iz8+/63kATM4NDAzEb7/9hsrKSovtKxQKBAQEGNo4c+YMqqqqzLZVty9bk0OszKmursahQ4cM8ZIDOcUqNjYWTz75JMLCwho8n6Ykh1hVVVXh6NGj8PHxwcsvvwxnZ2eo1WqMHTsWV65cafQcrUUOsQKAKVOmICUlBdnZ2bh+/Tq+/fZbfPzxx5g2bRrs7e0bNUdraup4ScH1XXqszHkU1/f6eNTXdymsub7LZ3V7RJWUlMDNzc2k3N3dHcXFxXc9T6VSwdHR0eQ8IQRKSkrg5OQkqf2SkhIAMKmnv+Jwt3HcT3KIlTmLFi3CpUuX8MYbb0ieS1OTS6x++OEHJCUl4fvvv2/wXJqaHGJVVFSEqqoqvP/++xgwYAA2bdqEwsJCvPXWWwgPD8eBAwcaNUdrkUOsAGD27NnQ6XQYOnQoxP8+H/DFF1/E0qVLGzSvptLU8ZI6BoDre0M9iuu7VFzfpcXKmus7k3aiRvjyyy8RHx+PuLg4PPnkk7YejqwIITB9+nT87W9/M7laQcb02zrUajUyMzMNH5HdokULDBs2DNnZ2RgyZIgthygrK1aswLJly7BkyRI8/vjjOHHiBP75z3/i9ddfx8qVK209PHpIcH23jOu7dNZc37k9xsbc3d1RVlZmUl5SUmK0B8vceTqdDjdv3jQ5T6FQGK6iSGlfX/fOevorNHcbx/0kh1jVdfToUYwZMwYTJkxAXFxcfafTpOQQq/T0dOTl5eHvf/87SktLUVpaami37te2JodYubm5QaFQoE+fPoYFHQAGDRoEpVKJEydONGhu1iaHWBUVFSE6Ohrz5s3DjBkzMGDAAERFRWHZsmX45JNP8MsvvzRmilbV1PGSOgaA63t9PcrruxRc36XHyprrO5N2GzO3B7isrAwFBQV3/e1Vf+zkyZNG5fn5+YZ7ilpqXwiBkydPGtrw8/NDs2bNTOpZ2tNlK3KIld7p06cxfPhw9OnTB6tXr27wnJqKHGKVn5+PkpIS+Pr6wt3dHe7u7nj//fdx48YNuLu7Y+7cuY2dplXIIVbNmzeHr6+vxb7k8h+gHGJ15swZ6HQ69OjRw6je448/bjguF00dLym4vtd/a8yjvr5LwfVdeqysub4zabex4cOHY/fu3SgtLTWUZWRkwM7ODiEhIRbP69OnD1xdXZGRkWEoq6qqQmZmJkaMGGHU/rFjx3Dq1ClD2Z49e1BUVGSop1KpMHjwYGzcuNGoj/T0dAQGBt71xXY/ySFWAFBQUICQkBC0a9cOGzduRLNmzaw0Q+uRQ6wmTpyInJwco0dkZCQcHR2Rk5ODqVOnWnHGDSeHWAFAWFgY9u/fb7SAZ2dn4/bt27L507wcYuXj4wOg9kpoXbm5uQAgm/UKaPp4ScH1vX64vkvD9b1+rLa+N+qGkdRo+hv/Dxw4UOzcuVOsWbNGuLm5mdz4f8iQIcLPz8+obOHChUKlUomlS5eKPXv2iDFjxlj8sJKuXbuKrVu3ivT0dNG2bVuLH64UFRUlcnJyRFxcnFAoFOKzzz5rusnXkxxiVVFRIbp37y7UarX4/PPPxcGDBw2Po0ePNm0A6kEOsTJHyj1/7ze5xOq3334Tbm5uIiQkRGzbtk2sXbtWtGzZUvTr10/U1NQ0XQDqQS6xGjVqlFCr1WLx4sUiOztbfPzxx0Kr1YqhQ4c23eQboKnjJYQQGRkZIiMjQ0RERAhHR0fD9+fPnzfU4fpe616x4vr+Jymvqzs9iuu7ENJiZa31nUm7DPz888/imWeeEU5OTsLLy0tER0cLnU5nVGfgwIHCx8fHqKympkYsWLBAtGnTRqhUKhEcHCwOHDhg0v7FixdFeHi4cHFxEW5ubmLy5MmirKzMpN6WLVtE165dhYODg/D39xdJSUlWnac12DpW586dEwDMPu7s09ZsHStz5LioCyGfWH3//fdi4MCBwtHRUWi1WjF58mRRUlJizak2mhxiVVZWJqKjo4Wfn59wdHQU7du3F6+//rrJB+PIQVPHy9J6lJycbFSP6/u9Y8X1/U9SX1d1Parru9RYWWN9V/yvQyIiIiIikinuaSciIiIikjkm7UREREREMseknYiIiIhI5pi0ExERERHJHJN2IiIiIiKZY9JORERERCRzTNqJiIiIiGSOSTsRERERkcwxaSciIiNXr16FRqNBYmKiUfnEiRPh6+trm0E9JObOnQuFQoHz58/fl/7Wrl1r0l9lZSVatWqFd999976MgYisg0k7EREZiY2NhaenJyZNmiSp/pUrVxAdHY2goCCo1Wq4urqiY8eOGDduHDIzM43qDho0CC4uLhbb0ie1R44cMXu8pKQETk5OUCgUSElJsdiOr68vFAqF4eHg4ABfX19MmTIFFy5ckDSvh5WTkxPefvttfPDBBygoKLD1cIhIIibtRERkcPHiRaxZswavv/467O3t71n/119/Rffu3bFy5Ur06tULCQkJWLhwIcLCwpCfn4/k5GSrjm/Dhg3Q6XRo37491qxZc9e6bdq0QUpKClJSUrBs2TIEBwdjzZo1CA4OxrVr16w6rgfNK6+8AoVCgY8++sjWQyEiie69IhMR0SNj1apVUCgUGD9+vKT6ixcvxtWrV7F582aMHDnS5PiVK1esOr6kpCQMHjwYI0eOxMyZM3H27Fl06NDBbF2NRoMXX3zR8H1UVBS8vLywYsUKJCcnIyYmxqpje5A4OzsjPDwca9euxXvvvQeVSmXrIRHRPfBKOxFRI+j3DO/Zswfz5s2Dj48PnJycEBwcjEOHDgEAvvrqK/Tr1w/Ozs7w9vbG/PnzzbZ15MgRjB49Gh4eHlCpVOjcuTP+9a9/obq62qjet99+i4kTJ6JTp05o3rw51Go1+vbti02bNpm0OXHiRCgUCpSVlRmSVkdHR/Tt2xeHDx82qZ+RkYGePXvCy8tL0vxPnToFAHjmmWfMHm/ZsqWkdqQ4evQofvjhB0RGRmLChAmwt7e/59X2O4WGhgIATp8+bbHO9u3boVAosHz5crPHe/fuDU9PT1RVVQGo3/Nhjv45MkehUGDixIkm5enp6ejXrx/UajWaN2+O4OBgbNy4UVJ/esOHD8e1a9eQk5NTr/OIyDaYtBMRWcHbb7+NzZs3Y8aMGYiPj8fZs2cREhKCzZs3Izw8HP3798fixYsREBCAuLg4pKamGp2/bds29O3bF7/88gtmzZqF5cuXo3fv3oiLizO56r1p0ybk5+dj7NixWLZsGd555x0UFxcjPDwcaWlpZscXGhqKixcvIi4uDrNnz8ZPP/2EZ599Fn/88Yehzu+//46TJ0/i6aefljxvPz8/AEBiYiKEEJLPu3btmtlHRUWFxXOSkpLg4uKCMWPGwMPDA2FhYVi3bh1qamok96v/JcPDw8NinZCQELRs2RLr1683e/6hQ4cwYcIENGvWDEDDno/GiI2Nxbhx46BWqzF//nwkJCSgefPmiIiIwMqVKyW307t3bwDA3r17rT5GImoCgoiIGiw5OVkAEI8//rjQ6XSG8i1btggAwt7eXnz33XeGcp1OJ1q2bCl69eplKKusrBQtWrQQ/fv3F1VVVUbtf/TRRwKAyMnJMZRdv37dZBw3btwQnTp1EoGBgUblkZGRAoCIiooyKv/ss88EAPHvf//bUJadnS0AiGXLlpmda2RkpPDx8TEqO3PmjHB1dRUARNu2bcWECRPEkiVLxJEjR8y2MXDgQAHgno+6MdPHyM3NTURGRhrKNm/eLACIrKwsk358fHxEQECAKCwsFIWFheLs2bNizZo1QqPRCHt7e3H8+HGz49OLjo4WAMSJEyeMymNjYwUAkZubayirz/MRHx8vAIhz584ZyvTPkTkAjOacm5srAIjZs2eb1B05cqRQq9WivLzcUKZ/fdbtry57e3sRFhZm9hgRyQuvtBMRWUFUVBQcHBwM3/fv3x8AEBwcjJ49exrKHRwc8PTTTxuu+ALAl19+id9//x2TJk1CaWmp0ZXnESNGAAB27dplqO/s7Gz4uqKiAkVFRaioqMCQIUOQl5eH8vJyk/G98cYbRt8PGTIEAIzGUVhYCADQarWS592hQwccO3YM06dPBwCkpaXhjTfeQM+ePdGtWzfk5uaanOPo6Igvv/zS7OOll14y209mZiZKS0sRGRlpKBsxYgQ8PT0tbpHJz8+Hp6cnPD090aFDB0yePBkeHh7YsmULgoKC7jovfT91r7YLIZCamoqgoCA88cQThvKGPB8NtWHDBigUCkRGRpr8leKvf/0r/vjjDxw8eFBye1qtFlevXrXa+Iio6fCNqEREVnDnmyHd3d0BAO3btzep6+7ujqKiIsP3eXl5AIDJkydbbP/33383fH316lXExsZiy5YtZhOu0tJSuLq63nV8jz32GAAYjUO/r1rUY5sLUHt7xRUrVmDFihUoKCjAN998g5SUFGzduhVhYWE4ceKE0S8CSqUSQ4cONdvWN998Y7Y8KSkJnp6eaNOmjdF+9JCQEGRkZODatWsmW158fX0N95p3cHBAq1at4O/vL2lO+sR8w4YNWLBgAezs7PD111/j/PnzWLRokVHdhjwfDZWXlwchBAICAizWqftauRchhMX99EQkL0zaiYisQKlU1qu8Ln2S/MEHH6BHjx5m67Rq1cpQNyQkBHl5eZgxYwZ69uwJjUYDpVKJ5ORkpKWlmd3jbWkcdRN0T09PAEBxcfE9x2yJt7c3IiIiEBERgRdeeAFpaWnIysoyuotLfZ07dw45OTkQQqBTp05m66SmpmLmzJlGZc7OzhZ/OZDi5ZdfxsyZM5GdnY2hQ4di/fr1UCqVRnNp6PNRl6Wk+c43IOv7UygU2L59u8XntEuXLpLnWFJSYnjeiUjemLQTEdlYx44dAUhLMn/88UccO3YMcXFxJp9ouXr16kaNQ5/s1d0y0xi9evVCWloaLl261Kh2kpOTIYRAYmIi3NzcTI7HxsZizZo1Jkl7Y02YMAExMTFYv349+vbti40bN2LYsGHw9vY21LHG86H/K0RxcbHRXyTOnj1rUrdjx47YsWMH2rVrh8DAwIZMy+D8+fOorq6+51YhIpIH7mknIrKx0NBQeHl5ISEhwexV7srKSsNdXvRXV+/cwvLTTz9JvsWgJZ6enujSpYvhVpVS7N27F5WVlSblNTU12Lp1KwDgL3/5S4PHVFNTg7Vr16Jr166YMmUKnn/+eZPH+PHjcfz4cXz33XcN7sccT09PDB8+HJmZmdiwYQPKy8uN9tQD1nk+9H892L17t1H5hx9+aFJXv+d/zpw5uH37tsnx+myN0T/PAwcOlHwOEdkOr7QTEdmYs7Mz1q9fj1GjRqFz586YPHky/P39UVpaivz8fGRmZmLTpk0YNGgQAgMD0aVLFyxatAgVFRXo3LkzfvnlF6xatQpdu3Y1+8bP+oiIiMD8+fNRUFBgdEXZksWLF2P//v147rnn8MQTT0Cj0eDKlSv473//i9zcXAwePBjPPvtsg8eza9cuXLhwAa+88orFOmPGjMHcuXORlJSEp556qsF9mRMZGYnPP/8cs2bNgkajwahRo4yOW+P5GD9+PObMmYOpU6ciPz8fWq0WO3bsMPuprU899RTmzp2LuXPnokePHoiIiECrVq1QUFCA3NxcZGVl4datW5LmlpWVBQ8PDwwePFhSfSKyLSbtREQyEBoaiu+++w4JCQlITU1FYWEh3N3d4efnhzfffBPdunUDUHtld9u2bYiOjsa6detw48YNBAUFYd26dTh27Fijk/ZXX30V7733HtLS0jBr1qx71o+NjUVGRga+/vpr7Ny5E8XFxXB2dkZgYCA+/PBDTJ8+HXZ2Df+jblJSEgAgPDzcYp2goCB06tQJn376KZYsWQInJ6cG93ensLAwaLVaFBcXY8qUKXB0dDQ6bo3nw9XVFVlZWXjzzTexYMECuLi4IDw8HKmpqYY3NNcVHx+Pnj17Yvny5Vi6dClu3LgBLy8vBAUFWfxAqDvduHEDmZmZiIqK4qehEj0gFKK+twkgIqKH2rRp07Br1y6cPHnS8AFCQO0nd+7duxfnz5+33eCoXtauXYtJkybh3Llz8PX1NZTrPwTq1KlTkv6iQkS2xz3tRERkZN68eSgqKkJycrKth0JNoLKyEgkJCYiJiWHCTvQA4fYYIiIy4uXlhbKyMlsPg5qIk5MTCgoKbD0MIqonXmknIiIiIpI57mknIiIiIpI5XmknIiIiIpI5Ju1ERERERDLHpJ2IiIiISOaYtBMRERERyRyTdiIiIiIimWPSTkREREQkc0zaiYiIiIhkjkk7EREREZHM/X/kPkeWlQT/HQAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot the SHAP values\n", + "shap.plots.bar(explanation, max_display=15)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA2MAAAI4CAYAAADnDzGJAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAnSpJREFUeJzs3XtclHXe//HXCMMgoICAiIDH0NQOrLneoGW6q20Juq6Zp9LVX6W10nGR9bZWRV3XzdJsN3UtCw+3h9Ry7/JQ4rFEvEVMDQ0PYYqaioIoynl+f0yMDMjBRC/F9/PxuB4x3+v7vb6fGZmJz3wPl8lqtVoRERERERGRW6qO0QGIiIiIiIjcjZSMiYiIiIiIGEDJmIiIiIiIiAGUjImIiIiIiBhAyZiIiIiIiIgBlIyJiIiIiIgYQMmYiIiIiIiIAZSMiYiIiIiIGEDJmIiIiACQnJyMyWQiOTnZ6FBERO4KSsZEREREREQMoGRMRERERETEAErGREREREREDKBkTERERERExABKxkRERERERAygZExERERERMQASsZEREREREQMoGRMRERERETEAErGREREREREDKBkTERERERExABKxkRERERERAygZExERERERMQASsZEREREREQMoGRMRERERETEAErGREREREREDKBkTERERERExABKxkRERERERAygZExERERERMQASsZEREREREQMoGRMRERERETEAErGREREREREDKBkTEREROz8/f2xWCxGhyEiclcwWa1Wq9FBiIiIiPH27t2LKfsKrYOb4WI2Gx2OiNRW7hbwdDc6ituCs9EBiIiIyO3ByckJb1d3XF6YC2lnjA5HRGqj5g1h3iglYz9TMiYiIiKO0s5A6gmjoxARqfW0ZkxERERERMQASsZEREREREQMoGRMRERERETEAErGREREREREDKBkTERERERExABKxkRERERERAygZExERERERO58WTkwYjb4DQP3QdBtHCQfqX77A+nw+ETwGAwNhsKQmXD2Qvl6f1sBvaeA/3Aw9YUJS39xyErGRERERETkzlZcDBGTYfHXEPUEvDUUzlyAruPg0Mmq26dnQJc34fBPMOVpiO4Nq3dBj1jIL3Cs++Zi2HkYftX8hsNWMnYb2717N+vXr2fJkiXMnTuXxYsXGx0Sqamp9OrVC5PJdM3D1dX1pva/adMmXn31VTZv3nxT+7lRly9fZvny5fTt25e2bdvi5uaGyWRiwoQJRocmIiIicufp+lcY9s+Kz6/YDgmpEBcF4wfAqCdg80RwqgPjl1V9/SkrIScXNsbCyxEwth988mfYcxTiNjnWTZsDpz6CRa/eyDMCwPmGryA3zc6dO7FYLPj6+pKfn290OOW88sorBAUFOZQ1atTopva5fft2Zs6ciZeXF127dr2pfd2IrKwsVq9ezapVqwgICKBJkyakpqYaHZaIiIhI7bRiO/h7Qd+wq2V+ntC/EyzaCnkFYDFX3H5lIkR2gCZ+V8u6PwitGsMnCTDisavlzRrWWNhKxm5jAwcOpH79+gAsX76cgoKCKlrcWs888wwdOnQwOowaU1hYSE5ODp6enjd8LV9fX/72t7/x7rvv4urqysSJE/n73/9eA1GKiIiISDm706B9C6hTZuJfxxCYux4OnoT7m1677YlztimNHVqWP9cxBNbsqvl4f6ZpirexkkTsTmS1Wvn22295+umnCQwMxGw24+HhQY8ePdi9e7dD3X379jF06FDuvfdePD09cXZ2xt/fn+eee45z584BcPHiRf785z/zxhtvABAbG2ufGtmsWTP7eZPJRFxcnMP1k5KS6NSpEyaTyV62efNmWrduTdOmTVm8eDEdO3bE09MTLy8vAPLz81m9ejU9evTAx8cHZ2dnfH19GTZsGGfOnKny+bu4uBAYGGi/noiIiIjcRKcyIcC7fHlJ2cnzlbctXbds+/OXbCNrN4FGxmq53Nzcatd1cXGhTtlvEypx9uxZ0tPT7Y+dnZ3x8vLC1dWV9evX89JLL5GWlkavXr1o06YNR48e5csvv6RTp058/fXX9lG15ORkPv30U7p3707v3r2xWq0kJiYSFxdHYmIiycnJuLq60q9fP44fP87y5cvp0aMHjz1mGy6+5557qh1zWZmZmbzyyit06NCBV155BScnJ/Lz85k3bx5jx44FoF+/fvj7+/Ptt9+ybNkyvv76a5KTk2tkBE1EREREyigohAuXy5flFUBGtmN5Aw/baNiVfLBcI7VxdbH990olS35Kzl1rGqOr+WqdyqY5/kIaGavlFixYUO3jp59+uq5r9+zZk+DgYPsREBBAXFwcaWlpfPjhhxw8eJDPP/+clStXMnnyZBYsWMCMGTNwd3cnOjrafp1BgwZx9uxZVq1axVtvvcW0adNYunQpgwcPJiUlhVWrVmE2mwkPDyc0NBSATp06ER0dTXR0NH369PnFr8/FixcZOXIka9euZcqUKUyaNImkpCTmzp2Ls7Mz+/bt44MPPmDy5MksWbKE0aNHc/ToUWbMmPGL+/wl9u/fT2Zmpv1xeno6x44dsz/Ozs7mu+++c2iTkJBQ6ePExESKiorUh/pQH+rD/vjy5TJ//IiI3CRlZ0o5fF5t+962PX3pIyEVln5TvvxYhq1NXRdOHz/hcM2EhATIzbefr/Azsa4tYTt/6nS5z92z6Sft7e3XrEEmq9VqrdEryk1RsmZs8ODB19Wu9MhVVfz8/LBYLJXWSU1NJTo6mi+++ILJkyfTokUL+zk3Nzc6dOhASkoKI0aMwNXVlfj4+HLtx48fT2JiIhcvXqRu3br2c1arldzcXC5cuEBhYSFLliwhJiaG119/nXfeeQeAKVOm8MYbbzB+/HiHnQkvXrzIhAkTmD59Oh9//DHDhg2zn0tKSuLll19m+/btlPy6b968mZEjR3Lw4EEyMzPt0wmtVitz5swhKiqKoUOHMmnSJIf4t27dSnR0NM2aNav2mzE3N9e+Zqxs3CIit5OUlBS8r1hp/My/IfVE1Q1ERK5X60DbjoWNG1RcJ/MS7Cpzf7A/x0Ejbxj9e8fyh9vYRr9CRkFIAKx50/H8vHh4bhbsnVH5mrGg5+EfQyDmD47nhsy0rRk7t6B8u4xsW0I4vj9MGFjx86mEpinWcmV3O6xJv/vd7665gce2bds4efIkBQUFBAcHV9g+IyOD4OBgTpw4wXvvvcfy5cs5duyYwzcWgMO3uDXNz8/PYV1Xbm4ux44do7i4mLi4uHLrz0qUTiJFREREpAZ5e9h2MixbFuBdvrxEaDP4+oDtfmOll93sOARuFtuuiBUJ9AG/+pB0jRtE/98hCL3x+4lVRMlYLXc9U04sFgtOTk411ve9997LuHHjrnnO09MTPz8/zpw5w+TJk5kzZw733XcfEyZMICgoCIvFwq5du3jnnXcoLi6uVn+lN+gozWq1lkvwSri5uZWrW6Jv377069fvmu2aNGlSrZhERERE5BboF27b3v7TROjXyVaWkQ3LE6BXB8f1Xkd+XprTstQtmZ4Mh/mb4HgGBPvayjbste3C+Fqvmxa2krFabtGiRdWuGxkZSePGlXxrUE3169enYcOGZGVlMWDAgEo3BUlOTiYxMRFXV1d27NhhT44KCgo4dOhQufoVJVwWiwV3d3cAzp933C0nOzubjIyMasVet25dAgMDMZlMODs7M2jQoGq1ExERERED9QuHsFYw/F+wPx1868GsdVBUDLFlphD+drztv0f/fbVs7JO2xK3bOHglAi7lwrT/2KY2Dv+NY/uFm+HHs3A5z/Z4636YvNz285BHoWn170OmZKyW69mzZ7Xr+vj41EifrVq1IiwsjJUrVzJ9+nSHzTrANlp38eJF/P39qVOnDnXq1MFkMjmMgCUnJ7N69epy1/bw8ADKJ1wuLi4EBQXh5OREfHw8r7/+OmBLxOLj40lLS6tW7CaTidDQUO677z4+/fRTEhMTCQsLc6hz+fJlcnJy8PPzq+AqIiIiInJLOTnZ1ouNng/vrbbtfvjreyDuJds6taoE+8KWSfB6HIxZBC7OEPEQvDOs/C6K8zbAlpSrjzd9ZzvAtoZNyVjtcPDgQS5dugTY1jIVFRWRnJwM2JKSVq1aVXmNm7lmrCItWrTghRde4MCBA4wePZpVq1YRHh6O2Wzmhx9+YNeuXQQFBbFp0yaaNWtG+/btSU5OJiwsjN69e3Pu3Dm2bt1KXl7eNa/dsGFDFi1aRP369fH19aVly5b06tWLhx56iPDwcNauXUvPnj0JDQ1l9+7dfPPNNwQGBlZ7M5OOHTsyatQoxo4dyyOPPEKvXr1o27YtV65c4fvvv2f37t2MGDGiyo04tmzZwrJlyyguLub7778HYPXq1Zw5c4Z69erx9NNP88ADD1z36ysiIiJy19k8qeo63h7w4SjbUZnSI2KltWsCX157ic11x1JNSsZuY6mpqZw6dcqhLCkpCYCAgIBqJWNG6d69O8uXL2fGjBnEx8fz3nvvYTKZ8PHxoUOHDowaZXuTNGrUiDfeeANnZ2f+85//MG3aNOrXr0/v3r0JDw9n5MiRDtd9+OGH+ctf/sLs2bOZNm0a+fn5NG3alF69ehEaGkpsbCwTJ05k69atbNiwgQcffJB3332XefPmVTsZc3Fx4dlnn+Wee+5h5syZJCQk8Pnnn+Pi4oK/vz8RERH079+/yuts27aN2bNnO5QlJSXZ/w3btGmjZExERETkLqat7UVERATQ1vYicgtUZ2v7u4hu+iwiIiIiImIAJWMiIiIiIiIGUDImIiIiIiJiACVjIiIiIiIiBlAyJiIiIiIiYgAlYyIiIiIiIgZQMiYiIiIiImIA3fRZREREHDVvaHQEIlJb6fPFgZIxERERAaCoqIhzuVfwnTMCF7PZ6HBEpLZytxgdwW1DyZiIiIgAUFhYSM9+v2fDhg20a9fO6HBERGo9rRkTERERu9OnT5OXl2d0GCIidwUlYyIiIiIiIgZQMiYiIiIiImIAJWMiIiIiIiIGUDImIiIiIiJiACVjIiIiIiIiBlAyJiIiIiIiYgAlYyIiIiIiIgZQMiYiIiIiImIAJWMiIiIiIiIGUDImIiIiIiJiACVjIiIiAoDFYsHf39/oMERE7homq9VqNToIEbk7Xci1klNodBQiUqIgv4DjP3xPfRd44IEHjA5HRKTWczY6ABG5e+UUwrNfFpN2Qd8JidwOmns6Mel+H5wKsowORUTkrqBkTEQMlXbBSup5o6MQERt9MSIicitpzZiIiIiIiIgBlIyJiIiIiIgYQMmYiIiIiIiIAZSMiYiIiIiIGEDJmIiIiIiIiAGUjImIiIiIiBhAW9uL1FJZuVZithbz2SErlwugY4CJd7rWob2/qVrtD5yz8tqmYr45YcXFCSJamJjetQ5+bo7t/5ZYzI5TVnacsnLmMowPNzGhs9PNeEoiIiIitYpGxkRqoWKrlYhPi1h8wErUr+rw1qN1OHPZStdlRRzKrPo+QukXrXRZWsThLCtTHqlDdIc6rP7BSo/lReQXObZ/85tidv5k5VcNq5fkiYiIiIiNkrHb2O7du1m/fj1Llixh7ty5LF682OiQiIqKwmQyXfMwm81cvnz5pvWdlZXFiBEjWLp06U3ro6ZMnz6dhx56iAYNGmA2m6lXrx6tW7dm8uTJ5OXl3fT+V6RaSTgJcY/XYXynOoz6VR02D3DCyQTjtxVX2X5KYjE5BbCxvxMvt6/D2LA6fNKrDnvOQtx3jslY2vNOnHrRmUU99XEiIiIicj00TfE2tnPnTiwWC76+vuTn5xsdjoPnnnuO1q1bO5Q5OTnh4uJy0/rMysrigw8+ID8/n4EDB960fmpCUlIS9erVY/DgwTRq1IhLly6xfv16/vrXv7J161a+/PJLTKabN5K04qAVfzfo2+pqH35uJvq3NrFov5W8QisW54r7X3nISmQLE03qX63TvWkdWnkX80mqlREPXq3bzFMjYiIiIiK/hJKx29jAgQOpX78+AMuXL6egoMDgiK6KiIigT58+RodRozIzM/H09KROnRsf4bnWKObUqVMJDw8nPj6elJQU7rvvvhvupyK7z1hp72+iTpmEr2OAibl7rRzMhPv9rt32xEXb2q8OjconWR0DTKz5oeppjiIiIiJSNc0ruo2VJGJ3otzcXF599VWCg4Mxm824ubnRsWNHvvrqK4d6xcXFvPzyy9x33314eXnh7OyMj48PvXv35ujRo/Z6a9asoXnz5gDMnz/fPjXS19fXft5kMjF+/PhysURGRmIymbh48aK9LDQ0FF9fX/bv38+jjz6Kh4cHDRo0ICcnB4Aff/yRvn374uvri7OzM97e3kRERHDkyJEbel2Cg4OxWq1kZGTc0HWqcioHAtzLl5eUnbxUcUJ1Ksexbtn253Mhr1AJmYiIiMiN0shYLZebm1vtui4uLtUeFcrKyiI9Pd2hzMvLCw8PDwoKCggLCyMlJYXu3bszbNgwsrOzWbZsGZGRkaxYsYLevXsDkJ+fz7x58+jSpQuPPfYY9erVIzk5mTVr1rB7925SUlKoX78+DzzwAGPHjmXKlCmEhYXx5JNPAuDm5lbt51dWbm4uXbp0oV27drz22mtkZGRgNps5fPgwHTt2pKCggD59+hASEsKRI0f45JNP6Ny5M0lJSQQFBVWrj7Nnz5KXl8fZs2f57LPP+Oyzz2jUqBEdO3asdpzFViv5RdWra3ECk8nElULbz2W5/vyOv1JY8TVKzl2zvdPVOhZ9eoiIiIjcEI2M1XILFiyo9vHTTz9V+7rDhw8nODjY4Zg0aRIAsbGx7Nmzh1mzZrF27VomTZrEzJkz+f777/H09GTMmDFYrbaRFYvFwqlTp1i7di3Tp08nNjaWzz//nKlTp5Kens6cOXMACAoK4vnnnwegdevWREdHEx0dzZ/+9Kdf/Nrk5OTw+OOPs2XLFiZNmsTs2bNxdXVl+PDhFBYWkpCQwMKFCxk3bhzz589n3bp1nD17ljfffLPafXTu3Jng4GDat2/P5MmTadeuHf/7v/97XUnk1uNW6r5bVK0j9bytTV1nOH7qjMN1EhISyC28ej4xMZGioqtZ3v79+8nMzKTuz0nWqbPnOXbsmP18dnY26acz7O1LrlmZivooUfqciNyeyr7Pyz6u6n2enp5e7rPku+++Ux/qQ32oj1rZx/UyWUv+KpbbWsmascGDB19Xu7KjV5Xx8/PDYrFUWicqKor333+fmJgYQkNDHc6FhobSpk0bQkJCyMnJYceOHeU2qRg1ahRffPEF6enpBAQEOJwrLCzk/Pnz5Ofnc+nSJdq0aUPfvn1ZuXIlAEePHqV58+b88Y9/JC4uzqHtmjVriIiIYNy4ccTGxjqci4yMZPXq1WRnZ1OvXj17rHv27OHw4cO0bNnSXjcrK4sGDRrQo0cP5s2bV+75h4eH4+rqyqFDhyp9nUqsX7+ejIwMTpw4wdq1azlz5gz/+Mc/6NmzZ7XaA/yUY2VdWvXepn8IMeFpMRHyYSEh3ibWPOk4vDVvXzHPfVnM3j86cb/ftTfeOHHRStC/i/hHlzrEdHT8vmbImiLW/GDlXFT5YbGMy1b8ZhVd133GTl6y8ptPriaRImKs1g1g0cOnqVuQRbt27YwOR0Sk1tNEo1quutPprld4eHiFG3gcO3aM/Px8mjRpUmH7EydO2JOxOXPmMGPGDI4cOVJupCQ7O7vGYi6rXr16NG3a1KFs7969WK1WvvrqK4KDg6/ZrmSdWnX06NHD/nN0dDTPP/88f/jDH9iyZQthYWHVukYjdxPD7ru+HQtDG5r4Ot1KsdXqsInHjlNW3JyhlXfFbQPrmfCrC0k/lU8A/++UlVDdT0xERESkRigZq+Wu575fFosFJ6fqjWhUxmq1EhQUxFtvvVVhnZCQEAA+/vhjXnzxRVq2bMmYMWNo3rw5bm5uFBUVMWTIEIqLq74nFlDpNvEVTYVzcXHB2dnxLVAyUNytWzf7tMiyXF1dqxXTtbz88st8+OGHvP/++9VOxn6Jfq1MrDho5dODVvq1tr02GZetLE+10qulyWFb+yNZtufc0utq2ZOtTMxPsXI820rwz9vbb/ixmIOZ8NpDSsZEREREaoKSsVpu0aJF1a4bGRlJ48aNb7jPwMBAzp8/T9++fauc9hgXF4fZbOabb76hUaNG9vJ9+/aVq1tZwuXv7w/A+fPl57uVnvtblXbt2mEymcjLy2PQoEHVblddJclx6fnJN0O/VibCAmD4umL2nwPfujDr22KKrBDb2XHq4W8/sSWrR0dc/TgY+191WH6wiG6fFPFK+zpcKoBpO4u53xeGlxmlW5hSzI/ZcPnnHRa3psPk7bYkekhbE011HzIRERGRa1IyVstdz9okHx+fGulz4MCBTJ06lddff53333+/3Pm0tDT7NvUlI3GlR8CsVisxMTHl2nl4eADXTmRatWqFk5MT27Ztw2q12hO39evXc+DAgWrH7uvrS1hYGImJiaxcudK+a2Pp2I4fP17pFMzCwkJOnTp1zWmOf/vb3wD4r//6r2rH9Es41bGtFxu9pZj3dhdzpQB+3QjinnCidYOqk6Pg+ia2DHDi9c3FjNlajIsTRLQw8U7XOuVuFj1vXzFbSi1N3HTcyqbjtsTs4cA6SsZEREREKqBk7DZ28OBBLl26BNi2YS8qKiI5ORmwJSatWrWq8ho3a81YZSZOnEh8fDyzZs0iMTGRLl264OXlxbFjx9i2bRtOTk6kpKQA0L9/fzZt2kSXLl146qmnKCoqYt26dfb7fZXm4+NDQEAAmzZtIiYmhsaNG1O3bl1GjhyJh4cHvXr1YtWqVXTt2pVu3bpx+PBhVq1aRdOmTR3uWVaVhQsXEhYWxoABA+jRowft27fHarWSlpbGxo0beeKJJ8ptIFLapUuXaNmyJZ07d6Zt27YEBATw008/8dVXX3Ho0CHuv/9+oqOjr/dlvW7eriY+/J0TH/6u8nqlR8RKa+dr4st+VU9b3TxQHyMiIiIiv4T+irqNpaamcurUKYeypKQkAAICAqqVjBnBbDazfft2JkyYwLJly5g1axYA3t7etG3blqFDh9rrvvDCC2RmZjJr1izefvtt3N3d6dy5M7Nnzy63uQbYpl1GRUUxc+ZM8vPz8fHxYeTIkYDtZtDPPPMMGzduJDExkRYtWrBgwQI++uij60rGWrZsyb59+4iJiSE+Pp74+HjMZjO+vr488sgjPPvss5W2d3NzY8CAAezYsYOkpCQuX76Mq6srTZo04S9/+Qvjxo2jbt261Y5HRERERGonbW0vIobR1vYitxdtbS8icmvpps8iIiIiIiIGUDImIiIiIiJiACVjIiIiIiIiBlAyJiIiIiIiYgAlYyIiIiIiIgZQMiYiIiIiImIAJWMiIiIiIiIG0E2fRcRQzT1NgG53KHI7sL0fRUTkVlEyJiKGcXeGeb/TAL3I7aIgv4DjP5zDxcXoSERE7g5KxkTEMJ6uJjyNDkJE7FJSDtEv8jHWrFljdCgiIncFfSUtIiIiAOTl5XH69GmjwxARuWsoGRMRERERETGAkjEREREREREDKBkTERERERExgJIxERERERERAygZExERERERMYCSMREREREREQMoGRMRERERETGAkjEREREREREDKBkTERERERExgJIxERERERERAygZExERETt/f38sFovRYYiI3BVMVqvVanQQIiIiYry9e/diyr5C6+BmuJjNRocjcvdwt4Cnu9FRiAGcjQ5AREREbg9OTk54u7rj8sJcSDtjdDgid4fmDWHeKCVjdyklYyIiIuIo7QyknjA6ChGRWk9rxkRERERERAygZExERERERMQASsZEREREREQMoGRMRERERETEAErGREREREREDKBkTERERERExABKxkREREREaqOsHBgxG/yGgfsg6DYOko9Uv/2BdHh8IngMhgZDYchMOHuhfL2/rYDeU8B/OJj6woSlNfYUajslYyIiIiIitU1xMURMhsVfQ9QT8NZQOHMBuo6DQyerbp+eAV3ehMM/wZSnIbo3rN4FPWIhv8Cx7puLYedh+FXzm/NcajElY7fY7t27Wb9+PUuWLGHu3LksXrzY6JCIiorCZDJd8zCbzVy+fPmm9Z2VlcWIESNYuvT2/wbl22+/5emnn+bBBx/E3d0dk8nEsGHDKqz/2muv8eijj+Lv74/JZMLX1/fWBSsiIiK1W9e/wrB/Vnx+xXZISIW4KBg/AEY9AZsnglMdGL+s6utPWQk5ubAxFl6OgLH94JM/w56jELfJsW7aHDj1ESx69Uae0V3J2egA7jY7d+7EYrHg6+tLfn6+0eE4eO6552jdurVDmZOTEy4uLjetz6ysLD744APy8/MZOHDgTeunJmzatIklS5bQsGFDQkJC2LNnT6X13333Xdzd3bnnnnvIycm5RVGKiIiIYEvG/L2gb9jVMj9P6N8JFm2FvAKwmCtuvzIRIjtAE7+rZd0fhFaN4ZMEGPHY1fJmDWs8/LuFkrFbbODAgdSvXx+A5cuXU1BQUEWLWyciIoI+ffoYHUaNyszMxNPTkzp1bnwQeNCgQQwcOJCAgAC2bdvGww8/XGn9PXv2cP/992MymWjWrBmXLl264RhEREREqmV3GrRvAWX/BuoYAnPXw8GTcH/Ta7c9cc42pbFDy/LnOobAml01H+9dStMUb7GSROxOlJuby6uvvkpwcDBmsxk3Nzc6duzIV1995VCvuLiYl19+mfvuuw8vLy+cnZ3x8fGhd+/eHD161F5vzZo1NG9um1s8f/58+9TIkul8a9aswWQyMX78+HKxREZGYjKZuHjxor0sNDQUX19f9u/fz6OPPoqHhwcNGjSwj0r9+OOP9O3bF19fX5ydnfH29iYiIoIjR6q3kLVRo0YEBARU+/V64IEHMJlM1a4vIiIiUmNOZUKAd/nykrKT5ytvW7pu2fbnL9lG1uSGaWTsDpSbm1vtui4uLtUeFcrKyiI9Pd2hzMvLCw8PDwoKCggLCyMlJYXu3bszbNgwsrOzWbZsGZGRkaxYsYLevXsDkJ+fz7x58+jSpQuPPfYY9erVIzk5mTVr1rB7925SUlKoX78+DzzwAGPHjmXKlCmEhYXx5JNPAuDm5lbt51dWbm4uXbp0oV27drz22mtkZGRgNps5fPgwHTt2pKCggD59+hASEsKRI0f45JNP6Ny5M0lJSQQFBf3ifkVERERumoJCuHC5fFleAWRkO5Y38LCNhl3JB8s1/tR3/Xn5yZVKlsuUnLvWNEZX89U6lU1zlGrRyNgdaMGCBdU+fvrpp2pfd/jw4QQHBzsckyZNAiA2NpY9e/Ywa9Ys1q5dy6RJk5g5cybff/89np6ejBkzBqvVCoDFYuHUqVOsXbuW6dOnExsby+eff87UqVNJT09nzpw5AAQFBfH8888D0Lp1a6Kjo4mOjuZPf/rTL35tcnJyePzxx9myZQuTJk1i9uzZuLq6Mnz4cAoLC0lISGDhwoWMGzeO+fPns27dOs6ePcubb775i/u8Ffbv309mZqb9cXp6OseOHbM/zs7O5rvvvnNok5CQUOnjxMREioqK1If6UB/qw/74Zm7YJCJVq/B9vu172/b0pY+EVFj6TbnyQxu2297ndV0gr7D8+/x8lu2Hui6OfZSy5+D3th9+Hv1y+CzJtZVl5l5dD1+2j7y8vFrxmfhL+rheJmvJX9Byy5WsGRs8ePB1tSs7elUZPz8/LBZLpXWioqJ4//33iYmJITQ01OFcaGgobdq0ISQkhJycHHbs2FFu6t2oUaP44osvSE9PLzeNr7CwkPPnz5Ofn8+lS5do06YNffv2ZeXKlQAcPXqU5s2b88c//pG4uDiHtmvWrCEiIoJx48YRGxvrcC4yMpLVq1eTnZ1NvXr17LHu2bOHw4cP07Ll1TnOWVlZNGjQgB49ejBv3rxyzz88PBxXV1cOHTpU6etUWsmasWvFfS0la8YyMjKq3YeIyK2WkpKC9xUrjZ/5N6SeMDockbtD60DbjoWNG1RcJ/MS7CqzrOLPcdDIG0b/3rH84Ta20a+QURASAGvKfOE8Lx6emwV7Z1S+ZizoefjHEIj5g+O5ITNta8bOLSjfLiPblhSO7w8Tbu+N2W4XmqZ4B7pZ0+nCw8Mr3MDj2LFj5Ofn06RJkwrbnzhxwp6MzZkzhxkzZnDkyBGHbyDA9i3DzVKvXj2aNnX8YNm7dy9Wq5WvvvqK4ODga7bTtvMiIiJy2/L2sO1kWLYswLt8eYnQZvD1Adv9xkovWdlxCNwstl0RKxLoA371Ieka6+r/7xCE6n5iNUXJ2B3oeqaRWCwWnJycbrhPq9VKUFAQb731VoV1QkJCAPj444958cUXadmyJWPGjKF58+a4ublRVFTEkCFDKC4urlaflW1+UTbBK+Hi4oKzs+Ovdcngb7du3ezTIstydXWtVkwiIiIid4R+4bbt7T9NhH6dbGUZ2bA8AXp1cFzvdeTnZS0tG10tezIc5m+C4xkQ/POX1hv22nZhfK3XrXkOdwElY3egRYsWVbtuZGQkjRtX8s1HNQUGBnL+/Hn69u1b5bTHuLg4zGYz33zzDY0aXX1T79u3r1zdyhIuf39/AM6fL7/bT+n5vFVp164dJpOJvLw8Bg0aVO12IiIiInesfuEQ1gqG/wv2p4NvPZi1DoqKIbbMFMLf/rxz9dF/Xy0b+6Qtces2Dl6JgEu5MO0/tqmNw3/j2H7hZvjxLFzOsz3euh8mL7f9PORRaKr7kFVEydgdqGfPntWu6+PjUyN9Dhw4kKlTp/L666/z/vvvlzuflpZm36a+ZCSu9AiY1WolJiamXDsPDw8Ah8WTJVq1aoWTkxPbtm3DarXaE7f169dz4MCBasfu6+tLWFgYiYmJrFy50r5rY+nYjh8/XukUTBEREZE7ipOTbb3Y6Pnw3mrb7oe/vgfiXrKtU6tKsC9smQSvx8GYReDiDBEPwTvDyu+iOG8DbEm5+njTd7YDbGvYlIxVSMnYLXbw4EH7zX9zc3MpKioiOTkZsCUmrVq1qvIaRmzBPnHiROLj45k1axaJiYl06dIFLy8vjh07xrZt23ByciIlxfYm7N+/P5s2baJLly489dRTFBUVsW7dOvv9vkrz8fEhICCATZs2ERMTQ+PGjalbty4jR47Ew8ODXr16sWrVKrp27Uq3bt04fPgwq1atomnTpg73LKvKwoULCQsLY8CAAfTo0YP27dtjtVpJS0tj48aNPPHEE1VuxHHhwgX++7//G4AzZ84AsGvXLvvuj08++SS//e1v7fVnzJhh3xQkKyuLgoICe93g4GD7tURERESu2+ZJVdfx9oAPR9mOypQeESutXRP4clzNxCLXpGTsFktNTeXUqVMOZUlJSQAEBARUKxkzgtlsZvv27UyYMIFly5Yxa9YsALy9vWnbti1Dhw61133hhRfIzMxk1qxZvP3227i7u9O5c2dmz55dbnMNsE27jIqKYubMmeTn5+Pj48PIkSMB282gn3nmGTZu3EhiYiItWrRgwYIFfPTRR9eVjLVs2ZJ9+/YRExNDfHw88fHxmM1mfH19eeSRR3j22WervEZmZiazZ892KPvuu+/sW576+fk5JGPz589nz549DvVL2rdp00bJmIiIiMhdTlvbi4iICKCt7UUMUZ2t7aXW0k2fRUREREREDKBkTERERERExABKxkRERERERAygZExERERERMQASsZEREREREQMoGRMRERERETEAErGREREREREDKCbPouIiIij5g2NjkDk7qH3211NyZiIiIgAUFRUxLncK/jOGYGL2Wx0OCJ3D3eL0RGIQZSMiYiICACFhYX07Pd7NmzYQLt27YwOR0Sk1tOaMREREbE7ffo0eXl5RochInJXUDImIiIiIiJiACVjIiIiIiIiBlAyJiIiIiIiYgAlYyIiIiIiIgZQMiYiIiIiImIAJWMiIiIiIiIGUDImIiIiIiJiACVjIiIiIiIiBlAyJiIiIiIiYgAlYyIiIiIiIgZQMiYiIiJ2/v7+WCwWo8MQEbkrmKxWq9XoIERERMR4e/fuxZR9hdbBzXAxm40OR6R2c7eAp7vRUYjBnI0OQERERG4PTk5OeLu64/LCXEg7Y3Q4IrVX84Ywb5SSMVEyJiIiImWknYHUE0ZHISJS62nNmIiIiIiIiAGUjImIiIiIiBhAyZiIiIiIiIgBlIyJiIiIiIgYQMmYiIiIiIiIAZSMiYiIiIiIGEDJmIiIiIhIbZGVAyNmg98wcB8E3cZB8pHqtz+QDo9PBI/B0GAoDJkJZy+Ur/e3FdB7CvgPB1NfmLC0xp7C3UTJmIiIiIhIbVBcDBGTYfHXEPUEvDUUzlyAruPg0Mmq26dnQJc34fBPMOVpiO4Nq3dBj1jIL3Cs++Zi2HkYftX85jyXu4SSsdvY7t27Wb9+PUuWLGHu3LksXrzY6JBITU2lV69emEymax6urq43tf9Nmzbx6quvsnnz5pvaT024dOkS8+fPp3Pnzvj6+uLi4oK/vz99+vRhz549RocnIiIid5quf4Vh/6z4/IrtkJAKcVEwfgCMegI2TwSnOjB+WdXXn7IScnJhYyy8HAFj+8Enf4Y9RyFuk2PdtDlw6iNY9OqNPKO7nrPRAUjFdu7cicViwdfXl/z8fKPDKeeVV14hKCjIoaxRo0Y3tc/t27czc+ZMvLy86Nq1603t60bk5OTw7rvvMmnSJDw9PRk0aBDe3t58++23xMfHs27dOrZv386vfvUro0MVERGR2mLFdvD3gr5hV8v8PKF/J1i0FfIKwGKuuP3KRIjsAE38rpZ1fxBaNYZPEmDEY1fLmzWs8fDvRkrGbmMDBw6kfv36ACxfvpyCgoIqWtxazzzzDB06dDA6jBpTWFhITk4Onp6eN3ytgwcP8tVXX5Gfn8/69et58MEHAbhw4QITJkzg3XffZf78+UrGREREpObsToP2LaBOmclvHUNg7no4eBLub3rttifO2aY0dmhZ/lzHEFizq+bjFU1TvJ2VJGJ3IqvVyrfffsvTTz9NYGAgZrMZDw8PevTowe7dux3q7tu3j6FDh3Lvvffi6emJs7Mz/v7+PPfcc5w7dw6Aixcv8uc//5k33ngDgNjYWPvUyGbNmtnPm0wm4uLiHK6flJREp06dMJlM9rLNmzfTunVrmjZtyuLFi+nYsSOenp54eXkBkJ+fz+rVq+nRowc+Pj44Ozvj6+vLsGHDOHPmTJXPv6CggNzcXAAaN25sL3dzc7P34e7ufl2vqYiIiEilTmVCgHf58pKyk+crb1u6btn25y/ZRtakRmlkrJYrSQiqw8XFhTplv0mpxNmzZ0lPT7c/dnZ2xsvLC1dXV9avX89LL71EWloavXr1ok2bNhw9epQvv/ySTp068fXXX9tH1ZKTk/n000/p3r07vXv3xmq1kpiYSFxcHImJiSQnJ+Pq6kq/fv04fvw4y5cvp0ePHjz2mG2o/J577ql2zGVlZmbyyiuv0KFDB1555RWcnJzIz89n3rx5jB07FoB+/frh7+/Pt99+y7Jly/j6669JTk6udAQtKCiItm3bsnPnTvr378/48ePx9vZm/fr1LFy4EH9/f1544YVfHLeIiIjUcgWFcOFy+bK8AsjIdixv4GEbDbuSD5Zr/Hnv6mL775VKlr2UnLvWNEZX89U6lU1zlOumkbFabsGCBdU+fvrpp+u6ds+ePQkODrYfAQEBxMXFkZaWxocffsjBgwf5/PPPWblyJZMnT2bBggXMmDEDd3d3oqOj7dcZNGgQZ8+eZdWqVbz11ltMmzaNpUuXMnjwYFJSUli1ahVms5nw8HBCQ0MB6NSpE9HR0URHR9OnT59f/PpcvHiRkSNHsnbtWqZMmcKkSZNISkpi7ty5ODs7s2/fPj744AMmT57MkiVLGD16NEePHmXGjBmVXrdx48ZER0fz+9//nu3bt9OtWzdCQ0MZPXo0np6eJCcnExwcXO049+/fT2Zmpv1xeno6x44dsz/Ozs7mu+++c2iTkJBQ6ePExESKiorUh/pQH+rD/vjy5TJ/+InITfXTTz9V/D7f9r1te/rSR0IqLP2mXHlR2mlbm7ouZJ4+W+59fubYCfv5Cj9L6v6csOUVlPssOfnDj/b2UP6zBGxLMa75PEr3Ucnj2/Ez8Zf0cb1MVqvVekNXkFuiZM3Y4MGDr6td6ZGrqvj5+WGxWCqtk5qaSnR0NF988QWTJ0+mRYsW9nNubm506NCBlJQURowYgaurK/Hx8eXajx8/nsTERC5evEjdunXt56xWK7m5uVy4cIHCwkKWLFlCTEwMr7/+Ou+88w4AU6ZM4Y033mD8+PFMmDDB3vbixYtMmDCB6dOn8/HHHzNs2DD7uaSkJF5++WW2b99Oya/75s2bGTlyJAcPHiQzM9M+ddBqtTJnzhyioqIYOnQokyZNcoh/69atREdH06xZsyrffD/88APvvPMO3377LY8//jj16tVj69atrFu3jjZt2rBx48YaWZ8mIlJTUlJS8L5ipfEz/4bUE0aHI1J7tQ607VjYuEHFdTIvwa4y9wf7cxw08obRv3csf7iNbfQrZBSEBMCaNx3Pz4uH52bB3hmVrxkLeh7+MQRi/uB4bshM25qxcwvKt8vItiWF4/vDhIEVPx+5Jk1TrOXK7nZYk373u99dcwOPbdu2cfLkSQoKCiod/cnIyCA4OJgTJ07w3nvvsXz5co4dO+bwjQVQ7puXmuTn52dPxMA2rfPYsWMUFxcTFxdXbv1ZidJJ5LUcOHCA6OhovvnmG1JTU+27TA4dOpTY2Fjee+89pk2bxuTJk2vqqYiIiEht4u1h28mwbFmAd/nyEqHN4OsDtvuNlV56suMQuFlsuyJWJNAH/OpD0jVuEP1/hyBU9xO7GZSM1XLXM+XEYrHg5ORUY33fe++9jBs37prnPD098fPz48yZM0yePJk5c+Zw3333MWHCBIKCgrBYLOzatYt33nmH4uLiavVXeoOO0qxWa7kEr4Sbm1u5uiX69u1Lv379rtmuSZMmlcaSlJTE+vXr+d3vfuew3X+DBg14+OGHmTt3Llu2bKn0GiIiIiLXpV+4bXv7TxOhXydbWUY2LE+AXh0c13sd+Xl5SstStyV6Mhzmb4LjGRDsayvbsNe2C+NrvW7Nc7jLKBmr5RYtWlTtupGRkQ47//1S9evXp2HDhmRlZTFgwIBKNwVJTk4mMTERV1dXduzYYU+OCgoKOHToULn6FSVcFovFvjvh+fOOOwVlZ2eTkZFRrdjr1q1LYGAgJpMJZ2dnBg0aVK12ZZ0+fZqCgoJrJoHFxcUUFxdTWFj4i64tIiIick39wiGsFQz/F+xPB996MGsdFBVDbJkphL8db/vv0X9fLRv7pC1x6zYOXomAS7kw7T+2qY3Df+PYfuFm+PEsXM6zPd66HyYvt/085FFoqvuQVYeSsVquZ8+e1a7r4+NTI322atWKsLAwVq5cyfTp0x026wDbaN3Fixfx9/enTp061KlTB5PJ5DAClpyczOrVq8td28PDAyifcLm4uBAUFISTkxPx8fG8/vrrgC0Ri4+PJy0trVqxm0wmQkNDue+++/j0009JTEwkLCzMoc7ly5fJycnBz8+vgqtA06ZNcXd3Z8uWLaSlpdG8uW1o//Tp02zYsIH8/Hx+/etfVysmERERkWpxcrKtFxs9H95bbdv98Nf3QNxLtnVqVQn2hS2T4PU4GLMIXJwh4iF4Z1j5XRTnbYAtKVcfb/rOdoBtDZuSsWpRMnYbO3jwIJcuXQJsa5mKiopITk4GbElJq1atqrzGzVwzVpEWLVrwwgsvcODAAUaPHs2qVasIDw/HbDbzww8/sGvXLoKCgti0aRPNmjWjffv2JCcnExYWRu/evTl37hxbt24lLy/vmtdu2LAhixYton79+vj6+tKyZUt69erFQw89RHh4OGvXrqVnz56Ehoaye/duvvnmGwIDA6u9mUnHjh0ZNWoUY8eO5ZFHHqFXr160bduWK1eu8P3337N7925GjBjhsIFIWb/+9a/p1asXS5cu5aGHHrLfwHvHjh18/fXX+Pj4lEtSRURERCq1eVLVdbw94MNRtqMypUfESmvXBL689jKT645FqqRk7DaWmprKqVOnHMqSkpIACAgIqFYyZpTu3buzfPlyZsyYQXx8PO+99x4mkwkfHx86dOjAqFG2D4hGjRrxxhtv4OzszH/+8x+mTZtG/fr16d27N+Hh4YwcOdLhug8//DB/+ctfmD17NtOmTSM/P5+mTZvSq1cvQkNDiY2NZeLEiWzdupUNGzbw4IMP8u677zJv3rxqJ2MuLi48++yz3HPPPcycOZOEhAQ+//xzXFxc8Pf3JyIigv79+1d6jWbNmvH222/TokULVq5cyccff0xBQQGenp48+eSTTJ06tcp1ZyIiIiJSu2lrexEREQG0tb3ILVOdre3lrqCbPouIiIiIiBhAyZiIiIiIiIgBlIyJiIiIiIgYQMmYiIiIiIiIAZSMiYiIiIiIGEDJmIiIiIiIiAGUjImIiIiIiBhAN30WERERR80bGh2BSO2m95j8TMmYiIiIAFBUVMS53Cv4zhmBi9lsdDgitZu7xegI5DagZExEREQAKCwspGe/37NhwwbatWtndDgiIrWe1oyJiIiI3enTp8nLyzM6DBGRu4KSMREREREREQMoGRMRERERETGAkjEREREREREDKBkTERERERExgJIxERERERERAygZExERERERMYCSMREREREREQMoGRMRERERETGAkjEREREREREDKBkTERERERExgJIxERERsfP398disRgdhojIXcFktVqtRgchIiIixtu7dy+m7Cu0Dm6Gi9lsdDgiN87dAp7uRkchUiFnowMQERGR24OTkxPeru64vDAX0s4YHY7IjWneEOaNUjImtzUlYyIiIuIo7QyknjA6ChGRWk9rxkRERERERAygZExERERERMQASsZEREREREQMoGRMRERERETEAErGREREREREDKBkTERERERExABKxkRERERErkdWDoyYDX7DwH0QdBsHyUeq3/5AOjw+ETwGQ4OhMGQmnL1Qvt7fVkDvKeA/HEx9YcLSGnsKcntQMiYiIiIiUl3FxRAxGRZ/DVFPwFtD4cwF6DoODp2sun16BnR5Ew7/BFOehujesHoX9IiF/ALHum8uhp2H4VfNb85zEcMpGbuN7d69m/Xr17NkyRLmzp3L4sWLjQ6JqKgoTCbTNQ+z2czly5dvWt9ZWVmMGDGCpUvvvG+Fli1bZn+dVq1aZXQ4IiIiUpGuf4Vh/6z4/IrtkJAKcVEwfgCMegI2TwSnOjB+WdXXn7IScnJhYyy8HAFj+8Enf4Y9RyFuk2PdtDlw6iNY9OqNPCO5jTkbHYBUbOfOnVgsFnx9fcnPzzc6HAfPPfccrVu3dihzcnLCxcXlpvWZlZXFBx98QH5+PgMHDrxp/dS0S5cu8dJLL2GxWMjLyzM6HBEREbkRK7aDvxf0Dbta5ucJ/TvBoq2QVwAWc8XtVyZCZAdo4ne1rPuD0KoxfJIAIx67Wt6sYY2HL7cXJWO3sYEDB1K/fn0Ali9fTkFBQRUtbp2IiAj69OljdBg1KjMzE09PT+rUqdkB4xdffJHCwkL69evH//zP/9TotUVEROQW250G7VtA2b8XOobA3PVw8CTc3/TabU+cs01p7NCy/LmOIbBmV83HK7c1TVO8jZUkYnei3NxcXn31VYKDgzGbzbi5udGxY0e++uorh3rFxcW8/PLL3HfffXh5eeHs7IyPjw+9e/fm6NGj9npr1qyheXPbfOn58+fbp/z5+vraz5tMJsaPH18ulsjISEwmExcvXrSXhYaG4uvry/79+3n00Ufx8PCgQYMG5OTkAPDjjz/St29ffH19cXZ2xtvbm4iICI4cuY7FucDWrVtZvHgxY8eOxcvL67raioiIyG3oVCYEeJcvLyk7eb7ytqXrlm1//pJtZE3uGhoZq+Vyc3OrXdfFxaXao0JZWVmkp6c7lHl5eeHh4UFBQQFhYWGkpKTQvXt3hg0bRnZ2NsuWLSMyMpIVK1bQu3dvAPLz85k3bx5dunThscceo169eiQnJ7NmzRp2795NSkoK9evX54EHHmDs2LFMmTKFsLAwnnzySQDc3Nyq/fzKys3NpUuXLrRr147XXnuNjIwMzGYzhw8fpmPHjhQUFNCnTx9CQkI4cuQIn3zyCZ07dyYpKYmgoKAqr19YWMjw4cN58MEHef3113n55Zd/cawiIiJyExQUwoXL5cvyCiAj27G8gYdtNOxKPliu8Se0689LNa5UsrSk5Ny1pjG6mq/WqWyao9QqGhmr5RYsWFDt46effqr2dYcPH05wcLDDMWnSJABiY2PZs2cPs2bNYu3atUyaNImZM2fy/fff4+npyZgxY7BarQBYLBZOnTrF2rVrmT59OrGxsXz++edMnTqV9PR05syZA0BQUBDPP/88AK1btyY6Opro6Gj+9Kc//eLXJicnh8cff5wtW7YwadIkZs+ejaurK8OHD6ewsJCEhAQWLlzIuHHjmD9/PuvWrePs2bO8+eab1br+2LFjOX78OHPmzLnhqY/79+8nMzPT/jg9PZ1jx47ZH2dnZ/Pdd985tElISKj0cWJiIkVFRepDfagP9WF/fDM3YRIx2jXfH1v327anL30kpMLSb8qVX0xJs70H67pAXmH5a+b+nGjVdXHso9R78IdTJ2w//Dz65fAezLWVpfxwqNK474TPkru5j+tlspb8VSy3tZI1Y4MHD76udmVHryrj5+eHxWKptE5UVBTvv/8+MTExhIaGOpwLDQ2lTZs2hISEkJOTw44dOzCZTA51Ro0axRdffEF6ejoBAQEO5woLCzl//jz5+flcunSJNm3a0LdvX1auXAnA0aNHad68OX/84x+Ji4tzaLtmzRoiIiIYN24csbGxDuciIyNZvXo12dnZ1KtXzx7rnj17OHz4MC1bXp23nZWVRYMGDejRowfz5s0r9/zDw8NxdXXl0KFD5c6Vtn//fn71q1/xxz/+kblz5zq8dp999lmtW28nIrVDSkoK3lesNH7m35B6wuhwRG5M60DbjoWNG1RcJ/MS7CqzBOHPcdDIG0b/3rH84Ta20a+QURASAGvKfDk7Lx6emwV7Z1S+ZizoefjHEIj5g+O5ITNta8bOLSjfLiPblhSO7w8T7pxNzKRqmqZYy1VnOt0vER4eXmFCcezYMfLz82nSpEmF7U+cOGFPxubMmcOMGTM4cuSIw7cVYPtG4mapV68eTZs6flju3bsXq9XKV199RXBw8DXblaxTq8zQoUPx8/Nj+vTpNRKriIiI3ATeHradDMuWBXiXLy8R2gy+PmC731jpmS87DoGbxbYrYkUCfcCvPiRdYw36/x2CUN1P7G6jZKyWu54pJxaLBScnpxvu02q1EhQUxFtvvVVhnZCQEAA+/vhjXnzxRVq2bMmYMWNo3rw5bm5uFBUVMWTIEIqLi6vVZ9kRuNLKJnglXFxccHZ2fAuUDBR369bNPi2yLFdX10pjmT9/Prt27WL8+PEOI2jnzp0DbJuD7N69m/vuuw+zWXPCRURE7ij9wm3b23+aCP062coysmF5AvTq4Lje68jPS0BaNrpa9mQ4zN8ExzMg+OcveDfste3C+FqvW/Mc5LahZKyWW7RoUbXrRkZG0rhxJd/mVFNgYCDnz5+nb9++VU57jIuLw2w2880339Co0dUPqn379pWrW1nC5e/vD8D58+V3MCo997cq7dq1w2QykZeXx6BBg6rdrrSSHRdjY2PLTZkEePXVVwE4cOAA99577y/qQ0RERAzSLxzCWsHwf8H+dPCtB7PWQVExxJaZQvjbn3d5Pvrvq2Vjn7Qlbt3GwSsRcCkXpv3HNrVx+G8c2y/cDD+ehcs/36d0636YvNz285BHoanuQ3anUzJWy/Xs2bPadX18fGqkz4EDBzJ16lRef/113n///XLn09LS7NvUl4zElR4Bs1qtxMTElGvn4eEB4LDQskSrVq1wcnJi27ZtWK1We+K2fv16Dhw4UO3YfX19CQsLIzExkZUrV9p3bSwd2/HjxyudgtmvXz97cljaypUr2bRpE6NGjaJNmzYEBgZWOy4RERG5TTg52daLjZ4P76227X7463sg7iXbOrWqBPvClknwehyMWQQuzhDxELwzrPwuivM2wJaUq483fWc7wLaGTcnYHU/J2G3s4MGDXLp0CbBtw15UVERycjJgS0xatWpV5TVu1pqxykycOJH4+HhmzZpFYmIiXbp0wcvLi2PHjrFt2zacnJxISbF9sPTv359NmzbRpUsXnnrqKYqKili3bp39fl+l+fj4EBAQwKZNm4iJiaFx48bUrVuXkSNH4uHhQa9evVi1ahVdu3alW7duHD58mFWrVtG0aVOHe5ZVZeHChYSFhTFgwAB69OhB+/btsVqtpKWlsXHjRp544olyG4iU9sADD/DAAw+UKz9w4ACbNm2ie/fu2sBDRETkdrV5UtV1vD3gw1G2ozKlR8RKa9cEvhxXM7HIHU3J2G0sNTWVU6dOOZQlJSUBEBAQUK1kzAhms5nt27czYcIEli1bxqxZswDw9vambdu2DB061F73hRdeIDMzk1mzZvH222/j7u5O586dmT17drnNNcA27TIqKoqZM2eSn5+Pj48PI0eOBGxrtZ555hk2btxIYmIiLVq0YMGCBXz00UfXlYy1bNmSffv2ERMTQ3x8PPHx8ZjNZnx9fXnkkUd49tlnb+wFEhERERFBW9uLiIjIz7S1vdQq1dnaXsRguumziIiIiIiIAZSMiYiIiIiIGEDJmIiIiIiIiAGUjImIiIiIiBhAyZiIiIiIiIgBlIyJiIiIiIgYQPcZExEREUfNGxodgciN0++x3AGUjImIiAgARUVFnMu9gu+cEbiYzUaHI3Lj3C1GRyBSKSVjIiIiAkBhYSE9+/2eDRs20K5dO6PDERGp9bRmTEREROxOnz5NXl6e0WGIiNwVlIyJiIiIiIgYQMmYiIiIiIiIAZSMiYiIiIiIGEDJmIiIiIiIiAGUjImIiIiIiBhAyZiIiIiIiIgBlIyJiIiIiIgYQMmYiIiIiIiIAZSMiYiIiIiIGEDJmIiIiIiIiAGUjImIiIiIiBhAyZiIiIjY+fv7Y7FYjA5DROSuYLJarVajgxARERHj7d27F1P2FVoHN8PFbDY6HLkbuVvA093oKERuGWejAxAREZHbg5OTE96u7ri8MBfSzhgdjtxtmjeEeaOUjMldRcmYiIiIOEo7A6knjI5CRKTW05oxERERERERAygZExERERERMYCSMREREREREQMoGRMRERERETGAkjEREREREREDKBkTERERERExgJIxEREREandsnJgxGzwGwbug6DbOEg+Uv32B9Lh8YngMRgaDIUhM+HshfL1/rYCek8B/+Fg6gsTltbYU5DaScmYiIiIiNRexcUQMRkWfw1RT8BbQ+HMBeg6Dg6drLp9egZ0eRMO/wRTnobo3rB6F/SIhfwCx7pvLoadh+FXzW/Oc5FaR8nYLbZ7927Wr1/PkiVLmDt3LosXLzY6JKKiojCZTNc8zGYzly9fvml9Z2VlMWLECJYuvf2/Ofr22295+umnefDBB3F3d8dkMjFs2LAK6xcXFzN27FgCAwMxm834+PgwYMAAMjIybl3QIiIitV3Xv8Kwf1Z8fsV2SEiFuCgYPwBGPQGbJ4JTHRi/rOrrT1kJObmwMRZejoCx/eCTP8OeoxC3ybFu2hw49REsevVGnpHcRZyNDuBus3PnTiwWC76+vuTn5xsdjoPnnnuO1q1bO5Q5OTnh4uJy0/rMysrigw8+ID8/n4EDB960fmrCpk2bWLJkCQ0bNiQkJIQ9e/ZUWn/IkCEsXryYsLAwRo4cSWpqKsuWLSMlJYVdu3ZhsVhuUeQiIiJ3sRXbwd8L+oZdLfPzhP6dYNFWyCsAi7ni9isTIbIDNPG7Wtb9QWjVGD5JgBGPXS1v1rDGw5faTcnYLTZw4EDq168PwPLlyykoKKiixa0TERFBnz59jA6jRmVmZuLp6UmdOjc+CDxo0CAGDhxIQEAA27Zt4+GHH66wbnJyMkuWLCEsLIxt27bZ+2/VqhUTJkzgnXfeYezYsTcck4iIiFRhdxq0bwFl/xboGAJz18PBk3B/02u3PXHONqWxQ8vy5zqGwJpdNR+v3FU0TfEWK0nE7kS5ubm8+uqrBAcHYzabcXNzo2PHjnz11VcO9YqLi3n55Ze577778PLywtnZGR8fH3r37s3Ro0ft9dasWUPz5rY51fPnz7dPjfT19bWfN5lMjB8/vlwskZGRmEwmLl68aC8LDQ3F19eX/fv38+ijj+Lh4UGDBg3IyckB4Mcff6Rv3774+vri7OyMt7c3ERERHDlSvQW8jRo1IiAgoFp1586di9Vq5aWXXnJIBP/yl79gsVhYvnx5ta4jIiIiN+hUJgR4ly8vKTt5vvK2peuWbX/+km1kTeQX0sjYHSg3N7fadV1cXKo9KpSVlUV6erpDmZeXFx4eHhQUFBAWFkZKSgrdu3dn2LBhZGdns2zZMiIjI1mxYgW9e/cGID8/n3nz5tGlSxcee+wx6tWrR3JyMmvWrGH37t2kpKRQv359HnjgAcaOHcuUKVMICwvjySefBMDNza3az6+s3NxcunTpQrt27XjttdfIyMjAbDZz+PBhOnbsSEFBAX369CEkJIQjR47wySef0LlzZ5KSkggKCvrF/ZaVnJyMyWSie/fuDuWurq7cc889HD58mIKCAszmSqZFiIiIiKOCQrhwuXxZXgFkZDuWN/CwjYZdyQfLNf7kdf15GcaVSpaNlJy71jRGV/PVOpVNcxSphEbG7kALFiyo9vHTTz9V+7rDhw8nODjY4Zg0aRIAsbGx7Nmzh1mzZrF27VomTZrEzJkz+f777/H09GTMmDFYrVYALBYLp06dYu3atUyfPp3Y2Fg+//xzpk6dSnp6OnPmzAEgKCiI559/HoDWrVsTHR1NdHQ0f/rTn37xa5OTk8Pjjz/Oli1bmDRpErNnz8bV1ZXhw4dTWFhIQkICCxcuZNy4ccyfP59169Zx9uxZ3nzzzV/c57WcPXvWPjJXlr+/P5cuXSIzM7Pa19u/f79D/fT0dI4dO2Z/nJ2dzXfffefQJiEhodLHiYmJFBUVqQ/1oT7Uh/3xzdywSeR6VPi7u+172/b0pY+EVFj6TbnyXZ99aWtc1wXyCsu9P77/du/V81z7/fHThZ9HzfIKyr8Hcwsc2kP592B6evpt9z5XHze3j+tlspb8BS23XMmascGDB19Xu7KjV5Xx8/OrcqOIqKgo3n//fWJiYggNDXU4FxoaSps2bQgJCSEnJ4cdO3ZgMpkc6owaNYovvviC9PT0ctP4CgsLOX/+PPn5+Vy6dIk2bdrQt29fVq5cCcDRo0dp3rw5f/zjH4mLi3Nou2bNGiIiIhg3bhyxsbEO5yIjI1m9ejXZ2dnUq1fPHuuePXs4fPgwLVtendudlZVFgwYN6NGjB/PmzSv3/MPDw3F1deXQoUOVvk6llawZu1bcAAEBAeTl5XHu3Llyr1dERARr1qwpF6eIiNFSUlLwvmKl8TP/htQTRocjd5vWgbYdCxuX/yLTLvMS7CqzvODPcdDIG0b/3rH84Ta20a+QURASAGvKfPE6Lx6emwV7Z1S+ZizoefjHEIj5g+O5ITNta8bOLSjfLiPblhSO7w8Tbu8NysRYmqZ4B6rJ6XSlhYeHV7iBx7Fjx8jPz6dJkyYVtj9x4oQ9GZszZw4zZszgyJEjDt9AgO1bhpulXr16NG3q+IG6d+9erFYrX331FcHBwddsV7JOraa4urqSk5NDUVERzs6Ob7OSaaYlSaSIiIhUk7eHbSfDsmUB3uXLS4Q2g68P2O43Vnrpxo5D4Gax7YpYkUAf8KsPSddYX/5/hyBU9xOTG6Nk7A50PdNILBYLTk5ON9yn1WolKCiIt956q8I6ISEhAHz88ce8+OKLtGzZkjFjxtC8eXPc3NwoKipiyJAhFBcXV6vPsiNKpZVN8Eq4uLiUS35KBn+7detmnxZZlqura7Viqi4/Pz9+/PFHzp8/T8OGjtvcnj59Gg8PD7y9r7EYWERERGpWv3Db9vafJkK/TrayjGxYngC9Ojiu9zry8/KOlo2ulj0ZDvM3wfEMCP75y9sNe227ML7W69Y8B6m1lIzdgRYtWlTtupGRkTRuXMk3PtUUGBjI+fPn6du3b5XTHuPi4jCbzXzzzTc0anT1w2zfvn3l6laWcPn7+wNw/nz5XY5Kz+etSrt27TCZTOTl5TFo0KBqt7sR7du3Z+fOncTHxztMQ83NzeXw4cO0adNGm3eIiIjcCv3CIawVDP8X7E8H33owax0UFUNsmSmEv/15B+ej/75aNvZJW+LWbRy8EgGXcmHaf2xTG4f/xrH9ws3w41m4nGd7vHU/TP55B+Uhj0JT3YdMHCkZuwP17Nmz2nV9fHxqpM+BAwcydepUXn/9dd5///1y59PS0uzb1JeMxJUeAbNarcTExJRr5+HhAXDNzSxatWqFk5MT27Ztw2q12hO39evXc+DAgWrH7uvrS1hYGImJiaxcudK+a2Pp2I4fP17pFMzr9eyzzzJ37lz++c9/MnDgQPuOlv/4xz/Iy8srF4OIiIjcJE5OtvVio+fDe6ttux/++h6Ie8m2Tq0qwb6wZRK8HgdjFoGLM0Q8BO8MK7+L4rwNsCXl6uNN39kOsK1hUzImZSgZu8UOHjzIpUuXANsoSVFREcnJyYAtMWnVqlWV17hZa8YqM3HiROLj45k1axaJiYl06dIFLy8vjh07xrZt23ByciIlxfbh079/fzZt2kSXLl146qmnKCoqYt26dfb7fZXm4+NDQEAAmzZtIiYmhsaNG1O3bl1GjhyJh4cHvXr1YtWqVXTt2pVu3bpx+PBhVq1aRdOmTR3uWVaVhQsXEhYWxoABA+jRowft27fHarWSlpbGxo0beeKJJ665EUdpFy5c4L//+78BOHPmDAC7du2y7/745JNP8tvf/haAX//61wwYMIClS5fSuXNnevbsyffff8+yZcto27Yto0ePrnbsIiIiUonNk6qu4+0BH46yHZUpPSJWWrsm8OW4molFpBQlY7dYamoqp06dcihLSkoCbDvwVScZM4LZbGb79u1MmDCBZcuWMWvWLAC8vb1p27YtQ4cOtdd94YUXyMzMZNasWbz99tu4u7vTuXNnZs+eXW5zDbBNu4yKimLmzJnk5+fj4+PDyJEjAdvNoJ955hk2btxIYmIiLVq0YMGCBXz00UfXlYy1bNmSffv2ERMTQ3x8PPHx8ZjNZnx9fXnkkUd49tlnq7xGZmYms2fPdij77rvv7Fue+vn52ZOxkufVpEkTFi5cyMSJE/Hw8OAPf/gDs2bNqnKqp4iIiIjUftraXkRERABtbS8Gq87W9iK1jG76LCIiIiIiYgAlYyIiIiIiIgZQMiYiIiIiImIAJWMiIiIiIiIGUDImIiIiIiJiACVjIiIiIiIiBtB9xkRERMRR84ZGRyB3I/3eyV1IyZiIiIgAUFRUxLncK/jOGYGL2Wx0OHI3crcYHYHILaVkTERERAAoLCykZ7/fs2HDBtq1a2d0OCIitZ7WjImIiIjd6dOnycvLMzoMEZG7gpIxERERERERAygZExERERERMYCSMREREREREQMoGRMRERERETGAkjEREREREREDKBkTERERERExgJIxERERERERAygZExERERERMYCSMREREREREQMoGRMRERERETGAkjERERGx8/f3x2KxGB2GiMhdwWS1Wq1GByEiIiLG27t3L6bsK7QOboaL2Wx0OHK3cbeAp7vRUYjcUs5GByAiIiK3BycnJ7xd3XF5YS6knTE6HLmbNG8I80YpGZO7jpIxERERcZR2BlJPGB2FiEitpzVjIiIiIiIiBlAyJiIiIiIiYgAlYyIiIiIiIgZQMiYiIiIiImIAJWMiIiIiIiIGUDImIiIiIiJiACVjIiIiIiIiBlAyJiIiIiK1V1YOjJgNfsPAfRB0GwfJR6rf/kA6PD4RPAZDg6EwZCacvVC+3t9WQO8p4D8cTH1hwtIaewpSeykZExEREZHaqbgYIibD4q8h6gl4ayicuQBdx8Ghk1W3T8+ALm/C4Z9gytMQ3RtW74IesZBf4Fj3zcWw8zD8qvnNeS5SKykZu43t3r2b9evXs2TJEubOncvixYuNDomoqChMJtM1D7PZzOXLl29a31lZWYwYMYKlS++Mb5o+++wzfvWrX+Hm5obFYqFly5b87W9/o7i42OjQREREaoeuf4Vh/6z4/IrtkJAKcVEwfgCMegI2TwSnOjB+WdXXn7IScnJhYyy8HAFj+8Enf4Y9RyFuk2PdtDlw6iNY9OqNPCO5yzgbHYBUbOfOnVgsFnx9fcnPzzc6HAfPPfccrVu3dihzcnLCxcXlpvWZlZXFBx98QH5+PgMHDrxp/dSEWbNmERUVhZ+fH88++yz16tVj9erVvPnmmxw9epQPPvjA6BBFRERqvxXbwd8L+oZdLfPzhP6dYNFWyCsAi7ni9isTIbIDNPG7Wtb9QWjVGD5JgBGPXS1v1rDGw5faT8nYbWzgwIHUr18fgOXLl1NQUFBFi1snIiKCPn36GB1GjcrMzMTT05M6dW5swLigoID//u//pl69euzcuZMmTZoA8Le//Y2wsDA+/vhjnnvuOf7rv/6rJsIWERGRiuxOg/YtoOz/2zuGwNz1cPAk3N/02m1PnLNNaezQsvy5jiGwZlfNxyt3HU1TvI2VJGJ3otzcXF599VWCg4Mxm824ubnRsWNHvvrqK4d6xcXFvPzyy9x33314eXnh7OyMj48PvXv35ujRo/Z6a9asoXlz2xzs+fPn26dG+vr62s+bTCbGjx9fLpbIyEhMJhMXL160l4WGhuLr68v+/ft59NFH8fDwoEGDBuTk5ADw448/0rdvX3x9fXF2dsbb25uIiAiOHKl6wW9ycjLZ2dl07tzZnogBmEwmhg8fTlFREXPnzq3+iykiIiK/zKlMCPAuX15SdvJ85W1L1y3b/vwl28iayA3QyFgtl5ubW+26Li4u1R4VysrKIj093aHMy8sLDw8PCgoKCAsLIyUlhe7duzNs2DCys7NZtmwZkZGRrFixgt69ewOQn5/PvHnz6NKlC4899hj16tUjOTmZNWvWsHv3blJSUqhfvz4PPPAAY8eOZcqUKYSFhfHkk08C4ObmVu3nV1Zubi5dunShXbt2vPbaa2RkZGA2mzl8+DAdO3akoKCAPn36EBISwpEjR/jkk0/o3LkzSUlJBAUFVXjdK1euAFC3bt1y59zd3QHbekARERG5DgWFcOFy+bK8AsjIdixv4GEbDbuSD5Zr/Lnr+vOyiiuVLAMpOXetaYyu5qt1KpvmKFIFjYzVcgsWLKj28dNPP1X7usOHDyc4ONjhmDRpEgCxsbHs2bOHWbNmsXbtWiZNmsTMmTP5/vvv8fT0ZMyYMVitVgAsFgunTp1i7dq1TJ8+ndjYWD7//HOmTp1Keno6c+bMASAoKIjnn38egNatWxMdHU10dDR/+tOffvFrk5OTw+OPP86WLVuYNGkSs2fPxtXVleHDh1NYWEhCQgILFy5k3LhxzJ8/n3Xr1nH27FnefPPNSq/7wAMPUKdOHXbt2lVuaumXX34JwNmzZ68r1v3795OZmWl/nJ6ezrFjx+yPs7Oz+e677xzaJCQkVPo4MTGRoqIi9aE+1If6sD++mZswiVyPa/3uXly307Y9fekjIRWWflO+/FiG7f1Q1wXyCoEy749cW6JV5OLs0Efp98fp7CzbDz+Pfjm8B3NtZdu/dZyqeCe8z9XHze3jepmsJX8Vy22tZM3Y4MGDr6td2dGryvj5+WGxWCqtExUVxfvvv09MTAyhoaEO50JDQ2nTpg0hISHk5OSwY8cOTCaTQ51Ro0bxxRdfkJ6eTkBAgMO5wsJCzp8/T35+PpcuXaJNmzb07duXlStXAnD06FGaN2/OH//4R+Li4hzarlmzhoiICMaNG0dsbKzDucjISFavXk12djb16tWzx7pnzx4OHz5My5ZX54JnZWXRoEEDevTowbx588o9//DwcFxdXTl06FClr1Pfvn357LPP+O1vf8sbb7yBl5cX//M//8M///lP8vPz8fPz48yZM5VeQ0TkVktJScH7ipXGz/wbUk8YHY7cTVoH2nYsbNyg4jqZl2BXmeUCf46DRt4w+veO5Q+3sY1+hYyCkABYU+aL1Hnx8Nws2Duj8jVjQc/DP4ZAzB8czw2ZaVszdm5B+XYZ2baEcHx/mHB7bzgmxtM0xVqusul0NyI8PLzCDTyOHTtGfn6+w3qpsk6cOGFPxubMmcOMGTM4cuSIw7cVYPtG4mapV68eTZs6fgDv3bsXq9XKV199RXBw8DXblaxTq8z//M//8PTTT/O///u/bNiwwd7f3/72N0aPHn1D0ytFRETuSt4etp0My5YFeJcvLxHaDL4+YLvfWOmlGDsOgZvFtitiRQJ9wK8+JF1jvfj/HYJQ3U9MbpySsVrueqacWCwWnJycbrhPq9VKUFAQb731VoV1QkJCAPj444958cUXadmyJWPGjKF58+a4ublRVFTEkCFDqn1PrrIjcKWVTfBKuLi44Ozs+BYoGSju1q2bfVpkWa6urlXGU7duXT799FPOnDnDjh07MJvNdOnSxT6iVlmiKiIiIjWkX7hte/tPE6FfJ1tZRjYsT4BeHRzXex35eblGy0ZXy54Mh/mb4HgGBP/8ZeyGvbZdGF/rdWueg9RqSsZquUWLFlW7bmRkJI0bV/INUTUFBgZy/vx5+vbtW+W0x7i4OMxmM9988w2NGl398Nu3b1+5upUlXP7+/gCcP19+V6TSc3+r0q5dO0wmE3l5eQwaNKja7SrSsGFDevW6+mFdcsPqxx57rKImIiIiUlP6hUNYKxj+L9ifDr71YNY6KCqG2DJTCH/7847MR/99tWzsk7bErds4eCUCLuXCtP/YpjYO/41j+4Wb4cezcDnP9njrfpi83PbzkEehqe5DJuUpGavlevbsWe26Pj4+NdLnwIEDmTp1Kq+//jrvv/9+ufNpaWn2bepLRuJKj4BZrVZiYmLKtfPw8ABwWGhZolWrVjg5ObFt2zasVqs9cVu/fj0HDhyoduy+vr6EhYWRmJjIypUr7bs2lo7t+PHjv2hk6/vvv+f999+nUaNGvPTSS9fdXkRERK6Tk5Ntvdjo+fDeatvuh7++B+Jesq1Tq0qwL2yZBK/HwZhF4OIMEQ/BO8PK76I4bwNsSbn6eNN3tgNsa9iUjMk1KBm7jR08eJBLly4Btm3Yi4qKSE5OBmyJSatWraq8xs1aM1aZiRMnEh8fz6xZs0hMTKRLly54eXlx7Ngxtm3bhpOTEykptg+r/v37s2nTJrp06cJTTz1FUVER69ats9/vqzQfHx8CAgLYtGkTMTExNG7cmLp16zJy5Eg8PDzo1asXq1atomvXrnTr1o3Dhw+zatUqmjZt6nDPsqosXLiQsLAwBgwYQI8ePWjfvj1Wq5W0tDQ2btzIE088UW4DkbL+/ve/89lnnxEeHk7Dhg1JTU1l5cqV1KlThwULFuDp6Xk9L6mIiIhcy+ZJVdfx9oAPR9mOypQeESutXRP4clzNxCJShpKx21hqaiqnTp1yKEtKSgIgICCgWsmYEcxmM9u3b2fChAksW7aMWbNmAeDt7U3btm0ZOnSove4LL7xAZmYms2bN4u2338bd3Z3OnTsze/bscptrgG3aZVRUFDNnziQ/Px8fHx9GjhwJ2G4G/cwzz7Bx40YSExNp0aIFCxYs4KOPPrquZKxly5bs27ePmJgY4uPjiY+Px2w24+vryyOPPMKzzz5b5TUefPBBli5dyscff8yVK1fw9PSke/fuTJ06lTZt2lQ7FhERERGpvbS1vYiIiADa2l4MVJ2t7UVqId30WURERERExABKxkRERERERAygZExERERERMQASsZEREREREQMoGRMRERERETEAErGREREREREDKD7jImIiIij5g2NjkDuNvqdk7uUkjEREREBoKioiHO5V/CdMwIXs9nocORu424xOgKRW07JmIiIiABQWFhIz36/Z8OGDbRr187ocEREaj2tGRMRERG706dPk5eXZ3QYIiJ3BSVjIiIiIiIiBlAyJiIiIiIiYgAlYyIiIiIiIgZQMiYiIiIiImIAJWMiIiIiIiIGUDImIiIiIiJiACVjIiIiIiIiBlAyJiIiIiIiYgAlYyIiIiIiIgZQMiYiIiIiImIAJWMiIiJi5+/vj8ViMToMEZG7gslqtVqNDkJERESMt3fvXkzZV2gd3AwXs9nocORmcbeAp7vRUYgI4Gx0ACIiInJ7cHJywtvVHZcX5kLaGaPDkZuheUOYN0rJmMhtQsmYiIiIOEo7A6knjI5CRKTW05oxERERERERAygZExERERERMYCSMREREREREQMoGRMRERERETGAkjEREREREREDKBkTERERERExgJIxERERERERAygZExEREZGakZUDI2aD3zBwHwTdxkHykeq3P5AOj08Ej8HQYCgMmQlnL5Sv97cV0HsK+A8HU1+YsLTGnoLIraRkTERERERuXHExREyGxV9D1BPw1lA4cwG6joNDJ6tun54BXd6Ewz/BlKchujes3gU9YiG/wLHum4th52H4VfOb81xEbhElY7ex3bt3s379epYsWcLcuXNZvHix0SGRmppKr169MJlM1zxcXV1vav+bNm3i1VdfZfPmzTe1n5qSmZnJP//5T9q3b0+9evVwcXGhUaNGPPXUU+Tn5xsdnoiISPV1/SsM+2fF51dsh4RUiIuC8QNg1BOweSI41YHxy6q+/pSVkJMLG2Ph5QgY2w8++TPsOQpxmxzrps2BUx/Boldv5BmJGM7Z6ACkYjt37sRiseDr63tb/uH+yiuvEBQU5FDWqFGjm9rn9u3bmTlzJl5eXnTt2vWm9nWjjh8/zrhx45g/fz73338/f/rTn3B1dSUtLY309HQKCwtxcXExOkwREZGasWI7+HtB37CrZX6e0L8TLNoKeQVgMVfcfmUiRHaAJn5Xy7o/CK0awycJMOKxq+XNGtZ4+CJGUDJ2Gxs4cCD169cHYPny5RQUFFTR4tZ65pln6NChg9Fh1JjCwkJycnLw9PS84WtduXKFhQsXsmjRIoYPH86HH36IyWSqgShFRERuU7vToH0LqFNm4lXHEJi7Hg6ehPubXrvtiXO2KY0dWpY/1zEE1uyq+XhFbgOapngbK0nE7kRWq5Vvv/2Wp59+msDAQMxmMx4eHvTo0YPdu3c71N23bx9Dhw7l3nvvxdPTE2dnZ/z9/Xnuuec4d+4cABcvXuTPf/4zb7zxBgCxsbH2qZHNmjWznzeZTMTFxTlcPykpiU6dOjkkQ5s3b6Z169Y0bdqUxYsX07FjRzw9PfHy8gIgPz+f1atX06NHD3x8fHB2dsbX15dhw4Zx5syZKp///v37Wb16NSaTiWnTplFUVMTFixcpLCy8gVdVRETkNnYqEwK8y5eXlJ08X3nb0nXLtj9/yTayJlLLaGSslsvNza12XRcXF+qU/TarEmfPniU9Pd3+2NnZGS8vL1xdXVm/fj0vvfQSaWlp9OrVizZt2nD06FG+/PJLOnXqxNdff20fVUtOTubTTz+le/fu9O7dG6vVSmJiInFxcSQmJpKcnIyrqyv9+vXj+PHjLF++nB49evDYY7bpCvfcc0+1Yy4rMzOTV155hQ4dOvDKK6/g5OREfn4+8+bNY+zYsQD069cPf39/vv32W5YtW8bXX39NcnJypSNox48fJyUlhXvuuYepU6cSFxfH2bNncXZ25tFHH2X27NmEhIT84rhFRERuqoJCuHC5fFleAWRkO5Y38LCNhl3JB8s1/rR0/XlK/pVKllyUnLvWNEZX89U6lU1zFLkDaWSslluwYEG1j59++um6rt2zZ0+Cg4PtR0BAAHFxcaSlpfHhhx9y8OBBPv/8c1auXMnkyZNZsGABM2bMwN3dnejoaPt1Bg0axNmzZ1m1ahVvvfUW06ZNY+nSpQwePJiUlBRWrVqF2WwmPDyc0NBQADp16kR0dDTR0dH06dPnF78+Fy9eZOTIkaxdu5YpU6YwadIkkpKSmDt3Ls7Ozuzbt48PPviAyZMns2TJEkaPHs3Ro0eZMWNGpdc9efIkFy5c4NixY0yfPp3IyEimT59OREQEW7ZsoXPnztf1eu/fv5/MzEz74/T0dI4dO2Z/nJ2dzXfffefQJiEhodLHiYmJFBUVqQ/1oT7Uh/3x5ctl/viWWqmw1O9Ehb9X2763bU9f+khIhaXflCs/tGG77feqrgvkFZb/vTqfZfuhrotjH6XsOfi97YefR78cfndzbWWZuTn2+mX7yMvLqxXvQfVx5/dxvUxWq9V6Q1eQW6JkzdjgwYOvq13pkauq+Pn5YbFYKq2TmppKdHQ0X3zxBZMnT6ZFixb2c25ubnTo0IGUlBRGjBiBq6sr8fHx5dqPHz+exMRELl68SN26de3nrFYrubm5XLhwgcLCQpYsWUJMTAyvv/4677zzDgBTpkzhjTfeYPz48UyYMMHe9uLFi0yYMIHp06fz8ccfM2zYMPu5pKQkXn75ZbZv307Jr/vmzZsZOXIkBw8eJDMz0z490Wq1MmfOHKKiohg6dCiTJk1yiH/r1q1ER0fTrFmzSt98b731Fn/5y18AeOONN5g8eTJgS9LGjh3L/PnziYmJ4R//+Eelr7eIyK2UkpKC9xUrjZ/5N6SeMDocuRlaB9p2K2zcoPJ6mZdgV5n7g/05Dhp5w+jfO5Y/3MY2+hUyCkICYM2bjufnxcNzs2DvjMrXjAU9D/8YAjF/cDw3ZKZtzdi5BeXbZWTbksLx/WHCwMqfk8htSNMUa7myux3WpN/97nfX3MBj27ZtnDx5koKCAoKDgytsn5GRQXBwMCdOnOC9995j+fLlHDt2zOEbC8DhG4ua5ufnZ0/EwDat89ixYxQXFxMXF1du/VmJ0klkVedLJ4aNGzfmN7/5DQsXLrxjtucXEZG7kLeHbSfDsmUB3uXLS4Q2g68P2O43VnrZw45D4Gax7YpYkUAf8KsPSde4QfT/HYJQ3U9MaiclY7Xc9Uw5sVgsODk51Vjf9957L+PGjbvmOU9PT/z8/Dhz5gyTJ09mzpw53HfffUyYMIGgoCAsFgu7du3inXfeobi4uFr9VbRbodVqLZfglXBzcytXt0Tfvn3p16/fNds1adKk0lgaNmyIi4sL+fn55bb7r1+/Ph4eHjc1yRQREbnl+oXbtrf/NBH6dbKVZWTD8gTo1cFxvdeRn6fqtyz1/8gnw2H+JjieAcG+trINe227ML7W69Y8B5FbTMlYLbdo0aJq142MjKRx40q+taqm+vXr07BhQ7KyshgwYEClm4IkJyeTmJiIq6srO3bssCdHBQUFHDp0qFz9ihIui8WCu7s7AOfPO+7WlJ2dTUZGRrVir1u3LoGBgZhMJpydnRk0aFC12pXVsGFDmjZtyqFDh0hPT+fee++1n8vKyuLixYvcf//9v+jaIiIit6V+4RDWCob/C/ang289mLUOioohtswUwt+Ot/336L+vlo190pa4dRsHr0TApVyY9h/b1Mbhv3Fsv3Az/HgWLufZHm/dD5OX234e8ig01X3I5M6gZKyW69mzZ7Xr+vj41EifrVq1IiwsjJUrVzJ9+nSHzTrANlp38eJF/P39qVOnDnXq1MFkMjmMgCUnJ7N69epy1/bw8ADKJ1wuLi4EBQXh5OREfHw8r7/+OmBLxOLj40lLS6tW7CaTidDQUO677z4+/fRTEhMTCQsLc6hz+fJlcnJy8PPzq+AqEBISQlhYGIcOHWL27NnMnDkTgB9//JF169ZhtVqv699GRETktufkZFsvNno+vLfatvvhr++BuJdsa9WqEuwLWybB63EwZhG4OEPEQ/DOsPK7KM7bAFtSrj7e9J3tANsaNiVjcodQMnYbO3jwIJcuXQJsa5mKiopITk4GbElJq1atqrzGzVwzVpEWLVrwwgsvcODAAUaPHs2qVasIDw/HbDbzww8/sGvXLoKCgti0aRPNmjWjffv2JCcnExYWRu/evTl37hxbt24lLy/vmtdu2LAhixYton79+vj6+tKyZUt69erFQw89RHh4OGvXrqVnz56Ehoaye/duvvnmGwIDA6u9mUnHjh0ZNWoUY8eO5ZFHHqFXr160bduWK1eu8P3337N7925GjBjhsIFIWUFBQTzzzDNs376d9957jyNHjtCuXTt27tzJli1baNOmDS+//PIvfYlFRERuvc2Tqq7j7QEfjrIdlSk9IlZauybw5bWXOFx3LCJ3ACVjt7HU1FROnTrlUJaUlARAQEBAtZIxo3Tv3p3ly5czY8YM4uPjee+99zCZTPj4+NChQwdGjbJ9SDdq1Ig33ngDZ2dn/vOf/zBt2jTq169P7969CQ8PZ+TIkQ7Xffjhh/nLX/7C7NmzmTZtGvn5+TRt2pRevXoRGhpKbGwsEydOZOvWrWzYsIEHH3yQd999l3nz5lU7GXNxceHZZ5/lnnvuYebMmSQkJPD555/j4uKCv78/ERER9O/fv1qvwYIFC5g6dSrbtm1j3bp1eHl58dxzzzF16lT7KJ+IiIiI3J20tb2IiIgA2tr+rlDdre1F5JbQTZ9FREREREQMoGRMRERERETEAErGREREREREDKBkTERERERExABKxkRERERERAygZExERERERMQAus+YiIiIOGre0OgI5GbRv63IbUXJmIiIiABQVFTEudwr+M4ZgYvZbHQ4crO4W4yOQER+pmRMREREACgsLKRnv9+zYcMG2rVrZ3Q4IiK1ntaMiYiIiN3p06fJy8szOgwRkbuCkjEREREREREDKBkTERERERExgJIxERERERERAygZExERERERMYCSMREREREREQMoGRMRERERETGAkjEREREREREDKBkTERERERExgJIxERERERERAygZExERERERMYCSMREREbHz9/fHYrEYHYaIyF3BZLVarUYHISIiIsbbu3cvpuwrtA5uhovZbHQ4dxZ3C3i6Gx2FiNxhnI0OQERERG4PTk5OeLu64/LCXEg7Y3Q4d47mDWHeKCVjInLdlIyJiIiIo7QzkHrC6ChERGo9rRkTERERERExgJIxERERERERAygZExERERERMYCSMREREREREQMoGRMRERERETGAkjEREREREREDKBkTERERuV1l5cCI2eA3DNwHQbdxkHzE6KhEpIYoGRMRERG5HRUXQ8RkWPw1RD0Bbw2FMxeg6zg4dNLo6ESkBigZu43t3r2b9evXs2TJEubOncvixYuNDomoqChMJtM1D7PZzOXLl29a31lZWYwYMYKlS5fetD5qyowZMyp8nXr37m10eCIicjvo+lcY9s+Kz6/YDgmpEBcF4wfAqCdg80RwqgPjl926OEXkpnE2OgCp2M6dO7FYLPj6+pKfn290OA6ee+45Wrdu7VDm5OSEi4vLTeszKyuLDz74gPz8fAYOHHjT+qlJzzzzDA8++KBDWdu2bQ2KRkRE7igrtoO/F/QNu1rm5wn9O8GirZBXABazYeGJyI1TMnYbGzhwIPXr1wdg+fLlFBQUGBzRVREREfTp08foMGpUZmYmnp6e1KlTcwPGPXr0YOjQoTV2PRERuYvsToP2LaDs/5c6hsDc9XDwJNzf1JjYRKRGaJribawkEbsT5ebm8uqrrxIcHIzZbMbNzY2OHTvy1VdfOdQrLi7m5Zdf5r777sPLywtnZ2d8fHzo3bs3R48etddbs2YNzZs3B2D+/Pn2KX++vr728yaTifHjx5eLJTIyEpPJxMWLF+1loaGh+Pr6sn//fh599FE8PDxo0KABOTk5APz444/07dsXX19fnJ2d8fb2JiIigiNHrn/R9Pnz52/q9E0REamlTmVCgHf58pKyk+dvbTwiUuM0MlbL5ebmVruui4tLtUeFsrKySE9Pdyjz8vLCw8ODgoICwsLCSElJoXv37gwbNozs7GyWLVtGZGQkK1assK+bys/PZ968eXTp0oXHHnuMevXqkZyczJo1a9i9ezcpKSnUr1+fBx54gLFjxzJlyhTCwsJ48sknAXBzc6v28ysrNzeXLl260K5dO1577TUyMjIwm80cPnyYjh07UlBQQJ8+fQgJCeHIkSN88skndO7cmaSkJIKCgqrVx8iRI8nNzcVkMhEcHMwLL7zAmDFjMJlMvzhuERG5AxUUwoXL5cvyCiAj27G8gYdtNOxKPliu8aea689LAq7cXksYROT6KRmr5RYsWFDtupGRkTRu3LhadYcPH16uLCYmhn/84x/ExsayZ88e5s6dy/PPP28/HxsbS0hICGPGjKFXr16YTCYsFgunTp0qNwr49ttvM3r0aObMmUNMTAxBQUE8//zzTJkyhdatWxMdHV3t51WRnJwc+vTpw6JFi8o9t8LCQhISErj//vvt5f/v//0/fvOb3/Dmm28SFxdX6bXd3d3p1q0b3bp1o3Hjxhw5coT58+czduxYDh48yMcff3zD8YuIyB1k2/e2benLSkiFpd84lqXNgWYNoa4L5BWWb5P7cxJW9+at0xaRW0PTFGu5nj17Vvvw8fGp9nVjYmJYvHixwzFs2DAAli1bRkBAAI8//jjp6en249KlS3Tq1InU1FR++uknAEwmkz0RKyws5MyZM6SnpxMZGQnAjh07avYFKSM2NtbhcVZWFtu2bSM8PBxvb2+H+Fu2bEnjxo3Ztm1bldcdMWIEGzdu5K9//SvPPvssU6ZM4ciRIzRt2pQFCxawc+fO64pz//79ZGZm2h+np6dz7Ngx++Ps7Gy+++47hzYJCQmVPk5MTKSoqEh9qA/1oT7sjzWl+sbs3r3b4bHDv8eDzdg/sz9F6/4K68fD+vHktmpEQbd29sdnF4/i9P+8CI28ACjy9+TiwWPlr3nq53/Dxg3uiN8r9aE+7qY+rpfJarVab+gKckuUbOAxePBgQ+OIiori/fff57PPPqtwAw+LxVLl7o87d+6kQ4cOAMyZM4cZM2Zw5MgRhzcIQPfu3Vm/fj0AR48epXnz5vzxj38sNzK1Zs0aIiIiGDduXLkEKzIyktWrV5OdnU29evUA25qxH374gfPnz+PsfHWAeOvWrTz66KOVxu7r68vZs2crrVORd999l9dee40xY8bw97///RddQ0TkZklJScH7ipXGz/wbUk8YHc6do3UgbIyFxg2ur13Xv9pGwOJeuvb5p6bB1wfg5IeOm3iMmA3/sxXOL9BuiiJ3OE1TrOWu51tOi8WCk5PTDfdptVoJCgrirbfeqrBOSEgIAB9//DEvvvgiLVu2ZMyYMTRv3hw3NzeKiooYMmQIxcXF1eqzsjVYZRO8Ei4uLg6JWEnsAN26dXOYYlmaq6trtWK6lpLbAWRkZPzia4iIyF2iX7hte/tPE6FfJ1tZRjYsT4BeHZSIidQCSsZqubLroSpzPWvGKhMYGMj58+fp27cvFoul0rpxcXGYzWa++eYbGjVqZC/ft29fubqVJVz+/v6AbefCskoPN1elXbt2mEwm8vLyGDRoULXbVVfJUHdJvCIiIhXqFw5hrWD4v2B/OvjWg1nroKgYYu+M+22KSOWUjNVyPXv2rHbd61kzVpmBAwcydepUXn/9dd5///1y59PS0uzb1JeMxJUeAbNarcTExJRr5+HhAeAwt7dEq1atcHJyYtu2bVitVnvitn79eg4cOFDt2H19fQkLCyMxMZGVK1fad20sHdvx48dp0qRJpdc5ceIEgYGBDmXnz59nxowZODs7069fv2rHJCIidyknJ1jzJoyeD++ttu2e+Ot7bNMaWwdW3V5EbntKxm5jBw8e5NKlS4BtG/aioiKSk5MBW2LSqlWrKq9R3S3Ya9LEiROJj49n1qxZJCYm0qVLF7y8vDh27Bjbtm3DycmJlJQUAPr378+mTZvo0qULTz31FEVFRaxbt85+v6/SfHx8CAgIYNOmTcTExNC4cWPq1q3LyJEj8fDwoFevXqxatYquXbvSrVs3Dh8+zKpVq2jatKnDPcuqsnDhQsLCwhgwYAA9evSgffv2WK1W0tLS2LhxI0888USVuynef//9PPDAA7Rr145GjRpx9OhRPvvsMzIzM4mKiiI0NPQ6XlEREamVNk+quo63B3w4ynaISK2jZOw2lpqayqlTpxzKkpKSAAgICKhWMmYEs9nM9u3bmTBhAsuWLWPWrFkAeHt707ZtW4YOHWqv+8ILL5CZmcmsWbN4++23cXd3p3PnzsyePZumTZuWu/aiRYuIiopi5syZ5Ofn4+Pjw8iRIwHbzaCfeeYZNm7cSGJiIi1atGDBggV89NFH15WMtWzZkn379hETE0N8fDzx8fGYzWZ8fX155JFHePbZZ6u8RkREBAkJCSQlJXHlyhXc3Nxo3bo1o0aNuuZtAURERETk7qPdFEVERATQboq/2C/dTVFE7nq6z5iIiIiIiIgBlIyJiIiIiIgYQMmYiIiIiIiIAZSMiYiIiIiIGEDJmIiIiIiIiAGUjImIiIiIiBhAyZiIiIiIiIgBdNNnERERcdS8odER3Fn0eonIL6RkTERERAAoKiriXO4VfOeMwMVsNjqcO4u7xegIROQOpGRMREREACgsLKRnv9+zYcMG2rVrZ3Q4IiK1ntaMiYiIiN3p06fJy8szOgwRkbuCkjEREREREREDKBkTERERERExgJIxERERERERAygZExERERERMYCSMREREREREQMoGRMRERERETGAkjEREREREREDKBkTERERERExgJIxERERERERAygZExERERERMYCSMREREbHz9/fHYrEYHYaIyF3BZLVarUYHISIiIsbbu3cvpuwrtA5uhovZbHQ41eNuAU93o6MQEflFnI0OQERERG4PTk5OeLu64/LCXEg7Y3Q4VWveEOaNUjImIncsJWMiIiLiKO0MpJ4wOgoRkVpPa8ZEREREREQMoGRMRERERETEAErGREREREREDKBkTERERERExABKxkRERERERAygZExERERERMQASsZEREREysrKgRGzwW8YuA+CbuMg+YjRUYlILaNk7Br27t1Lp06dqF+/PiaTiccff9zokKrlu+++w2QyMWLECKNDsbtTX0sREbmLFRdDxGRY/DVEPQFvDYUzF6DrODh00ujoRKQWue5kbM+ePfTq1YvAwEAsFgvu7u4EBgbyu9/9juXLl9+MGG+5wYMHs2fPHp577jmmTZvGSy+9VGHdnTt30r17dwIDA3Fzc8NisRAYGMjAgQM5cqTmv0E7ffo0I0aMYMWKFTV+7Zvhel7LmhATE8Pf//73m9qHiIjc4br+FYb9s+LzK7ZDQirERcH4ATDqCdg8EZzqwPhlty5OEan1nK+n8vr164mMjMTJyYmIiAjatWvH5cuXOXz4MNu3b2fp0qU89dRTNyvWW+Ly5cvs37+fJ598kunTp1dZ/4cffuDs2bP06NGD4OBgnJ2d2bdvH6tWreLLL7/k22+/pWnTpjUW39mzZ/nggw8A6NevX41d92a43teyJnz88cc0atSI//7v/74l/YmISC20Yjv4e0HfsKtlfp7QvxMs2gp5BWAxGxaeiNQe15WMvfnmm+Tn57N582YeffTRcud/+OGHGgvMKD/++CNWqxVvb+9q1R8wYAADBgwoV/6vf/2Ll156iRkzZvDuu+/WcJS3h4sXL2KxWHBxcbnm+et9LW93eXl5FBQU4OHhYXQoIiJyM+1Og/YtoE6ZCUQdQ2Duejh4Eu6vuS9aReTudV3TFNPT0/Hw8LhmIgbQokUL+8+VrV8aMWIEJpOJ7777zl72+OOPYzKZOHHiBI8//jj16tXD1dWVTp062ZO82NhYAgMDMZvNBAYGMnfu3OuKvW/fvvj4+ODs7IyPjw99+/blxIkTDjG0bdsWgA8++ACTyYTJZPpFUwLvueceADIzM6tVPz8/n1deeYWgoCDMZjMeHh507tyZb775xl5nxYoV3H///eXi8/PzK3e9jz76iHvuuQez2YyXlxdPP/00+fn55eolJyfTo0cPvLy8cHZ2xs/Pj6effpqsrCyHeiX/PseOHeOJJ57A09MTT09PDh48eM3nU53X8r333qNt27a4urpisVho1aoV77//frlrvffee4SFheHr64vZbKZevXp06tSJrVu3OtQzmUxkZGTYf/dKjpLfs4rWrP39738vF1vJ72hCQgIDBw6kQYMG1K1bl3Xr1gG2Ub+oqCiCg4Mxm824ubnxX//1X2zcuNHh2kVFRcTExNC0aVNcXV2pW7cujRs3JiIigry8vGu+diIiYrBTmRBwjS8SS8pOnr+18YhIrXVdI2OBgYGcPHmS2bNn8+KLL96UgB599FH8/f159dVXOXToEMuXL+eJJ56gZ8+e9mmQrq6uxMXF8eKLLxIWFsYDDzxQ6TXPnj3Lr3/9a06fPs0TTzxB+/bt2b17N6tWrWLHjh3s3bsXHx8fXnrpJR544AGmTZtG586d6dOnDwAPPfRQlXFfunSJrKwscnJySEpK4s033wTgD3/4Q7We92OPPcaWLVv41a9+xZAhQ/jpp59YtmwZ3bt3Z82aNfzmN7/hoYceYvTo0eXiq1+/vsO1Nm3axNKlS+nfvz+NGzdm9erVLF68GG9vb/71r3/Z68XHx9O7d2/c3Nzo378/QUFB7Nmzh08++YRdu3axZ88eLBaLw7UfffRRGjRowKhRo8jJycHLy+uaz6eq1/L//b//x8cff8xDDz3Eyy+/jJOTE6tXryYqKopTp04xefJk+7X+/e9/4+npSf/+/QkICODw4cOsXLmSxx57jISEBNq3bw/AtGnTmDhxIvXq1eO1116zt///7d17WFTV/j/w9wDOgDDcBC/IxbvAgNrxKGCKmgp5wRteUk/glczsWGkXU7OrpJV6tJNZgRfUQswyTAVE5TEh/GXm0SOal1DRLEUYVBBI1u8PvuzjODMw3NwDvF/PM0/N2mutvfZnL3n4sPde293d3aRzYMiUKVOgVCoxY8YMKBQKeHl5obi4GIGBgcjKykJISAimTZuG/Px8xMfHY+jQodi9ezeGDBkCAJg7dy4+/fRTBAQEICIiApaWlrh48SIOHDiAwsJCvfgSEVEdK/0L0BbqlxWXAjcLdMud7cqvhhWVACoDvyJZ/9+dIEX6f9wkIqoRUQ27d+8WlpaWAoBo3bq1ePLJJ8WSJUtERkaGXt2TJ08KAGLWrFl622bNmiUAiJMnT0ploaGhAoAYM2aMTt2JEycKAKJFixbixo0bUnlaWpoAICIiIqoc99SpUwUAsXjxYp3yRYsWCQBi+vTpJo27MgsXLhQApI+rq6v48MMPTWq7detWAUD0799f3L9/Xyo/dOiQsLCwEL6+viaNr2KbUqnUie39+/eFh4eHcHBw0Knv5eUl3NzcxM2bN3XK161bJwCIZcuWSWUV52fQoEEmHVNlY92/f78AICIjI/XaBAUFCWtra50x5eXl6dXLyMgQVlZWYvTo0TrlLi4uws/Pz+B4AIjQ0FC98mXLlgkAIiEhQSqrmKN+fn6iuLhYp/6CBQsEABEXF6dTfuPGDdGiRQud/bdv3160bdvW4HiIiMzNqVOnxNX/d1KIrnOFwBjz/3SdK8TV3MoP6uBJ0/v77Y/yNraThJj+sX5f3/9UXm/fz3UffCJqkqp1m+Lw4cNx6NAhhIaGorCwEPv27cM777yDoKAgaDQanDhxotbJYcUVpQoDBgwAAIwcORIuLi5SeXBwMGxsbEx6Ti0lJQVqtRpLlizRKX/jjTegVquRkpJS63FHRkZi27ZtWL9+PWbOnAlbW1v8+eefJrWtWIXy7bffhsUD96f3798fgYGBOH36NC5fvmzyWPr16wc/Pz/pu4WFBXr37g2tVivdNvnDDz/g0qVLCAsLQ1FREXJycqTPsGHDoFKpDMZl0aJFJo/DmC+++AIKhQLPPfeczn5zcnIwfPhw3Lt3T7odEIB09a2srAw3b95ETk4O3N3d4ebmhv/85z+1Hk9lnn/+eb1n4nbu3Ak3NzcMGDBAZ+z37t1DQEAATp8+jdu3bwMA7OzscOvWLezatatW4zh9+rTOLa85OTk6c6KgoEDntl8ASE9Pr/T7jz/+iPv373Mf3Af3wX1I3wsLH7qC1EBcv37deKy6twNSluK/q8cDKUvLP928kNe73f++pyzF6X9NwH1XdXmbNk64fe6KXqxyT/3fKsluzo3mnHMf3Af3Ubf7qLbaZHKnTp0Sy5cvF76+vgKA8PDwEIWFhUKIml8ZKykp0ambkJAgAIh33nlHrx8XFxfh7+9f5TibNWumc3XpQT4+PkKpVErfa3pl7GFpaWnCyspKzJ07t8q6PXv2FAqFQty7d09v27Rp0wQAsXfv3irHV7HtwSt9FSpifurUKSGEEP/61790ruQZ+nTr1k1qX3F+tFqtyTEwNta///3vVe57+fLlUv3U1FTRu3dvoVKp9Oq5urrq9F3XV8bS09P16iuVyirHn5WVJYQov5psb28vAAgnJycxaNAg8dFHH0n/ToiIzEmjvDJmSP/FQkSuMb593AohWk0T4oG7VYQQQsz6RIjmTwlxr8RwOyKiaqrWM2MP02g00Gg0WLBgAfz8/JCVlYXk5GSMGjUKCoXCaLu//vrL6LZmzQwvFWtpaWmwXAhRvUE/IsHBwejQoQO2bduGtWsreZdJPTAWK0A/XpMmTUJYWJjBuoYWBnn4+bSaEEJAoVBg8+bNRsfau3dvAEBWVhaGDRuG5s2b45lnnoFGo4GdnR0UCgVeffVVFBUV1Xo8lc1HtVptcPyenp54//33jbareE5t+PDhyM7OxpdffonU1FQcPXoUqampWLlyJY4ePQo3N7daj5+IiOrYuKDy5e13/giM61NedrMASEgHwv7OZe2JqM7UKhmrYGFhge7duyMrKwuXLl0CALRu3RqA4dUEs7Oz62K3JmvVqhWuXLmCkpISnVvOSkpKkJOTg1atWtXLfouLi3Hnzp0q63l6euLYsWPIzMxEcHCwzraK1QorViasLMmtDo1GA6A8cZs0aVKd9Gmq9u3b49ixY+jUqRMCAwMrrbtx40YUFxcjLi5O7x12zz77rF7yXll8bG1t9VaJBFDtl3O3adMGWq0WEyZMqDTxreDk5IQ5c+Zgzpw5AIAlS5bg3XffxQcffIBVq1ZVa99ERPQIjAsCArsA0z4GTucALmrgk33A/TLgrafkHh0RNSLVemZs27ZtBpdHv337No4cOQIA0sp2LVq0gIODAzIzM1FWVibVPXHihFT3URk8eDBu376N9957T6f83Xffxe3bt6WV72rC2DNrCQkJuHz5Mry9vavso+LlzW+++aZOrI4cOYKMjAz4+vrC09MTwP+enzJ1yXxjBg4cCA8PD3z99dcGn/UrKSnBtWvXarUPY2bOnAkAmD9/PkpLS/W2//bbb9L/VyQ7D1/Re/vtt6HVavXaWltbo6CgQK8cKF8N9PTp0zrbr1+/jm+//bZa4w8PD4dWq8XLL79scPuD48/JydHbHhQUBAC4dYtLIxMRmSVLS2DPYmDi48Ca74GXNwMu9sCBt4CubeUeHRE1ItW6Mvbaa69hzpw5CA4Ohp+fH2xtbXH58mUkJibi999/R0hICPr27SvVnzJlCj755BP06tULI0aMwNWrV7F9+3Z4enri/PnzdX4wxqxYsUJabOTnn3/GY489huPHj+P777+Hm5sbVqxYUeO+n376ady8eRNBQUFo164dioqKcPz4cRw4cADW1tYmvfB58uTJ+Oyzz3Dw4EH06tULoaGh0tL2zZo107nNsW3btmjdujWSkpLw6quvok2bNlCr1ZgxY0a1xm1hYYFNmzYhLCwMAQEBCAsLg0ajwd27d3HhwgUcOHAAr776KhYuXFjdkFQpNDQUUVFR+Oyzz9CxY0cMGzZMem3CiRMncPToUenWwQkTJuCjjz7C7NmzcfjwYTg7OyM9PR0ZGRlo1aqVzkOYANCtWzfs2bMHM2bMgEajgYWFBaZOnQpHR0dERUVhwYIF6NWrF8aPH4+8vDxs374drVq1MpjYGRMdHY20tDSsWrUKhw8fRr9+/eDg4IBLly7hyJEjUKlU0sIivr6+8PHxQc+ePaVjjI+Ph5WVFaZPn153QSUiItMdeqfqOk52wBfPlX+IiOpJtZKx6OhofP311/j5559x8OBBFBYWwsbGBh06dMBzzz2H1157Taf+qlWroNVqsXv3bixbtgweHh5YtWoVMjMzH2ky5urqiszMTMybNw9paWnYs2cPHBwcMHr0aKxZswYtWrSocd8TJ07E1q1bkZiYiIKCAuklzKNHj8Y777wDHx8fk/pJTk7Gyy+/jB07duCDDz6ASqVC9+7dsWLFCp0EFwA2bNiAF198EatXr0ZJSQlcXFyqnYwB5VfHMjMz8frrr+PQoUP45ptvYG1tjVatWmHUqFEmvyOtJtavX4/AwEB8/PHH2LJlC4qLi2Fvb4/27dvrrNjYo0cPfPXVV1i0aBE+//xzWFhYwM/PD0lJSZg9ezauX7+u0++///1vTJkyBfHx8SgsLIQQAoMHD4ajoyPmz5+PnJwcxMXFYfny5WjZsiVeeOEFWFhY4PXXXzd57CqVChkZGVi6dCkSEhKkF1U7OztDo9Fg2rRpUt2pU6ciNTUVW7ZsQVFREezt7aHRaLB06VKjL08nIiIioqZBIcx1BQwiIiJ6pP773//CqUjA7R/rgbNX5R5O1bq2Lb910M1Z7pEQEdVItZ4ZIyIiIiIiorrBZIyIiIiIiEgGTMaIiIiIiIhkwGSMiIiIiIhIBkzGiIiIiIiIZMBkjIiIiIiISAZMxoiIiIiIiGRQrZc+ExERURPQvqXcIzBNQxknEZERTMaIiIgIAHD//n3k3iuCy6dRUDZrJvdwTGOrknsEREQ1xmSMiIiIAAB//fUXho0bhdTUVGg0GrmHQ0TU6PGZMSIiIpL88ccfKC4ulnsYRERNApMxIiIiIiIiGTAZIyIiIiIikgGTMSIiIiIiIhkwGSMiIiIiIpIBkzEiIiIiIiIZMBkjIiIiIiKSAZMxIiIiIiIiGTAZIyIiIiIikgGTMSIiIiIiIhkwGSMiIiIiIpIBkzEiIiIiIiIZMBkjIiIiIiKSAZMxIiIiIiIiGTAZIyIiIiIikgGTMSIiIiIiIhkwGSMiIiIiIpIBkzEiIiIiIiIZMBkjIiIiIiKSAZMxIiIiIiIiGVjJPQAiqpwQArdv35Z7GETUBNy5c0f6b0FBgcyjISJqeNRqNRQKhcn1FUIIUY/jIaJaunHjBlq2bCn3MIiIiIioClqtFvb29ibX55UxIjOnVCoBAFeuXKnWP26qvYKCAnh4eDD2jxjjLh/GXh6Mu3wYe3k05rir1epq1WcyRmTmKi5129vbN7ofWA0FYy8Pxl0+jL08GHf5MPbyYNy5gAcREREREZEsmIwRERERERHJgMkYkZlTqVRYunQpVCqV3ENpchh7eTDu8mHs5cG4y4exlwfj/j9cTZGIiIiIiEgGvDJGREREREQkAyZjREREREREMmAyRkREREREJAMmY0SPWGJiIrp37w5ra2t06dIFGzZsMKmdVqvFjBkz4OzsDLVajXHjxuH333/XqfPTTz9h2rRp8PHxgYWFBUaMGGGwr3bt2kGhUOh97t27V+vjM1fmEHchBN5//314enrCxsYGQUFB+PHHH2t9bOauPmMPAOnp6QgKCoKNjQ28vLywfPlyPPw4dGOe82fOnMGQIUNga2uL1q1b45VXXkFJSUmV7Uydj9euXUN4eDjUajWcnZ0xc+ZMFBQU6NWr6XluyMwh9lOnTjU4t/ft21dnx2lu6jPuN27cwLx58xAQEACVSgU7Ozuj/TW1OW8OcW+U810Q0SNz+PBhYWlpKZ555hlx4MABsXjxYqFQKERCQkKVbUNDQ4W7u7uIj48Xu3btEn5+fqJ79+6itLRUqrN69WrRsWNHMXnyZOHl5SWGDx9usC8vLy8xbtw4kZGRofMpKyurs2M1J+YS9+joaKFUKsXKlSvF/v37xZgxY4RarRYXLlyos2M1N/Ud+3Pnzgk7OzsxZswYsX//frFy5UqhVCrFBx98oNNXY53zt27dEm3atBHBwcFi3759IiYmRjg4OIjnnnuuyramzMeSkhLh5+cn/Pz8xHfffSe++uor4e7urjfHa3OeGypziX1kZKTo0KGD3tzOz8+v82M2B/Ud9+PHj4uWLVuKESNGiD59+ghbW1uDfTW1OW8ucW+M853JGNEjFBISIvr06aNTNmnSJOHj41Npu/T0dAFAJCUlSWVnzpwRCoVCxMfHS2X379+X/r9///6VJmOm/ABtLMwh7kVFRcLe3l4sXLhQKisuLhZeXl7i2WefrfYxNRT1HfuoqCjh5eUliouLpbKFCxcKR0dHce/ePamssc75ZcuWCVtbW5GbmyuVrV+/XlhaWoqrV68abWfqfNy2bZtQKBTizJkzUllSUpIAIDIzM6Wymp7nhsxcYh8ZGSk0Gk1dHZbZq++4P/jzfOnSpUaTgqY2580l7o1xvvM2RaJHpLi4GAcPHsT48eN1yp966ilkZWUhOzvbaNu9e/fC0dERQ4YMkcq6du2KHj16YM+ePVKZhQX/ST/MXOKenp6OgoICTJgwQSpTKpUYO3asTl+NyaOI/d69ezF69GgolUqd/vPz85GRkVF3B2Om9u7di8GDB8PZ2VkqmzBhAsrKypCcnGy0nanzce/evejWrRu6du0qlQ0ZMgTOzs5Svdqc54bMHGLfFNV33E35ed4U57w5xL2xarpHTvSIXbhwAaWlpfD29tYp9/HxAVB+L7YxZ86cQdeuXaFQKPTaVtauMlu3bpXuyx42bBhOnjxZo37MnbnEvaK+oXFcvnwZRUVF1eqvIajv2N+9exdXrlzR69/b2xsKhUKv/8Y458+cOaN3/I6OjmjTpk2V8QWqno+G+lcoFPD29pb6qM15bsjMIfYVzp8/DwcHByiVSvTs2RPffvttTQ/L7NV33E3RFOe8OcS9QmOb71ZyD4CoqcjLywNQ/sPrQU5OTgCAW7duVdr24XYVbStrZ8zIkSMREBAAT09PXLx4Ee+99x769u2L48ePo0OHDtXuz5yZS9zz8vKgUqlgbW2t15cQAnl5ebCxsalWn+auvmOfn59vsH+lUonmzZvr9N9Y53xN56ip89GU/mtznhsyc4g9ADz22GPo1asXNBoN8vPzsW7dOowZMwYJCQkYN25cjY/PXNV33E0dA9C05rw5xB1onPOdyRhRLWi1WoOruz3M3H7ZW7NmjfT//fr1Q0hICLy9vfHhhx/ik08+kXFkpmmocW8MGmrsG/qcJzJm3rx5Ot9HjhyJPn364I033miwv5wSGdMY5zuTMaJaSEhIwKxZs6qsl5WVJf3FTKvV6myr+Avbg/dhP8zJyQlXrlzRK8/Ly6u0nanatGmDvn374tixY7Xu61FoiHF3cnJCcXEx7t27p/MXwry8PCgUCmmc5s6cYl/xV9qH+y8pKUFhYWGl/Te0OW+Mk5OT3vEDVc9RU+djZf17eHhIdYCaneeGzBxib4iFhQXCw8PxyiuvoKioqNFdca/vuJs6BqBpzXlziLshjWG+85kxolqYOXMmRPmqpJV+vL290bFjRzRr1kzv3mpj91M/yNvbG2fPntV7d5Khe7ibgoYY94r6Z8+e1eur4t0rDYE5xd7W1hYeHh56/Ve0awr/Ngw9P1Rx9bKq+AJVz0dD/QshcPbsWamP2pznhswcYt8U1XfcTdEU57w5xL2xYjJG9IioVCoMHDgQO3bs0CmPj4+Hj48P2rVrZ7Tt0KFDkZeXh9TUVKns119/xfHjxzFs2LBaj+3atWv44Ycf0KtXr1r3ZW7MJe59+vSBvb09EhISpLLS0lLs3LmzTs6hOXoUsR86dCh27dqF0tJSnf4dHR3Rp08fo/03ljk/dOhQ7N+/X3p+Dii/emlhYYGQkBCj7Uydj0OHDsWJEydw7tw5qSw1NRW5ublSvdqc54bMHGJvSFlZGRISEqDRaBrlL7r1HXdTNMU5bw5xN6RRzPf6XDefiHRVvCTy2WefFQcPHhRvvPGGUCgUYvv27Tr1LC0txfTp03XKQkNDhYeHh9i+fbv47rvvhL+/v94LcP/880+RkJAgEhIShK+vr+jZs6f0/e7du0KI8nfXTJ48WWzZskUcOHBAfPHFF6Jjx47CyclJXLx4sf6DIANziLsQ5S++VKlUYvXq1SI1NVWEh4c3mZc+11fsz507J2xtbUV4eLhITU0Vq1ev1nvpc2Oe8xUvYu3fv79ISkoSsbGxwtHRUe+dak888YTo2LGjTpkp87HixcP+/v4iMTFRxMfHCw8PD6Mvfa7qPDcm5hD77Oxs0b9/f/Hpp5+K/fv3i4SEBPHEE08IhUIhdu7cWb8BkEl9x10IIf38Hj9+vLC2tpa+Z2dnS3Wa2pw3h7g31vnOZIzoEdu1a5fw9/cXSqVSdOrUScTExOjVASAiIyN1yvLz88X06dOFo6OjsLOzE2PHjtV70eLBgwcFAIOf3377TQghREZGhhgwYIBwcXERVlZWwsXFRUyYMEHnxaKNkdxxF0KIsrIysWzZMuHu7i5UKpUICAgQ6enp9XG4ZqU+Yy+EEEeOHBEBAQFCpVIJd3d3ER0dLcrKyqTtjX3Onz59WgwaNEjY2NiIli1bigULFui8BFuI8peRe3l56ZSZOh9zcnLE2LFjhZ2dnXB0dBTTp08XWq1Wr54p57mxkTv2ubm5YuTIkcLd3V0olUphZ2cnBgwYIPbt21cvx2su6jvuxn6eb9iwQadeU5vzcse9sc53hRAP3ZBPRERERERE9Y7PjBEREREREcmAyRgREREREZEMmIwRERERERHJgMkYERERERGRDJiMERERERERyYDJGBERERERkQyYjBEREREREcmAyRgREREREZEMmIwRERFRozV16lQoFAq5h0FEZBCTMSIiogbm4sWLiIqKgre3N5o3bw4nJyf4+PggMjISBw8e1Knbrl07+Pn5Ge2rIlm5efOmwe1ZWVlQKBRQKBQ4fPiw0X4q6lR8rK2t0blzZ7z00ku4detWzQ6UiKiRs5J7AERERGS6n376Cf3790ezZs0QEREBjUaDoqIinDt3DsnJyVCr1Rg4cGCd7S8mJgZqtRo2NjaIjY1Fv379jNbt0aMH5s+fDwC4desW9uzZg1WrViElJQXHjh2DUqmss3ERETUGTMaIiIgakLfeeguFhYX45Zdf0L17d73t169fr7N9lZaWIi4uDuPHj4eDgwM+++wzrFmzBmq12mD9tm3b4h//+If0/Z///CfCwsKwe/du7Nq1C+PHj6+zsRERNQa8TZGIiKgBOXfuHFq0aGEwEQOA1q1b19m+EhMT8eeffyIyMhJTp07F3bt3ER8fX60+QkNDAQDnz583WmfdunVQKBT47rvv9LaVlZXB3d0dPXr0kMqSk5MxceJEdOjQATY2NnB0dERISAjS0tJMGtOAAQPQrl07vfLs7GwoFAq8+eabOuVCCKxbtw49e/ZE8+bNYWdnh4EDB+rdEkpEVF1MxoiIiBqQjh07Ijc3Fzt37jS5zf3793Hz5k2Dn+LiYqPtYmJi0L59e/Tr1w/dunXDY489htjY2GqN99y5cwAAFxcXo3WeeuopqFQqbN68WW9bamoqrl69isjISKls48aNuHXrFiIiIrB27Vq8+OKLyMrKwqBBgyp9rq2mnn76acydOxedOnXCihUr8NZbb0Gr1WLIkCEGE0giIlPxNkUiIqIGZPHixUhJSUF4eDg6d+6Mvn37olevXhgwYAB8fHwMtjlz5gxcXV2rtZ9r164hKSkJixcvllYjjIyMxAsvvICsrCyD+yotLZUWAsnLy8Pu3buxbt06ODg4YNSoUUb35eTkhLCwMCQmJiIvLw9OTk7Sts2bN8PKygpTpkyRyj7//HPY2trq9DF79mxoNBpER0dX+lxbdX3zzTfYunUr1q9fj6ioKKl83rx5CAwMxLx58xAWFsYVG4moRnhljIiIqAEJCgrCsWPHEBkZCa1Wiw0bNmDOnDnw9fVFcHAwLl68qNemXbt2SElJMfgJCQkxuJ+NGzeirKwMERERUtmUKVPQrFkzo1fHkpOT4erqCldXV3Tp0gUvvfQSfH19kZycjJYtW1Z6XJGRkSguLta5DfLOnTv45ptv8OSTT+q0fzARu3PnDnJzc2FpaYmAgABkZmZWup/q2rJlC9RqNUaPHq1zRTE/Px9hYWHIzs6Wrv4REVUXr4wRERE1MP7+/ti4cSMA4NKlS0hLS8MXX3yBw4cPY9SoUXorF9ra2mLw4MEG+9qyZYtemRACsbGx6NatG8rKynSe93r88ccRFxeH6OhoWFnp/hoREBCAd999FwCgUqng5eUFT09Pk46pIuHavHkzZs+eDQD4+uuvcffuXZ2EEAAuXLiARYsWISkpCfn5+Trb6voKVVZWFm7fvo1WrVoZrfPHH3+gS5cudbpfImoamIwRERE1YF5eXoiIiMDTTz+Nfv364ciRIzh69Cj69u1b4z7T0tJw4cIFAEDnzp0N1tm9ezdGjx6tU+bi4mI06auKlZUVJk+ejNWrV+P8+fPo1KkTNm/eDCcnJ4wcOVKqd+fOHQQHB+Pu3bt44YUX4O/vD7VaDQsLC0RHR+PAgQNV7stYwvbXX3/plQkh4Orqim3bthntr7L3uBERVYbJGBERUSOgUCgQEBCAI0eO4OrVq7XqKzY2VlpQw8JC/4mGZ555BjExMXrJWG1FRkZi9erV2Lx5M2bNmoVDhw4hKioKKpVKqpOamopr164hNjYW06ZN02m/ePFik/bj7OyMY8eO6ZUbusWzc+fO+PXXXxEYGAg7O7tqHhERUeWYjBERETUgKSkpGDhwoN4tgkVFRUhOTgYA+Pr61rh/rVaLHTt2ICQkBBMmTDBY5/vvv0dcXBx+//13tGnTpsb7eliPHj3QrVs3bNmyBdbW1igrK9NZRREALC0tAZRfsXpQcnKyyc+LdenSBTt37sTRo0fRu3dvAOVL6K9atUqvbkREBBITE7Fw4UKsXbtWb/sff/xR6S2MRESVYTJGRETUgLz44ovIzc3FyJEj4e/vj+bNm+PKlSvYtm0bfv31V0RERMDf37/G/X/55ZcoKipCeHi40Trh4eHYuHEjNm3ahNdee63G+zIkMjIS8+fPx/Lly9GlSxcEBgbqbO/bty9at26N+fPnIzs7G+7u7vjll18QFxcHf39/nDx5ssp9REVF4aOPPsKYMWMwb948KJVK7Nixw+BtiuPGjcO0adPw8ccf4+eff8aIESPg4uKCnJwcZGRk4Pz58wavqBERmYKrKRIRETUgK1euxNixY/Hjjz/izTffRFRUFNasWQM3NzfExMRgw4YNteo/JiYGVlZWOs9pPWzIkCFQq9W13pchU6ZMgZWVFQoKCvQW7gAAR0dHJCUlISAgAGvXrsX8+fNx+vRp7NmzB3/7299M2kf79u3x7bffwtXVFUuWLMGKFSvw+OOPY9OmTQbrx8bGSrdsRkdH4/nnn8emTZtgZ2eH6OjoWh0vETVtCvHwdX4iIiIiIiKqd7wyRkREREREJAMmY0RERERERDJgMkZERERERCQDJmNEREREREQyYDJGREREREQkAyZjREREREREMmAyRkREREREJAMmY0RERERERDJgMkZERERERCQDJmNEREREREQyYDJGREREREQkAyZjREREREREMvj/kZ9J0MzrD30AAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot the SHAP values\n", + "shap.plots.bar(explanation[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 2: Shapley value analysis to explain importance of particular atom/node and bond/edge" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nO2deVzU1f7/X7OwiAgaKIJSgAqIKRrmhlcTx6VE0wzKEk3L0ZZr1vfaqLcyc2m0foZtNqa5hKlganrJBUwJJRdAUGRRcEEEVARZZBtmzu+PQx8nBATmM/OZmc7z0R90ZuZzXjPCa87nnPciIoSAwWAwGG1FLLQABoPBMG+YjTIYDIZeMBtlMBgMvWA2ymAwGHrBbJTBYDD0gtkog8Fg6AWzUQaDwdALZqMMBoOhF8xGGQyGeVBUVHTixIm5c+cOHjx4+fLllZWVQiuqR8SymBgMhqlRV1eXm5t75cqVixcvpqen0x8KCgp0n+Pv75+SkiKUQl2YjTIYDIEpKSnJysrKyMjIysrKysrKzMzMyclRq9UNnubo6Ojt7S2VSmtqapKTkwGsX79+3rx5Qkj+G8xGGQyGUcnPz+cWmPSHK1euPPw0V1fXPn36eHl5+fn50R88PT1FIhF99KOPPlqxYoWtrW1cXNygQYOM+w4awmyUwWAYitLS0uzsbGqU1DQzMjIe3tO0sbHp0aOHrmn6+vq2b9+++Yu/88473377raura2Jiopubm8HexKNhNspgMPjh4WXm1atXH3aYTp06cQtM+oOHh4dY3OrjbrVaPXbs2OPHjw8dOvTYsWM2NjY8vY9Ww2yUwWC0HbVaPXfu3J07d9bW1mo0mgaP2tnZeXt7+/j4+Pr6+vr6+vj4eHt7P3KZ2XLu3r379NNPX716dd68eevXr+frsq2F2SiDwWg777///pdffkl/5muZ2QwpKSnvvfdeVFSUs7MzHTl37tzw4cMrKytVKpVcLudxrpbDbJTBYLSdbt265efnBwYGRkVFubq6Gnq64cOHnzx5cty4cdHR0RKJhA5GRESEhYVZWVkdPXr0X//6l6E1PAwLv2cwGG0kLy/v1q1bVlZWW7ZsMYKHAtixY0eXLl0OHz783//+lxucPn36ggUL1Gp1aGhoXl6eEWQ0gNloiyCECPLPw2CYMps2bdJoNC+++GLPnj2NM6O7u/uePXusra3XrFmzc+dObvyLL74YN25cYWFhSEhITU2NccRwMBt9NLGxsR06dHB3d7ezs1uwYMHdu3eFVsRgCI9Wq928eTOA119/3ZjzBgYGfvHFF4SQ2bNnJyUl0UGJRPLzzz97eXmdOnVKgB1SwmiW6Ohoa2tr3U/Mzs4uLCwsLi5Oq9UKrY7BEIzDhw8D8PT01Gg0dKSkpGTatGmHDx82wuxvvPEGgCeeeOL27dvcYEpKCg0D+Pbbb42ggYPZaJNotVqlUkm3sXv06JGZmbls2TKZTMblUfTq1Wvp0qXXr18XWimDIQChoaEAli9fzo188803AMaMGWOE2aurqwcPHgxg9OjRarWaG9+9e7dIJLKysjp+/LgRZFCYjTZOeXn5iy++CEAkEikUCu77lhBy48YNpVLp4eFBzVQsFstkssjIyNraWgEFMxjGpKioyMbGRiwW6y4jBgwYAGDnzp3G0ZCfn0+Tl95//33d8YULFwJwcXHJzc01jhJmo42QnZ3dt29fAB06dNi7d2+jz9FoNDExMSEhIdwtf9euXefPn3/hwgUjq2UwjA+NFX3uuee4kdTUVACPPfZYVVWV0WQkJCTQ5KUff/yRG9RoNM8++yyAAQMG3L9/3wgyeLbR4rrinOqcck05NzLq0qivbn/F7ywG5fjx4507dwbg7e2dnp7+yOcXFxerVKp+/fpxm6cBAQEqlaqiosIIahkMQfD39wfwyy+/cCNvv/02gHfffdfISmjykq2t7ZkzZ7jB4uJiGjwwffp0I2jgzUb3luztl94PSUASJMmS8ZfHZ1dnE0JczrssvrmYr1kMjUqlkkqlACZMmFBSUqL70LVr15p/bWJiolwut7e3p2bq4OAgl8vj4+MNqZfBEIBTp04BcHZ2rq6upiNVVVWdOnUCkJKSYnw99Gj+8ccfv3XrFjeYnp7u4OAAIDw83NAC+LHRbXe3iZJEskuy2LLYnOqc6HvRARkBk7InEfOx0aqqqpkzZza6GUoIUalU1tbWu3bteuR1ysrKtm7dKpPJuMWpn5+fUqm8c+eOwbQzGEZlzpw5AP7zn/9wIxEREQCefvppQfTU1taOGDECQGBgYE1NDTe+Z88ekUgklUp///13gwrgwUYrNZVOqU6DMwfXah+csZTVlVVqKomZ2OiNGzcGDhwIwN7eXvc+hRBSXV3NhcV9/PHHLb9mRkaGQqHo0qULfa2NjU1ISMj+/fvr6ur4ls9gGI+Kigq6yrt48SI3OGrUKADff/+9UKoKCwu7d+8O4N///rfu+JIlS+iObU5OjuFm58FGD5YeRBK2393e6KOmb6NxcXEuLi4AevbsmZaWpvvQzZs3hwwZQndetm7d2oaL19TU7N+/PyQkhO4VAOjevbtCoaAFxBgMs+PHH3+k6z5u5MqVKyKRqF27dg32wYzMn3/+SY+bNm7cyA1qNJoJEyYA8Pf3N9xxBQ82uvbWWiQh+X5yo49yNlqnNcVVmEqlsrKyAvDss88WFxfrPnTy5EmaJuzu7n727Fk9J8rLy1MqlV5eXg3CpHTvQRgM0ycwMLDByThd8c2cOVM4UfVs3bqVLnpOnz7NDZaWlvbu3RvA1KlTDZQyw4ONLi9YjiTcqL3R6KPURu+o73RJ7SK/Lk+6n6T/jLxQXV09a9as5jdDAYwYMUJ331pPaJhUWFhYu3btqJ926tRJLpenpqbyNQWDYTiysrJEIpG9vX1ZWRkdqaurc3d3B/DHH38Iq41CAwZcXV1v3rzJDWZmZjo6OgL4/PPPDTEpDzYafiscSTh7v/H1GrXRzUWb6SE+kjAwY+D6O+vv1d3Tf+o2k5eXR/u32NvbR0VF6T6kVqsVCgX1OLlcbqCg+pKSEpVK1b9//wZhUuXl5Y9+MYMhEDSyfc6cOdzIgQMHaHSgieRG19bWjhw5EsDQoUO5QAJCyL59+8RisVgs/u2333iflAcbPVp2FEnYUrSl0Ue5m/qLVRcVeQrnVGdqprbnbEOuhMSUxWiJsT/9+Pj4rl270hTP8+fP6z50+/btZ555hp4I6d62GI5Tp07NmTOH7tkDaN++va+vb9v2YRkMg6JWq+kfzqlTp7jByZMnA1i9erWAwhpQVFTk6ekJYN68ebrjS5cupfd/ly9f5ndGHmy0Vlvret7VP92/StNI9kKDI6ZqbXVkcaTskkyUJKJ+2iut19L8pddrjJSZzt2tP/PMM7pFDQghSUlJjz/+OIBu3brp7q0YgaqqqsjISC5MSiQSbd/e+JEdgyEUe/bsAdCnTx9upLCw0MrKSiqV5ufnCyjsYZKTk+3s7ACoVCpuUKvVTp06FUDv3r1LS0t5nI6fuNG9JXslyZLBmYP3luxNq0qLLYt9/8b7Pxb9SJo+qb9Re0NZqPS44EHNVJwkll2SRRZH6kZN8Ut1dTWtCkPv1nXLGRBCIiIi6H7l8OHDCwoKDKThkRw8eNDJyYne4wulgcFoFHrkvXbtWm5k9erVACZPniygqqb46aefAFhZWelu2paVlfXp0wfAlClTeNyF4C2L6WjZ0eFZwyXJEiTBJtlmUOagfff2EUI8Lnh8nN9kuKWGaGLKYkKuhFgnW1M/7Xq+6/wb8y9U8pyZ3kzoUoPNUMGPzj/66CMatC+sDAZDl7y8PIlEYm1trXsPR0/ADxw4IKCwZliwYAGArl273rjx4AA8KyurY8eOAFatWsXXRDzn1Ndoa+6q77Zhu7OwtvDzws99L/pSMxUliUZmjdx5ZGdlZaX+qpoJXbpz505QUBDdDNUNNxOQixcv0iBWoYUwGA9YsWIFgJCQEG7kjz/+oCbV4MbOdKirqxs3bhyAIUOG6B43HTp0SCKRiMXi//3vf7xMZHIVnhLvJ8qvy+3P2buluEmsJPpnpjcTunTu3Dla787Nze3PP//UW3vbqa2t5X4Xa2pqpFKpRCIxZqUcBqMZtFotrfRx6NAhbpAmTy9ZskRAYY/k7t27NFh7xowZuuPLly+ngTq676jNmJyNUkrrSiMSI2hZVkr//v2//vrrBhHyzdN86NLPP/9MN6GHDRsm7Ab5jBkzrKysDh48yI306tULQIOUKgZDKI4ePUoT8LhU5vLycnt7e5FIxPupN+80WhJfq9XSE11ra+u8vDw9pzBRG+VoNDM9JibmkdvDzYQu1dXVmdRm6Jtvvom/16GZOHEigN27dwuoisHgeOWVVwAsXbqUG/n+++8BjBo1SjhRraDRkviFhYU0Rfu7777T8/qmbqOU1mamNxO6VFRURL+FpFKpUqk0hvpHER4eDuDNN9/kRv7zn/8AWLFihYCqGAxKSUmJnZ2dWCzWrRX59NNPA4iIiBBQWKtYuHBhgzjxqqoqsVgMYP369Xpe3DxslINmptPYWjTRwKOZ0KXU1FT62s6dOx87dszY6pvg4MGDAIKCgriRH374AUBYWJiAqhgMytdffw1g7Nix3MiFCxcAODo6Gqe2PC/U1dU1qJyyd+9eAB07dtQ/8snMbJTSVGZ6cnJyM3frO3fupFskTz31lEn1obt69So95uJG4uPjAQwaNEhAVQwGhXZY0i22++677wJ4++23BVSlP5MmTQJPWfZmaaMcd+7cWbt2LY2n5bCxsdm0aZPu0+hmKO3o+eqrr/ISRMUjGo2GHnZxmRV37twB4ODgIKwwRmtZvHhxQkKC0Cr4JDExEYCTkxMXMFRTU+Ps7AwgObnxom5mQUFBAc2/4iXXxrxtlINr4CGVSmNiYnQfKi0tpSc2prMZ+jC0s41uMxmay2RqOXaMZqCbMxKJ5L333jOju93moeefCxYs4EZ27twJoF+/fgKq0p/PPvsMwAsvvMDL1SzERilPPfUUAN0I0NzcXBrv1qVLF2P2rW4ttOX3tm3buBFa1dHQzQ8YPFJbW6tUKmmQsqenZ2xsrNCK9KWysvLhDktjxowB8M033wgoTH98fX0BREdH83I1MSwI+tFkZmZyI66uru7u7v379z99+jQtn2WaUOVZWVnciI+PT4MRholjZWWlUCjOnj07cODAq1evjhkzZu7cuWVlZULraju7d+8uKSkZPHgwvVsCcO3ataNHj9ra2tIQKDMlLi4uMzOzW7duNMdJfyzKRh+2HqlUGhUVlZCQQLOVTJaHlTMbNVP69ev3559/0mXphg0bfH19f/31V6FFtZFNmzYB4HqRAdi8eTOtk0RXqWYKfV+zZs2SSCT8XJGXNa2JQHdtpkyZIrSQVpOUlATgySef5Ebo39748eMFVMXQh7S0NC4NLyQkxOxaw+bk5IhEovbt23MnnxqNhoZjm/Ve07179+zs7EQiUXZ2Nl/XtKjV6MM39eaCj48PzavTaDTcCMzzvTAoffr0SUhIUKlUtMPCk08+uXv3bqFFtQJaqSc0NJSrKX7kyJHc3FxPT0+aH2im/Pzzz5WVlUFBQT169ODtonz5sSlQWVkpFoutrKwM1PnDoND2sNw3pFqttra2FovFFnPm+48lJyeHVhEDEBwcrNsjyGS5du1a586dAZw4cYIbzM/PX758+YYNGwQUpj8BAQHUTHm8pkXZKCGE7oFmZWUJLaTV0BRV3aNDurhm3e4sAK1Wq1KpOnToAKBjx466JdlNiurqai7r2tXV1d7e/tKlS0KL4pPz58/TfwJ+g8ct6qYe5nwy8/COhPnuUTAaIBKJ5HJ5ZmbmpEmT7t279913F4ODceOG0LJ0SE5Onj9/vpub26RJk6KioiQSiUQiqaioeOGFFyoqKoRWxxtcmjWXAMkLlmaj5ms9D38BmO97YTSKm5vbr7/+unnzZq12dXQ0+vbFxo0gREhJJSUlGzZsCAgICAgIoIUo/fz8lErljRs3MjIy+vTpk5aWRms7CKmSJ2pqanbs2AFg9uzZPF+ax5WtKfDdd98BeP3114UW0mqOHDkCYOTIkdzI5s2bAbzyyivCiWIYhMJCMnUqAQhA/vUvYvz7ZlqVIiQkhCYL4K+qFElJSbpPu3TpEu23YRnFxn7++WcYpsuZpdno77//DiAwMFBoIa0mNzcXgIuLCzeSkJBgoH91hikQGUk6dyYAsbMjSiX5qyCyYcnMzFy6dCmNW4JOjbSmqu4ePnyY9tsw2YZLLWf06NHgo7row1iajd68eROAk5OT0EJajVartbe3B8BV+C8pKQHQvn17HlsYMkyK4mIil9cvS4cNIxkZhpro3j2iUpExY0K529DevXuvWbOmJUUbVq5cCaBDhw4XL140lD7Dc+XKFbFY3K5du1Z10GghlmajhBAa5lZUVCS0kFbzcE0AWvZft68hw/KIjibu7gQgtrZk6VLCY7SeRkPi44lcTtq3JwAZMWKng4NDWFhYS/pHcGi1WlrzwcfH5969e7yJMy4ffvghDFbD1wJtlNbl1o13MxemTZsGYMuWLdzIiBEjADSoWcWwPO7dI3I5EYkIQPz9yd+3KNvC1atk6VLi4VG/1BWLyejRZNeu0rYF+pSXl/ft2xfApEmTNBqNvuKMDpd/ZaD6RJZ2Ug9zPuBmmfX/WBwdoVLh+HH06oXUVAwejEWLUFMDADk5SEqq/5kjIwNXrjRynepqREVh4kT07Illy3DtGrp3h0KBy5cRG4vQUIe2BfrY29vv37/f2dl5//79n376aRuuICyHDh3Kzc318vKi6xLesUAbNV/refgLwHzfC6MNjBiB5GS88w60WqxejaefRmoqPvgAAwdi5cq/PTMsDEuW/G0kKQnvvovu3REaiv/9D1ZWCAnB/v24dg1KJby89NXm4eGxY8cOqVT66aefmldWK/6qRTJnzhxau513mI2aEA/n0ZvvyvphwsPDHRwcrKysRo4ceaXRpRQDsLfH118jPh6+vsjIAC2xYGODNWvQ6G9BYSHWrUP//hg4EF99hbt3ERCA8HDcvInISEycCL5qGAGQyWSrVq0ihMyaNevixYu8XdfAFBUVRUdHS6XSGTNmGGoOQ+wUCAvtt+Xt7S20kFZDawJYW1ur1Wo6kp2dDcDd3V1YYXqi0Wg+/vhj3YWAWCweM2bMzp07udYUjAZUVpJDhwgh5IUXyIgRJCCAjBpFuGOhgADy0kskPp5IpfW7n66u5IMPDHjWz0F38Hv16tWgQ5zJ8vnnnwOYNGmS4aawQButrq6WSCRSqVTwBvRt4IknngDAJTLX1dXZ2tqKRKLy8nJhhbWZsrKyKVOmAJBIJMuXL//++++nTp1KewsC6NixI+1FKLRM0+WFF8gzz5CTJ4lIRLj2CNRGa2pIt24kOJhERvJ5vt88lZWVtLrH2LFj64wT7Koffn5+AH799VfDTWGBNkoIoSWw0tPThRbSamg57v3793MjtGGfmRrNpUuX6C/xY489duTIEW783r17KpWKNkqhBAQEhIeH3717V0C1pgm1UULItGmkc2dCPyFqo4QQQVbzXP2n//73vwJM3xpOnDgBwMXFxaBV3yxwbxRse9Q0+O233wYNGpSent6vX7+zZ8/SHj4UR0dHuVx+4sSJtLQ0hULh7OyclJS0YMGCbt26hYaG0i5GAio3Tb74AjU1WLTob4M2NgIoeeKJJ/bs2WNlZbVq1arIyEgBFLQYrtC9lZWV4WaxTBs1X+tpqkCJeX0lEEJWr149ceLEe/fuhYaGJiQkeDVxVNynTx+lUpmXlxcZGSmTyWpqaqKiosaMGePr6/vJJ5/cMKkiSELj5oZly7BpExIThZYCDB8+fM2aNYSQ119/nZ5GmCAVFRU0qGDmzJmGnclwC10BUalUAF577TWhhbSao0ePAhg+fDg3sm3bNgAv0Vs4c6C8vHzq1KkARCKRQqFoVSZrbm6uUqmkG8QAJBIJzfg2xzrcfMHd1BNC1Gri709GjnxwUy8stFSSh4eHabZIoWXxRowYYeiJLNNGjx8/DmDo0KFCC2k1tCaAs7MzN3L69GkA/fv3F1BVy7l8+fKTTz4JwMHBoc2b+lz9Ie5GzNXVVaFQXL58mV+1ZoGujRJSf9ZkY2MSNlpVVUWTBmUymQkeNw0ZMgTA1q1bDT2RZdpoYWEhgI4dOwotpC30798/ODiYiwQqLS0F0K5dO9NPwjt48CBtGOnj45PBR+hNQUFBeHg4TUOkBAQEqFSqiooK/S9uLjSwUULI7NkEMAkbJYTcvHnT1dUVwAcffCCIgOrq6vPnz0dFRZWVlemOZ2RkAHB0dDRCGx7LtFFCCP17vnXrltBCeID+ml67dk1oIU2i1WqVSiVtVxscHMx7AYvExES5XE4rYOGvE6r4+Hh+ZzFNPvmE/N///W3kzh0yfjwxnRKgJ0+epHVL+W1w1CjFxcWJiYlbt25VKBQhISF+fn5ck+QGvw/vvfcegHnz5hlaErFgG6Xr+bi4OKGF8AC9TZbL5aa5A1VRQcLC1F5eE8Ri8aeffmq4sn6lpaVbt26lTasotFS7aX4s/yi++eYbes+UmJjI1zVramouXrz4yy+/rFq1aubMmYMHD6Y1pBtgZWXl7e39/PPPnz59Wve1NCTr7NmzfOlpBou1UXo2Z+5dDNVqtUKhAODk5ATA2to6ODg4MjKSS3MSnJwc0q8fAUjfvlVGq+ybnp6uUCjo3wkAGxubkJCQVhV/M0eyskinTsTXV2gdTfDGG28AeOKJJ27fvt2GlzdYZgYEBNg0FszVsWPHgICAkJCQpUuXRkZGJiYmNlqzioZh9e3bV++31SIs1kZXrVoF4P8a3A6ZFXfu3KGNeW1sbN5+++0JEyZw9y+PP/740qVLBb/NP36cdOlCANKrFzF+Sd/q6urIyMjg4GDuY3F3d1coFIJ/LAYiN5cApFs3oXU0QW1t7fDhwwEEBQU1/zWvVqtzcnJiYmLCw8PlcrlMJqPbVg/j6uoqk8nkcnl4eHhMTExOTk4LxQwbNgzAV199xcc7ezQWa6N79uwBMGHCBKGFtJHk5GQa9+Pm5nbq1Ck6mJ+fr1Qqe/bsSX/JxGJxYGCgSqUSpJe9SkWsrAhAnnuOCJtdnZeXp1QqPT09uY/lySefXLhwoSAfi+EoKSEAcXQUWkfTFBQUdOvWDcB777338KNRUVGTJ0/28fFpNBLe0dFx0KBBM2bMWLly5e7du9PS0tqczH3o0CEAIpGosLBQvzfUUizWRtPT0wH06NFDaCFtYfv27XZ2dgACAwMbbfNAj1yEykyvqiKvvUYAIhIRhYKYSAQBDZPS7Z1rLlFiLUStJgCRSIgpb10kJCTQm/FNmzY1eEipVDazzGzzhszNmzdjYmJUKtX8+fNlMhk9W6bbC/q+mRZjsTZaU1MjlUolEklVVZXQWlpBXV0d3QwFIJfLm/9CppnptE4ExQiZ6TdukKefJgCxtye7dxtunrZz48aNSZMmicViANnZ2ULL4RNbWwIQE19kb9myBYCtre2ZM2d0xzMyMiIjI1NSUtr8J1lWVpaYmLh9+/YPP/wwJCTE39/f1ta20d0AT0/PtLQ0Pt5Ni7BYGyWE9OrVC0CDTzMiIuLAgQMmGCpMCCkqKqLH0NbW1q06HEtKSnrrrbe4c8wuXVxmz1YbIhzojz+IiwsBSM+e5MIF/q/PI3R37PfffxdaCJ/QTqKmH8U3d+5culWtT8RhcXFxfHy8SqVSKBTBwcFeXl70q7EBnTp1CgwMlMvlSqVy//79WVlZxv/rtmQbnThxIoDdOksmtVrt5uYGk8yKSUlJobt7nTt3blvHGHrkIpPJRoyQ0xqU3t5k6VKSm8uPQm4zdPx4YoDuijxD8xTXr18vtBA+8fQkADH9FXZtbS1t1zFs2LCWbHHW1NTk5OTs379fqVTK5fLAwMAOHTo87JjW1tZeXl7BwcEKhUKlUsXHx5eWlhrh7TwSS7bRhQsXAlihE6ZcWVm5evVqWv6DbkKPGjUqIiKibX2+eGTHjh10M/Spp566fv26nle7fLlu8WLi5lZf0NfKikyZQg4caHsn9Orq+swZuhlqkkv5hqxevRrAggULhBbCJzS2LCVFaB0toLCwsHv37gDeeeedBg+1fJkZEBAQFhamVCojIyPT0tJM8yaSWLaN0qrXw4YNe/gh08mKoZuhtDL89OnTeTR0jYbExJCQkPolJC2QrlCQRpfg6ekkJoY0+GpPTiZJSSQvjwweXN/+lysbbPr8+uuvAMaPHy+0ED4ZNowAxFyyt5KSkuhx39y5c1esWPHqq68OHDiwqWWmn5/fCy+8sHjx4i1btpw+fdpcSutTLNlGv/vuOwA2NjZNbdAInhVz9+7dsWPHApBKpUql0kCzFBSQ8HDSt2+9mQIkIICoVEQ3MX3OHAIQufxvLxw7lowZQw4dIhIJ8fQkqakGEmgQaJlET09PoYXwybhxBCAHDwqto8V8++23YrG4QTvSBsvMxMREc+8lY8k2mpOTI5VK6c379OnTm+n2LkhWzPnz52kVTmdnZ+OchCQmErmc2NvXm6mjI5HLCU3emzOH2NoSsZicPPng+dRGCSE7d5KiIiMI5BO1Wm1tbS0Wiy0penTqVAKQyEihdbSYefPmAfDy8vrggw82bdp08uRJi2xwYMk2SghZv369g4MD9zXYp0+fL7/8sqnFpjGzYn799VcqbMCAAUbOurl3j6xfXx+0RP/bv5/MmUP69SPjx5O+fR909eFs1EyhFa9TzWsV3Sw0XPfHH4XW0TKOHTsmEolsbGyMGXskCBZuoxS62HRxceE2YprPTH84K4bH4sG0GBLdDH3llVcEXCulpxOFgvTsSe7fr7fR9HRiZUXWrKl/grnb6OTJkwHs2rVLaCG88c47BCDr1gmtowXcv3+fptutMJ1SVAbjH2GjlLq6OloMmN7pA+jWrZtCoWgqUffhrJiuXbvOnz///PnzbdZQWlr6/PPPG3oztFXQfQtqo4SQ994j7dsTuj42dxtdtGgRgGXLlgkthDcWLyYAWblSaB0tYMGCBSeh5tAAABDNSURBVAD8/f3/CZ0L/kE2ykEz02n3UG6xuXXr1qZOyYuLi1Uqlb+/P7c5QIsHt7bpcVZWVu/evQE4OTnRrm2mA2ejpaXEzY08/zwh5m+jmzdvpkt+oYXwxsqVBCCLFwut41GcOnWKNjnnsW6eKfNPtFEOGvZEAzbxV2b6uXPnmn8+F7Hh4OAQFhbWzMmVLgcOHHB0dKTfz1euXOHvTfADZ6OEkB076o+Dzd1GExIS6Hee0EJ4Y906ApCHAjFNi+rqatoV3PTbL/PFP9pGKTQz/amnntJdbDaTmV5eXr5x48ahQ4dyz+/bt294eHhxE5k9dDOUBhi//PLLpnlwrGujhBCZjPTpQ4KCzNtGS0pKALRv395i6pBu2bK9QweHmTNnCi2kOZYsWQLA19fXvMpZ6AOz0QfQnum0QDIAW1vb5sOeMjMzFQpFly5d6PMbjd4vKyubMmUKAIlEYiKboY3SwEYzM4m1NbGyMm8bJYTQf50bN24ILYQfaDXiqVOnCi2kSVJSUqysrMRi8YkTJ4TWYjyYjTaEy0ynh+kAvL29lUplU7ULa2pqdu/e/cYbbzzstpcuXfLz8wPw2GOPHTlyxPDa204DGyWELFlCALO3UZrZ3cKNF9Pn4MGDAMaNGye0kMZRq9W03tj7778vtBajwmy0SZrqmd7CBh7R0dG05FK/fv1aXrVbKNaubZjCdP8+ef55IlC3R95YsGDJgAGjf/yxLaVeTJD4+Hg0kd9sCnz66ac0c6y1p6/mDrPRR9CGnum6m6GhoaHm2A1YoyF8N/cUhv/3/8zgTKblpKSk0C9moYU0QkZGhq2trUgkspi1f8thNtpSWtgzvby8fOrUqTQDVaFQmOPhxuHDpF07MnGi0Dr44H//s4StCY7s7GyYZKEAjUYTGBgI4M033xRaiwAwG201jVaH+vPPPwkhsbGxNAHRwcFh3759QittIxcu1NcqtQCyswlA3N2F1sETt27dAtC5c2ehhTRk7dq6kSOVPXv2MpECoEZGRAgBo/WUlpbu2LHjxx9/PHv2LB2xtbWlFWr9/Pz27dtHa++bIzU1aN8eIhHu34e1tdBq9EOjgb09ampQVoa/vvjMmMrKyvbt29va2lZVVQmt5QHZ2fD3R2UlDh5Ujx/fSLs6i6eRaqmMluDo6Dhv3rwzZ87QhH2JREKLfXXt2jU2NtZ8PRSAjQ08PFBXh5wcoaXojUSCHj1ACC5fFloKH9jZ2Uml0urq6rq6OqG11EMI5s1DZSVmzsQ/00PBbFR/evfurVQqi4uLP/74423btt28ebOppttmBO0PkJUltA4+8PUFgMxMoXXwBG0HW1FRIbSQetavx9Gj6NoVa9cKLUU4mI3yg4ODw7Jly8LCwhpth2B2WJL10PdiGV8JAOimfHl5ue7g4cOHCwoKjC8mNxeLFgHAt9/isceMP7+pYAl/8wzesaTVKH0vlvGVAICWdNBdjVZXV0+ZMsXNza1Hjx5z586NiooqKyszjpi5c1FejtBQvPCCcSY0UZiNMhrBkqzHkr4S0NhqtKioaPTo0fb29leuXNmwYUNoaGiXLl1kMtnq1auTk5O1Wq2BlGzZgkOH4OSEr74y0AxmAzupZzTCrVvo2hUdO6KkRGgpelNWBkdHtGuHigpYwI5LUFDQsWPHjh49GhQUpDuu0WhSUlJiY2NjY2Pj4uLUajUdd3Z2HjVqlEwmGz9+/OOPP86XjMJC9OmD4mJERODVV/m6qrnCbJTROI89hpIS3LqFv0qvmDFubigowLVr+Cuz14yZNGnSgQMH9u3bR+t/N0pxcfHRo0djYmKOHDly/fp1btzPz2/y5NOBgfYjR6J9e71kTJ2KPXvw3HOIjtbrOpYBs1FG4wwdilOnEBeHESOElqI3o0bh+HEcPoyxY4WWoh8ajSYgICA1NfWpp56aPXv2mDFjvL29m3/JlStX6BI1JibG2vrxO3dSCYFUCn9/yGSQyfDMM/irHURL2bULL78MBwekpcHdve1vx3IQMvafYcLQ7mkqldA6+GDePLNpYdQMxcXF48aNA/CYzqF4165dQ0JCtm7dWvSo3q1qtTohIeejj8iQIUQiedDQ0NmZvPQS2biRXL/eIhlFRcTFhQDkhx94eFOWAbNRRuN89hkBiGUUPPvySwKQt94SWoceZGZm0jxjZ2fn3bt3R0ZGyuXybt26cX4qFosDAgIUCkVMTMwj276Xl5OYGKJQED+/B34KEC8vIpeTyMjmCtNMm0YAMmoUMcNyEYaC2SijcfbsIQCZMEFoHXxw8CABSFCQ0Drayv79+2kHmv79+1+9elX3oZycnPDwcJlMZmtry1mqnZ2dTCZTKpWJiYmPLI5z+TL57jsyZQpxdHzgp9bWpKSkkSdHRxOAtG9PTK8PjpCwvVFG42RkwM8PPXogO1toKXqTn49lyzBoEF5/XWgprYQQsmbNmiVLlmi12mnTpm3cuJFrHdaAysrK+Pj4I0eOxMTEXLhwgRvv3r379Onr/f2DR49G587NzaXRICUFsbGIjUVZGU6fbuQ55eVQKNC7N/79b73el4XBbJTROLW16NEjr2vXrPj4f9nammuFkooKZGXBxQXduz8YLC7G1avo29fUC6+Ul5fPmDFj3759Eolk5cqVCoWihS+8fft2XFxcbGzsb7/9lpeXN2zY1YQEDwBeXggOxsSJGD4cOovXRsjPR3o6vLzg5fVg8Pp1XL6M0aPxV18Ixl8IvBpmmDD0FPjChQtCC2k7J07UF8rTLce+fTsBiIn3Z9Jtx93mQsharTY1NXXdusqxY0m7dg/u2e3syLPPkrVrSVP/tlFRBCA9exLdrnRffEEAotG0TYslY/7hyAyD4ePjAyDL/BOACgvxySdCi2gN0dHRgwcPzsjI6Nev39mzZ2UyWduuIxKJ+vXrN39+u8OHUVaG+HgoFAgIQHU1Dh7E+++jb1+4uCA0FBs24ObNhi+/eROrVun7Xv4JMBtlNAk9Gs40/5xQuRzr1iE1VWgdLYAQsnr16kmTJt27d++ll15KSEjw9PTk5cpSKYYPh1KJxETk5yMiAjNnwtUVt28jKgpz58LdHQMG4IMPkJRU/5J338WaNZaTR2s4mI0ymsRiVqMzZ2LAAMjlMFh+OT9UVFS8+OKLixYtEolESqVyx44d7fVMNmoCFxe8+iq2bEF+PnJyoFIhJAQdOiAlBZ9/jmPH6p/25pvo1Qtz54IdoDQPs1FGk1jMalQsxtq1OHsWP/wgtJSmyc7OHjJkyJ49exwcHPbu3atQKERGOcrx8oJcjshI3L6No0ehUGDixPqHpFKsXYu4OEREGEGIGcNslNEk1EazsrKI+a9Ghg/HzJlYvBi3bwstpTEOHTo0aNCgixcv+vj4nD59eiLnZEbExgZBQVAq62tiUcaMQUgI/vMfSyhSYziYjTKaxMnJycnJqaysrLCwUGgtPPDFFxCL8eGHD0YyMhqPjjQmdDM0ODi4pKRk4sSJZ86cod9epsO6daipwaefCq3DhGE2ymgOi7mvB+DkhOXLsWnTg7OmVaswZAh698bq1bhzRwBJFRUVoaGhixYt0mq1CoVi3759Dg4OAuhoFldXfPQRvv0Wly4JLcVUYTbKaA56ymSONtroadLcuQgIwNdf1/+vhwe6dEFmJhYtgrs7XnoJR44Y7xgqJydn2LBhu3fv7tChw549e5RKpcl2oHn3Xfj6YvNmoXWYKib6z8YwEcz0sP7OHchk2LWr4bhYjPXrUVtb/7/LlyM/HzExCAmBRoPISIwbh8cfx6JFuHrVsArj4uKGDh164cIFb2/v06dPT5482bDz6YdUiu++g8l0IzU5mI0ymoM7ZRJaSCs4cwYDBuDYMSxbBjs7BARANw09IACLFiEgoD4TVCKBTIbISFy/DqUSPXrg5k2sXo2ePTFmDLZtgyEawm/YsEEmk925c2fChAlnzpyh2UomhYsLZDLY2DwYGT4cS5ZAJmOZoI0hcBYVw7ShBurh4SG0kJYSEVGf9Th8OCkoaPXLNRoSH0/kcmJnV5832bEjkcvJuXP8yKuqqpoxYwYAkUikUCg0LLPSImA2ymgOtVptbW0tFovv378vtJZHoFYThaLe++RyUlOj19Xu3iXr1hF//wd56MOGaTdt2lZWVtbma+bm5g4cOBCAvb39L7/8opc+hinBbJTxCOh9fWpqqtBCmuPOHRIURABiY8NzVfa0NKJQECcnMnDgbQC2trYhISExMTGPrOPZgLi4uC5dugDo2bNnWloanxIZQsNslPEIJkyYAOCTTz4RWkiTnDtHPDwIQNzcyJ9/GmSK+/fJrl2nRowYwWUW9e7d+4svvrh161ZLXq5SqaysrAA8++yzxcXFBpHIEA5mo4xHEBwcTI3D29s7MjKytrZWaEV/4+ef6/cxhw0j+fkGn+7SpUtLly7lOhVLJBKZTBYZGalWqxt9fnV19axZs9hmqGXDbJTxCMrKynx00gNdXFwWLlyYmZkptC5SV8fnZmgrp66LiYkJCQmha0wAbm5uCoUiOztb92l5eXmDBg2im6FRUVHG08cwLsxGGS0iOztbpVL5+/tzfhoQEKBSqcp16yEbkaIiIpMRgEilRKkURAIhhBQUFCiVyp49e9LPRCwWBwYGqlSq+/fvf/PNN87OzgB69Ohx/vx5wSQyDA+zUUbrSExMlMvlHTp0oMbh4OAQFhbW5vLsbSM1VePpSQDStSs5ccKYMzeOVqs9duxYWFhYu3bt6MfC/RAUFHT37l2hBTIMC+vFxGgL1dXVBw4c2LBhw9GjR+mvUO/evWfOnDl79uzOzTdO05tdu3Z9+OEPJSVHnnhCvHcv/tqlNAnKysp27ty5bdu2hIQEQoi/v//p06dtdKPYGZYIs1GGXmRlZW3evHnz5s23b98GYG1t/fzzz4eFhT333HMSiYTfuTQazZIlSz7//HNCyMKF25YvDzNZgzpy5Iirq2vfvn2FFsIwBsxGGTyg0WiOHTu2YcOGvXv31tXVAejWrdv06dPnzp3LVw+MsrKy6dOnHzhwQCqVrlixouVtMhkMQ8NslMEn+fn5P/300w8//JCTkwNALBYHBQWFhYWFhIRw24VtICsra/LkyZmZmc7Ozrt27QoKCuJPMoOhL8xGGfyj1WoTEhJ++umniIiIyspKAJ06dQoJCXnrrbd0z/pbyIEDB6ZPn15WVta/f/+9e/d6eHjwr5jB0ANmowwDcu/evcjIyO+///7cuXN0JCAgQC6XT5s2jTvrbwZCyJo1a5YsWaLVaqdNm7Zx40Y73WJNDIZpwGyUYQySkpK2bdsWERFRXFwMwNbWduLEiXK5fPTo0U01bisvL58xY8a+ffskEsnKlSvZZijDZGE2yjAeD4dJ+fj4zJo1a9asWbRsB8elS5cmT56ckZHh5OS0c+dOmUwmkGQG49EwG2UIwOXLl7dv37558+bc3FwAEolk1KhRcrl8ypQpUqk0Ojr61VdfLS0t9ff337t3L19n/QyGgWA2yhAMtVr922+/bdy48eDBgxqNBoCVlZWrq+uNGzcIIS+//PKmTZvYZijD9GE2yhCegoKCbdu2rVu3rqCgAIBIJPrss88++OCDprZNGQyTgtkow1TQarVfffXV+fPnR4wY8dprrwkth8FoKcxGGQwGQy9YZ1AGg8HQC2ajDAaDoRfMRhkMBkMvmI0yGAyGXjAbZTAYDL1gNspgMBh6wWyUwWAw9ILZKIPBYOjF/wc5HGaMHRX65gAAAdN6VFh0cmRraXRQS0wgcmRraXQgMjAyNC4wOS40AAB4nHu/b+09BiAQAGImBgiQAGJpIG5gZGNIANKMzOwOGkCamZkNQrNAxJmY2BkUQHwYFyEMVY4mDtfukAGWZ0RiQGQEwQYyYirAcAEWI3AZys3AyMDIlMDEDGQzsLAysLIxsLEzsHMwcHAysHMpcHFnMHHzJPDwZjDx8jHw8ifwC2QwMQlmMAkKJQgJZzAJiySIiGYwiYoliIlnMIkzJnCyMAhwJYgLJTixAM1nZQQqFGdjZWPn4GRh4+bh5RfgYhMWERUTFxLXYgR6hgEWrFfaDti/2dpmD+KETjNwaKzLsQOx/V1MHQ6aLtoPYqc+vmLv68B7AMRe2ddu58R7FSyurypoO73Cah+IPbFs3X6hiTvAep/yKh1Q9xUBq1maIHwg8RMzWK98c/p+3jVqYPbh3uoDjelie0HsJtnWA5+5doLdUHB34wGvun9g9n636weKjSFuC2dmOnhq2WkwW+7D+QOftO6B7TUwWHxA408l2K6PnzscLk68AmZX2C52eLn+PFgNy9pLDme6TcDsW5xPHBzz5oDNUfl+1GEvp6QDiD2jfZpDbQwXmH1Iq9WhvsgZrEYMAGdzeABCE6vJAAACVnpUWHRNT0wgcmRraXQgMjAyNC4wOS40AAB4nH1VW6obMQz9zyq8gTGSLMn2501yKaXcBNq0eyj0s/unRx6S8QXTmVh47GM9j5xTiuf79dvvv+n1yPV0Son+8+u9p1+FiE4fKSbp/P7l6y1dHm/n58rl/vP2+JFEk1ScwfsZ+/a4fzxXOF0SZyNlNkzIRONQpvEcJyXdkuRqnatjWwp14wWuQJ/kVkS6po2zeBW3BVCHYffCndImWaiS0QJosAyDrtUpNLqr+spFh0asFtPiaaPMTZhWPlYAoai49F6wX5y0yQLYYBquWRHqLVRa7YVWyB4qJWOzVo0ZObv3BZIpoJQ7adEBhXVZ5pKjOFvJsGkoyhbBIZ2rJLEEVDNbbyZIrKohYytkVGizbNakMZC9NRRrhYwSbZ6r1NI8yKENaldIS5c/aWuZxdrgEXhCSx6x70q1N6QoYtJSuizDr7unoLwXGTmrDqauoFEpzVJ7rW0nCei0TFRUyjKIIabDPJmXVfgShfJsDTyhwacKsq7Cl6hTRfAARu5FhNdAGSq5ucJj0KWz8NJ2FElzJXgnALJz60uNOoDY1xrhwkV1XwDfb9dPfb/fBOf77XrcBPHK0fC4PlI5+pox9OhexrCjRxnDj05kjHr0G2O0o6sYox+twzHmBol95qkNNATLxHYNwWVitYZgndjLQ9jEUh0rPpExPsG0iXMasbaJWRqC+8QgDSEzUzSE8MQIDSEylV5DSJlqrCFEp2LquLVfC5FX5OMVggyH65QcqfuZw7+o9FzX+H7+O2B++gcuuTJBB7elowAAAS96VFh0U01JTEVTIHJka2l0IDIwMjQuMDkuNAAAeJwlUDmSwzAM+8qWyYzC4U1xXLrPJ9zu5AV5/IJeN7JACARwfuR6nG89z/fjsuvC5fd5XfY8T31+Lr3mU/n5PoSCXWQJcajbOpQqWvZiUuMOGWSbaq+XkGZprUMo0wSIknJxrAPs9MrhZLrXIGzhFmAxyVZhSGFsqd1Qt2TfCkgpTGVYUW08kBF+wgZjy5LeAJ0kegecukdi5ysoYusG0nsrvL+S4M960vj24WwSBQkIUvE/x3tzjribtcqtxN15L+xKNHL7gpmqCckpiUgzZTf3wWBfscBJq2v/B2eQgpAUk5HnSJg5kmIj9l1EoTg8K9jKcaWqaB8U2em6sLRFdXSL8R6AJJrGHSd6ZYKCZ67n9w9ramN6GPzYewAAAABJRU5ErkJggg==", + "text/plain": [ + "" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mol = Chem.MolFromSmiles(test_mol)\n", + "mol" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of atoms: 24, Number of bonds: 27\n" + ] + } + ], + "source": [ + "n_atoms = mol.GetNumAtoms()\n", + "n_bonds = mol.GetNumBonds()\n", + "print(f\"Number of atoms: {n_atoms}, Number of bonds: {n_bonds}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# initialize the featurizer\n", + "atom_featurizer = CustomMultiHotAtomFeaturizer.v2() # chemprop v2 default atom featurizer settings\n", + "bond_featurizer = CustomMultiHotBondFeaturizer()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "# A helper function to get predictions from a molecule with ability to keep or remove specific atom/node or bond/edge\n", + "def get_predictions(keep_atoms: Optional[List[bool]], keep_bonds: Optional[List[bool]], mol: str) -> float:\n", + " featurizer = CustomSimpleMoleculeMolGraphFeaturizer(\n", + " atom_featurizer=atom_featurizer,\n", + " bond_featurizer=bond_featurizer,\n", + " keep_atoms=keep_atoms,\n", + " keep_bonds=keep_bonds\n", + " )\n", + " test_data = [data.MoleculeDatapoint.from_smi(mol)]\n", + " test_dset = data.MoleculeDataset(test_data, featurizer=featurizer)\n", + " test_loader = data.build_dataloader(test_dset, shuffle=False, batch_size=1)\n", + "\n", + " with torch.inference_mode():\n", + " trainer = pl.Trainer(\n", + " logger=False,\n", + " enable_progress_bar=False,\n", + " accelerator=\"cpu\",\n", + " devices=1\n", + " )\n", + " test_preds = trainer.predict(mpnn, test_loader)\n", + " return test_preds[0][0]" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop_delete/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prediction with all atoms and bonds kept: tensor([2.2535])\n", + "Prediction with all atoms and bonds removed: tensor([2.1880])\n" + ] + } + ], + "source": [ + "# example prediction with different keep_atoms and keep_bonds\n", + "\n", + "# keep all atoms and bonds\n", + "keep_atoms_0 = [True] * n_atoms\n", + "keep_bonds_0 = [True] * n_bonds\n", + "\n", + "# remove all atoms and bonds\n", + "keep_atoms_1 = [False] * n_atoms\n", + "keep_bonds_1 = [False] * n_bonds\n", + "\n", + "pred_0 = get_predictions(keep_atoms_0, keep_bonds_0, test_mol)\n", + "pred_1 = get_predictions(keep_atoms_1, keep_bonds_1, test_mol)\n", + "\n", + "print(f\"Prediction with all atoms and bonds kept: {pred_0}\") # expected 2.2535\n", + "print(f\"Prediction with all atoms and bonds removed: {pred_1}\") # expected 2.1880" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "# An example wrapper class for use as the model input in SHAP explainer\n", + "class MoleculeModelWrapper:\n", + " def __init__(self, mol: str, n_atoms: int, n_bonds: int):\n", + " self.mol = mol\n", + " self.n_atoms = n_atoms\n", + " self.n_bonds = n_bonds\n", + " def __call__(self, X):\n", + " preds = []\n", + " for keep_features in X:\n", + " try:\n", + " # unpacking X, indices corresponds to atom.GetIdx() and bond.GetIdx() from rdkit mol, adapt as needed\n", + " keep_atoms = keep_features[:self.n_atoms]\n", + " keep_bonds = keep_features[self.n_atoms:self.n_atoms + self.n_bonds]\n", + " except Exception as e:\n", + " print(f\"Invalid input: {keep_features}\")\n", + " raise e\n", + " pred = get_predictions(keep_atoms, keep_bonds, self.mol)\n", + " preds.append([pred.item()])\n", + " return np.array(preds)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "# An example masker function for use with SHAP explainer\n", + "# The masker function takes in a binary mask and the input data X, and returns the masked input data. This simulates the effect of masking out certain features.\n", + "def binary_masker(binary_mask, x):\n", + " masked_x = deepcopy(x)\n", + " masked_x[binary_mask == 0] = 0\n", + " return np.array([masked_x])" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the model wrapper with the test molecule, number of atoms and bonds\n", + "model_wrapper = MoleculeModelWrapper(test_mol, n_atoms, n_bonds)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[2.25354147]])" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Test the model wrapper with a random node/edge choice\n", + "keep_features = [1] * (n_atoms + n_bonds)\n", + "feature_choice = np.array([keep_features])\n", + "model_wrapper(feature_choice)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the SHAP explainer with the model wrapper and masker\n", + "explainer = shap.PermutationExplainer(model_wrapper, masker=binary_masker)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "PermutationExplainer explainer: 2it [00:25, 25.79s/it] \n" + ] + } + ], + "source": [ + "# Compute SHAP values, using 200 evaluations of different node/edge choices (notice that nodes and edges are masked out randomly by the binary masker, so the results may vary between runs)\n", + "explanation = explainer(feature_choice, max_evals=200)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + ".values =\n", + "array([[-6.33835793e-04, 2.00152397e-04, 8.77737999e-04,\n", + " -1.20162964e-04, -1.70385838e-03, 1.32679939e-04,\n", + " 5.65052032e-04, -1.49548054e-03, 1.45280361e-03,\n", + " 7.84397125e-05, 7.94768333e-04, 1.32918358e-03,\n", + " 1.59931183e-03, 7.92026520e-04, -1.05524063e-03,\n", + " 1.27375126e-03, 1.26934052e-03, -5.46216965e-04,\n", + " 2.07734108e-03, 9.27805901e-04, 1.94215775e-03,\n", + " 1.70767307e-03, 9.18865204e-04, 2.30920315e-03,\n", + " 1.02865696e-03, 2.66933441e-03, 3.65734100e-04,\n", + " 1.01172924e-03, 1.39999390e-03, 1.10065937e-03,\n", + " 1.54471397e-03, 1.68943405e-03, 1.58667564e-03,\n", + " 8.28027725e-04, 2.80642509e-03, 2.18117237e-03,\n", + " 2.17568874e-03, 9.46164131e-04, 1.96087360e-03,\n", + " 2.82001495e-03, 2.58827209e-03, 2.84719467e-03,\n", + " 2.18105316e-03, 2.61569023e-03, 1.97517872e-03,\n", + " 2.07221508e-03, 2.10452080e-03, 1.83522701e-03,\n", + " 1.78325176e-03, 2.54166126e-03, 2.21943855e-03]])\n", + "\n", + ".base_values =\n", + "array([[2.18796897]])\n", + "\n", + ".data =\n", + "array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1]])" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Print the SHAP values\n", + "explanation" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA2sAAAL5CAYAAAAubLxsAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAzLhJREFUeJzs3XtU1VX+//HnB+J+9SQ38UapmRhDmok0zKjfQsFxugnZTyvNIh3RRNFGTS11IqcxHcMp1L4w4jiaWGbG2MUrgY5ljpdwmjTL4auWJBcTOaKc3x/kqeMBLwhypNdjrbP07L0/e7/PWbNcveazP/sYFovFgoiIiIiIiDgUp6YuQEREREREROwprImIiIiIiDgghTUREREREREHpLAmIiIiIiLigBTWREREREREHJDCmoiIiIiIiANSWBMREREREXFACmsiIiIiIiIOSGFNpJmxWCyUl5ej37sXERERub4prIk0MydPnsTPz4+TJ082dSkiIiIichUU1kRERERERByQwpqIiIiIiIgDUlgTERERERFxQAprIiIiIiIiDkhhTURERERExAEprImIiIiIiDgghTUREREREREHpLAmIiIiIiLigBTWREREREREHJDCmoiIiIiIiANSWBMREREREXFACmsiIiIiIiIOSGFNRERERETEASmsiYiIiIiIOCCFNREREREREQeksCYiIiIiIuKAFNZEREREREQckMKaiIiIiIiIA1JYExERERERcUAKayIiIiIiIg5IYU1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oAU1kRERERERByQwpqIiIiIiIgDUlgTERERERFxQAprIiIiIiIiDkhhTURERERExAEprImIiIiIiDigG5q6ABFpJEdPwPdnm7qKH3m5gZ9XU1chIiIict1QWBNprpIXw3/Lm7qKGmGB8PpohTURERGRK6CwJtJcfVUMB443dRUiIiIiUk96Zk1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oAU1kRERERERByQwpqIXP9KT0HSqxAwDLwehj7T4dODTV2ViIiIyFVRWJMrsnnzZgzDqPO1ffv2Rl1//vz5ZGVlNeoajeHo0aO0aNECwzD405/+ZNf/wQcfMHLkSHr06IG7uzuGYbB58+ZrX+j1qLoaBsyG5XmQHAd/fBS+LYPe0+GLI01dnYiIiEi96XfW5IoUFxcD0KNHD7p27WrX7+vr26jrv/TSS4SGhjJs2LBGXaeh/e53v6OyshKAI0fsA8SiRYtYs2YNbdq0ITAwkP/+97/W7/pnr/c0aB8IWWNq78/ZBgWfw6pUGBRd05YYDZ2SYcZKWJ5y7WoVERERaUAKa1Ivt99+O6mpqXbtbdq0adR1z5w5w+nTpxtl7qqqKs6dO4e7u3uDzrt27VrWrl3Lgw8+yKpVq2odk5iYSN++fQkMDOTtt98mOzu7QWto1nK2QZA/PBD1Y1uAX01gW7YVzFXg5tJk5YmIiIjUl7ZBSr14eXnRsWNHu9dPg87KlSv55S9/iY+PD56envTs2ZOcnBy7uVauXMlvf/tb2rZti5ubGy1btuS+++5jz549NuMMw6C4uJh9+/bZbL386quvrP213XHLysqy21b43HPPYRgGn332GePHj6d169a4u7uTn58PgNls5oUXXiA8PBx3d3f8/f0ZOHAgu3btuqLv6eTJk4wcOZJf/epXxMbG1jkuPj6epKQkHnzwQVq2bHlFa/zs7ToE3W4Cpwv+ObuzI1SY4T/aCikiIiLXJ91Zk3qprKy026bn5uaGj48PAM8++yx/+MMf6NatG7/5zW8A2LdvHwkJCaSnpzN69GjrdX/6059wcXGhZ8+eeHp68t1337Fx40Y+/PBDdu3aRceOHQEYPnw4q1atwtvbm7i4OOv1Pw2Ix44ds6v16NGjADb1nt+KOHjwYCwWC71796aqqgoXFxeqqqro168fBQUFxMTEcPvtt2M2m/noo4+466672Lp1K3fcccdlfU+TJk3i9OnTPPPMM3z//fd1jvPy8rqs+aQWR0vgV13s20Na1Px55ATc1u7a1iQiIiLSABTWpF5effVVXn31VZu2hIQE3njjDT799FP+8Ic/0L9/f8aOHUvr1q0xDINDhw4xbdo0nnnmGR599FFrsJs4cSJ+fn4EBATg6elJeXk5GzZsYPr06cyZM4clS5YAMHnyZN555x1MJhOTJ0+2rnvjjTfW+3NUV1czc+ZMOnbsiGEYtGnThldeeYUtW7bw9NNPc//99xMQEMCZM2fYuXMnkyZN4umnn7begbuY7du3s2jRIkaNGkXfvn1Zu3Ztvev82ag6C2UV9m3mKigut203edfcTTt9Btxq+afM3bXmz9NnGqdWERERkUambZBSLzExMYwbN87mdc899wCwZMkSDMPg8ccfp0ePHoSEhBAcHEyvXr3o27cvp06dYuvWrda57r//fvr168ftt99Oq1atuOmmm0hISCAoKIi8vDzruPN32JycnGy2Xrq41P95pPvuu4/777+fiIgIbrvtNvz9/VmyZAnBwcEMHz6c8PBwAgMDad26NXFxcdx2221s3779ks/NVVVVMXz4cG699VZSUlJwdXWtd43NyYXbSAsKCmzeFy5+s+b4/Z++Cj6HFR/Ztx8upry8nHNuN4D5rP2clTUhrfDQAZs1tm/fzrlz535cs7CQkpIS6/uioiIOHz5sfV9eXs6+ffsuWveF77WG1tAaWkNraA2toTUuZ41LMSwWi+WKrpCftZycHBISEhg1ahRTpkyx6fP09MRkMhEdHc22bdsuOs/8+fN5+umngZr/gJ82bRqbN2/m1KlTNuMCAwP55ptvrO8DAgIIDg5m7969dnMahkG/fv1Yv369TXtaWhpTpkxh1apVDBo0CICkpCQWL17M+++/bw2Z57m5uXHmzMXvxnz55ZeEhYXV2T9r1ixmzZrFq6++yogRI4Afv7uUlBRefvnlOq8dP3488+bNs6n3SpSXl+Pn50dZhyR8Dxy/4usbxS2hsPF5aGW6+LiS72HnBb+PNiELglvAxHtt2395a83ds46joWMI5D5r2//6h/DEX2DPPG2DFBERkeuStkFKvbi7u9O6deta+86cOYNhGIwZMwanCw99+EH37t0BOHz4MDExMbi7u9O/f38CAwNxdXXFMAzeeOONS4amy/HT/0fkQoGBgXZtFouF0NBQEhIS6rzu/BbO2hw9epQ//OEPREdH061bNw4cqLmzc/55urKyMg4cOEBISIieVbtQC2+4+xf2bSEt7NvPi2wPeftrfm/tp/97++cX4OkGnVo1WrkiIiIijUlhTRpcaGgoO3fu5N5776VTp061jjGZau6wrFixglOnTjF27FgSExPx9/fnhhtq/meZlZVl/ft5hmHUua6XlxcnT560a//vf/9b5zXOzs52bYGBgZw6dYqUlJQ6w6a/v3+dc3711VeYzWa2bNlCt27d7Pr/93//l//93//l73//O4MHD65zHrlMg3rVHN//5vYff2etuBxWFcDAO3Rsv4iIiFy3FNakwQ0cOJC1a9fy8ssv8/bbb9sFom+++QZPT08ASktLAYiMjCQyMtI6ZvHixZSWltodY+/u7l5rIAMIDg5m3759VFRUWOcvKSlh9erVV1R/3759yc7OZuXKlUycONGu/5tvvrELkT8VFBREUlKSXfuRI0dYt24dUVFRRERE0LZt2yuqS+owqBdEdYLh6VBYBC194C/r4Vw1PK8wLCIiItcvhTVpcL/97W95++23WbduHZGRkSQkJNCqVSuOHj3Kzp07yc3NtW5vjImJYe7cuYwdO5aDBw/SokUL8vPzeeeddwgICODCRyo7duzIpk2bmDZtGrfeeitOTk4MHDgQLy8vBg4cyPz58+nTpw+PPvoopaWlLFq0CH9/f7777rvLrn/s2LHWkx83bdpE37598fX15fDhw2zYsAFXV1e2bNlS5/WtW7fmmWeesWv/4IMPWLduHb/4xS+YNGkSQUFB1r49e/ZYT4vcsWMHAKtXr+bf//43AGPGjMHPz++yP8PPirNzzfNqE/8KC96tOf2xRwfIGlPzrJyIiIjIdUphTRpcYGAgzz33HO3btycvL4+5c+dy+vRpTCYTbdq0ITEx0Tr2rrvuYty4caxZs4bZs2fj7OxMly5dmDhxIn/72984ftz2gIyxY8dSUlLCggULOHnyJBaLhf/85z907NiR3/3udxw+fJi8vDxSUlIIDQ2lX79+uLu788orr1x2/d26dePFF1/kb3/7G59++imbNm3CMAxatmxJWFgY//M//3PR611dXbnpppvs2s//xICnp6dd/9atW5k2bZpN24oVK6x/T0xM/PmGtc2zLj2mhTcsGV3zEhEREWkmFNbkikRHR5ORkUHPnj0vOq579+4EBAQQFxfH8ePHqaqqwsPDA5PJRLt2P57M5+vry7hx44iOjubEiRMYhkFQUBA9e/YkIiLCbstjbGwsHh4eHD16FLPZDNRsf4Sau27Tp0/ns88+o6KiAl9fXyIiIjAMg65duxIdHW2dJykpiTvuuMOmlvOcnJwYMGAAYWFhfPHFF9YjWj09PQkMDKzzObyr+e7uueceMjIy6rw2JCSkXmuKiIiIyPVLR/eLNDPX9dH9IiIiImKlH8UWERERERFxQAprIiIiIiIiDkhhTURERERExAEprImIiIiIiDgghTUREREREREHpKP7RZqr9i3B2bWpq6gRFtjUFYiIiIhcdxTWRJqr9CfBx7epq/iRl1tTVyAiIiJyXVFYE2muQkzg60BhTURERESuiJ5ZExERERERcUAKayIiIiIiIg5IYU1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oAU1kRERERERByQwpqIiIiIiIgDUlgTERERERFxQAprIiIiIiIiDkhhTURERERExAHd0NQFiEgjOXoCvj/b1FX8yMsN/LyaugoRERGR64bCmkhzlbwY/lve1FXUCAuE10crrImIiIhcAYU1kebqq2I4cLypqxARERGRetIzayIiIiIiIg5IYU1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiLXt9JTkPQqBAwDr4ehz3T49GBTVyUiIiJy1RTWROT6VV0NA2bD8jxIjoM/PgrflkHv6fDFkaauTkREROSqKKzJFcnJycEwjDpfubm5jbr+pEmTSEtLa9Q1GkJubi4xMTEEBwfj7u6Ou7s7bdu25amnnuL4cfvj9Kurq5kxYwZhYWG4urri6elJ9+7dG/37dHi9p8GwV+ruz9kGBZ9DVjLMeAhGx8HmmeDsBDNWXrs6RURERBqBfmdN6qVPnz7Ex8fbtUdERDTqupmZmQQHBzN58uRGXedq7du3j8rKSgYOHEhoaCjV1dV8/PHHvP766/zjH//gs88+w8fHxzr+gQce4O233+a2225jwoQJVFRUsGLFCu69917+/ve/M2jQoCb8NA4sZxsE+cMDUT+2BfhBYjQs2wrmKnBzabLyRERERK6GwprUS2RkJKmpqU1dRoMym81UVVXh7e191XNNmjSJSZMm2bWnpKQwf/58MjMzGTt2LACbNm3i7bffpnv37uzYsQMnp5ob3lOmTOGWW25hzJgx3H///Tg7O191Xc3OrkPQ7SZwumCTwJ0dYdEH8J8jcFu7pqlNRERE5CppG6Q0mgULFtClSxfc3d1xc3OjU6dOLFy4sNZxUVFRtGzZEhcXF3x8fIiOjmbr1q024wzDoLi4mH379tlsvdy3b5+1v3///nbzp6WlYRgGOTk51rakpCQMw6CgoIDBgwdjMpnw8PBg/fr1AFRUVJCcnEybNm1wcXHB09OTnj17snHjxqv6TsLCwgA4ceKEte38VsehQ4dagxpAUFAQv/rVrzh27BjvvPPOVa3bbB0tgZAW9u3n246csO8TERERuU7ozprUS0VFBUVFRTZtHh4e3HjjjQA8/vjjZGZm0r17d8aOHYuzszPvvvsuycnJHD16lNmzZ1uvy8jIwM/Pj8TEREJCQjhw4ACrV68mNjaWgoICunXrBsBLL73EzJkz8fHxISUlxXp969at6/05hgwZgqurKyNGjMAwDNq1a4fZbCYqKor9+/cTGxvL8OHDKS0tZeXKlcTFxbFu3Truueeey5q/vLyc8vJyTp48yZYtW5g9ezY33HAD9913n3WM2WwGwMvLy+56Dw8PALZs2WJzTbNUdRbKKuzbzFVQXG7bbvKuuZt2+gy41fLPmLtrzZ+nzzROrSIiIiLXgMKa1EtGRgYZGRk2bb1792bTpk1s2LCBzMxMHnvsMbKysqz9aWlpREdHM3fuXFJSUqzBLj8/H39/f5u5Ro0aRUxMDLNmzeKtt94CIDU1lTlz5mAymRpsC6a3tzc7d+7E1dXV2jZx4kT27t1LdnY2Q4cOtbZPnz6dzp07M378ePbu3XtZ848cOZK///3v1vdt2rQhKyuLyMhIa9svfvELAD744AOefPJJa/v559wAu2DcLOX/u+bY/QsVfA4rPrJtO/QatA8ED1cwn7W/pvKHkObhat8nIiIicp3QNkipl4EDB7J8+XKb1/m7ZUuWLMEwDEaPHk1RUZHNa8CAAVRWVlq3GwLWoFZdXU1xcTFFRUW0bt2aVq1asWfPnkb9HGPGjLEJagBvvvkmrVq1onfv3ja1V1ZW0rNnTwoLCzl58uRlzZ+amsry5ctJT09n8ODBuLi48M0339iMeeSRR2jdujVvvvkm48aNY9euXWzdupX4+HgOHz4M1NzJbA527dpl876goODHN79oT+GfEzm3fhp8MAM+mEFlp2Cq+oRb3x9fPppv/jYKgv0BOBfkx8n/HLaf82hJzZtWJts1gO3bt3Pu3Dnr+8LCQkpKSqzvi4qKrN871NwdPb/Vtta6a3mvNbSG1tAaWkNraA2tcTlrXIphsVgsV3SF/Kzl5OSQkJBASkoKL7/8cq1jevTowSeffHLReebMmWM9gGPjxo1MnjyZ3bt3W7cEnhcQEMC3335r8z44OLjWO1uGYdCvXz+bIAg1d/SmTJnCqlWrrKcqJiUlsXjxYgoKCujVq5fNeDc3N86cufj2uf3799O5c+eLjqnNsmXLeOSRR3j55ZdttnLu37+fhx56yOZztWvXjkGDBjF37lweeughVqxYcVlrlJeX4+fnR1mHJHwP2P9MQJO4JRQ2Pg+tTFd2Xe9pNXfQssbU3p/wEuTthyNLbA8ZSXoV/rYVTizVaZAiIiJy3dI2SGlwFosFwzBYunRpnScY3nnnnUBNSImPj8fT05OnnnqK8PBwvL29MQyDZ555htOnT191PWfP1rJN7gc/PT7/p/W3bduWF198sc7r6vuc3NChQ0lOTmbJkiU2Ye3WW29lz5497N+/n3//+98EBQURHR3NtGnTrP1Si0G9ao7vf3M7DIquaSsuh1UFMPAOBTURERG5rimsSYMLCwtj586ddOjQgaioqIuOzcrKwmw2k52dTUJCgk3fqFGjcHGx/Y9twzDqnMvLy4vS0lK79oMHD15+8UBISAhlZWUkJiY2ynH5Z86coby8vNa+W2+91SaYvf/++xiGod9Zq8ugXhDVCYanQ2ERtPSBv6yHc9Xw/OCmrk5ERETkquiZNWlwTzzxBAATJkygqqrKrv/QoUPWv58PQxfuxp05cyZlZWV217q7u9cZdEJDQyksLLTpP3bsGGvWrLmi+h988EHKysqYOHFirf0/rb8uX375Za3tc+bM4fTp05f14+GLFy9mx44dxMbGEh4efsnxP0vOzpD7LDx0Fyx4FyYuhZa+NVsubwlt6upERERErorurEmD69evH0lJSSxatIibb76Z+Ph4QkNDOXLkCLt372bHjh3WrYmJiYnMnTuXkSNHkpeXh8lUcyDEtm3bCAoKsnmoEyAiIoLc3FxGjBhBeHg4Tk5ODBs2DH9/f5KSkkhNTaVHjx4kJCRQUlLCG2+8QVBQUK3Bry5paWls2bKFefPmkZeXR0xMDH5+fnz99dfk5+fj5uZ2yYNP7r77bvz8/OjevTvt2rWjtLSU7du3s23bNkwmE3PnzrUZP2DAAKqrq4mMjMTDw4P8/Hw+/PBDOnToQHZ29mXX3uxsnnXpMS28YcnompeIiIhIM6KwJo0iIyODqKgo0tPTWbZsGWazGV9fX8LCwpg6dap1XGRkJCtWrGDq1KksXrwYJycnunbtynvvvcfIkSM5duyYzbwLFy5kyJAhrFy5koqKCiwWC3fffTf+/v5MmDCBoqIisrOzmTNnDoGBgYwbNw4nJyemTJly2bW7ubmxbds2ZsyYwapVq6w/5G0ymQgPD2f48OGXnGPo0KGsXbuW1atXc/LkSZydnQkODuaRRx7hhRdeIDTU9q5Pjx49yM7OZsuWLZw9e5bg4GBGjRpFWlparc/ViYiIiEjzp9MgRZqZZnUapIiIiMjPmJ5ZExERERERcUAKayIiIiIiIg5IYU1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oD0O2sizVX7luDs2tRV1AgLbOoKRERERK47CmsizVX6k+Dj29RV/MjLrakrEBEREbmuKKyJNFchJvB1oLAmIiIiIldEz6yJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oAU1kRERERERByQwpqIiIiIiIgDUlgTERERERFxQAprIiIiIiIiDkhhTURERERExAEprImIiIiIiDgghTUREREREREHpLAmIiIiIiLigG5o6gJEpJEcPQHfn23qKn7k5QZ+Xk1dhYiIiMh1Q2FNpLlKXgz/LW/qKmqEBcLroxXWRERERK6AwppIc/VVMRw43tRViIiIiEg96Zk1ERERERERB6SwJiIiIiIi4oAU1kRERERERByQwpqIiIiIiIgDUlgTketb6SlIehUChoHXw9BnOnx6sKmrEhEREblqCmsicv2qroYBs2F5HiTHwR8fhW/LoPd0+OJIU1cnIiIiclUU1uSK5OTkYBhGna/c3NxGXX/SpEmkpaU16hoNITc3l5iYGIKDg3F3d8fd3Z22bdvy1FNPcfz4pY/Tf+SRRzAMA3d392tQrQPrPQ2GvVJ3f842KPgcspJhxkMwOg42zwRnJ5ix8trVKSIiItII9DtrUi99+vQhPj7erj0iIqJR183MzCQ4OJjJkyc36jpXa9++fVRWVjJw4EBCQ0Oprq7m448/5vXXX+cf//gHn332GT4+PrVeu2nTJpYvX46bm9s1rvo6lLMNgvzhgagf2wL8IDEalm0FcxW4uTRZeSIiIiJXQ2FN6iUyMpLU1NSmLqNBmc1mqqqq8Pb2vuq5Jk2axKRJk+zaU1JSmD9/PpmZmYwdO9auv6qqiieffJIePXpw6tQpvvjii6uupVnbdQi63QROF2wSuLMjLPoA/nMEbmvXNLWJiIiIXCVtg5RGs2DBArp06YK7uztubm506tSJhQsX1jouKiqKli1b4uLigo+PD9HR0WzdutVmnGEYFBcXs2/fPputl/v27bP29+/f327+tLQ0DMMgJyfH2paUlIRhGBQUFDB48GBMJhMeHh6sX78egIqKCpKTk2nTpg0uLi54enrSs2dPNm7ceFXfSVhYGAAnTpyotX/KlCn897//5fXXX7+qdX42jpZASAv79vNtR2r/nkVERESuB7qzJvVSUVFBUVGRTZuHhwc33ngjAI8//jiZmZl0796dsWPH4uzszLvvvktycjJHjx5l9uzZ1usyMjLw8/MjMTGRkJAQDhw4wOrVq4mNjaWgoIBu3boB8NJLLzFz5kx8fHxISUmxXt+6det6f44hQ4bg6urKiBEjMAyDdu3aYTabiYqKYv/+/cTGxjJ8+HBKS0tZuXIlcXFxrFu3jnvuueey5i8vL6e8vJyTJ0+yZcsWZs+ezQ033MB9991nN/azzz7jlVdeYdSoUYSHh9f7M123qs5CWYV9m7kKistt203eNXfTTp8Bt1r+GXN3rfnz9JnGqVVERETkGlBYk3rJyMggIyPDpq13795s2rSJDRs2kJmZyWOPPUZWVpa1Py0tjejoaObOnUtKSoo12OXn5+Pv728z16hRo4iJiWHWrFm89dZbAKSmpjJnzhxMJlODbcH09vZm586duLq6WtsmTpzI3r17yc7OZujQodb26dOn07lzZ8aPH8/evXsva/6RI0fy97//3fq+TZs2ZGVlERkZaTd22LBhBAUF8cc//rH+H+h6lv/vmmP3L1TwOaz4yLbt0GvQPhA8XMF81v6ayh9CmoerfZ+IiIjIdULbIKVeBg4cyPLly21e5++WLVmyBMMwGD16NEVFRTavAQMGUFlZad1uCFiDWnV1NcXFxRQVFdG6dWtatWrFnj17GvVzjBkzxiaoAbz55pu0atWK3r1729ReWVlJz549KSws5OTJk5c1f2pqKsuXLyc9PZ3Bgwfj4uLCN998Yzdu3rx57Ny5k/T0dLt6mpNdu3bZvC8oKPjxzS/aU/jnRM6tnwYfzIAPZlDZKZiqPuHW98eXj+abv42CYH8AzgX5cfI/h+3nPFpS86aVyXYNYPv27Zw7d876vrCwkJKSEuv7oqIiDh/+cc7y8nLrVtta667lvdbQGlpDa2gNraE1tMblrHEphsVisVzRFfKzlpOTQ0JCAikpKbz88su1junRoweffPLJReeZM2eO9QCOjRs3MnnyZHbv3o3ZbLYZFxAQwLfffmvzPjg4uNY7W4Zh0K9fP5sgCDV39KZMmcKqVasYNGgQUPPM2uLFiykoKKBXr142493c3Dhz5uLb5/bv30/nzp0vOqY2y5Yt45FHHuHll1+2buU8cuQIt956KzExMaxbt8469rbbbuOLL76gsrLyitYoLy/Hz8+Psg5J+B649M8EXBO3hMLG56GV6cqu6z2t5g5a1pja+xNegrz9cGSJ7SEjSa/C37bCiaU6DVJERESuW9oGKQ3OYrFgGAZLly7F2dm51jF33nknUBN64uPj8fT05KmnniI8PBxvb28Mw+CZZ57h9OnTV13P2bO1bJP7QW3H51ssFtq2bcuLL75Y53X1fU5u6NChJCcns2TJEmtYGzduHGazmeTkZJs7T2azGYvFwq5du/Dw8KhXOGz2BvWqOb7/ze0wKLqmrbgcVhXAwDsU1EREROS6prAmDS4sLIydO3fSoUMHoqKiLjo2KysLs9lMdnY2CQkJNn2jRo3CxcX2P7YNw6hzLi8vL0pLS+3aDx48ePnFAyEhIZSVlZGYmFhn2LwaZ86cobz8xwMzioqKMJvNxMXF1Tq+W7dutGnTxuY2u/xgUC+I6gTD06GwCFr6wF/Ww7lqeH5wU1cnIiIiclUU1qTBPfHEE+Tk5DBhwgQ2b95sF7gOHTpkPcL+fBi6cDfuzJkzKSsro2XLljbt7u7uNkHnp0JDQyksLKS8vBxfX18Ajh07xpo1a66o/gcffJB58+YxceLEWrd6/rT+unz55ZfcdNNNdu1z5szh9OnTNj8ePnXqVL766iu7sS+++CLffPMN8+bNo0WLWo6nF3B2htxnYeJfYcG7Nac/9uhQs23yltCmrk5ERETkqiisSYPr168fSUlJLFq0iJtvvpn4+HhCQ0M5cuQIu3fvZseOHdatiYmJicydO5eRI0eSl5eHyVRzIMS2bdsICgqyeagTICIigtzcXEaMGEF4eDhOTk4MGzYMf39/kpKSSE1NpUePHiQkJFBSUsIbb7xBUFAQZWVll11/WloaW7ZsYd68eeTl5RETE4Ofnx9ff/01+fn5uLm5XfLgk7vvvhs/Pz+6d+9Ou3btKC0tZfv27Wzbtg2TycTcuXOtYwcMGFDrHK+99hrHjx9n9OjRl117s7N51qXHtPCGJaNrXiIiIiLNiMKaNIqMjAyioqJIT09n2bJlmM1mfH19CQsLY+rUqdZxkZGRrFixgqlTp7J48WKcnJzo2rUr7733HiNHjuTYsWM28y5cuJAhQ4awcuVKKioqsFgs3H333fj7+zNhwgSKiorIzs5mzpw5BAYGMm7cOJycnJgyZcpl1+7m5sa2bduYMWMGq1atsv6Qt8lkIjw8nOHDh19yjqFDh7J27VpWr17NyZMncXZ2Jjg4mEceeYQXXniB0FDd9RERERGRi9NpkCLNTLM6DVJERETkZ0y/syYiIiIiIuKAFNZEREREREQckMKaiIiIiIiIA1JYExERERERcUAKayIiIiIiIg5IYU1ERERERMQB6XfWRJqr9i3B2bWpq6gRFtjUFYiIiIhcdxTWRJqr9CfBx7epq/iRl1tTVyAiIiJyXVFYE2muQkzg60BhTURERESuiJ5ZExERERERcUAKayIiIiIiIg5IYU1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oAU1kRERERERByQwpqIiIiIiIgDUlgTERERERFxQAprIiIiIiIiDkhhTURERERExAHd0NQFiEgjOXoCvj/b1FWAlxv4eTV1FSIiIiLXHYU1keYqeTH8t7xpawgLhNdHK6yJiIiI1IPCmkhz9VUxHDje1FWIiIiISD3pmTUREREREREHpLAmIiIiIiLigBTWREREREREHJDCmoiIiIiIiANSWBOR61PpKUh6FQKGgdfD0Gc6fHqwqasSERERaTAKayJy/amuhgGzYXkeJMfBHx+Fb8ug93T44khTVyciIiLSIBTW5Irk5ORgGEadr9zc3EZdf9KkSaSlpTXqGg1hw4YN/L//9/+46aab8PLywsvLiw4dOjBt2jTMZrPd+P79+9f5nS5cuLAJPkET6z0Nhr1Sd3/ONij4HLKSYcZDMDoONs8EZyeYsfLa1SkiIiLSiPQ7a1Ivffr0IT4+3q49IiKiUdfNzMwkODiYyZMnN+o6V2v27Nl88skn/OpXv2Lo0KGcPXuW9957j9mzZ5Obm8vHH3+Mk5P9/1fy0ksv2bX17dv3WpR8fcnZBkH+8EDUj20BfpAYDcu2grkK3FyarDwRERGRhqCwJvUSGRlJampqU5fRoMxmM1VVVXh7e1/1XCkpKfTt29dmrhdeeIG7776bDRs2kJWVxeOPP253XXP7ThvNrkPQ7Sa4MPDe2REWfQD/OQK3tWua2kREREQaiLZBSqNZsGABXbp0wd3dHTc3Nzp16lTrlr4FCxYQFRVFy5YtcXFxwcfHh+joaLZu3WozzjAMiouL2bdvn802wX379ln7+/fvbzd/WloahmGQk5NjbUtKSsIwDAoKChg8eDAmkwkPDw/Wr18PQEVFBcnJybRp0wYXFxc8PT3p2bMnGzduvKzP/tvf/rbW0Pfwww8D8K9//avW66qrqykuLubcuXOXtc7P1tESCGlh336+7ciJa1uPiIiISCPQnTWpl4qKCoqKimzaPDw8uPHGGwF4/PHHyczMpHv37owdOxZnZ2feffddkpOTOXr0KLNnz7Zel5GRgZ+fH4mJiYSEhHDgwAFWr15NbGwsBQUFdOvWDajZIjhz5kx8fHxISUmxXt+6det6f44hQ4bg6urKiBEjMAyDdu3aYTabiYqKYv/+/cTGxjJ8+HBKS0tZuXIlcXFxrFu3jnvuuade63399dcABAcH19rv5eVFZWUlN9xwA127duWFF14gLi6u3p/vulB1Fsoq7NvMVVBcbttu8q65m3b6DLjV8s+Xu2vNn6fPNE6tIiIiIteQwprUS0ZGBhkZGTZtvXv3ZtOmTWzYsIHMzEwee+wxsrKyrP1paWlER0czd+5cUlJSrMEuPz8ff39/m7lGjRpFTEwMs2bN4q233gJqtgjOmTMHk8nUYNsFvb292blzJ66urta2iRMnsnfvXrKzsxk6dKi1ffr06XTu3Jnx48ezd+/eK16rpKSE1157DQ8PD4YNG2bTFxgYyEMPPUSPHj3w8fFh586dLFu2jIEDB5KdnW29I9cs5f+75tj9CxV8Dis+sm079Bq0DwQPVzCftb+m8oeQ5uFq3yciIiJyndE2SKmXgQMHsnz5cpvX+btlS5YswTAMRo8eTVFRkc1rwIABVFZWWrcbAtagdn4LYFFREa1bt6ZVq1bs2bOnUT/HmDFjbIIawJtvvkmrVq3o3bu3Te2VlZX07NmTwsJCTp48eUXrVFVVMWDAAI4fP84f/vAHWrVqZdO/dOlSVqxYwYQJE0hKSiIjI4O8vDycnZ0ZP378VX9OR1FQUGD//hft4YMZ8MEMCv+cyLn10yCiHcRG8vWSYZx8c4K1v+jsKQ4fPlyz3fFoCeXl5dZtsEDN9kiAVqY619y+fbvNNtPCwkJKSkqs74uKimrW+IHdGnV9Dq2hNbSG1tAaWkNraI0rXONSDIvFYrmiK+RnLScnh4SEBFJSUnj55ZdrHdOjRw8++eSTi84zZ84cJk2aBMDGjRuZPHkyu3fvtjvWPiAggG+//dbmfXBwcK13tgzDoF+/fjZBEGru6E2ZMoVVq1YxaNAgoOaZtcWLF1NQUECvXr1sxru5uXHmzMW30e3fv5/OnTtfdMx5586dIz4+nvfff5/Ro0eTnp5+WdcBxMXFsX79ej7++GPuuOOOy7qmvLwcPz8/yjok4Xvg+GWv1ShuCYWNz9uEp8vSe1rNHbSsMbX3J7wEefvhyBLbQ0aSXoW/bYUTS3UapIiIiFz3tA1SGpzFYsEwDJYuXYqzs3OtY+68806gJvTEx8fj6enJU089RXh4ON7e3hiGwTPPPMPp06evup6zZ2vZLvcDHx+fWutv27YtL774Yp3XXe5zcufOneM3v/kN77//Pk8++eQVBTWAtm3bAnD06NEruq7ZG9Sr5vj+N7fDoOiatuJyWFUAA+9QUBMREZFmQWFNGlxYWBg7d+6kQ4cOREVFXXRsVlYWZrOZ7OxsEhISbPpGjRqFi4vtf3QbhlHnXF5eXpSWltq1Hzx48PKLB0JCQigrKyMxMbHOsHk5zge19evX8/jjj7No0aIrnuN87W3atKl3Hc3SoF4Q1QmGp0NhEbT0gb+sh3PV8Pzgpq5OREREpEHomTVpcE888QQAEyZMoKqqyq7/0KFD1r+fD0MX7sadOXMmZWVldte6u7tTXl5u1w4QGhpKYWGhTf+xY8dYs2bNFdX/4IMPUlZWxsSJE2vt/2n9damurua3v/0t69evZ9iwYbz++ut1ji0tLeX777+3a9+4cSNbtmwhNDSUyMjIy67/Z8HZGXKfhYfuggXvwsSl0NK3ZsvlLaFNXZ2IiIhIg9CdNWlw/fr1IykpiUWLFnHzzTcTHx9PaGgoR44cYffu3ezYscO6NTExMZG5c+cycuRI8vLyMJlMFBQUsG3bNoKCgux+bywiIoLc3FxGjBhBeHg4Tk5ODBs2DH9/f5KSkkhNTaVHjx4kJCRQUlLCG2+8QVBQUK3Bry5paWls2bKFefPmkZeXR0xMDH5+fnz99dfk5+fj5uZ2yYNPhg4dSm5uLu3btyc8PJw//elPNv233norAwYMAGDXrl3cf//99O7dmw4dOuDt7c3u3btZt24dTk5Otf42XbO3edalx7TwhiWja14iIiIizZDCmjSKjIwMoqKiSE9PZ9myZZjNZnx9fQkLC2Pq1KnWcZGRkaxYsYKpU6eyePFinJyc6Nq1K++99x4jR47k2LFjNvMuXLiQIUOGsHLlSioqKrBYLNx99934+/szYcIEioqKyM7OZs6cOQQGBjJu3DicnJyYMmXKZdfu5ubGtm3bmDFjBqtWrbKGJZPJRHh4OMOHD7/kHOcPQPnqq69qvUPXr18/a1hr164dPXr04OOPP+a9997jzJkztGjRgr59+zJr1izr830iIiIi8vOi0yBFmplmcRqkiIiIiOiZNREREREREUeksCYiIiIiIuKAFNZEREREREQckMKaiIiIiIiIA1JYExERERERcUAKayIiIiIiIg5Iv7Mm0ly1bwnOrk1bQ1hg064vIiIich1TWBNprtKfBB/fpq4CvNyaugIRERGR65LCmkhzFWICXwcIayIiIiJSL3pmTURERERExAEprImIiIiIiDgghTUREREREREHpLAmIiIiIiLigBTWREREREREHJDCmoiIiIiIiANSWBMREREREXFACmsiIiIiIiIOSGFNRERERETEASmsiYiIiIiIOCCFNREREREREQd0Q1MXICKN5OgJ+P5s09bg5QZ+Xk1bg4iIiMh1SmFNpLlKXgz/LW+69cMC4fXRCmsiIiIi9aSwJtJcfVUMB443dRUiIiIiUk96Zk1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oAU1kTk+lR6CpJehYBh4PUw9JkOnx5s6qpEREREGozCmohcf6qrYcBsWJ4HyXHwx0fh2zLoPR2+ONLU1YmIiIg0CIU1uSI5OTkYhlHnKzc3t1HXnzRpEmlpaY26RmM4ePAgXl5eGIbB+PHj7fofffRROnfujK+vLy4uLtx444307NmTnJycJqjWAfSeBsNeqbs/ZxsUfA5ZyTDjIRgdB5tngrMTzFh57eoUERERaUT6nTWplz59+hAfH2/XHhER0ajrZmZmEhwczOTJkxt1nYY2bNgwzp07V2f/rl27uOWWWxgwYAAmk4mjR4+yZs0aEhISmDNnDpMmTbqG1V4HcrZBkD88EPVjW4AfJEbDsq1grgI3lyYrT0RERKQhKKxJvURGRpKamtrUZTQos9lMVVUV3t7eDTrv4sWLyc/P5+mnn2b+/Pm1jtm7d69d26xZs2jfvj0LFixQWLvQrkPQ7SZwumBzwJ0dYdEH8J8jcFu7pqlNREREpIFoG6Q0mgULFtClSxfc3d1xc3OjU6dOLFy4sNZxUVFRtGzZEhcXF3x8fIiOjmbr1q024wzDoLi4mH379tlsvdy3b5+1v3///nbzp6WlYRiGzZbCpKQkDMOgoKCAwYMHYzKZ8PDwYP369QBUVFSQnJxMmzZtcHFxwdPTk549e7Jx48Yr+g6+++47fv/733P//fdz1113XdG1LVq0wMfHh++///6KrvtZOFoCIS3s28+3HTlxbesRERERaQS6syb1UlFRQVFRkU2bh4cHN954IwCPP/44mZmZdO/enbFjx+Ls7My7775LcnIyR48eZfbs2dbrMjIy8PPzIzExkZCQEA4cOMDq1auJjY2loKCAbt26AfDSSy8xc+ZMfHx8SElJsV7funXren+OIUOG4OrqyogRIzAMg3bt2mE2m4mKimL//v3ExsYyfPhwSktLWblyJXFxcaxbt4577rnnsuZPSkqiurqa1157jS1btlxyfFFREefOneOrr75iwYIF/N///V+t202blaqzUFZh32auguJy23aTd83dtNNnwK2Wf77cXWv+PH2mcWoVERERuYYU1qReMjIyyMjIsGnr3bs3mzZtYsOGDWRmZvLYY4+RlZVl7U9LSyM6Opq5c+eSkpJiDXb5+fn4+/vbzDVq1ChiYmKYNWsWb731FgCpqanMmTMHk8nUYFswvb292blzJ66urta2iRMnsnfvXrKzsxk6dKi1ffr06XTu3Jnx48fXum3xQrm5ubz11lvMnz+fgICAS44vKSmhTZs21veurq4MHDiQpUuXXuGnus7k/7vm2P0LFXwOKz6ybTv0GrQPBA9XMJ+1v6byh5Dm4WrfJyIiInKd0TZIqZeBAweyfPlym9f5u2VLlizBMAxGjx5NUVGRzWvAgAFUVlZatxsC1qBWXV1NcXExRUVFtG7dmlatWrFnz55G/RxjxoyxCWoAb775Jq1ataJ37942tVdWVtKzZ08KCws5efLkRec1m82MHDmS22+/nbFjx15WLd7e3ixfvpylS5cyY8YMOnToQEVFBeXl5Ze+2EFVVVXZvC8oKLB//4v28MEM+GAGhX9O5Nz6aRDRDmIj+XrJME6+OcHaX3T2FIcPH67Z7ni0hPLycus2WKBmeyRAK1Oda27fvt3msJfCwkJKSkqs74uKimrW+IHdGnV9Dq2hNbSG1tAaWkNraI0rXONSDIvFYrmiK+RnLScnh4SEBFJSUnj55ZdrHdOjRw8++eSTi87z0xMON27cyOTJk9m9ezdms9lmXEBAAN9++63N++Dg4FrvbBmGQb9+/WyCINTc0ZsyZQqrVq1i0KBBQM32xMWLF1NQUECvXr1sxru5uXHmzMW30e3fv5/OnTvX2f+73/2OJUuWsGPHDiIjI4HL++5+qqqqisjISE6ePMkXX3yBm5vbJa+Bmn8o/Pz8KOuQhO+B45d1TaO4JRQ2Pm8TnC5b72k1d9CyxtTen/AS5O2HI0tsDxlJehX+thVOLNVpkCIiInLd0zZIaXAWiwXDMFi6dCnOzs61jrnzzjuBmtATHx+Pp6cnTz31FOHh4Xh7e2MYBs888wynT5++6nrOnq1lu9wPfHx8aq2/bdu2vPjii3Ved7Hn5A4ePMiSJUuIi4vDYrGwa9cuAL7++mug5tCRXbt2ERYWZrf986dcXFwYNGgQM2fOZM2aNTz00EN1jv3ZGdSr5vj+N7fDoOiatuJyWFUAA+9QUBMREZFmQWFNGlxYWBg7d+6kQ4cOREVFXXRsVlYWZrOZ7OxsEhISbPpGjRqFi4vtf3QbhlHnXF5eXpSWltq1Hzx48PKLB0JCQigrKyMxMbHOsHkxhw8fpqqqirVr17J27Vq7/qVLl7J06VLS09MZPXr0ReeqqKg5eOP48Sa8Q+aIBvWCqE4wPB0Ki6ClD/xlPZyrhucHN3V1IiIiIg1Cz6xJg3viiScAmDBhgt0zSwCHDh2y/v18GLpwN+7MmTMpKyuzu9bd3b3OZ7hCQ0MpLCy06T927Bhr1qy5ovoffPBBysrKmDhxYq39P62/Nl27diU9Pd3ulZSUBEBsbCzp6enExsZaa6ztDuL5EygNw6BPnz5X9BmaPWdnyH0WHroLFrwLE5dCS9+abZe3hDZ1dSIiIiINQnfWpMH169ePpKQkFi1axM0330x8fDyhoaEcOXKE3bt3s2PHDuvWxMTERObOncvIkSPJy8vDZDJRUFDAtm3bCAoKsnmoEyAiIoLc3FxGjBhBeHg4Tk5ODBs2DH9/f5KSkkhNTaVHjx4kJCRQUlLCG2+8QVBQUK3Bry5paWls2bKFefPmkZeXR0xMDH5+fnz99dfk5+fj5uZ20YNPAgICar1jlpOTw6JFiwgPD7fpX7t2LRMnTqRv377cfPPN+Pr68uWXX/LOO+9w4sQJnnzyScLDwy+7/mZh86xLj2nhDUtG17xEREREmiGFNWkUGRkZREVFkZ6ezrJlyzCbzfj6+hIWFsbUqVOt4yIjI1mxYgVTp05l8eLFODk50bVrV9577z1GjhzJsWPHbOZduHAhQ4YMYeXKlVRUVGCxWLj77rvx9/dnwoQJFBUVkZ2dzZw5cwgMDGTcuHE4OTkxZcqUy67dzc2Nbdu2MWPGDFatWmX9IW+TyUR4eDjDhw9vmC/pB3feeSe//OUv+fjjj3nvvfcwm834+Phwyy23MHLkyAZfT0RERESuDzoNUqSZaRanQYqIiIiInlkTERERERFxRAprIiIiIiIiDkhhTURERERExAEprImIiIiIiDgghTUREREREREHpLAmIiIiIiLigPQ7ayLNVfuW4OzadOuHBTbd2iIiIiLNgMKaSHOV/iT4+DZtDV5uTbu+iIiIyHVMYU2kuQoxgW8ThzURERERqTc9syYiIiIiIuKAFNZEREREREQckMKaiIiIiIiIA1JYExERERERcUAKayIiIiIiIg5IYU1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oAU1kRERERERByQwpqIiIiIiIgDuqGpCxCRRnL0BHx/tmlr8HIDP6+mrUFERETkOqWwJtJcJS+G/5Y33fphgfD6aIU1ERERkXpSWBNprr4qhgPHm7oKEREREaknPbMmIiIiIiLigBTWREREREREHJDCmoiIiIiIiANSWBMREREREXFACmsicv0pPQVJr0LAMPB6GPpMh08PNnVVIiIiIg1KYU1Eri/V1TBgNizPg+Q4+OOj8G0Z9J4OXxxp6upEREREGozCmlyRnJwcDMOo85Wbm9uo60+aNIm0tLRGXaMxHDx4EC8vLwzDYPz48bWO+cc//kFkZCTu7u54eHjQvXt3Nm3adI0rdQC9p8GwV+ruz9kGBZ9DVjLMeAhGx8HmmeDsBDNWXrs6RURERBqZfmdN6qVPnz7Ex8fbtUdERDTqupmZmQQHBzN58uRGXaehDRs2jHPnztXZn5uby7333ovJZGLMmDEALF26lLi4OD788EN++ctfXqtSHV/ONgjyhweifmwL8IPEaFi2FcxV4ObSZOWJiIiINBSFNamXyMhIUlNTm7qMBmU2m6mqqsLb27tB5128eDH5+fk8/fTTzJ8/v9YxTz/9NDfccAMfffQRHTt2BCApKYmIiAjGjh3Lp59+2qA1Xdd2HYJuN4HTBRsD7uwIiz6A/xyB29o1TW0iIiIiDUjbIKXRLFiwgC5duuDu7o6bmxudOnVi4cKFtY6LioqiZcuWuLi44OPjQ3R0NFu3brUZZxgGxcXF7Nu3z2br5b59+6z9/fv3t5s/LS0NwzDIycmxtiUlJWEYBgUFBQwePBiTyYSHhwfr168HoKKiguTkZNq0aYOLiwuenp707NmTjRs3XtF38N133/H73/+e+++/n7vuuqvWMbt27eLAgQP07dvXGtQAOnbsSN++ffnXv/7Fl19+eUXrNmtHSyCkhX37+bYjJ65tPSIiIiKNRHfWpF4qKiooKiqyafPw8ODGG28E4PHHHyczM5Pu3bszduxYnJ2deffdd0lOTubo0aPMnj3bel1GRgZ+fn4kJiYSEhLCgQMHWL16NbGxsRQUFNCtWzcAXnrpJWbOnImPjw8pKSnW61u3bl3vzzFkyBBcXV0ZMWIEhmHQrl07zGYzUVFR7N+/n9jYWIYPH05paSkrV64kLi6OdevWcc8991zW/ElJSVRXV/Paa6+xZcuWWsds3rwZgKioKLu+nj17kpuby5YtW7jpppvq/TkdVtVZKKuwbzNXQXG5bbvJu+Zu2ukz4FbLP13urjV/nj7TOLWKiIiIXGMKa1IvGRkZZGRk2LT17t2bTZs2sWHDBjIzM3nsscfIysqy9qelpREdHc3cuXNJSUmxBrv8/Hz8/f1t5ho1ahQxMTHMmjWLt956C4DU1FTmzJmDyWRqsC2Y3t7e7Ny5E1dXV2vbxIkT2bt3L9nZ2QwdOtTaPn36dDp37sz48ePZu3fvJefOzc3lrbfeYv78+QQEBNQ57nzobdu2rV3f+bbDhw9f9me6ruT/u+bY/QsVfA4rPrJtO/QatA8ED1cwn7W/pvKHkObhat8nIiIich3SNkipl4EDB7J8+XKb1/m7ZUuWLMEwDEaPHk1RUZHNa8CAAVRWVlq3GwLWoFZdXU1xcTFFRUW0bt2aVq1asWfPnkb9HGPGjLEJagBvvvkmrVq1onfv3ja1V1ZW0rNnTwoLCzl58uRF5zWbzYwcOZLbb7+dsWPHXnRsRUXNnSUPDw+7Pk9PTwBOnTp1JR/LYVRVVdm8LygosHn/z8pv4YMZ1lfhnxOxRLSD2Ej4YAZfLxnGyTcn1PQH+1NUVERVgHfNVkigvLzcug32fNu/vrUNtheuuX37dpvDXgoLCykpKbG+LyoqsgnHNmvUMafW0BpaQ2toDa2hNbRGfda4JIvIFVi1apUFsKSkpNQ55o477rAAF33NmTPHOn7Dhg2WO++80+Lm5mY3LiAgwGbuli1bWrp27VrruoClX79+du0vvPCCBbCsWrXK2vbkk09aAEtBQYHdeFdX10vWv3///ot+T6NGjbK4uLhYdu3aZW2r67sbP368BbBkZWXZzZOZmWkBLM8999xF1/upsrIyC2Ap65BksXB/071uSbZY/u+7y67b6tfPWiyPLai7f9AfLZag4RbLuXO27U/+xWLxHGyxVJ658jVFREREHJC2QUqDs1gsGIbB0qVLcXZ2rnXMnXfeCcD+/fuJj4/H09OTp556ivDwcLy9vTEMg2eeeYbTp09fdT1nz9ayZe4HPj4+tdbftm1bXnzxxTqvu9hzcgcPHmTJkiXExcVhsVjYtWsXAF9//TVQc+jIrl27CAsLw9/f3zpXbVsdz7fVtkXyZ2tQr5rj+9/cDoOia9qKy2FVAQy8Q8f2i4iISLOhsCYNLiwsjJ07d9KhQ4daD834qaysLMxmM9nZ2SQkJNj0jRo1ChcX2//wNgyjzrm8vLwoLS21az948ODlFw+EhIRQVlZGYmJinWHzYg4fPkxVVRVr165l7dq1dv1Lly5l6dKlpKenM3r0aHr37g3U3Gq/0D//+U8Mw+DXv/71FdfRbA3qBVGdYHg6FBZBSx/4y3o4Vw3PD27q6kREREQajJ5Zkwb3xBNPADBhwgS7Z5YADh06ZP37+TBksVhsxsycOZOysjK7a93d3SkvL7drBwgNDaWwsNCm/9ixY6xZs+aK6n/wwQcpKytj4sSJtfb/tP7adO3alfT0dLtXUlISALGxsaSnpxMbGwvA7bffzs0338zGjRs5cOCAdZ4DBw6wceNGIiIimudJkPXl7Ay5z8JDd8GCd2HiUmjpCxufh1tCm7o6ERERkQajO2vS4Pr160dSUhKLFi3i5ptvJj4+ntDQUI4cOcLu3bvZsWOHdWtiYmIic+fOZeTIkeTl5WEymSgoKGDbtm0EBQXZPNQJEBERQW5uLiNGjCA8PBwnJyeGDRuGv78/SUlJpKam0qNHDxISEigpKeGNN94gKCio1uBXl7S0NLZs2cK8efPIy8sjJiYGPz8/vv76a/Lz83Fzc7vowScBAQGMHj3arj0nJ4dFixYRHh5u1z9v3jweeOAB7rrrLh577DEA/vrXv2KxWPjzn/982bU3C5tnXXpMC29YMrrmJSIiItJMKaxJo8jIyCAqKor09HSWLVuG2WzG19eXsLAwpk6dah0XGRnJihUrmDp1KosXL8bJyYmuXbvy3nvvMXLkSI4dO2Yz78KFCxkyZAgrV66koqICi8XC3Xffjb+/PxMmTKCoqIjs7GzmzJlDYGAg48aNw8nJiSlTplx27W5ubmzbto0ZM2awatUq6w95m0wmwsPDGT58eMN8ST8xcOBA1qxZw5QpU1iwYAEA4eHhLF++XFsgRURERH6mDMuF+89E5LpWXl6On58fZR2S8D1wvOkKuSW0ZmtiK1PT1SAiIiJyHdMzayIiIiIiIg5IYU1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oAU1kRERERERByQfmdNpLlq3xKcXZtu/bDApltbREREpBlQWBNprtKfBB/fpq3By61p1xcRERG5jimsiTRXISbwbeKwJiIiIiL1pmfWREREREREHJDCmoiIiIiIiANSWBMREREREXFACmsiIiIiIiIOSGFNRERERETEASmsiYiIiIiIOCCFNREREREREQeksCYiIiIiIuKAFNZEREREREQckMKaiIiIiIiIA7qhqQsQkUZy9AR8f7bp1vdyAz+vpltfRERE5DqnsCbSXCUvhv+WN83aYYHw+miFNREREZGroLAm0lx9VQwHjjd1FSIiIiJST3pmTURERERExAEprImIiIiIiDgghTUREREREREHpLAmIiIiIiLigBTWREREREREHJDCmohcf0pPQdKrEDAMvB6GPtPh04NNXZWIiIhIg1JYE5HrS3U1DJgNy/MgOQ7++Ch8Wwa9p8MXR5q6OhEREZEGo7AmVyQnJwfDMOp85ebmNur6kyZNIi0trVHXaAwHDx7Ey8sLwzAYP368XX91dTUzZswgLCwMV1dXPD096d69e6N/nw6p9zQY9krd/TnboOBzyEqGGQ/B6DjYPBOcnWDGymtXp4iIiEgj049iS7306dOH+Ph4u/aIiIhGXTczM5Pg4GAmT57cqOs0tGHDhnHu3Lk6+x944AHefvttbrvtNiZMmEBFRQUrVqzg3nvv5e9//zuDBg26htU6uJxtEOQPD0T92BbgB4nRsGwrmKvAzaXJyhMRERFpKAprUi+RkZGkpqY2dRkNymw2U1VVhbe3d4POu3jxYvLz83n66aeZP3++Xf+mTZt4++236d69Ozt27MDJqeaG95QpU7jlllsYM2YM999/P87Ozg1a13Vr1yHodhM4XbAx4M6OsOgD+M8RuK1d09QmIiIi0oC0DVIazYIFC+jSpQvu7u64ubnRqVMnFi5cWOu4qKgoWrZsiYuLCz4+PkRHR7N161abcYZhUFxczL59+2y2Xu7bt8/a379/f7v509LSMAyDnJwca1tSUhKGYVBQUMDgwYMxmUx4eHiwfv16ACoqKkhOTqZNmza4uLjg6elJz5492bhx4xV9B9999x2///3vuf/++7nrrrtqHXN+q+PQoUOtQQ0gKCiIX/3qVxw7dox33nnnitZt1o6WQEgL+/bzbUdOXNt6RERERBqJ7qxJvVRUVFBUVGTT5uHhwY033gjA448/TmZmJt27d2fs2LE4Ozvz7rvvkpyczNGjR5k9e7b1uoyMDPz8/EhMTCQkJIQDBw6wevVqYmNjKSgooFu3bgC89NJLzJw5Ex8fH1JSUqzXt27dut6fY8iQIbi6ujJixAgMw6Bdu3aYzWaioqLYv38/sbGxDB8+nNLSUlauXElcXBzr1q3jnnvuuaz5k5KSqK6u5rXXXmPLli21jjGbzQB4eXnZ9Xl4eACwZcsW7rvvvvp9SEdWdRbKKuzbzFVQXG7bbvKuuZt2+gy41fJPl7trzZ+nzzROrSIiIiLXmO6sSb1kZGTQpk0bm9f556o2bNhAZmYmjz32GJ988gl//OMfSUtLY8+ePfTq1Yu5c+fy3XffWefKz8+noKCAv/zlL0ybNo2//vWvfPjhh5w7d45Zs2ZZx6WmpuLm5obJZCI1NdX68vf3r/fn8Pb2Zu/evbz00kv88Y9/pEePHjz77LPs3buXzMxM3n33XWbOnMmCBQv47LPP8PHxqfWAkNrk5uby1ltv8fzzzxMQEFDnuF/84hcAfPDBBzbt1dXVfPzxxwB2wfh6UF1dzX/+8x+btoKCApv3+zJyao7f/+mr4HNY8ZF9++FiioqKqHa/AcxnASgvL7feWaWyJqQVHjpw0TW3b99u8/xgYWEhJSUl1vdFRUUcPnzY+t5mjTrm1BpaQ2toDa2hNbSG1qjPGpdiWCwWyxVdIT9rOTk5JCQkMHDgQB5++GGbvrZt23LXXXfx8MMPs3LlSv75z38SEhJiM+avf/0rzz77LMuWLWPIkCE2fdXV1Zw4cYLKykoA7rrrLm644QYOHvzx97MCAgIIDg5m7969drUZhkG/fv2sWxnPS0tLY8qUKaxatcoaKJOSkli8eDEZGRkkJSXZjL/55puprKzkn//8p90aTz31FOvXr6e0tBQfH586vyez2UzHjh0JCAhg586dNt9dSkoKL7/8snXsmTNnuPnmmzl69CjJyck89thjnDx5khdeeMEaWuPj43n33XfrXO+nysvL8fPzo6xDEr4Hjl/WNQ3ullDY+Dy0Ml18XMn3sPOC30ebkAXBLWDivbbtv7y15u5Zx9HQMQRyn7Xtf/1DeOIvsGeenlkTERGRZkHbIKVeOnToYBfWzjtw4AAWi4U777yzzuv/7//+z/r3jRs3MnnyZHbv3m3dEnjexe5INYTbbrvNrq2oqIgzZ87Qpk2bOq/7v//7Pzp37lxnf0pKCseOHWPt2rWXrMHV1ZX333+fhx56iD//+c/8+c9/BqBdu3aMGzeOuXPnXjQYXtdaeMPdv7BvC2lh335eZHvI21/ze2s/PWTkn1+Apxt0atVo5YqIiIhcSwpr0uAsFguGYbB06dI6TzA8H+T2799PfHw8np6ePPXUU4SHh+Pt7Y1hGDzzzDOcPn36qus5e/ZsnX21hSCLxULbtm158cUX67zuYs/JHTx4kCVLlhAXF4fFYmHXrl0AfP3110DNoSO7du0iLCzMuoXz1ltvZc+ePezfv59///vfBAUFER0dzbRp06z98oNBvWqO739zOwyKrmkrLodVBTDwDh3bLyIiIs2Gwpo0uLCwMHbu3EmHDh2Iioq66NisrCzMZjPZ2dkkJCTY9I0aNQoXF9v/8DYMo865vLy8KC0ttWv/6TbKyxESEkJZWRmJiYn1Oi7/8OHDVFVVsXbt2lrvrC1dupSlS5eSnp7O6NGjbfpuvfVWm2D2/vvvYxiGfmftpwb1gqhOMDwdCougpQ/8ZT2cq4bnBzd1dSIiIiINRgeMSIN74oknAJgwYQJVVVV2/YcOHbL+/XwYuvDRyZkzZ1JWVmZ3rbu7O+Xl5XbtAKGhoRQWFtr0Hzt2jDVr1lxR/Q8++CBlZWVMnDix1v6f1l+brl27kp6ebvc6/2xcbGws6enpxMbGXnSexYsXs2PHDmJjYwkPD7+iz9CsOTvXPK/20F2w4F2YuBRa+tY8I3dLaFNXJyIiItJgdGdNGly/fv1ISkpi0aJF3HzzzcTHxxMaGsqRI0fYvXs3O3bssG5NTExMZO7cuYwcOZK8vDxMJhMFBQVs27aNoKAgmxN4ACIiIsjNzWXEiBGEh4fj5OTEsGHD8Pf3JykpidTUVHr06EFCQgIlJSW88cYbBAUF1Rr86pKWlsaWLVuYN28eeXl5xMTE4Ofnx9dff01+fj5ubm7s2bOnzusDAgLs7phBzQEjixYtIjw83K5/wIABVFdXExkZiYeHB/n5+Xz44Yd06NCB7Ozsy669Wdg869JjWnjDktE1LxEREZFmSmFNGkVGRgZRUVGkp6ezbNkyzGYzvr6+hIWFMXXqVOu4yMhIVqxYwdSpU1m8eDFOTk507dqV9957j5EjR3Ls2DGbeRcuXMiQIUNYuXIlFRUVWCwW7r77bvz9/ZkwYQJFRUVkZ2czZ84cAgMDGTduHE5OTkyZMuWya3dzc2Pbtm3MmDGDVatWWX/I22QyER4ezvDhwxvmS/qJHj16kJ2dzZYtWzh79izBwcGMGjWKtLS05nu4iIiIiIhclI7uF2lmrquj+0VERESkTnpmTURERERExAEprImIiIiIiDgghTUREREREREHpLAmIiIiIiLigBTWREREREREHJCO7hdprtq3BGfXplk7LLBp1hURERFpRhTWRJqr9CfBx7fp1vdya7q1RURERJoBhTWR5irEBL5NGNZERERE5KromTUREREREREHpLAmIiIiIiLigBTWREREREREHJDCmoiIiIiIiANSWBMREREREXFACmsiIiIiIiIOSGFNRERERETEASmsiYiIiIiIOCCFNREREREREQeksCYiIiIiIuKAFNZEREREREQc0A1NXYCINJKjJ+D7s9d2TS838PO6tmuKiIiINFMKayLNVfJi+G/5tVsvLBBeH62wJiIiItJAFNZEmquviuHA8aauQkRERETqSc+siYiIiIiIOCCFNREREREREQeksCYiIiIiIuKAFNZEREREREQckMKaiIiIiIiIA1JYE5HrQ+kpSHoVAoaB18PQZzp8erCpqxIRERFpNAprckU2b96MYRh1vrZv396o68+fP5+srKxGXaMhfPrpp6SmptKtWzdatGhBixYt6NGjB3/5y1+oqqqyG//73/+e6OhoAgMDcXNzo02bNvzmN79h8+bN1754R1RdDQNmw/I8SI6DPz4K35ZB7+nwxZGmrk5ERESkUeh31uSKFBcXA9CjRw+6du1q1+/r69uo67/00kuEhoYybNiwRl3nas2aNYtNmzZxxx130LVrV6qrq/nss88YPXo0a9as4b333sMwDOv4999/n8DAQP7nf/4HNzc3Kioq2L59O3369GHp0qU88sgjTfhproHe06B9IGSNqb0/ZxsUfA6rUmFQdE1bYjR0SoYZK2F5yrWrVUREROQaUViTern99ttJTU21a2/Tpk2jrnvmzBlOnz7dKHNXVVVx7tw53N3dr3quX//618THx9OpUycCAgKorq7m8OHDPP/883zwwQesW7eOgQMHWsc///zz+Pv74+fnh6urK6dPn2bv3r0kJyfz/PPPN/+wdik52yDIHx6I+rEtwK8msC3bCuYqcHNpsvJEREREGoO2QUq9eHl50bFjR7vXT4POypUr+eUvf4mPjw+enp707NmTnJwcu7lWrlzJb3/7W9q2bYubmxstW7bkvvvuY8+ePTbjDMOguLiYffv22Wy9/Oqrr6z9td1xy8rKwjAMmy2Fzz33HIZh8NlnnzF+/Hhat26Nu7s7+fn5AJjNZl544QXCw8Nxd3fH39+fgQMHsmvXrsv6fgYPHszw4cP59a9/TZcuXejatSvx8fHWgPbRRx/ZjB84cCAxMTFERETQuXNnbr/9dh5++GF8fHz47rvvLmvNZm3XIeh2Ezhd8E/WnR2hwgz/0VZIERERaX50Z03qpbKy0rol8jw3Nzd8fHwAePbZZ/nDH/5At27d+M1vfgPAvn37SEhIID09ndGjR1uv+9Of/oSLiws9e/bE09OT7777jo0bN/Lhhx+ya9cuOnbsCMDw4cNZtWoV3t7exMXFWa//aUA8duyYXa1Hjx4FsKn3yJGa/7gfPHgwFouF3r17U1VVhYuLC1VVVfTr14+CggJiYmK4/fbbMZvNfPTRR9x1111s3bqVO+6446LfT3Bw8EX7PT09a20vLi6murqao0ePsnjxYo4cOcKvfvWri871s3C0BH7Vxb49pEXNn0dOwG3trm1NIiIiIo1MYU3q5dVXX+XVV1+1aUtISOCNN97g008/5Q9/+AP9+/dn7NixtG7dGsMwOHToENOmTeOZZ57h0UcftQa7iRMn4ufnR0BAAJ6enpSXl7NhwwamT5/OnDlzWLJkCQCTJ0/mnXfewWQyMXnyZOu6N954Y70/R3V1NTNnzqRjx44YhkGbNm145ZVX2LJlC08//TT3338/AQEBnDlzhp07dzJp0iSefvpp6x24K/H999+TkZGBh4cH/fv3r7U/ICDA+t7NzY2YmBibz9osVJ2Fsgr7NnMVFJfbtpu8a+6mnT4DbrX8c+XuWvPn6TONU6uIiIhIE9I2SKmXmJgYxo0bZ/O65557AFiyZAmGYfD444/To0cPQkJCCA4OplevXvTt25dTp06xdetW61z3338//fr14/bbb6dVq1bcdNNNJCQkEBQURF5ennXc+TtsTk5ONlsvXVzq/6zSfffdx/33309ERAS33XYb/v7+LFmyhODgYIYPH054eDiBgYG0bt2auLg4brvtNrZv337Fz82dO3eO//f//h9FRUU8+uij/OIXv7Abc8MNNzBu3DjGjBnDkCFDrNtCIyIi6v35rrWz585Z/15eXs6+ffts+gsKCiD/3zXH7//0VfA5rPjIrv2LDdsoKSkBD1cwn6WoqIjDhw9b56s4UVrzFw9X2zUuXPMntm/fzrmf1FlYWFizxg8uXKPOz6E1tIbW0BpaQ2toDa1xlWtcimGxWCxXdIX8rOXk5JCQkMCoUaOYMmWKTZ+npycmk4no6Gi2bdt20Xnmz5/P008/DcCuXbuYNm0amzdv5tSpUzbjAgMD+eabb6zvAwICCA4OZu/evXZzGoZBv379WL9+vU17WloaU6ZMYdWqVQwaNAiApKQkFi9ezPvvv28Nmee5ublx5szF79R8+eWXhIWFXXTMedXV1QwbNozs7Gzuvfde/vSnP9GhQ4daxx05cgSLxcLJkyf5/PPPmT59OhaLhV27dl12KC0vL8fPz4+yDkn4Hjh+Wdc0iFtCYePz0Mp08XEl38POC34fbUIWBLeAiffatv/y1pq7Zx1HQ8cQyH3Wtv/1D+GJv8CeedoGKSIiIs2OtkFKvbi7u9O6deta+86cOYNhGIwZMwanCw+E+EH37t0BOHz4MDExMbi7u9O/f38CAwNxdXXFMAzeeOONS4amy/HT/0fkQoGBgXZtFouF0NBQEhIS6rzu/BbOS6murmbEiBFkZ2fzm9/8hhkzZtQa1KDmjuFPv9POnTvz/vvv89prr7F582a7UHndauENd//Cvi2khX37eZHtIW9/ze+t/fR/U//8AjzdoFOrRitXREREpKkorEmDCw0NZefOndx777106tSp1jEmU83dlxUrVnDq1CnGjh1LYmIi/v7+3HBDzf8ss7KyrH8/76e/TXYhLy8vTp48adf+3//+t85rnJ2d7doCAwM5deoUKSkpdYZNf3//Ouc873xQy8rKIj4+nhkzZnD77bdf8rrznJycrL9b9+WXX172dc3SoF41x/e/uf3H31krLodVBTDwDh3bLyIiIs2SnlmTBnf+ePqXX36ZkJAQWrdubfNycXGxnoZYWloKQGRkJJGRkbRv357WrVvzj3/8w9r3U+7u7rUGMqg5gXHfvn1UVPx4eEVJSQmrV6++ovr79u1LaWkpK1eutKv9fP0XhsgLWSwWnnjiCbKysoiLi+O5556r8wTJkpKSWu8gnjp1itWrV2MYBl261HIS4s/JoF4Q1QmGp8PMN+Av/6j5Ie1z1fD84KauTkRERKRR6M6aNLjf/va3vP3226xbt47IyEgSEhJo1aoVR48eZefOneTm5lrDSUxMDHPnzmXs2LEcPHiQFi1akJ+fzzvvvENAQAAXPlLZsWNHNm3axLRp07j11ltxcnJi4MCBeHl5MXDgQObPn0+fPn149NFHKS0tZdGiRfj7+1/Rb5WNHTvWevLjpk2b6Nu3L76+vhw+fJgNGzbg6urKli1bLjpHamoqmZmZtG7dmp49e/L555/z+eefW/tvvvlmevXqBcCHH35IcnIyDz74IB06dMDHx4dDhw6RnZ1NUVERv/nNb4iMjLzs+pslZ+ea59Um/hUWvFtz+mOPDpA1puZZOREREZFmSGFNGlxgYCDPPfcc7du3Jy8vj7lz53L69GlMJhNt2rQhMTHROvauu+5i3LhxrFmzhtmzZ+Ps7EyXLl2YOHEif/vb3zh+3PaAjLFjx1JSUsKCBQs4efIkFouF//znP3Ts2JHf/e53HD58mLy8PFJSUggNDaVfv364u7vzyiuvXHb93bp148UXX+Rvf/sbn376KZs2bcIwDFq2bElYWBj/8z//c8k5Nm3aBNScEvTcc8/Z9ScmJlrDWmBgILfeeiu5ubl89913VFZW4uvrS7t27bjvvvt47LHHLvsZuevW5lmXHtPCG5aMrnmJiIiI/AworMkViY6OJiMjg549e150XPfu3QkICCAuLo7jx49TVVWFh4cHJpOJdu1+PLXP19eXcePGER0dzYkTJzAMg6CgIHr27ElERITdlsfY2Fg8PDw4evQoZrMZ+PEHqDt27Mj06dP57LPPqKiowNfXl4iICAzDoGvXrkRHR1vnSUpK4o477rCp5TwnJycGDBhAWFgYX3zxhfWIVk9PTwIDA+t8Du+nnn/+eeuPcdfmp3PcfvvtPP/883zzzTecOnWKs2fP4u7uTkBAAF26dKFt27aXXE9EREREmh8d3S/SzDj80f0iIiIicll0wIiIiIiIiIgDUlgTERERERFxQAprIiIiIiIiDkhhTURERERExAEprImIiIiIiDggHd0v0ly1bwnOrtduvbDAa7eWiIiIyM+AwppIc5X+JPj4Xts1vdyu7XoiIiIizZjCmkhzFWIC32sc1kRERESkweiZNREREREREQeksCYiIiIiIuKAFNZEREREREQckMKaiIiIiIiIA1JYExERERERcUAKayIiIiIiIg5IYU1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oAU1kRERERERBzQDU1dgIg0kqMn4Puz1249Lzfw87p264mIiIg0cwprIs1V8mL4b/m1WSssEF4frbAmIiIi0oAU1kSaq6+K4cDxpq5CREREROpJz6yJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oAU1kRERERERByQwpqIOLbSU5D0KgQMA6+Hoc90+PRgU1clIiIi0ugU1kTEcVVXw4DZsDwPkuPgj4/Ct2XQezp8caSpqxMRERFpVAprckVycnIwDKPOV25ubqOuP2nSJNLS0hp1jYawb9++Or+jtm3b1nrNP/7xDyIjI3F3d8fDw4Pu3buzadOma1z5NdZ7Ggx7pe7+nG1Q8DlkJcOMh2B0HGyeCc5OMGPltatTREREpAnod9akXvr06UN8fLxde0RERKOum5mZSXBwMJMnT27UdRrKXXfdxX333WfTZjKZ7Mbl5uZy7733YjKZGDNmDABLly4lLi6ODz/8kF/+8pfXolzHk7MNgvzhgagf2wL8IDEalm0FcxW4uTRZeSIiIiKNSWFN6iUyMpLU1NSmLqNBmc1mqqqq8Pb2brA5u3Tpclnf09NPP80NN9zARx99RMeOHQFISkoiIiKCsWPH8umnnzZYTdeVXYeg203gdMEmgDs7wqIP4D9H4LZ2TVObiIiISCPTNkhpNAsWLKBLly64u7vj5uZGp06dWLhwYa3joqKiaNmyJS4uLvj4+BAdHc3WrVttxhmGQXFxsd0Ww3379ln7+/fvbzd/WloahmGQk5NjbUtKSsIwDAoKChg8eDAmkwkPDw/Wr18PQEVFBcnJybRp0wYXFxc8PT3p2bMnGzduvOLv4fvvv6e8vLzO/l27dnHgwAH69u1rDWoAHTt2pG/fvvzrX//iyy+/vOJ1m4WjJRDSwr79fNuRE9e2HhEREZFrSHfWpF4qKiooKiqyafPw8ODGG28E4PHHHyczM5Pu3bszduxYnJ2deffdd0lOTubo0aPMnj3bel1GRgZ+fn4kJiYSEhLCgQMHWL16NbGxsRQUFNCtWzcAXnrpJWbOnImPjw8pKSnW61u3bl3vzzFkyBBcXV0ZMWIEhmHQrl07zGYzUVFR7N+/n9jYWIYPH05paSkrV64kLi6OdevWcc8991zW/EuXLmXJkiVYLBZMJhP3338/CxYswNPT0zpm8+bNAERFRdld37NnT3Jzc9myZQs33XRTvT+nQ6g6C2UV9m3mKii+IMyavGvupp0+A261/DPl7lrz5+kzjVOriIiIiANQWJN6ycjIICMjw6atd+/ebNq0iQ0bNpCZmcljjz1GVlaWtT8tLY3o6Gjmzp1LSkqKNdjl5+fj7+9vM9eoUaOIiYlh1qxZvPXWWwCkpqYyZ84cTCZTg23B9Pb2ZufOnbi6ulrbJk6cyN69e8nOzmbo0KHW9unTp9O5c2fGjx/P3r17Lzqvs7MzERERxMfHc9NNN/HNN9/w1ltv8frrr7Nz50527NiBi0vNs1bnQ29tB4+cbzt8+PBVf9Yml//vmmP3L1TwOaz4yLbt0GvQPhA8XMF81v6ayh9CmoerfZ+IiIhIM6FtkFIvAwcOZPny5Tav83fLlixZgmEYjB49mqKiIpvXgAEDqKystG43BKxBrbq6muLiYoqKimjdujWtWrViz549jfo5xowZYxPUAN58801atWpF7969bWqvrKykZ8+eFBYWcvLkyYvOe+utt7J7927S0tJ48sknefbZZ9m5cye/+c1v+Ne//sWCBQusYysqau42eXh42M1z/g7cqVOnrvajNrpz585x5MiPx+mXl5dbt6gC8Iv2fDY/AT6YYX2d6hAAsZHW94V/TuTc+mkQ7A+A+UYvqg5/a52iqKioJrgeLQHge19X2zWAgoKCi77fvn07586ds74vLCykpKTEfo26PofW0BpaQ2toDa2hNbRGA61xKYbFYrFc0RXys5aTk0NCQgIpKSm8/PLLtY7p0aMHn3zyyUXnmTNnDpMmTQJg48aNTJ48md27d2M2m23GBQQE8O2339q8Dw4OrvXOlmEY9OvXzyYIQs0dvSlTprBq1SoGDRoE1DyztnjxYgoKCujVq5fNeDc3N86cufj2uv3799O5c+eLjqnNZ599RteuXenTp4/1+bcJEybw8ssvk5WVxWOPPWYzPisri+HDh/Pcc88xY8aMy1qjvLwcPz8/yjok4Xvg+BXXWC+3hMLG56GV/UmXF9V7Ws0dtKwxtfcnvAR5++HIEttDRpJehb9thRNLdRqkiIiINFvaBikNzmKxYBgGS5cuxdnZudYxd955J1ATeuLj4/H09OSpp54iPDwcb29vDMPgmWee4fTp01ddz9mztWyj+4GPj0+t9bdt25YXX3yxzuvq+5xc586dcXJyorS01G6u2rY6nm+r67fZmr1BvWqO739zOwyKrmkrLodVBTDwDgU1ERERadYU1qTBhYWFsXPnTjp06FDroRk/lZWVhdlsJjs7m4SEBJu+UaNGWZ/rOs8wjDrn8vLysglB5x08ePDyiwdCQkIoKysjMTGxzrBZX3v27KG6utr6vB7UPOsHNbfaL/TPf/4TwzD49a9/3aB1XDcG9YKoTjA8HQqLoKUP/GU9nKuG5wc3dXUiIiIijUrPrEmDe+KJJ4Ca7X1VVVV2/YcOHbL+/XwYunA37syZMykrK7O71t3dvc5j8ENDQyksLLTpP3bsGGvWrLmi+h988EHKysqYOHFirf0/rb8u//d//2fXdu7cOcaPHw/UPPN33u23387NN9/Mxo0bOXDggLX9wIEDbNy4kYiIiOv/JMj6cnaG3GfhobtgwbswcSm09K3ZcnlLaFNXJyIiItKodGdNGly/fv1ISkpi0aJF3HzzzcTHxxMaGsqRI0fYvXs3O3bssG5NTExMZO7cuYwcOZK8vDxMJhMFBQVs27aNoKAgm4c6ASIiIsjNzWXEiBGEh4fj5OTEsGHD8Pf3JykpidTUVHr06EFCQgIlJSW88cYbBAUF1Rr86pKWlsaWLVuYN28eeXl5xMTE4Ofnx9dff01+fj5ubm6XPPjkoYce4vvvv+eOO+6gbdu2HD9+nH/84x8cPHiQXr16MXr0aJvx8+bN44EHHuCuu+6yPrf217/+FYvFwp///OfLrv26s3nWpce08IYlo2teIiIiIj8jCmvSKDIyMoiKiiI9PZ1ly5ZhNpvx9fUlLCyMqVOnWsdFRkayYsUKpk6dyuLFi3FycqJr16689957jBw5kmPHjtnMu3DhQoYMGcLKlSupqKjAYrFw99134+/vz4QJEygqKiI7O5s5c+YQGBjIuHHjcHJyYsqUKZddu5ubG9u2bWPGjBmsWrXK+kPeJpOJ8PBwhg8ffsk5+vfvz8qVK8nJyeH777/nhhtuoF27djz77LM899xzdtsrBw4cyJo1a5gyZYr1pMjw8HCWL1/+890CKSIiIvIzp9MgRZqZ6+o0SBERERGpk55ZExERERERcUAKayIiIiIiIg5IYU1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oD0O2sizVX7luDsem3WCgu8NuuIiIiI/IworIk0V+lPgo/vtVvPy+3arSUiIiLyM6CwJtJchZjA9xqGNRERERFpUHpmTURERERExAEprImIiIiIiDgghTUREREREREHpLAmIiIiIiLigBTWREREREREHJDCmoiIiIiIiANSWBMREREREXFACmsiIiIiIiIOSGFNRERERETEASmsiYiIiIiIOCCFNREREREREQd0Q1MXICKN5OgJ+P7stVvPyw38vK7deiIiIiLNnMKaSHOVvBj+W35t1goLhNdHK6yJiIiINCCFNZHm6qtiOHC8qasQERERkXrSM2siIiIiIiIOSGFNRERERETEASmsiYiIiIiIOCCFNREREREREQeksCYijq30FCS9CgHDwOth6DMdPj3Y1FWJiIiINDqFNRFxXNXVMGA2LM+D5Dj446PwbRn0ng5fHGnq6kREREQalcKaXJGcnBwMw6jzlZub26jrT5o0ibS0tEZdoyHk5uYSExNDcHAw7u7uuLu707ZtW5566imOH7c/Tr9///51fqcLFy5sgk9wjfSeBsNeqbs/ZxsUfA5ZyTDjIRgdB5tngrMTzFh57eoUERERaQL6nTWplz59+hAfH2/XHhER0ajrZmZmEhwczOTJkxt1nau1b98+KisrGThwIKGhoVRXV/Pxxx/z+uuv849//IPPPvsMHx8fu+teeuklu7a+fftei5IdU842CPKHB6J+bAvwg8RoWLYVzFXg5tJk5YmIiIg0JoU1qZfIyEhSU1ObuowGZTabqaqqwtvb+6rnmjRpEpMmTbJrT0lJYf78+WRmZjJ27Fi7/ub2nV61XYeg203gdMEmgDs7wqIP4D9H4LZ2TVObiIiISCPTNkhpNAsWLKBLly64u7vj5uZGp06dat3St2DBAqKiomjZsiUuLi74+PgQHR3N1q1bbcYZhkFxcTH79u2z2Sa4b98+a3///v3t5k9LS8MwDHJycqxtSUlJGIZBQUEBgwcPxmQy4eHhwfr16wGoqKggOTmZNm3a4OLigqenJz179mTjxo1X9Z2EhYUBcOLEiVr7q6urKS4u5ty5c1e1TrNxtARCWti3n287Uvv3KCIiItIc6M6a1EtFRQVFRUU2bR4eHtx4440APP7442RmZtK9e3fGjh2Ls7Mz7777LsnJyRw9epTZs2dbr8vIyMDPz4/ExERCQkI4cOAAq1evJjY2loKCArp16wbUbBGcOXMmPj4+pKSkWK9v3bp1vT/HkCFDcHV1ZcSIERiGQbt27TCbzURFRbF//35iY2MZPnw4paWlrFy5kri4ONatW8c999xzWfOXl5dTXl7OyZMn2bJlC7Nnz+aGG27gvvvuq3W8l5cXlZWV3HDDDXTt2pUXXniBuLi4en8+h1J1Fsoq7NvMVVBcbttu8q65m3b6DLjV8s+Uu2vNn6fPNE6tIiIiIg5AYU3qJSMjg4yMDJu23r17s2nTJjZs2EBmZiaPPfYYWVlZ1v60tDSio6OZO3cuKSkp1mCXn5+Pv7+/zVyjRo0iJiaGWbNm8dZbbwE1WwTnzJmDyWRqsO2C3t7e7Ny5E1dXV2vbxIkT2bt3L9nZ2QwdOtTaPn36dDp37sz48ePZu3fvZc0/cuRI/v73v1vft2nThqysLCIjI23GBQYG8tBDD9GjRw98fHzYuXMny5YtY+DAgWRnZ/Pwww9f3Qd1BPn/rjl2/0IFn8OKj2zbDr0G7QPBwxXMZ+2vqfwhpHm42veJiIiINBPaBin1MnDgQJYvX27zOn+3bMmSJRiGwejRoykqKrJ5DRgwgMrKSut2Q8Aa1M5vASwqKqJ169a0atWKPXv2NOrnGDNmjE1QA3jzzTdp1aoVvXv3tqm9srKSnj17UlhYyMmTJy9r/tTUVJYvX056ejqDBw/GxcWFb775xm7c0qVLWbFiBRMmTCApKYmMjAzy8vJwdnZm/PjxDfJZG9u5c+c4cuTH4/TLy8utW1QB+EV7PpufAB/MsL5OdQiA2Ejr+8I/J3Ju/TQI9gfAfKMXVYe/tU5RVFTE4cOHa7ZHAt/7utquARQUFFz0/fbt2222mRYWFlJSUmK/Rl2fQ2toDa2hNbSG1tAaWqOB1rgUw2KxWK7oCvlZy8nJISEhgZSUFF5++eVax/To0YNPPvnkovPMmTPHegDHxo0bmTx5Mrt378ZsNtuMCwgI4Ntvv7V5HxwcXOudLcMw6Nevn00QhJo7elOmTGHVqlUMGjQIqHlmbfHixRQUFNCrVy+b8W5ubpw5c/Htdfv376dz584XHVObZcuW8cgjj/Dyyy/bbOWsS1xcHOvXr+fjjz/mjjvuuKw1ysvL8fPzo6xDEr4H7H8moFHcEgobn4dWpiu7rve0mjtoWWNq7094CfL2w5EltoeMJL0Kf9sKJ5bqNEgRERFptrQNUhqcxWLBMAyWLl2Ks7NzrWPuvPNOoCb0xMfH4+npyVNPPUV4eDje3t4YhsEzzzzD6dOnr7qes2dr2Ub3g9qOz7dYLLRt25YXX3yxzuvq+5zc0KFDSU5OZsmSJZcV1tq2bQvA0aNH67XedW9Qr5rj+9/cDoOia9qKy2FVAQy8Q0FNREREmjWFNWlwYWFh7Ny5kw4dOhAVFXXRsVlZWZjNZrKzs0lISLDpGzVqFC4utv8xbhhGnXN5eXlRWlpq137w4MHLLx4ICQmhrKyMxMTEOsPm1Thz5gzl5eWXHsiPtbdp06bB67guDOoFUZ1geDoUFkFLH/jLejhXDc8PburqRERERBqVnlmTBvfEE08AMGHCBKqqquz6Dx06ZP37+TB04W7cmTNnUlZWZnetu7t7nUEnNDSUwsJCm/5jx46xZs2aK6r/wQcfpKysjIkTJ9ba/9P66/Lll1/W2j5nzhxOnz5t8+PhpaWlfP/993ZjN27cyJYtWwgNDbU7kORnw9kZcp+Fh+6CBe/CxKXQ0rdmy+UtoU1dnYiIiEij0p01aXD9+vUjKSmJRYsWcfPNNxMfH09oaChHjhxh9+7d7Nixw7o1MTExkblz5zJy5Ejy8vIwmUwUFBSwbds2goKC7H5vLCIigtzcXEaMGEF4eDhOTk4MGzYMf39/kpKSSE1NpUePHiQkJFBSUsIbb7xBUFBQrcGvLmlpaWzZsoV58+aRl5dHTEwMfn5+fP311+Tn5+Pm5nbJg0/uvvtu/Pz86N69O+3ataO0tJTt27ezbds2TCYTc+fOtY7dtWsX999/P71796ZDhw54e3uze/du1q1bh5OTU62/TddsbJ516TEtvGHJ6JqXiIiIyM+Iwpo0ioyMDKKiokhPT2fZsmWYzWZ8fX0JCwtj6tSp1nGRkZGsWLGCqVOnsnjxYpycnOjatSvvvfceI0eO5NixYzbzLly4kCFDhrBy5UoqKiqwWCzcfffd+Pv7M2HCBIqKisjOzmbOnDkEBgYybtw4nJycmDJlymXX7ubmxrZt25gxYwarVq2yhiWTyUR4eDjDhw+/5BxDhw5l7dq1rF69mpMnT+Ls7ExwcDCPPPIIL7zwAqGhP94VateuHT169ODjjz/mvffe48yZM7Ro0YK+ffsya9Ys6/N9IiIiIvLzotMgRZqZ6+o0SBERERGpk55ZExERERERcUAKayIiIiIiIg5IYU1ERERERMQBKayJiIiIiIg4IIU1ERERERERB6SwJiIiIiIi4oD0O2sizVX7luDsem3WCgu8NuuIiIiI/IworIk0V+lPgo/vtVvPy+3arSUiIiLyM6CwJtJchZjA9xqGNRERERFpUHpmTURERERExAEprImIiIiIiDgghTUREREREREHpLAmIiIiIiLigBTWREREREREHJDCmoiIiIiIiANSWBMREREREXFACmsiIiIiIiIOSGFNRERERETEASmsiYiIiIiIOCCFNREREREREQd0Q1MXICKN5OgJ+P7stVvPyw38vK7deiIiIiLNnMKaSHOVvBj+W35t1goLhNdHK6yJiIiINCCFNZHm6qtiOHC8qasQERERkXrSM2siIiIiIiIOSGFNRERERETEASmsiYiIiIiIOCCFNREREREREQeksCYijq30FCS9CgHDwOth6DMdPj3Y1FWJiIiINDqFNRFxXNXVMGA2LM+D5Dj446PwbRn0ng5fHGnq6kREREQalcKaXJGcnBwMw6jzlZub26jrT5o0ibS0tEZdozEcPHgQLy8vDMNg/PjxNn3ff/89M2fOpFevXgQEBODq6kpgYCB9+/Zl+/btTVTxNdJ7Ggx7pe7+nG1Q8DlkJcOMh2B0HGyeCc5OMGPltatTREREpAnod9akXvr06UN8fLxde0RERKOum5mZSXBwMJMnT27UdRrasGHDOHfuXK19hYWFzJgxgy5dujBo0CBCQ0M5ePAgq1atIiYmhuXLl5OQkHCNK3YQOdsgyB8eiPqxLcAPEqNh2VYwV4GbS5OVJyIiItKYFNakXiIjI0lNTW3qMhqU2WymqqoKb2/vBp138eLF5Ofn8/TTTzN//ny7/tDQUDZu3EifPn1s2pOSkvjVr37F5MmTf75hbdch6HYTOF2wCeDOjrDoA/jPEbitXdPUJiIiItLItA1SGs2CBQvo0qUL7u7uuLm50alTJxYuXFjruKioKFq2bImLiws+Pj5ER0ezdetWm3GGYVBcXMy+fftstl7u27fP2t+/f3+7+dPS0jAMg5ycHGtbUlIShmFQUFDA4MGDMZlMeHh4sH79egAqKipITk6mTZs2uLi44OnpSc+ePdm4ceMVfQffffcdv//977n//vu56667ah0TGhpqF9QAevXqRdu2bfn666+vaM1m5WgJhLSwbz/fduTEta1HRERE5BrSnTWpl4qKCoqKimzaPDw8uPHGGwF4/PHHyczMpHv37owdOxZnZ2feffddkpOTOXr0KLNnz7Zel5GRgZ+fH4mJiYSEhHDgwAFWr15NbGwsBQUFdOvWDYCXXnqJmTNn4uPjQ0pKivX61q1b1/tzDBkyBFdXV0aMGIFhGLRr1w6z2UxUVBT79+8nNjaW4cOHU1paysqVK4mLi2PdunXcc889lzV/UlIS1dXVvPbaa2zZsuWKajt37hwnTpzAz8+vPh/N8VSdhbIK+zZzFRSX27abvGvupp0+A261/DPl7lrz5+kzjVOriIiIiANQWJN6ycjIICMjw6atd+/ebNq0iQ0bNpCZmcljjz1GVlaWtT8tLY3o6Gjmzp1LSkqKNdjl5+fj7+9vM9eoUaOIiYlh1qxZvPXWWwCkpqYyZ84cTCZTg23B9Pb2ZufOnbi6ulrbJk6cyN69e8nOzmbo0KHW9unTp9O5c2fGjx/P3r17Lzl3bm4ub731FvPnzycgIOCKa3vuuecoLS1lxIgRV3ytQ8r/d82x+xcq+BxWfGTbdug1aB8IHq5gPmt/TeUPIc3D1b5PREREpJnQNkipl4EDB7J8+XKb1/m7ZUuWLMEwDEaPHk1RUZHNa8CAAVRWVlq3GwLWoFZdXU1xcTFFRUW0bt2aVq1asWfPnkb9HGPGjLEJagBvvvkmrVq1onfv3ja1V1ZW0rNnTwoLCzl58uRF5zWbzYwcOZLbb7+dsWPHXnFd77zzDi+++CLt27ev9Tk3R3Tu3DmOHPnxOP3y8nLrFlUAftGez+YnwAczrK9THQIgNtL6vvDPiZxbPw2C/QEw3+hF1eFvrVMUFRVx+PDhmu2RwPe+rrZrAAUFBRd9v337dpvDXgoLCykpKbFfo67PoTW0htbQGlpDa2gNrdFAa1yKYbFYLFd0hfys5eTkkJCQQEpKCi+//HKtY3r06MEnn3xy0XnmzJnDpEmTANi4cSOTJ09m9+7dmM1mm3EBAQF8++23Nu+Dg4NrvbNlGAb9+vWzCYJQc0dvypQprFq1ikGDBgE12xMXL15MQUEBvXr1shnv5ubGmTMX3163f/9+OnfuXGf/7373O5YsWcKOHTuIjIwELu+7A/jwww+599578fLyIj8/n44dO160lguVl5fj5+dHWYckfA8cv6Jr6+2WUNj4PLQyXdl1vafV3EHLGlN7f8JLkLcfjiyxPWQk6VX421Y4sVSnQYqIiEizpW2Q0uAsFguGYbB06VKcnZ1rHXPnnXcCNaEnPj4eT09PnnrqKcLDw/H29sYwDJ555hlOnz591fWcPVvLNrof+Pj41Fp/27ZtefHFF+u87mLPyR08eJAlS5YQFxeHxWJh165dANaDQr777jt27dpFWFiY3fbPDRs2cN999+Hp6cmmTZuuOKg1O4N61Rzf/+Z2GBRd01ZcDqsKYOAdCmoiIiLSrCmsSYMLCwtj586ddOjQgaioqIuOzcrKwmw2k52dbXc8/ahRo3Bxsf2PccMw6pzLy8uL0tJSu/aDBw9efvFASEgIZWVlJCYm1hk2L+bw4cNUVVWxdu1a1q5da9e/dOlSli5dSnp6OqNHj7a2b9iwgXvvvRd3d3c2bNhAeHj4Fa/d7AzqBVGdYHg6FBZBSx/4y3o4Vw3PD27q6kREREQalZ5Zkwb3xBNPADBhwgSqqqrs+g8dOmT9+/kwdOFu3JkzZ1JWVmZ3rbu7O+Xl5XbtUHMEfmFhoU3/sWPHWLNmzRXV/+CDD1JWVsbEiRNr7f9p/bXp2rUr6enpdq+kpCQAYmNjSU9PJzY21nrNxo0bbYJaY/+4+HXD2Rlyn4WH7oIF78LEpdDSt2bL5S2hTV2diIiISKPSnTVpcP369SMpKYlFixZx8803Ex8fT2hoKEeOHGH37t3s2LHDujUxMTGRuXPnMnLkSPLy8jCZTBQUFLBt2zaCgoJsHuoEiIiIIDc3lxEjRhAeHo6TkxPDhg3D39+fpKQkUlNT6dGjBwkJCZSUlPDGG28QFBRUa/CrS1paGlu2bGHevHnk5eURExODn58fX3/9Nfn5+bi5uV304JOAgACbO2bn5eTksGjRIsLDw236P/vsM+69914qKioYMmQIH3zwAR988IHNtU888YTdlslmYfOsS49p4Q1LRte8RERERH5GFNakUWRkZBAVFUV6+v9v787Da7oWN45/TyLzHBnNMWtC1RhTUUQpvTVEFa2pRRptKdqiP1QN1XmgimpSVGlUdaCosQhVbmkVNQ9BSkQSRCJk//5Ic64jgyCRQ9/P85znOmuvvdfa5+zLebvWXnsq8+bNIz09HXd3d4KCghg9erS5Xu3atVmwYAGjR49m1qxZ2NjYEBISwooVKxg0aBDx8fEWx502bRo9e/Zk4cKFpKamYhgGrVu3xtPTk2HDhhEXF8fcuXOZMmUKfn5+DBkyBBsbG0aNGlXgvjs4OLB582bGjh1LTEyM+UHe3t7eBAcH07dv38L5kP6xZ88eLly4AMDMmTNzrfPwww/fm2FNRERERPKk1SBF7jF31WqQIiIiIpIn3bMmIiIiIiJihRTWRERERERErJDCmoiIiIiIiBVSWBMREREREbFCCmsiIiIiIiJWSGFNRERERETECuk5ayL3qgo+YGt/Z9oK8rsz7YiIiIj8iyisidyrpj4Dbu53rj0XhzvXloiIiMi/gMKayL0q0Bvc72BYExEREZFCpXvWRERERERErJDCmoiIiIiIiBVSWBMREREREbFCCmsiIiIiIiJWSGFNRERERETECimsiYiIiIiIWCGFNRERERERESuksCYiIiIiImKFFNZERERERESskMKaiIiIiIiIFVJYExERERERsUIlirsDIlJETiXChSt3pi0XB/BwuTNtiYiIiPxLKKyJ3KsGz4LjKUXfTpAfzI5UWBMREREpZAprIveqIwlw4Exx90JEREREbpHuWRMREREREbFCCmsiIiIiIiJWSGFNRERERETECimsiYiIiIiIWCGFNRGxbkkXYcB08O0DLk9AyzHw34PF3SsRERGRIqewJiLWKzMTHpkA8zfA4Hbw5lNwOhlajIH9J4u7dyIiIiJFSmFNbsqiRYswmUx5vpYtW1ak7b/00ktMnjy5SNsoDMuWLaNZs2YEBATg6OiIo6Mj5cqVY+DAgZw5k/dy+m+++SY1atTAycnJvE9ERMQd7Pkd1uL/oM9HeW9ftBli/4LowTD2cYhsB+vGg60NjF145/opIiIiUgz0nDW5JS1btqR9+/Y5ymvVqlWk7UZFRREQEMDIkSOLtJ3btWvXLtLS0ujYsSOlS5cmMzOTX3/9ldmzZ/Pjjz/y559/4ubmZrFPu3btWLFiBc2aNSM8PBwbGxsOHTrEsWPHiuksrMCizeDvCZ1D/1fm6wHdGsO8nyE9Axzsiq17IiIiIkVJYU1uSe3atRk+fHhxd6NQpaenk5GRgaur620f66WXXuKll17KUT506FDef/99oqKieP75583lEyZMYPny5UyZMiXX/f61fjsMdSqCzXWTABpUgZk/wb6TULN88fRNREREpIhpGqQUmQ8//JD77rsPR0dHHBwcqFq1KtOmTcu1XmhoKD4+PtjZ2eHm5kbjxo35+eefLeqZTCYSEhLYtWuXxdTLXbt2mbc//PDDOY4/efJkTCYTixYtMpcNGDAAk8lEbGws3bt3x9vbGycnJ5YvXw5AamoqgwcPpmzZstjZ2eHs7EzDhg1Zs2bNbX0mQUFBACQmJprLMjMzmTp1KhUrVjQHtbNnz5KZmXlbbd0TTp2DQK+c5dllJxNzbhMRERG5R2hkTW5JamoqcXFxFmVOTk6ULFkSgH79+hEVFUXdunV5/vnnsbW1ZenSpQwePJhTp04xYcIE834zZszAw8ODbt26ERgYyIEDB/j6668JCwsjNjaWOnXqAPDWW28xfvx43NzcGDp0qHn/MmXK3PJ59OzZE3t7e/r374/JZKJ8+fKkp6cTGhrKnj17CAsLo2/fviQlJbFw4ULatWvHDz/8QJs2bQp0/JSUFFJSUjh//jzr169nwoQJlChRgscee8xcZ9u2bfz999906dKFiIgI5s6dy8WLF3FycqJNmzZER0fj5ZVLYLnbZFyB5NScZekZkJBiWe7tmjWadukyOOTy15Sjfdb/XrpcNH0VERERsQIKa3JLZsyYwYwZMyzKWrRowdq1a1m9ejVRUVH07t2b6Oho8/bJkyfTuHFj3nnnHYYOHWoOdps2bcLT09PiWBERETRr1ozXX3+db775BoDhw4czZcoUvL29C20KpqurK9u3b8fe3t5cNmLECP744w/mzp1Lr169zOVjxoyhevXqvPjii/zxxx8FOv6gQYP48ssvze/Lli1LdHQ0tWvXNpft2LEDgJUrV3LlyhUGDRpE5cqV+f777/nuu+9o3rw5O3bswOb6qYB3m017s5bdv17sX7Bgo2XZ4U+ggh842UP6lZz7pP0T0pzsc24TERERuUfc5b/+pLh07NiR+fPnW7yyR8s+/fRTTCYTkZGRxMXFWbweeeQR0tLSzNMNAXNQy8zMJCEhgbi4OMqUKUOpUqX4/fffi/Q8nnvuOYugBrB48WJKlSpFixYtLPqelpZGw4YN2b17N+fPny/Q8YcPH878+fOZOnUq3bt3x87Ojr///tuiTkpK1qjS+fPnmT17Nu+++y7PPvssP/74I23btjUHR2sXHx9vsRhKSkqKeYoqAPdX4M/3w+GnsebXxcq+EFbb/H73B924uvz/IMATgPSSLmQcO20+RFxcXFYbp84BcMHd3rINIDY2Nt/3W7Zs4erVq+b3u3fv5ty5cznbyOs81IbaUBtqQ22oDbWhNgqpjRsxGYZh3NQe8q+2aNEiwsPDGTp0KO+++26uderXr8+2bdvyPc61C2msWbOGkSNHsnPnTtLT0y3q+fr6cvr0aYv3AQEBuY5smUwm2rZtaxEEIWtEb9SoUcTExNC1a1cg6561WbNmERsbS6NGjSzqOzg4cPly/tPr9uzZQ/Xq1fOtk5t58+bx5JNP8u6775qncn788cdERkbi5eVlcS8bQExMDN26daNXr14FDmwpKSl4eHiQXHkA7gfyfkxAoalWGta8BqW8b37fFv+XNYIW/Vzu28Pfgg174OSnlouMDJgOX/wMiXO0GqSIiIjcszQNUgqdYRiYTCbmzJmDra1trnUaNGgAZIWe9u3b4+zszMCBAwkODsbV1RWTycTLL7/MpUuXbrs/V67kMo3uH9cvn5/d/3LlyvHGG2/kud+t3ifXq1cvBg8ezKeffmoOa9mLjnh75ww75ctnrXSYlJR0S+3d9bo2ylq+f/EW6No4qywhBWJioWM9BTURERG5pymsSaELCgpi+/btVK5cmdDQ0HzrRkdHk56ezty5cwkPD7fYFhERgZ2d5Y9xk8mU57FcXFxyDTUHDx4seOeBwMBAkpOT6datW55h83ZcvnzZPPURoEmTJtjZ2ZGQkJCjbnbffX19C70fd4WujSC0KvSdCrvjwMcNPl4OVzPhte7F3TsRERGRIqV71qTQPf300wAMGzaMjIyMHNsPHz5s/nN2GLp+Nu748eNJTk7Osa+jo6NF0LlW6dKl2b17t8X2+Ph4lixZclP979KlC8nJyYwYMSLX7df2Py+HDh3KtXzKlClcunTJ4uHh7u7uPPjggyQnJ/PJJ59Y1M9+1EGnTp0K2v17i60tLHsVHm8CHy6FEXPAxz1r2mW10sXdOxEREZEipZE1KXRt27ZlwIABzJw5k0qVKtG+fXtKly7NyZMn2blzJ1u3bjVPTezWrRvvvPMOgwYNYsOGDXh7exMbG8vmzZvx9/e3uKkToFatWixbtoz+/fsTHByMjY0Nffr0wdPTkwEDBjB8+HDq169PeHg4586d46uvvsLf3z/X4JeXyZMns379et577z02bNhAs2bN8PDw4OjRo2zatAkHB4cbLnzSunVrPDw8qFu3LuXLlycpKYktW7awefNmvL29eeeddyzqf/TRRzRq1Ijnn3+ejRs3EhQUxMqVK9m6dSthYWF07NixwP2/q6x7/cZ1vFzh08isl4iIiMi/iMKaFIkZM2YQGhrK1KlTmTdvHunp6bi7uxMUFMTo0aPN9WrXrs2CBQsYPXo0s2bNwsbGhpCQEFasWMGgQYOIj4+3OO60adPo2bMnCxcuJDU1FcMwaN26NZ6engwbNoy4uDjmzp3LlClT8PPzY8iQIdjY2DBq1KgC993BwYHNmzczduxYYmJizKNb3t7eBAcH07dv3xseo1evXnz33Xd8/fXXnD9/HltbWwICAnjyySeZNGkSpUtbjgrVqFGDjRs3MmTIEL777jtSU1Px9/fnxRdf5M033yxw30VERETk3qHVIEXuMXfVapAiIiIikifdsyYiIiIiImKFFNZERERERESskMKaiIiIiIiIFVJYExERERERsUIKayIiIiIiIlZIYU1ERERERMQK6TlrIveqCj5ga1/07QT5FX0bIiIiIv9CCmsi96qpz4Cb+51py8XhzrQjIiIi8i+isCZyrwr0Bvc7FNZEREREpNDpnjURERERERErpLAmIiIiIiJihRTWRERERERErJDCmoiIiIiIiBVSWBMREREREbFCCmsiIiIiIiJWSGFNRERERETECimsiYiIiIiIWCGFNRERERERESuksCYiIiIiImKFFNZERERERESsUIni7oCIFJFTiXDhyp1py8UBPFzuTFsiIiIi/xIKayL3qsGz4HhK0bcT5AezIxXWRERERAqZwprIvepIAhw4U9y9EBEREZFbpHvWRERERERErJDCmoiIiIiIiBVSWBMREREREbFCCmsiIiIiIiJWSGFNRKxX0kUYMB18+4DLE9ByDPz3YHH3SkREROSOUFgTEeuUmQmPTID5G2BwO3jzKTidDC3GwP6Txd07ERERkSKnsCY3ZdGiRZhMpjxfy5YtK9L2X3rpJSZPnlykbRSFgwcP4uLigslk4sUXX8yz3ptvvkmNGjVwcnLC0dGRcuXKERERcQd7ege1+D/o81He2xdthti/IHowjH0cItvBuvFgawNjF965foqIiIgUEz1nTW5Jy5Ytad++fY7yWrVqFWm7UVFRBAQEMHLkyCJtp7D16dOHq1ev5lunXbt2rFixgmbNmhEeHo6NjQ2HDh3i2LFjd6iXVmbRZvD3hM6h/yvz9YBujWHez5CeAQ52xdY9ERERkaKmsCa3pHbt2gwfPry4u1Go0tPTycjIwNXVtVCPO2vWLDZt2sQLL7zA+++/n2udCRMmsHz5cqZMmcJLL71UqO3ftX47DHUqgs11EwAaVIGZP8G+k1CzfPH0TUREROQO0DRIKTIffvgh9913H46Ojjg4OFC1alWmTZuWa73Q0FB8fHyws7PDzc2Nxo0b8/PPP1vUM5lMJCQksGvXLoupl7t27TJvf/jhh3Mcf/LkyZhMJhYtWmQuGzBgACaTidjYWLp37463tzdOTk4sX74cgNTUVAYPHkzZsmWxs7PD2dmZhg0bsmbNmpv6DM6ePcsrr7xCp06daNKkSa51MjMzmTp1KhUrVjQHtbNnz5KZmXlTbd1zTp2DQK+c5dllJxPvbH9ERERE7jCNrMktSU1NJS4uzqLMycmJkiVLAtCvXz+ioqKoW7cuzz//PLa2tixdupTBgwdz6tQpJkyYYN5vxowZeHh40K1bNwIDAzlw4ABff/01YWFhxMbGUqdOHQDeeustxo8fj5ubG0OHDjXvX6ZMmVs+j549e2Jvb0///v0xmUyUL1+e9PR0QkND2bNnD2FhYfTt25ekpCQWLlxIu3bt+OGHH2jTpk2Bjj9gwAAyMzP55JNPWL9+fa51tm3bxt9//02XLl2IiIhg7ty5XLx4EScnJ9q0aUN0dDReXrmElrtJxhVITs1Zlp4BCSmW5d6uWaNply6DQy5/RTnaZ/3vpctF01cRERERK6GwJrdkxowZzJgxw6KsRYsWrF27ltWrVxMVFUXv3r2Jjo42b588eTKNGzfmnXfeYejQoeZgt2nTJjw9PS2OFRERQbNmzXj99df55ptvABg+fDhTpkzB29u70KZgurq6sn37duzt7c1lI0aM4I8//mDu3Ln06tXLXD5mzBiqV6/Oiy++yB9//HHDYy9btoxvvvmG999/H19f3zzr7dixA4CVK1dy5coVBg0aROXKlfn+++/57rvvaN68OTt27MDm+umAd5NNe7OW3b9e7F+wYKNl2eFPoIIfONlD+pWc+6T9E9Kc7HNuExEREbmH3MW//qQ4dezYkfnz51u8skfLPv30U0wmE5GRkcTFxVm8HnnkEdLS0szTDQFzUMvMzCQhIYG4uDjKlClDqVKl+P3334v0PJ577jmLoAawePFiSpUqRYsWLSz6npaWRsOGDdm9ezfnz5/P97jp6ekMGjSIBx54gOeffz7fuikpWSNL58+fZ/bs2bz77rs8++yz/Pjjj7Rt29YcHK1dfHy8xWIoKSkp5imq3F8BfhrLn++Hw09js161ynOuQYX/vf9pLLs/6MZVX7esfQK9OL//OOfOnTMfMy4ujrO7/nnOWilvyzb+ERsbm+/7LVu2WCz2snv37hxt5HkeakNtqA21oTbUhtpQG4XYxg0ZIjchJibGAIyhQ4fmWadevXoGkO9rypQp5vqrV682GjRoYDg4OOSo5+vra3FsHx8fIyQkJNd2AaNt27Y5yidNmmQARkxMjLnsmWeeMQAjNjY2R317e/sb9n/Pnj35fk4RERGGnZ2d8dtvv5nL8vrspk2bZgCGl5dXjuN89dVXBmD06tUr3/aulZycbABGcuUBhkGnon9VG2wYJ84WuH9mzV81jN4f5r2965uG4d/XMK5etSx/5mPDcO5uGGmXb75NERERkbuIpkFKoTMMA5PJxJw5c7C1tc21ToMGDQDYs2cP7du3x9nZmYEDBxIcHIyrqysmk4mXX36ZS5cu3XZ/rlzJZSrdP9zc3HLtf7ly5XjjjTfy3C+/++QOHjzIp59+Srt27TAMg99++w2Ao0ePAlmLh/z2228EBQXh6elJUFAQAN7e3jmOVb581mqHSUlJebZ3z+raKGv5/sVboGvjrLKEFIiJhY71tGy/iIiI3PMU1qTQBQUFsX37dipXrkxoaGi+daOjo0lPT2fu3LmEh4dbbIuIiMDOzvIHuclkyvNYLi4uuYaagwcPFrzzQGBgIMnJyXTr1i3PsJmfY8eOkZGRwXfffcd3332XY/ucOXOYM2cOU6dOJTIykiZNmmBnZ0dCQkKefc/vnrd7VtdGEFoV+k6F3XHg4wYfL4ermfBa9+LunYiIiEiR0z1rUuiefvppAIYNG0ZGRkaO7YcPHzb/OTsMGYZhUWf8+PEkJyfn2NfR0dF8j9f1Spcuze7duy22x8fHs2TJkpvqf5cuXUhOTmbEiBG5br+2/7kJCQlh6tSpOV4DBgwAICwsjKlTpxIWFgaAu7s7Dz74IMnJyXzyyScWx8p+1EGnTp1u6hzuCba2sOxVeLwJfLgURswBH3dY8xpUK13cvRMREREpchpZk0LXtm1bBgwYwMyZM6lUqRLt27endOnSnDx5kp07d7J161bz1MRu3brxzjvvMGjQIDZs2IC3tzexsbFs3rwZf39/i5s6AWrVqsWyZcvo378/wcHB2NjY0KdPHzw9PRkwYADDhw+nfv36hIeHc+7cOb766iv8/f1zDX55mTx5MuvXr+e9995jw4YNNGvWDA8PD44ePcqmTZtwcHDId+ETX19fIiMjc5QvWrSImTNnEhwcnGP7Rx99RKNGjXj++efZuHEjQUFBrFy5kq1btxIWFkbHjh0L3P+7xrrXb1zHyxU+jcx6iYiIiPzLKKxJkZgxYwahoaFMnTqVefPmkZ6ejru7O0FBQYwePdpcr3bt2ixYsIDRo0cza9YsbGxsCAkJYcWKFQwaNIj4+HiL406bNo2ePXuycOFCUlNTMQyD1q1b4+npybBhw4iLi2Pu3LlMmTIFPz8/hgwZgo2NDaNGjSpw3x0cHNi8eTNjx44lJibGPLrl7e1NcHAwffv2LZwP6Ro1atRg48aNDBkyhO+++47U1FT8/f158cUXefPNNwu9PRERERGxfibj+vlnInJXS0lJwcPDg+TKA3A/cKboG6xWOmtqYqmcC6SIiIiIyK3TPWsiIiIiIiJWSGFNRERERETECimsiYiIiIiIWCGFNRERERERESuksCYiIiIiImKFFNZERERERESskJ6zJnKvquADtvZF306QX9G3ISIiIvIvpLAmcq+a+gy4ud+Ztlwc7kw7IiIiIv8iCmsi96pAb3C/Q2FNRERERAqd7lkTERERERGxQgprIiIiIiIiVkhhTURERERExAoprImIiIiIiFghhTURERERERErpLAmIiIiIiJihRTWRERERERErJDCmoiIiIiIiBVSWBMREREREbFCCmsiIiIiIiJWqERxd0BEisipRLhwpejbcXEAD5eib0dERETkX0ZhTeReNXgWHE8p2jaC/GB2pMKaiIiISBFQWBO5Vx1JgANnirsXIiIiInKLdM+aiIiIiIiIFVJYExERERERsUIKayIiIiIiIlZIYU1ERERERMQKKayJiIiIiIhYIYU1EbFeSRdhwHTw7QMuT0DLMfDfg8XdKxEREZE7QmFNRKxTZiY8MgHmb4DB7eDNp+B0MrQYA/tPFnfvRERERIqcwprclEWLFmEymfJ8LVu2rEjbf+mll5g8eXKRtlEYzp07x8CBAwkNDcXb2xuTyUTNmjXzrJ+Zmcm7775L9erVcXNzw9HRkbJlyzJo0CASEhLuYM/voBb/B30+ynv7os0Q+xdED4axj0NkO1g3HmxtYOzCO9dPERERkWKih2LLLWnZsiXt27fPUV6rVq0ibTcqKoqAgABGjhxZpO3crhMnTjBz5kw8PDyoWrUq27dvz7d+//79iY6OplatWgwePBh7e3t+/vlnZsyYwdq1a9mzZw82Nv+y/7ayaDP4e0Ln0P+V+XpAt8Yw72dIzwAHu2LrnoiIiEhRU1iTW1K7dm2GDx9e3N0oVOnp6WRkZODq6nrbx6pYsSJ79+6lWrVqADg6OuZZ9/Lly8yfP5+KFSvy3//+F1tbW/O21q1bs3r1atavX0/Lli1vu193ld8OQ52KcH1IbVAFZv4E+05CzfLF0zcRERGRO+Bf9p/q5U768MMPue+++3B0dMTBwYGqVasybdq0XOuFhobi4+ODnZ0dbm5uNG7cmJ9//tminslkIiEhgV27dllMvdy1a5d5+8MPP5zj+JMnT8ZkMrFo0SJz2YABAzCZTMTGxtK9e3e8vb1xcnJi+fLlAKSmpjJ48GDKli2LnZ0dzs7ONGzYkDVr1hTo3J2dnc1B7UayQ2LJkiUtghpAQEAAAB4eHgU61j3l1DkI9MpZnl12MvHO9kdERETkDtPImtyS1NRU4uLiLMqcnJwoWbIkAP369SMqKoq6devy/PPPY2try9KlSxk8eDCnTp1iwoQJ5v1mzJiBh4cH3bp1IzAwkAMHDvD1118TFhZGbGwsderUAeCtt95i/PjxuLm5MXToUPP+ZcqUueXz6NmzJ/b29vTv3x+TyUT58uVJT08nNDSUPXv2EBYWRt++fUlKSmLhwoW0a9eOH374gTZt2txym9dzc3MjODiYbdu28cILL9CnTx/s7Oz4/vvvWbRoEa1btzZ/BnetjCuQnJqzLD0DElIsy71ds0bTLl0Gh1z+inK0z/rfS5eLpq8iIiIi1sIQuQkxMTEGkOurRYsWhmEYxqpVqwzA6N27d479GzVqZDg6OhoJCQnmsnPnzuWot3nzZqNEiRLGY489ZlHu4+NjhISE5No3wGjbtm2O8kmTJhmAERMTYy575plnDMAICQkx0tPTLeoPHz7cAIy5c+dalJ85c8YoWbJknu3nx8HBId/99u3bZ9SuXdvi8zSZTEa/fv2Mq1ev3lRbycnJBmAkVx5gGHQq2le1wYZx4qxhGIZx/Phx4+jRoxb9+OOPP7LerP2jwMe8cuBk1j4uTxiJnV83EhMTzcc8fvy48XfUj1l1l//Xso1/bNq0Kd/3mzdvNq5cuWJ+/+eff+ZoI8/zUBtqQ22oDbWhNtSG2ijENm7EZBiGUQwZUe5SixYtIjw8nI4dO/LEE09YbCtXrhxNmjThiSeeYOHChfzyyy8EBgZa1Pn888959dVXmTdvHj179rTYlpmZSWJiImlpaQA0adKEEiVKcPDg/56r5evrS0BAAH/88UeOvplMJtq2bWueypht8uTJjBo1ipiYGLp27QpkTYOcNWsWM2bMYMCAARb1K1WqRFpaGr/88kuONgYOHMjy5ctJSkrCzc3tRh+XmaOjI1WqVMm13wBxcXFERERw+fJl2rVrB8DixYvZsGEDzz77bK7TR/OSkpKCh4cHyZUH4H7gTIH3uyXVSsOa16CUd/71zl2A7dc9H21YNAR4wYj/WJY3rZE1elYlEqoEwrJXLbfPXgVPfwy/v6d71kREROSepmmQcksqV66cI6xlO3DgAIZh0KBBgzz3P3HihPnPa9asYeTIkezcuZP09HSLer6+voXT4Tzktpx+XFwcly9fpmzZsnnud+LECapXr14ofUhJSaF+/fpUr16dtWvXmsuHDBlCy5YtmT59On369KF+/fqF0l6x8HKF1vfnLAv0ylmerXYF2LAn63lr1y4y8st+cHaAqqWKrLsiIiIi1kBhTQqdYRiYTCbmzJmTY8GMbNlBbs+ePbRv3x5nZ2cGDhxIcHAwrq6umEwmXn75ZS5dunTb/bly5Uqe23IbHTMMg3LlyvHGG2/kud/t3Cd3vU8++YT4+Hj+7//+L8e28PBw1q1bx4oVK+7usHYrujbKWr5/8Rbo2jirLCEFYmKhYz0t2y8iIiL3PIU1KXRBQUFs376dypUrExoamm/d6Oho0tPTmTt3LuHh4RbbIiIisLOz/EFuMpnyPJaLiwtJSUk5yq+dRlkQgYGBJCcn061btzzDZmE6fvw4kHuozC7LL3Des7o2gtCq0Hcq7I4DHzf4eDlczYTXuhd370RERESKnJbul0L39NNPAzBs2DAyMjJybD98+LD5z9lh6PpbJ8ePH09ycnKOfR0dHUlJSclRDlC6dGl2795tsT0+Pp4lS5bcVP+7dOlCcnIyI0aMyHX7tf0vDNlTMefNm5dj29y5cwFo1qxZobZ5V7C1zbpf7fEm8OFSGDEHfNyz7pGrVrq4eyciIiJS5DSyJoWubdu2DBgwgJkzZ1KpUiXat29P6dKlOXnyJDt37mTr1q3mkaJu3brxzjvvMGjQIDZs2IC3tzexsbFs3rwZf39/rl69anHsWrVqsWzZMvr3709wcDA2Njb06dMHT09PBgwYwPDhw6lfvz7h4eGcO3eOr776Cn9//1yDX14mT57M+vXree+999iwYQPNmjXDw8ODo0ePsmnTJhwcHPj9999veJxRo0aZR/quXr3K33//zbPPPgtA3bp16d+/PwB9+/bl7bff5tdffyU4OJhHHnkEgKVLl7J7924efPBBWrVqVeD+3zXWvX7jOl6u8Glk1ktERETkX0ZhTYrEjBkzCA0NZerUqcybN4/09HTc3d0JCgpi9OjR5nq1a9dmwYIFjB49mlmzZmFjY0NISAgrVqxg0KBBxMfHWxx32rRp9OzZk4ULF5KamophGLRu3RpPT0+GDRtGXFwcc+fOZcqUKfj5+TFkyBBsbGwYNWpUgfvu4ODA5s2bGTt2LDExMeaVGL29vQkODqZv374FOs6sWbNISEgwvz9z5gzTp08HsgJtdlizs7Nj8+bNDBs2jJ9++on33nsPyJqO+cILL/Dmm28WuO8iIiIicu/Q0v0i9xirXLpfRERERG6a7lkTERERERGxQgprIiIiIiIiVkhhTURERERExAoprImIiIiIiFghhTURERERERErpKX7Re5VFXzA1r5o2wjyK9rji4iIiPyLKayJ3KumPgNu7kXfjotD0bchIiIi8i+ksCZyrwr0Bvc7ENZEREREpEjonjURERERERErpLAmIiIiIiJihRTWRERERERErJDCmoiIiIiIiBVSWBMREREREbFCCmsiIiIiIiJWSGFNRERERETECimsiYiIiIiIWCGFNRERERERESuksCYiIiIiImKFFNZERERERESsUIni7oCIFJFTiXDhSnH3QkREROTmuDiAh0tx98IqKKyJ3KsGz4LjKcXdCxEREZGCC/KD2ZEKa/9QWBO5Vx1JgANnirsXIiIiInKLdM+aiIiIiIiIFVJYExERERERsUIKayIiIiIiIlZIYU1ERERERMQKKayJiIiIiIhYIYW1XBw+fJjHHnsMX19fTCYTffr0Ke4uFYk+ffpgMpmKuxsWPv74Y6pXr46DgwMmk4kjR44Ud5dERERE5F6QdBEGTAffPuDyBLQcA/89WPD998TBw+PBtQd4PwVPfgBnkvPf54v1YOqctc8tuOmwdujQIQYMGED16tVxdnbGy8uLGjVq0Lt3b9auXXtLnbA2Tz75JKtXr6ZVq1b07duX+vXr51n3r7/+omfPntSoUQMPDw+cnZ2pXr06L774IqdOncpR32Qy5fuaOHFioZ7LkiVLGDduXK7b4uPjC7Wt27V27VoiIyNxc3OjR48e9O3bF0dHxyJrb8eOHYwbN06BUERERORel5kJj0yA+RtgcDt48yk4nQwtxsD+kzfePy4BHnwVDsTDpJ4w/FFYuh3avAaXM3Lf58IleGkuuNz679mbes7atm3baN68OTY2NjRt2pQGDRpw5coVzp49y+rVq7GxsaFly5a33BlrkJ6eTmxsLK1atWLkyJE4Ozvj7u6eZ/29e/eye/duqlevTr169TCZTJw+fZpPPvmEL7/8kp07d+Ln52eu37dv31yP88MPP3DmzBkqV65cqOczb948vv766zwDmzVZvHgxAC+//DI1a9bExsaGkiVLFll7GzZs4LXXXqNBgwZUqFChyNoRERERkSLW4v+ggh9EP5f79kWbIfYviBkOXRtnlXVrDFUHw9iFMH9o/sef9DVcTIPtb0E536yyBlWywlr0WhgQlnOfCYvAzQlahsCSrbd0WjcV1kaPHk1qaiqvvfYarVq1wsvLi6tXr5KcnExcXBxOTk631AlrcvLkSQzDoGzZstx///03rF+vXj1ef/11/Pz8cHFxwcbGhsTERObNm8cnn3zCrFmzGD16tLn+yJEjcxwjPj6e6OhoypcvT+vWrQv1fC5cuFCox7sd58+fx83NLc/t2SNcHTp0KNIRtWzZn01qamqRHP9G5ysiIiIid8iizeDvCZ1D/1fm65EV2Ob9DOkZ4GCX9/5fb4EO9f4X1ABa3w9VS8FXsTnD2v6T8N738M3LWdtv0U1Ng9y9ezcuLi5ERkbSpEkT7rvvPmrWrEnTpk3p3r07bdq0Mdc9cuQIJpMp1xGdcePG5bgfKfv+qbNnz9KnTx98fHxwc3PjscceM0/XmzlzJjVq1MDR0ZHq1avz7bffFrjvCQkJREZGUrZsWezt7SlbtiyRkZGcPXvWog8VK1YEICoqyjw1cd26dXket3Tp0nTo0IEGDRoQHBxMjRo1aNKkCWFhWV/YwYOW82CrVKmS47Vy5UoMw+Dhhx8u8EjS77//TqdOnShZsiSOjo7cd999vPnmm1y9etVcp0WLFqxYsQKwnH4ZHR1tcazk5GQiIiLw8/PD0dGRJk2a8Msvv+Ro0zAMpk+fTt26dXF2dsbV1ZWWLVvmmP567Xe/cOFC6tati5OTE5GRkbmeS3b9H374AQAnJydMJhMtWrQw1zl16hQRERGUK1cOe3t7SpUqxYABAzh9+rTFsU6ePMmwYcOoXbs2Xl5e5s9mypQpFp/NuHHjGDVqFADh4eHmzyb7/sTcrtFsFSpUsOhb9ufbp08fVq9eTdOmTXF1daVDhw7m7du2baNTp074+Pjg4OBAtWrVmDhxIleuXLE4zp9//kl4eDilS5fGwcGBgIAAWrZsydKlS3P97ERERESkAH47DHUqgs118adBFUhNh335TIU8cTZrymS9Sjm3NagCvx3KWT7ks6wRtfZ1b6vbNzWy5uvrS1xcHOvXr6dz5845tjs7O5v/nD1qcfJkzhPPLrt21Cc7kIWFheHs7Ez79u35+++/+f777zl16hRdu3Zl6tSpNGnShPvvv5+1a9fStWtX9u3bR1BQUL79Tk5OplGjRhw8eJAHH3yQBx98kJMnTzJ9+nRWr17Nr7/+ipubG/Xr1yc1NZWYmBhq167NAw88AICXl9cNP5u0tDQuXLhAWloau3fvZsyYMQA0bNgw3/0MwyAqKgoHBwf69et3w3bgf9NRTSYTLVu2xM3NjT///JOXX36ZHTt2MH/+fAAeeughTpw4wYEDByymX5YpU8bieGFhYZQoUYK2bduSkpLCqlWraN++PUeOHLEYGXryySf58ssvCQ0NpVOnTly5coVt27bRpk0bFi9ezKOPPgr873udP38+p0+fplWrVoSEhFC+fPlcz8fR0ZG+ffuyYcMGi75mT008duwYoaGhpKam0qxZM1q0aEFiYiJffPEFa9asYfv27Xh4eACwdetWvvzyS+rWrUvNmjXJyMhg7969vPLKKxw8eJCZM2cCUK1aNZo1a8aGDRto164dAQEBANStm/V/qNyu0WwXL160CPnZ1q9fz6JFi2jVqhVdunQxf3ZLly6lc+fO5uDl6OjIsWPHGDNmDL/99huLFi0C4OzZs7Rs2ZKMjAxatGiBh4cHaWlpxMXFsXLlSh555JFcPz8RERERuYFT5+DB+3KWB/7zO/9kItTM/bcqp85Z1r1+/8QLliNzS7fByp2w893b7vZNhbVevXrx+++/06VLF6pUqULTpk2pX78+LVq0oEaNGrfdGQA/Pz+GDh1K6dKlSUlJITMzk1WrVnHs2DEmTpxInTp1sLW15YEHHuCVV17ho48+4t138/8gJk+ezIEDB3jiiScYOHAgPj4+nD17lunTp7NgwQImT57MpEmT6Nq1K5UqVSImJoaQkBDzlMXSpUvfsN8zZsxgyJAhFufRr18/c4DJy+rVqzlx4gRNmjShdu3aN2wH4NlnnyUtLY1x48bRtm1bnJ2dOXLkCOPGjePLL7+kf//+tGrVin79+rF+/XoOHDhgMf3S39/f4nje3t688MILlClThvT0dEqXLs306dOZM2eOeTTsm2++4YsvvqBXr1707t2bgIAAMjMz2bt3Ly+99BKRkZF07NjRYnXJQ4cO8frrr9OqVSvc3Nywt7fP9XxKlizJyJEjOXr0qEVfs6fVRkREcOHCBcaPH0+jRo1wd3cnJSWFOnXqMHHiRN5++21ef/11AIKDg3n77bcJCgrC3d2dzMxMjh8/zvjx45k9ezavvfYagYGBhIWF8fvvv7NhwwY6duxonn6a3/2JN3LkyBGGDRtGly5d8PT0BLJCfN++fSlfvjzjx4+nWrVqODg4cPr0aT7++GNiYmJYt24dLVq0YO3atZw5c4bIyEj69OmDq6sraWlpJCQkmI8nIiIi8q+XcQWSU3OWpWdAQoplubdr1mjapcvgkEv0cfzn9+mly3m3l70tt2mSjnb/q+Ngl7XYyNAoGBQG95Ut2Pnk46bCWs+ePUlJSWHlypXs3r2bqKgooqKiAGjWrBnR0dHmaYS36qmnnjJPIQRo1aoVq1atomHDhvTq1cv8g79UqVKMGzeOHTt23PCYCxcuxM3NjZdfftniPjQPDw+WLl3KwoULmTRpEv7+/uZRJycnJ6pUqVLgfgcHBzNkyBDS09M5duwYu3btIjAwkMDAwHz3mzp1KgCPP/54nmHmWqdPn+bXX3/lgQce4NlnnzVPmwwODubEiRNs376duXPn0qpVK8qUKYOdXdYFlN+5PPXUUzz88MMWZdOnT2fbtm3m9zNmzMDR0ZGBAwdSvXp1c3lAQAChoaEsXLiQffv2Ua1aNfO2kJAQBg0adMORSTs7O6pUqZJrX5OTk1m+fDlNmzblsccew9XVFcga5e3RowczZ87k22+/NYe1oKAgKleujMlk4vLly1y4cIHAwEBatmzJL7/8wqZNm+jatSslS5Y0BzNfX9+b+q7zUqZMGYYMGWIxcrlkyRLOnDlD9+7deeihh7D5Z+jdz8+PXr16ERMTw9dff02LFi3MUyLj4+OpWrXqbQVHERERkbvZb7/9Zp7lBhAbG0vjxv8sDLJpb9ay+9eL/QsWbLQsO/wJVPDDcLIjKf4M1/4qjY2NpXGaQ9YbJ3vLNoAtW7ZQv359bJ2yfqOfOHQE53P3mX/bxsXF4X76LO7/7J+SkkLq2C8ISDgPr3X/XxvXt9n42pL83VRY8/f35/nnn6dFixYcP36cuLg49u3bx8aNG80jFL/99luBQkde2rZta/E+e+pcSEiIxXFLliyJi4sLiYmJNzzmiRMnCAoKombNmhblNWvWJCAggKNHj95yf7OFhoZSvXp1rly5QkJCAuvXr2fkyJG4uLjkuqgIQGJiIsuWLSMgIIDw8PACtbNnzx4AatSoYXF/m8lk4tFHHyUiIoK//vrrpvp+/WceHBwMwJkzZyzaTUtLo1mzZnke5+DBgxZhrUqVKgWaQpqfXbt2kZmZyc8//3zD6a7ZJk6cyJw5czhw4ACGYVhsO3bs2G31Jz9ly5bNMcU0+96/jz76iI8++ijX/bKvvwcffJDQ0FC+/vprvvvuO+rXr0/r1q15/PHHue++XIbtRURERO5R1wY1wDLg3F8BfhprucOwaAjwghH/sSwP8ATAFOiNV5rlpsaNG8PsVVlvSnnT+LppkKGh/yxG8s/0x9I2znDNb9syZcrAxatZo3cOdrinXcb90w3w7MOQcglSLtG4VGW4sBEMA46cpnHlm/tNd1NhDbKmzGUvrnD+/HlOnTrFnj17GDZsGLt372bz5s3m+6nycu1CD9e7frpX9miLi4tLjromkynfY12rRIkS5lGNbDY2NpQocdMfQa5cXV3Noz4VKlQgKCiIadOm8cEHH+QZ1qKjo8nIyKB169bme6ZuJHvlwtxWS8z+7K5ftOJGrv/Ms4997XGuXLmCm5sb/fv3z/M414ep7PvIbkdKStZQdsOGDWnUqFGuda5tZ+jQoUydOpVGjRrRpEkT3NzcsLW15dixYyxevJjLl/MZ4r7GrVy/2d//tbK/ry5dulC2bO5D4dnTX0uVKsWkSZNYt24du3fv5ujRo7z11ltMnDiR999/n8GDBxeo7yIiIiL3NC/XrJUYry8L9MpZnq12BdiwJ+t5a9dmgl/2g7ND1qqOeSldEnzdYVsuD9Deuh9q//Mb+NxFuJAGby7Jel0vaBD8pwEseSWfk7N0W0nFzc0NNzc3qlSpwmeffcbBgwfZu3cvzZs3N4+oZP/YvlZcXFyex7w+UBUGX19fTpw4wZUrVyzC2ZUrVzh58iQ+Pj6F3mbJkiUxDIOkpKQ868ycORNbW9sCLywCUK5cOSDrfrDr7d27F8MwLIJffqEjW0E+88DAQE6dOsWgQYNyDc6Q9Tnf7HFvJCgoCJPJhJOTE8OGDcu1zrXfaXR0NFWqVGH8+PGUK1cOR0dHbGxsmD59OoDFSFt+n032FMRz585ZlKelpZGUlJTrfYy5nW/291WtWjUiIiJybevaz7Nly5bcf//9HD9+nFOnTnHgwAEmTJhgvi+wIN+niIiIiFyna6Os5fsXb/nfc9YSUiAmFjrWs7wf7WDWwodUumYwpUsj+HwtHE+Asv9kh9W/Z60iObRj1ns/j6yl+q/34VLYvA++HJr7IiX5uKlf0z/99FOuozZpaWns2rULwDx64OPjg7u7O1u3brX4gXzo0CHWr19/U528XQ0bNiQ5Odm8EmC2GTNmkJyc/L8hzluQvYrl9dauXcvRo0fznLq3bds2/vrrL+6///6bar9ixYpUqlSJDRs2mD9zyAoh2fdttWrVylyevUhHQaaL5qdt27YYhsGHH35ImTJlcrzs7OxwcHC4rTZyU6FCBUJCQtiwYQNxcXE52i1dujS2trYW+9jZ2dG6dWuqVq1KuXLl8PLyMq+Qea3skJRboM5+OPmqVassyt95550cUyvz8/DDD+Pm5kZUVBTOzs45+l+yZElz2ExMTCQzMxNvb2/uv/9+Hn74YZ555hkCAgJIS0vj0qVLBW5XRERERK7RtRGEVoW+U2H8V/Dxj1kP0r6aab6/zKzV2KzXtUZ1yRqBazkGPloKk7+G8LezVpDs+1BWHWcHeKxhzlc5X7C1yfpzw6o31e2bGlkbPHgwKSkpPProo9SsWRNnZ2eOHz/O/PnzOXToEKGhoebgYW9vT1hYGIsWLaJdu3Y89thj5uXyAwMDc31+VVEZPHgwmzZt4rnnnmPHjh088MAD/Pbbb8yePRt/f3+eey6PJ50XQPazvh566CHKly9PWloa27dvZ8GCBTg6OuY5mpK9sEjnzp1v6mHiTk5OREREMGrUKJo2bcrgwYMJCAjghx9+YMWKFTRo0MDi/reaNWvyzTffMHDgQB599FHs7Oxo2LBhge//yta/f3/WrVvHxx9/zI4dO+jQoQM+Pj7ExcWxefNm9u/fz+HDh2/qmAXh6OjIiBEjGDJkCA8++CBPPfUUDzzwAJmZmRw6dIhvv/2WJ554gokTJwJZDylft24djz/+OK1bt+bvv//ms88+y3XU64EHHsBkMvHBBx9gMplwcXEhKCiIhg0b0rZtW/z9/XnjjTdIS0sjKCiIjRs3sn79+lynO+alatWqDBw4kA8++IBq1arRr18/KleuTFJSEnv37mXx4sV89dVXtGnThtmzZzN16lQ6depE5cqVsbOzY/369ezcuZO6deve1r2gIiIiIv9qtraw7FUY8XnWSNely1C/MkQ/B9VuvPI7ZX1g/evwYjS8Mg/sS8AjdeGdPvk/TPs23VRYCw8PZ8uWLaxevZqvvvqK8+fP4+rqSunSpXnqqad46qmn8Pb2NtcfMWIEKSkpbN26lbVr1xIUFESvXr04efLkHQ1rTZs2ZcKECXzxxRcsWbKEqKgovLy8ePDBB+nRo8dNrchyvZo1a7JixQpmz55NUlISJpMJPz8/mjZtyiOPPEKPHj1y7HPp0iW+/vprvLy86Nmz5023+dRTT3HhwgW+/fZbPvroIy5duoS/vz+dO3dm0KBBFlP0evTowebNm1m3bh2LFy8mMzOTjz/+OM8QmZeKFSuaF+7YsmULEydOJCMjg5IlS1KhQgU6dux40+dRUJ06dSI1NZXFixezYsUK5s6di4ODA76+vlStWtVihc9Ro0ZhZ2fHunXrWLJkCf7+/jRp0oTq1avneEB77dq16d27N6tWrSIiIoKMjAwef/xxFixYQLly5Xj55ZeZPXs2H3zwAfb29tSpU4eRI0cydux1/6UlH3Z2djz//PO4ubmxfPlyoqKiOHfuHO7u7uZVKkuVypojXaFCBcqXL8+iRYtITEzExsYGf39/unbtSt++fQvt/koRERGRe866129cx8sVPo3MeuXnyIzcy4PLwYpcVqG8kejnsl63wGTcxJyuuLg4jhw5Qnx8PBcvXuTy5cvY29vj7e1NlSpVqFatmsU9NZmZmWzdupX9+/dz+fJlPD09qVevHmfOnOG///0vTzzxhPnBwevWrWPfvn0MGDDAos2TJ0/yww8/0Lx5c4uVBiHroctubm4FCgqXLl1i+/btHD16lNTUVJydnSlfvjz16tWzWKzj/PnzfPnll9SpU4d69erd8LgHDx5k//79nD17lrS0rCVmXF1dKVOmDPfff3+uozBXrlxh3rx52NnZ0aNHj1u6D+ns2bNs27aN+Ph4MjIycHd3p2rVqtSqVctiFMkwDH755RcOHjxIamoqhmGYP8u8PnPIup+uatWq5sVksu3bt4+9e/dy9uxZMjMzcXJywsfHh0qVKlGpUtZT3W/2M8yWX3/S0tLYsWMHR48e5cKFC9ja2uLi4kLp0qWpUaOG+R7J7Ad1Hzp0iEuXLuHi4kL16tXx9fVl6dKlOa6jffv2sWPHDvMz/a4956SkJGJjYzl16hS2traUKVOGRo0a8e233+a47vL6vLIlJiayY8cOTp48SVpaGg4ODri7u1O2bFnuu+8+HB0dSUhI4I8//uDvv/8mNTUVk8mEm5sbVatWJTg4OMd0z7ykpKTg4eFBcuUBuB84c+MdRERERKxFtdKw5jUo5X3juv8CNxXWRMT6KayJiIjIXUthzULhL70oIiIiIiIit01hTURERERExAoprImIiIiIiFghhTURERERERErpLAmIiIiIiJihfTgJpF7VQUfsNWDtEVEROQuEuRX3D2wKgprIveqqc+Am3tx90JERETk5rg4FHcPrIbCmsi9KtAb3BXWRERERO5WumdNRERERETECimsiYiIiIiIWCGFNRERERERESuksCYiIiIiImKFFNZERERERESskMKaiIiIiIiIFVJYExERERERsUIKayIiIiIiIlZIYU1ERERERMQKKayJiIiIiIhYIYU1ERERERERK6SwJiIiIiIiYoUU1kRERERERKyQwpqIiIiIiIgVUlgTERERERGxQgprIiIiIiIiVkhhTURERERExAoprImIiIiIiFghhTURERERERErpLAmIiIiIiJihRTWRERERERErJDCmoiIiIiIiBVSWBMREREREbFCCmsiIiIiIiJWSGFNRERERETECimsiYiIiIiIWCGFNRERERERESuksCYiIiIiImKFFNZERERERESsUIni7oCIFC7DMABISUkp5p6IiIiISH7c3NwwmUx5bldYE7nHnD17FoCyZcsWc09EREREJD/Jycm4u7vnuV1hTeQe4+3tDcCxY8fw8PAo5t5IcUtJSaFs2bIcP348338M5N9B14NcS9eDXEvXQ/Fwc3PLd7vCmsg9xsYm61ZUDw8P/WUrZu7u7roexEzXg1xL14NcS9eDddECIyIiIiIiIlZIYU1ERERERMQKKayJ3GMcHBwYO3YsDg4Oxd0VsQK6HuRauh7kWroe5Fq6HqyTyche51tERERERESshkbWRERERERErJDCmoiIiIiIiBVSWBMREREREbFCCmsiVmrv3r20adMGFxcXAgICeOmll7h8+fIN9zMMgzfeeINy5crh5OREo0aN2LJlS456J0+epEuXLri5ueHt7c3TTz9NSkpKUZyKFIKivB7OnDnDCy+8QMOGDXFwcMDV1bWoTkMKSVFeD6tWraJ79+5UqFABZ2dn7rvvPt566y0yMjKK6nTkNhXl9fDrr7/Spk0bAgICcHBwoFy5cvTv35+TJ08W1elIISjq3xDZMjMzqVu3LiaTiUWLFhXmKUg2Q0SsTmJiohEYGGg8+OCDxvLly43Zs2cbHh4eRmRk5A33nTx5smFvb2+8++67xqpVq4xOnToZbm5uxsGDB811Ll++bISEhBghISHGd999ZyxYsMAoU6aM8cgjjxTlacktKurr4bfffjP8/PyMDh06GI0bNzZcXFyK8nTkNhX19dC1a1ejffv2xueff26sXbvWmDx5suHk5GT06dOnKE9LblFRXw8rVqwwBg8ebCxYsMBYu3atMXv2bKN8+fJGzZo1jbS0tKI8NblFRX1NXOvjjz82/P39DcCIiYkp7FMRwzAU1kSs0KRJkwwXFxfj7Nmz5rIZM2YYtra2xokTJ/Lc79KlS4a7u7sxcuRIc1l6erpRvnx5IyIiwlw2f/58w2QyGXv37jWXrVixwgCMX375pZDPRm5XUV8PV69eNf957NixCmtWrqivhzNnzuTYd+LEiYbJZMp1mxSvor4ecrNy5UoDMDZt2nT7JyCF7k5dE2fOnDG8vb2Nzz77TGGtCGkapIgV+vHHH2ndujXe3t7msm7dupGZmcnKlSvz3C82NpaUlBS6detmLrO3t6dz584sW7bM4vi1atWiWrVq5rI2bdrg7e1tUU+sQ1FfDzY2+qfgblLU14OPj0+OfR944AEMw+DUqVOFdBZSWIr6eshNyZIlAQo0rU7uvDt1TYwcOZKWLVvSsmXLwj0BsaB/oUWs0N69e6levbpFmaenJ4GBgezduzff/YAc+9aoUYNjx45x6dKlPI9vMpmoXr16vseX4lHU14PcXYrjeti4cSMODg4EBQXdRs+lKNyp6+Hq1atcvnyZvXv38tJLL1GnTh2aNm1aSGchhelOXBNbt25l/vz5vP3224XYc8mNwpqIFTp37hyenp45yr28vEhMTMx3PwcHBxwdHXPsZxgG586du63jS/Eo6utB7i53+nrYv38/H3zwAYMGDdLiM1boTl0PzZs3x8HBgRo1apCcnMyyZcsoUaJEoZyDFK6iviYyMzOJjIxk2LBhVKhQoTC7LrlQWBMREZFcpaSk0LlzZ4KCgpg4cWJxd0eK0ezZs9myZQvz5s0jPT2d1q1bawXhf6lPP/2U+Ph4XnnlleLuyr+CwpqIFfLy8iI5OTlH+blz5yzmoOe2X3p6OmlpaTn2M5lMeHl53dbxpXgU9fUgd5c7dT1cvnyZTp06ce7cOZYtW4aLi0vhnIAUqjt1PVSrVo2GDRvSs2dPfvrpJ/bv38/MmTML5ySkUBXlNXHhwgVGjRrFq6++yuXLl0lKSjKH9tTUVAX4IqCwJmKFcrt3LDk5mVOnTuWYS379fgB//fWXRfnevXvNz0zJ6/iGYfDXX3/le3wpHkV9Pcjd5U5cD5mZmfTs2ZPt27fz448/UrZs2UI8AylMxfH3g7+/P2XKlOHAgQO30XMpKkV5TSQkJHD27FkGDRqEl5cXXl5e3H///QD07t2bqlWrFvLZiMKaiBVq164dq1atIikpyVwWExODjY0NYWFhee7XuHFj3N3diYmJMZdlZGSwePFi2rdvb3H8nTt3sn//fnPZ6tWrOXv2rEU9sQ5FfT3I3eVOXA+RkZF8//33fPvtt9SsWbPQz0EKT3H8/XD8+HGOHj1KxYoVb7v/UviK8poICAhg7dq1Fq8vv/wSgHHjxrF48eKiOal/s2J8bICI5CH7gZbNmzc3VqxYYXz22WeGp6dnjgdaPvTQQ0alSpUsyiZPnmw4ODgY77//vrF69WqjS5cueT4Uu2bNmsb3339vLFy40Chbtqweim2livp6MAzDiImJMWJiYozw8HDD0dHR/P7IkSNFfn5yc4r6epg4caIBGCNGjDA2b95s8UpOTr4j5ygFV9TXw8CBA43Ro0cb33zzjbFmzRpj+vTpRuXKlY3SpUsbCQkJd+Qc5ebciX8zrnX48GE9Z60IKayJWKndu3cbrVq1MpycnAw/Pz9j+PDhRnp6ukWd5s2bG+XLl7coy8zMNCZNmmSUKVPGcHBwMBo2bGjExsbmOH5cXJzRuXNnw9XV1fD09DT69eunH2JWrKivByDXV1RUVBGeldyqorwemjdvnuf1sHbt2iI+M7kVRXk9zJ4922jYsKHh6elpODk5GdWqVTOef/55Iz4+vqhPS25DUf+bcS2FtaJlMgzDKIYBPREREREREcmH7lkTERERERGxQgprIiIiIiIiVkhhTURERERExAoprImIiIiIiFghhTURERERERErpLAmIiIiIiJihRTWRERERERErJDCmoiIiORw+vRpPDw8mDVrlkV5nz59qFChQvF06h4xbtw4TCYTR44cuSPtRUdH52jv0qVLlCpVitdee+2O9EFEbo3CmoiIiOTw6quv4uvrS9++fQtUPz4+nuHDhxMSEoKbmxvu7u5UqVKF7t27s3jxYou6LVq0wNXVNc9jZYeZbdu25br93LlzODk5YTKZmDt3bp7HqVChAiaTyfyyt7enQoUKPP300xw/frxA53WvcnJy4pVXXuGtt97i1KlTxd0dEcmDwpqIiIhYiIuL47PPPuO5556jRIkSN6x/9OhR7r//fqZNm0ZoaChvvPEGkydPpkOHDuzdu5eoqKhC7d8XX3xBeno6QUFBfPbZZ/nWLVOmDHPnzmXu3Ll88MEHNGzYkM8++4yGDRuSkJBQqP262/Tv3x+TycS7775b3F0RkTzc+G9gERER+VeZMWMGJpOJJ554okD13377bU6fPs2SJUv4z3/+k2N7fHx8ofZv9uzZtGzZkv/85z8MGTKEQ4cOUbFixVzrenh40KtXL/P7iIgI/Pz8mDp1KlFRUYwYMaJQ+3Y3cXFxoXPnzkRHRzNhwgQcHByKu0sich2NrImIiNym7HuCVq9ezfjx4ylfvjxOTk40bNiQLVu2ALB+/XqaNm2Ki4sLgYGBvP7667kea9u2bXTq1AkfHx8cHByoVq0aEydO5MqVKxb1tm7dSp8+fahatSrOzs64ubnRpEkTvvnmmxzH7NOnDyaTieTkZHNYcXR0pEmTJvzyyy856sfExFCvXj38/PwKdP779+8HoFWrVrluDwgIKNBxCuK///0vO3bsoHfv3vTo0YMSJUrccHTtem3btgXgwIEDedb58ccfMZlMfPjhh7lub9SoEb6+vmRkZAA3933kJvs7yo3JZKJPnz45yhcuXEjTpk1xc3PD2dmZhg0bsmjRogK1l61du3YkJCSwdu3am9pPRO4MhTUREZFC8sorr7BkyRJeeOEFxo4dy6FDhwgLC2PJkiV07tyZZs2a8fbbb1O9enXGjBnDvHnzLPZfunQpTZo0Yd++fQwbNowPP/yQRo0aMWbMmByjXN988w179+6lW7dufPDBB4wePZrExEQ6d+7M/Pnzc+1f27ZtiYuLY8yYMYwcOZJdu3bxyCOPcP78eXOdv//+m7/++osGDRoU+LwrVaoEwKxZszAMo8D7JSQk5PpKTU3Nc5/Zs2fj6upKly5d8PHxoUOHDnz++edkZmYWuN3scOnj45NnnbCwMAICApgzZ06u+2/ZsoUePXpgZ2cH3Nr3cTteffVVunfvjpubG6+//jpvvPEGzs7OhIeHM23atAIfp1GjRgCsW7eu0PsoIoXAEBERkdsSFRVlAMYDDzxgpKenm8u//fZbAzBKlChh/Prrr+by9PR0IyAgwAgNDTWXXbp0yfD39zeaNWtmZGRkWBz/3XffNQBj7dq15rILFy7k6MfFixeNqlWrGjVq1LAo7927twEYERERFuVfffWVARiffPKJuWzNmjUGYHzwwQe5nmvv3r2N8uXLW5QdPHjQcHd3NwCjbNmyRo8ePYz33nvP2LZtW67HaN68uQHc8HXtZ5b9GXl6ehq9e/c2ly1ZssQAjGXLluVop3z58kb16tWNM2fOGGfOnDEOHTpkfPbZZ4aHh4dRokQJ448//si1f9mGDx9uAMaff/5pUf7qq68agLF9+3Zz2c18H2PHjjUA4/Dhw+ay7O8oN4DFOW/fvt0AjJEjR+ao+5///Mdwc3MzUlJSzGXZ1+e17V2rRIkSRocOHXLdJiLFSyNrIiIihSQiIgJ7e3vz+2bNmgHQsGFD6tWrZy63t7enQYMG5hEegJ9++om///6bvn37kpSUZDHS1L59ewBWrlxpru/i4mL+c2pqKmfPniU1NZWHHnqIPXv2kJKSkqN/Q4cOtXj/0EMPAVj048yZMwB4e3sX+LwrVqzIzp07iYyMBGD+/PkMHTqUevXqUatWLbZv355jH0dHR3766adcX08++WSu7SxevJikpCR69+5tLmvfvj2+vr55ToXcu3cvvr6++Pr6UrFiRfr164ePjw/ffvstISEh+Z5XdjvXjq4ZhsG8efMICQmhTp065vJb+T5u1RdffIHJZKJ37945RiUfffRRzp8/z+bNmwt8PG9vb06fPl1o/RORwqMFRkRERArJ9YtceHl5ARAUFJSjrpeXF2fPnjW/37NnDwD9+vXL8/h///23+c+nT5/m1Vdf5dtvv831h3ZSUhLu7u759q9kyZIAFv3Ivm/KuInpjJC1TP7UqVOZOnUqp06dYuPGjcydO5fvv/+eDh068Oeff1oEQFtbW1q3bp3rsTZu3Jhr+ezZs/H19aVMmTIW95uFhYURExNDQkJCjqmNFSpUMD8rzt7enlKlSlG5cuUCnVN2IPviiy+YNGkSNjY2/Pzzzxw5coQ333zTou6tfB+3as+ePRiGQfXq1fOsc+21ciOGYeR5v5yIFC+FNRERkUJia2t7U+XXyg5Hb731FrVr1861TqlSpcx1w8LC2LNnDy+88AL16tXDw8MDW1tboqKimD9/fq73cOXVj2uDma+vLwCJiYk37HNeAgMDCQ8PJzw8nJ49ezJ//nyWLVtmsSrjzTp8+DBr167FMAyqVq2aa5158+YxZMgQizIXF5c8Q2FBPPXUUwwZMoQ1a9bQunVr5syZg62trcW53Or3ca28wtL1C8tkt2cymfjxxx/z/E6Dg4MLfI7nzp0zf+8iYl0U1kRERKxAlSpVgIKFi99//52dO3cyZswYXnvtNYttn3766W31I/tH/rVTI29HaGgo8+fP58SJE7d1nKioKAzDYNasWXh6eubY/uqrr/LZZ5/lCGu3q0ePHowYMYI5c+bQpEkTFi1aRJs2bQgMDDTXKYzvI3vUMTEx0WIE8tChQznqVqlSheXLl1OuXDlq1KhxK6dlduTIEa5cuXLDKaEiUjx0z5qIiIgVaNu2LX5+frzxxhu5jmpdunTJvGpj9mjK9VMVd+3aVeCl4vPi6+tLcHCw+ZEDBbFu3TouXbqUozwzM5Pvv/8egPvuu++W+5SZmUl0dDQ1a9bk6aefpmvXrjleTzzxBH/88Qe//vrrLbeTG19fX9q1a8fixYv54osvSElJsbhnDgrn+8geLVy1apVF+TvvvJOjbvY9faNGjeLq1as5tt/MFMjs77l58+YF3kdE7hyNrImIiFgBFxcX5syZw2OPPUa1atXo168flStXJikpib1797J48WK++eYbWrRoQY0aNQgODubNN98kNTWVatWqsW/fPmbMmEHNmjVzXdDjZoSHh/P6669z6tQpixGkvLz99tts2rSJjh07UqdOHTw8PIiPj+frr79m+/bttGzZkkceeeSW+7Ny5UqOHz9O//7986zTpUsXxo0bx+zZs6lfv/4tt5Wb3r1789133zFs2DA8PDx47LHHLLYXxvfxxBNPMGrUKAYMGMDevXvx9vZm+fLlJCQk5Khbv359xo0bx7hx46hduzbh4eGUKlWKU6dOsX37dpYtW8bly5cLdG7Lli3Dx8eHli1bFqi+iNxZCmsiIiJWom3btvz666+88cYbzJs3jzNnzuDl5UWlSpV48cUXqVWrFpA1krN06VKGDx/O559/zsWLFwkJCeHzzz9n586dtx3WnnnmGSZMmMD8+fMZNmzYDeu/+uqrxMTE8PPPP7NixQoSExNxcXGhRo0avPPOO0RGRmJjc+uTeWbPng1A586d86wTEhJC1apVWbBgAe+99x5OTk633N71OnTogLe3N4mJiTz99NM4OjpabC+M78Pd3Z1ly5bx4osvMmnSJFxdXencuTPz5s0zL1RzrbFjx1KvXj0+/PBD3n//fS5evIifnx8hISF5Psj7ehcvXmTx4sVERETg4OBQsA9DRO4ok3Gzyz2JiIjIPW/QoEGsXLmSv/76y/zgZ4A+ffqwbt06jhw5Unydk5sSHR1N3759OXz4MBUqVDCXZz+8e//+/QUaQRWRO0/3rImIiEgO48eP5+zZs0RFRRV3V6QIXLp0iTfeeIMRI0YoqIlYMU2DFBERkRz8/PxITk4u7m5IEXFycuLUqVPF3Q0RuQGNrImIiIiIiFgh3bMmIiIiIiJihTSyJiIiIiIiYoUU1kRERERERKyQwpqIiIiIiIgVUlgTERERERGxQgprIiIiIiIiVkhhTURERERExAoprImIiIiIiFghhTURERERERErpLAmIiIiIiJihf4fuBMK0TKHTdIAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot the SHAP values, notice that \"feature 0\" corresponds to the first atom, \"feature 1\" corresponds to the second atom, and so on also for bonds\n", + "shap.plots.bar(explanation, max_display=15)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the SHAP values for atoms and bonds\n", + "shap_values = explanation.values[0]\n", + "atom_shap_values = shap_values[:n_atoms]\n", + "bond_shap_values = shap_values[n_atoms:n_atoms + n_bonds]" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAyAAAAMgCAIAAABUEpE/AACtIklEQVR4nOzdZ1wU19cH8LOw9N6roKAoYEHB3hVLFLuY2PuSYjQmJqCJMYmJgVQ1zSVWNBrBrrGBvSuIXZCidASkSt1l53kxyTz7R0CFYYfy+37ygrlzZ/eM0eXsnXvPFTEMQwAAAADAHzWhAwAAAABobpBgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAAAPAMCRYAAAAAz5BgAQAANDdlZWWlpaW1dCguLq6oqHixvbS0tKys7MX2kpIShULBW3wtABIsAACA5uPZs2c+Pj56enr6+vpDhw5NS0ur0uHBgwdeXl4GBgb6+vozZ87k8rDU1NQhQ4bo6+vr6en5+Pg8e/aMbT958qSbm5uBgYGent6CBQvKy8tVej9NFhIsAACA5uPDDz9MT09PTk7OzMyUy+USiaRKh6lTpzo5OeXl5d25c+fixYtff/012+7n51dZWZmRkZGcnJyenr506VIiys3NnTJlyvLly2Uy2e3bt48cObJu3TpV31LTJGIYRugYAAAAgAd5eXk2Njb79u0bNWoUEV27dq1Pnz6PHz92cHBgO1y6dGnAgAHJycl2dnZEJJVKV65cmZmZmZKS4uTkdPny5Z49exLR0aNHJ06cmJmZaWxsnJeXZ2Jiwl7u5+eXkZFx6NAhge6vKcEIFgAAQDMRFxdXXl7ep08f9rB79+5qamoPHjzgOty7d8/R0ZHNroioV69e2dnZ2dnZDx48UFNT69GjB9veu3fv8vLyuLg4IuKyKyLKy8uztLRU0c00cWKhAwAAAAB+ZGdnq6mpGRkZsYdqamrGxsZZWVnKHYyNjblDU1NTInr69Gl2draRkZFIJGLbjY2N1dTUnj59qvziSUlJR44cOXLkSEPfRfOAESwAAIBmwsTERKFQFBcXs4cMwxQWFrJZFNehsLCQOywoKCAiMzMzExOToqIibtZQUVGRQqEwMzNT7jlp0qTp06cPGTJEFXfS9CHBAgAAaCZat24tEonu37/PHsbHx1dUVLRp04br0KZNm9TUVC7Hevjwoa6urpWVVZs2bSoqKhISEtj2Bw8eiESi1q1bs4dPnjwZPHhwu3btNmzYoLqbaeKQYAEAADQTtra23t7e33//PVuz6vvvv/fy8nJ3d5fL5U+fPlUoFMOGDTMzM2NXApaXl69bt2769Olisbhjx47dunULCgoiIoVC8eOPPw4fPtzGxoaIzpw506tXr+HDh//111/q6urC3mATglWEAAAAzce9e/dGjhypqampoaGRl5d35MiRHj16REZGdu/ePSMjw9ra+vDhw9OmTWvTpk1OTo6xsXFERIStrS0RXb16dcyYMaampjKZTCaTnThxws3NjV2HqKen5+Liwr6+mZnZiRMnBL3FpgEJFgAAQLNSVlZ29erVysrKXr166enpEVFJScn9+/e7du0qFouJKC8v7/r16wYGBj169GBbWMXFxVevXlVXV+/Vq5e2tjYRFRYWsmsJORoaGp07d1btDTVJSLAAAAAAeIYyDQAA0LJUVlYeOHAgJibG2tp6ypQpBgYGVToUFRWFhoZmZmZ26NBh/Pjx3MSj8vLysLCwpKQkR0dHX19fLS0t7pLr168/ePBgzpw5KrsLaOQwggUAAC3LmDFjbt68OXbs2EuXLpWVlV2/fl25NFReXl7Pnj21tbX79u176NChbt26HT58mIjKy8v79++fl5fn7e0dHh5uYWFx7tw5TU3NkydPrl69+vr16yKRqNptkqFlwggWAAC0IGfPnj1x4sTDhw+dnZ2Li4s9PDyCg4M/+eQTrkNwcDDDMFeuXNHT01u2bJmrq+u5c+cGDhwYGhqamJgYExNjbm6enZ3dvn37vXv3Tp06VS6Xr1q1ioh8fHyEu61XFRcXd+rUqVatWo0ePVroWJo5jGABAEALsnjx4piYmJMnT7KHn332WURExNWrV7kOPXv2HDFixFdffcUeent7u7u7r1u3bvz48WZmZps2bWLbZ8+eXVJSEhYWxh6eO3duxIgRjWoESy6XJycnJyYm3r9//8GDB4mJiefOnZPJZOzZHj16XLt2TdgImzeMYAEAQAuSmprq5OTEHbZp0yYlJUW5Q0pKinJlTicnp9TUVPZCLy8v5QuPHz/e8PG+qry8vEePHj18+DA2NjY2NjYmJiYhIaGioqJKN5FIpK2tXVpaev369Xfffff3338XJNqWAAlW85SdnR0bG2tra6v8OaIsKSkpJSXF2dmZrSPHycjISEhIcHBw4LZeZyUmJj59+tTFxUV55wQAgCanrKxMQ0ODO9TS0qoy7FRWVqapqflihyrt2traAo5Xpaens4NS3OjU48ePX3wkZWNj4+7u7uTk5OTk5Obm1rp1644dO4pEomnTpu3atWvDhg3Tpk3r16+fILfQ7CHBaoZ++umn5cuXt2nTJikpycfHZ+fOncqfJgqFYuHChTt27HByckpISFi+fPmXX37Jnlq5cmVQUJCzs3NiYuKsWbOkUqmamlpubu5bb70VFRVlb28fFxe3atUqf39/ge4MAKC+bG1tlTc/fvr0KVtmU7mD8ibHXIeXXthAysvL4+PjldOphw8flpSUVOmmpaXl7OzMplNubm7u7u7t27fX19ev9jV37twZFxcXGRk5ffr0GzduWFpaNvx9tDwMNC+JiYlqamr79+9nfzY3N9+8ebNyh/379+vr69+9e5dhmDNnzqipqd28eZNhmKioKDU1tbNnzzIMc/fuXX19/QMHDjAMs3HjRh8fn9LSUoZhDh06pKamFh8fr/LbAgDgR3BwsKWlpUwmYw9Hjhy5cOFC5Q7z589/44032J8rKiosLCw2bdrEMMzKlSs7d+7MdXN3d//qq6+4w7Nnz2ppadU/vLS0tPDwcKlUunjxYm9vbycnJ5FI9OLvbhMTk759+0okksDAwEOHDiUkJFRWVr7WG1VUVPTv35+I+vXrV15eXv/IoQpMcm9uvvzyy7179965c4c9XLp0aXR09NmzZ7kOEyZMMDQ03LZtG3s4aNAgDw+PtWvXLl68+N69e6dPn2bbZ86cWVxcvG/fPuUXVygUWlpa+/fvbxKLZQAAXlRcXNy2bdvBgwcvWrQoPDx8zZo1kZGRnTp1Cg0NJaIpU6bcvn27e/fun3766bBhw3799dezZ8/Gx8fr6uqmpqa6urrOnTt36tSpO3fuDAkJiY2Ntba2zs3NvXnz5p07dwICAo4ePWpkZNS9e/dXieTFoanY2Njnz59X6aapqWlvb88OSrGjU126dHmxdlcdZGZmenl5paWlLV68mN2dEHiER4TNTUJCgvImBp06deIWubDi4+OVS+F17tyZ3T49Pj6+yoV//fVXlRdPSUmRy+XK0z8BAJoWPT2906dPf/rpp7Nnz3ZwcDh8+HCnTp2I6P79+2yHLl26HDp06LvvvtuxY0enTp1Onz6tq6tLRPb29uHh4V9++eXMmTPbt28fERFhbW1NRAkJCQEBAUTUuXPngICATp06bdmy5cX3zcvL4+ZLsT88efKE3ZJZmYmJCZdLselUhw4dGmiLZWtr6z179gwaNGj9+vWdO3eeP39+Q7xLi4UEq7nJy8uzt7fnDo2MjHJzc5U75OfnK3/1MTQ0ZDtUaTcyMsrLy6vy4t98882gQYPc3d0bJHQAAJVwdXWtMjxPRNxsVCIaOXLkyJEjX7ywV69ex44dq9LYvXv3yMhI5ZaKiorU1FTldOrOnTtFRUVVLtTQ0GjdurXy0FTnzp0NDQ3rfmOvr1evXsHBwbNnz160aFGnTp169Oihyndv3pBgNTeWlpbKiVFOTo6VlZVyBwsLC+UOz549YztUaX/xwnXr1h0+fPjChQsNFToAQBP06kNT3PTzhh6aei2zZs26du3a77//Pn78+MjISNXM3G8JGleCdfbs2T/++CM3N7dr164BAQGmpqbKZxmG2bJly759+yorK0eMGLFo0SJuD/A9e/bs2LGjrKysf//+H374oY6ODtt+586d33//vWfPnnPnzlX1zQjE1dV1w4YNDMOw8yIjIyNdXV2rdLhx4wZ3GBkZOXz4cLadm4BV5UK5XB4QEPD333+Hh4e3bdtWFbcBANBY5eXlzZkz586dO9nZ2ZWVlS8Wa9DU1Gzbtm2HDh3at2/fvn179gfl3Xgam7Vr196/f//cuXO+vr5nzpxRrkYBdSfwJHslV69e1dDQWLZs2a5du3r06NG9e3eFQqHcYe3atXp6euvXr9+0aZOVldXSpUvZ9l27dmloaAQGBoaEhDg7O0+dOpVhmIqKimHDhtnZ2bVu3Xru3LkC3I9Anj59qqWltXbtWrlcfunSJW4x4P3792/dusUwzJkzZ7S0tI4fP65QKLZt2yYWixMTExmGiYuLU1dXDwkJUSgUx44d09TUvHDhAsMwWVlZQ4cO7dOnT3p6urC3BgDQGChPwyAiExMTT0/PmTNnBgYGhoaGRkZGlpWVCR1jNZKTk2NjY2s6m5mZ2apVKyJ65513VBlVM9aIEqzJkydPnjyZ/TkjI0MsFkdERHBnZTKZnZ3dunXr2MP9+/fr6urm5uYyDNOlS5cVK1aw7VeuXFFXV2frrd2/f1+hUPj5+bWoBIthmD179lhYWGhra2tra3N/MvPmzRs/fjz787fffqurq6utrW1iYrJt2zbuws2bNxsbG2tra+vp6X333Xds4+TJk6sk5atWrVLtDQEANBZHjx5lPwlnzJixYcMG9gtq43fnzh1LS0sXF5e8vLya+ty8eZOdzs/uxgj11IgSLAsLC+Vf9r179/7888+5w5iYGCLi/iqXl5erq6ufPHmSnTZ05coV5dfZvn07d9gCEyyGYWQyWUpKClu8imvh6r4wDFNWVpaSklJRUVHlwoqKipSUlMb59QsAQHBsCYYOHToIHcjrKSkp8fT0JKLhw4fL5fKaum3fvp2INDQ0zp8/r8rwmiW1hn4EWZMEJenp6RUVFTk5OcrbttjY2KSlpXGH7M9cB01NTTMzs/T09PT0dCJi18qyrK2tlS9smcRisb29vba2tnILN2WNiLS0tOzt7ZUrvLM0NDTs7e21tLRUFCgAQNMhl8vj4+OJ6OOPPxY6ltejo6Ozd+9eCwuLkydPrlq1qqZuM2bMWLJkiUwmmzJlCn6T1pNgCVY/Je+99566urqamlplZSXXQS6XK8+zY1MB5Q4ymUxDQ+PFdrlc/mLeAAAAUE+HDh3Ky8vr2LHjvHnzhI7ltTk6Ov79999isXjNmjVsVdVq/fDDD4MHD87MzJw8eXJ5ebkqI2xmBEuwMpTs379fXV3dysoqOTmZ65CSkqK8WJT9metQVFSUn59vZ2fHtnN7oVdWVqanp9vZ2anuTgAAoGXYtGkTETXdgpxDhgxh59fOnz//7t271fYRi8V79uxxcnK6evWqRCJRcYTNiWAJ1otGjx69e/duhmGIKDY29tatW8r7sbBVQ/7++2/2MDQ01NzcvGfPnnp6eoMGDeLajx8/XlFR4e3trfr4AQCgGUtLSztx4oSmpub06dOFjqXuli5dOm/evOfPn48dOzYnJ6faPqampvv27dPV1Q0JCfnjjz9UHGHzIfQksP8XGxtraGg4bty4r776qnXr1hMnTmTbBw4cePr0aYZhdu/eLRaLFy1aFBAQoKur+/PPP7MdTp8+raGhMW/evJUrV5qamgYEBLDte/bs8ff39/Dw6NSpk7+//+XLl4W4rSZg/vz5bdq0cXJyWrdu3etuFwoA0EKsXr2aiKZMmSJ0IPVVWlrKTtX39vauZcI7u1uahobG2bNnVRles9GIRrBcXFyio6M7deqUnp7+6aef7t69m20fNGiQpaUlEU2ZMuXs2bMaGhrFxcV79+794IMP2A6DBw++du2amZlZbm6uVCpds2aN8suOGDFi1KhRqr2VJqOkpKRLly6bNm16/PhxYmLikiVLDA0Nv/jiC+VntQAAwDDM1q1bqebngwzDqDSgetDW1t67d6+lpWVERMSKFStq6jZt2rRly5bJZLI333yTm4cDr07UhP5OAL9ycnIGDhz44MEDIho3blxRUdG5c+fY5QJqampDhgyRSCTjx4/HigEAgNOnTw8dOtTe3v7JkyfV7m/Tp08fV1fX7777zszMTPXh1cHly5cHDx5cUVGxc+fOqVOnVttHoVD4+PgcO3asa9euFy9eZKtkwStqRCNYoEq3bt3q3r37gwcP9PT0Nm7ceODAgVOnTpWWlh45csTX15et8jplyhQHB4clS5bcu3dP6HgBAITETW+vNru6fv36lStXDh06pK+vr/LQ6qhPnz4//vgjEc2fPz8qKqraPmpqan/99Zezs3N0dLSfn59qA2z6BH5ECULYuXMn+0Wkpg1w2IetnTt35v6eeHp6SqXS58+fqz5aAABh5eXl6ejoqKmpPXnypNoO7Gq7jz76SMWB1d+CBQuIyNHRMSsrq6Y+Dx48MDQ0JCJuMxV4FUiwWha5XO7v78/mTBKJpLy8vPb+kZGREomE+05maGgokUjYPQoBAFqIX375hYiGDx9e7dmSkhJ2I+f79++rOLD6q6io6NevHxENGTJEebePKvbt2ycSicRiMbvmDF4FEqwWJCcnhy1gIRaLX+uLSElJSWhoqLe3t0gkYjMtNze3wMDA7Ozshov21YWFhc2bN2/hwoXHjx9/8WxZWdn69etnzpz5wQcf3L59m2uXy+WbNm2aNWvWe++9d+nSJeVLrl+/vmzZslp2RQWAFqVr165ExBYSetGWLVvYBwIqjoovGRkZbPHIpUuX1tJt+fLlRGRmZpaQkKCy2Jo0JFgtxa1bt9q0aUNEFhYWZ86cqduLPHz40N/fn13USURaWlq+vr6HDh2qZaFvQ1uzZo2+vv6qVasCAgI0NTU3b95cpcO4cePatGnz3XffzZw5U1tb++bNm2z7O++8Y2VltWbNGnYjgRMnTjAM8+DBg/79+1tbW2tpae3bt0/VNwMAjU9kZCSbWNS0SSs7ArRp0yYVB8ajy5cvszukvfgRyqmsrBw9ejQRdenSpbi4WJXhNVFNNcH6I/uPhUkL12etFzqQpuHvv//W09Mjom7duiUlJdXz1crLyw8dOsTOhWczLXt7e39//8ePH/MR7GsoLi42NTXdsmULe/jtt986OTkpl/KKjo4WiUTcuP24cePeeusthmHS0tI0NDQiIiLY9nfffXfAgAEMw2RlZV25cqWystLGxgYJFgAwDPPOO+8Q0QcffFDt2djYWJFIpK+vX1hYqOLA+MWOw2lra1+/fr2mPgUFBa6urkQ0bdo0VcbWRDXSBKu4svhkwckN2RsCMwN/y/rtRMGJ0spS5Q4TEyZSFL0R94ZQETYV7KQr9tHejBkzSkpKeHzx1NTUwMBAJycnNs1SU1Pz9vYODQ196dQuvpw9e1YkEnFT7+Pi4ogoJiaG6xAYGNipUyfucMeOHSYmJgzDbN++3dzcXKFQsO2nT58mIuXvZEiwAIBhmJKSEhMTEyK6detWtR3YXZ8XLFig4sAaArtOsFWrVk+fPq2pz8OHD42MjIjohx9+UGVsTVGjK9OQLkv3S/Yzu2M2PH7428lvB6QFvJfy3oj4EZZ3LVdlrCpTlAkdYFOSm5s7atSooKAgdXX1wMDA7du36+jo8Pj6dnZ2/v7+cXFx4eHhM2fO1NLSYos7WFtb+/n53blzh8f3qlZqaqqJiQk7OEdE9vb2RKS8A3xaWhrbyHXIy8srLi5OTU21s7PjppSxfdLT0xs6YABoWvbs2ZOXl9ezZ88uXbq8eFYul+/YsYOa8u6Eyn755ZcBAwakpKRMnDixoqKi2j4dOnTYtm2bmpqav7//sWPHVBxh09K4EqzokmjPGM/gnOAyRZmOms5A/YG+Jr4jDEcYqxsXVRZ9lfHV8PjhJYoSocNsGmJiYvr27Xvy5Elzc/MTJ05wiwerpVAotm/frlAo6vBG7MBVSEhIenq6VCr18PDIy8sLDg7u0qWLl5dXcHDw8+fP63oTL8coVcrlEqZqz3IdmBeK69bUDgAtXO27Ox8+fDgjI6NDhw69evVSbVwNQkNDIzQ01N7e/tKlSx999FFN3caNG/fZZ59VVlbOmDEjISFBlRE2LY0owcqWZ49OGJ0pyxSRaIX1iqzOWWddzoa2CT3e9nhm58zv7L7TEGkkVSSlVKBg/8sdPny4Z8+eMTExXbt2vXHjxpAhQ2rpXFhYOH78+FmzZq1atao+b2psbCyRSKKjoyMjIxcvXmxqahoVFeXn52dpaTllyhR2wlN9Xv9Ftra2+fn5JSX/5tzsEBS7HIbroDwulZ6ebmRkpK+vb2trm5GRwcXD9rG1teU3PABo0hITE8+fP6+np/fmm29W24FNvxYuXKjauBqQlZXVwYMHdXR0fv31140bN9bU7Ysvvpg0aVJubu7EiROLi4tVGWFTIuTzyf81/8l8iiKKop+e/lRth6MFR5/K/n0wjDlYNVEoFIGBgWpqakQ0derUl671ePTokZubGxGZmpqePHmSx0hKS0urFHfo0KFDYGBgLU/3X1dRUZGRkdGuXbvYw59//tnR0VF5SeP169dFIhG3qPitt96aNGkSwzBJSUnq6upcdYYPP/ywd+/eyq+MOVgAEBAQQERz586t9mxGRoZYLNbU1KylRGcTFRISQkRaWlpXr16tqU9hYSH7u2PChAncfFZQ1lgSrFx5rk60DkVR94fdFczL/1chwapWYWHhuHHjiIiddPXS/kePHmXr43Xu3LnhSpvExsb6+/tbWVmxaZampqaPj09oaGgtRe1e3cqVK01NTdetWxcUFKSrq7thwwaGYW7cuMHteD9s2DA3N7eNGzcuXrxYU1Pz2rVrbPvs2bMdHBw2bNiwcuVKsVh85MgRtj0kJEQqlRoZGb3zzjtSqbSoqKj+QQJAkyOTydhR7YsXL1bb4euvvyaiyZMnqzgw1Vi0aBER2djYpKam1tQnNjaW/Q3y7bffqjK2pqKxJFj78vaxw1cbsje8Sv/GkGBlZWWdOHHi6tWrNVWBiouLO3HixL1796q0Jycnnzhx4ubNm1Wy/rKysps3b1ZUVNQtntjYWHYBrZmZWXh4eO2dlQe6fH19VbAHjlwuDw8PVy7uwM6Rr2diV1lZuXHjxgkTJvj6+oaGhrKNN2/enDNnDvvz8+fPV69e7ePjM3fu3CtXrnAXlpeX//zzz2PGjJk+fTpbBIu1bNkyiZKcnJz6hAcATdTBgweJyMXFpdrhGYVC0bZtWyI6duyY6mNTAZlMNmjQICLq3bt3TQXAGIY5fvy4urq6mpoa9x0VOI0lwVqetpxNsB6UPniV/oInWNu2bdPR0encubOFhYWHh8eLNc2XLl2qpaXl6enJPr/nkrCgoCBNTc1u3boZGRkNHTqUrZuQn5+/du1adi1bXFxcHeI5cuQIu3S2S5cuiYmJtXcuKiqaNGkSEYlEIn9/fxWP7qanpwcGBjo7O7NpFjtHftu2bfyWkAAAqI+xY8cS0XfffVftWba2i729vYBllhtaTk4OW57az8+vlm5fffUVEZmYmDx69EhlsTUJjSXBmvNkDkWRKEokU7zSYyNhE6zCwkJDQ8P169ezP3fq1GnZsmXKHa5du6aurs6Olzx69MjQ0JAdXHny5Im6uvr+/fuZ/3YnYLesCQsLW758Obvk9XUTLOWxqDfffPOlk67i4uI6duxIRIaGhgcOHHit9+IXu9Ehu+00Kc2RFzAkAACGYTIzMzU0NMRiMbsa5kXTp08nos8//1zFgalYdHQ0+xEtlUpr6qNQKHx9fdlZtvn5+aoMr5FrLAnWhIQJFEV60Xqv2L9KgnX5+WW5QnVfI0JCQkxNTblneRs2bLCyslLu8N5773l7e3OHc+bMGTt2LMMw33zzjaurK9e+YsUKLy8v7jA7O/t1E6zCwsIJEyZwk65eOhZ1/Phxtmhe+/btHzx4pcHChpafny+VSrt168YtvPD09Fy7du2zZ8+EDg0AWqhvv/2Wnb5d7dn8/HxdXV2RSBQfH6/iwFSPLfSloaFx7ty5mvoUFRWx39vHjRunvJdGC9dYyjSokzoRVVJlHa5NLE/sG9vX4Z5DQFpAQrkqanIkJia2a9dOQ0ODPXR3d3/69KlytafExER3d3fu0N3dPTExkW1n/xZWaa+buLi43r1779+/39TU9OjRo1zF9pqsW7du9OjReXl5Pj4+165dYydsCc7IyEgikURFRd27d8/f39/MzCwqKuqDDz6ws7NroOIOAAC127ZtG9Vc/mrHjh0lJSXe3t7cVIdmbPr06UuXLpXJZFOmTElNTa22j76+/qFDh8zMzA4ePPjNN9+oOMJGq7EkWMZiYyIqU5TVoY5oiizFWcs5XZYe9DTI5b7LsLhhu/J2NWjN94KCAn19fe7QwMCAiPLz82vqYGhoyJ598cKCgoK6JRDHjh3r0aPH/fv3O3XqdOPGjeHDh9fSuaysbNasWR988IFCofD39z948CA7YatRcXd3DwwMTEtLY4s7lJeXh4WFDRs2rEOHDkFBQU+fPhU6QABoEc6fPx8TE2NnZzdy5MhqO9RefbT5+f7770eMGPH06dNx48aVlpZW26dNmza7du1SV1dftWrV3r17VRxh49RYEqw2mm3YHx6UPXjdawfqD4xzj4vsECkxl+io6UQURUx7PM36rrVfst/Nkpt8R0pEZGVllZubyx3m5OSoqalZWlpyLZaWlsodsrOzra2tX2zPycmxtLSsfdjpRQzDBAUF+fj45Ofnjxkz5uLFi9xugNVKSUnp16/f9u3bDQwM9u7dy03Yapy0tLR8fX3Dw8NjY2NXrVrl4ODw6NGjgIAAOzu7YcOGhYWFyeVyoWMEgOaMzZ/mzJmjrq7+4tmbN29GR0ebmZmxNXFaAnV19Z07dzo5Od28eZPdr7Baw4YN+/rrrxmGmTt37v3791UZYSMl7BNKTkRhBLuKcE3GmlfpX9Mk9wJ5gTRb2je2L/tqFEVu990CMwNzZHwutj98+LC2tjY3m++bb77p0KGDcofPPvvMw8ODOxw7dixbNeD333+3sbHhVp28/fbbw4YN47q9yhysoqKiyZMn038LAF/6tPvcuXNs5teuXbv79++/zl02CpWVlWxxB+6BrI2NzZgxY06fPi10aADQDHHzq2r6KH733XeJaMmSJaqNS3i3bt1iN3799ddfa+qjUCjYqvcuLi55eXkqjK4xaiwJVrmi3OqOFUVRm7ttSipfvlz/pasI75fe90/1N79tzqZZ2tHavom+4YXhr1LF9KXkcrmjo+PChQuLioqio6Otra3ZxYCxsbGnTp1iGObx48disfi3334rKyvbs2ePhoYGWw83Pz/fwMDg888/Ly0tPXXqlL6+PlsuvLKyMjc3Ny4ujoiioqJq+nsZHx//WgsApVIpm5eMGjWqqf9dZ4s7uLi4cN8NTExMdu7cKXRcANCs/PHHH0Q0ZMiQas+WlJSw64Ru3bql4sAag71794pEIg0NjTNnztTUp6SkhF20NGLEiGZcw+JVNJYEi2GYL9O/ZJMhvyS/l6ZBr1imoUxRFpob6v3IWxQlYl+83b12q9JXJZUn1TPaqKiojh07qqmpaWtrv/vuu+xfozVr1nTr1o3tsHv3bhsbGzU1NVNT099++427MDw8vE2bNmpqagYGBqtWrWIbMzMzlYcVDQ0NX3xHbgGgi4vLSxcAlpaWzpkz59UHupqQw4cPc2mWpqYmO4kNAIAXXl5eRPTXX39Ve3b79u1E1KNHDxVH1Xh8/PHHRGRmZlZLwcUnT56Ym5tTCyhjUbtGlGCVK8o9HniwadDo+NHRJdHcqUqm8lzRuemPpwdlBrEtr1sHK6UiJTAzsPXd1uzrq99U937kHZobWqGoY9l0VlFRUe2F1/Py8qotnVBQUPBaqf3atWvZ2QCjR49+aaGRlJSU7t27E5G+vv6ePXte/V2akOTkZPYPRNhSXgDQnNy5c4eIjI2NayooyBY3r6UoVLNXWVn5xhtvEJGHh0ctZRcjIiLEYrFIJOI22GiBGlGCxTBMZkVmt4fduOlTVnesesb0dLvvpnVTi21xvufMDm7VrdBoJVMZXhjum+ircVODfUGbOzb+qf5xZXUpnq4apaWls2bNevWxqPPnz7O7/rVt2/bu3buqCVIQ7G1+/PHHQgcCAM3E+++/T0SLFi2q9mxCQoJIJNLR0WnqMy7qKTc3l90maMaMGbV0++GHH9jv+S/9TRQWFjZhwoQ33ngjKCjoxW15ZDLZ+vXrR40aNXbs2G3btnHtCoVi69atY8eOHT169C+//MKNWRQVFX3zzTejR4+eMmXK33//Xadb5EfjSrAYhilXlK/PWu9635VLs9j/dKN1JyVMOlN4hu1Wz0ruufJcaba084PO3Ot7PvSUZkufVzb4lnyvJTk5mR2vNjAwYGdr1Y6bdDVy5Mjc3FwVRCig/v37ExG35yBAU5ecnFxT3XCGYWQyWWJi4ou/2uVy+ZMnT6rdNFMFe4w2J+Xl5eyDrZs3b1bbYfny5fjMYT148MDQ0JCIfv7551q6sTNV2rRpU8umrtu3b9fU1FyzZs2mTZscHBzmzp1bpcNHH31kYWEhlUp/+uknPT09dg8VhmHWrl2rp6f3008/SaVSCwsLdj8VuVzeq1evoUOHhoaG/vzzz7q6uhs3bqzv3dZVo0uwOMnlyReKLuzP2x9eGP6g9EGVLXTy5fmZFZl58rx6vktkcaQkSaIfrc+mWYa3DCVJkgtFF+r5srxQXgD44o7RVZSVlc2bN48b6GoJUwv//PNPIpo1a5bQgQDU1/37911dXXV0dMRicf/+/TMzM6t02Ldvn5mZmYGBgbq6+vz587l/4KdPn7azs9PT01NXV580aRK7oadCofjnn3+GDh1KROfPn1f1zTRZO3fuJCJPT89qz8pkMltbWyK6cKFR/IIQ3P79+0Uikbq6+vHjx2vqU1payo4RDBs2rKbfSu7u7itXrmR/Pnv2rFgsTk1N5c7m5eXp6elx4wtr165t1aqVTCaTyWT29vZr165l2/fs2aOnp5efny+TyUJCQrhhsPfff3/QoEH1v9m6abwJliqxxR16xPTgBrS6PuwanBQs4CAQNxb1xhtvvHQ4OjU1tWfPnkSkra0dEhKikgCFd/78eSLq2bOn0IEA1FfPnj0nTpxYVlaWl5fXvXv3mTNnKp/Nzc3V09MLCgqqrKy8d++emZlZcHAwwzDl5eXW1tYfffQRO4jl4OCwevVqhmEOHTrk4+MTHBysoaFx9uxZYW6pCWJT0t9//73as4cOHWLXGL10R7KWY8WKFURkamqakJBQU5+kpCR2pGD58uUvnmWLE924cYM9VCgUxsbGys/1Tp48qa6uXl5ezh4mJCQQUWxs7MOHD4mIm2hfWlqqpqYWHh5e5fVXrlzZr1+/+txjfSDB+h8PSh/4p/pb3LagKBq4eSBX9FKV/6JedwHgxYsX2SqmrVq1ioyMVE2QjUFWVhYRGRkZCR0IQL2wE6tjY2PZw0OHDmlpaRUWFnIdfvvtN1tbW24A4OOPP2a/V7Df2ouKitj2H3/80dHRUfmVtbS0kGC9osePH6upqeno6NT0vZotKxoUFKTiwBqzyspKHx8fIurSpUstz6MvXLigqakpEolenBF1+/ZtIlIesurQocOPP/7IHW7dutXS0pI7LCkpIaLTp0+fOnWKiJQnbJmbm2/dulX5xRUKRdeuXbnhMdVrvBW9BeGq7RpoF5jSKeXvNn9bXLWQyWTsbi3t2rX75ptv0tLSGjqA1NTUAQMGbN26VV9fPzQ09KVV14ODg4cMGZKZmTlgwIDIyEhPT8+GjlDFzp8/HxgYyH5ZqcLCwsLMzKygoKBKkQuApiUmJsbQ0JArPuLp6VleXv7kyROuQ2xsbNeuXbmq4p6enrGxseyFHTp04Hbf8vT0TEpKqmknE6jd5s2bFQrF5MmT2Wo4VTx9+vTo0aNisXjmzJmqj63RUlNT27lzp5ub2+3bt2fPns3UsO1bv379vv/+e4Zhpk+fvnv3buVT7N9qhULBtcjlcq6sNNuhsrJS+SwRaWhosBdWOaV8IRF9++232dnZy5Ytq8891gcSrGpoibTeNHkzTBqWnJwcGBjYpk2bhISEzz77zMHBgd2tRSaTNcT7Xrx40cvL68aNG87OzleuXGErttekoqJCIpH4+fmxP0RERCjv1dNsbN26dfny5ezTwBe1b9+eiGJiYlQbFEDdRUdHr1ASFRX17NkzY2NjrgP7C559dMLKyclR3jzU1NSUnWtS5UJTU1O2c8PfRHOjUChq391569atMpnMx8fHxsZGtaE1duwCLCMjo71797LLBqu1ePHiTp06VVZWzpgx49GjR1y7nZ0dEaWkpLCHcrk8MzOTbeQ65OXlFRcXs4fsbtO2trZVLiwqKiooKFC+8Pvvv1+3bt2JEyfYyfiCQIJVGzs7O39///j4+PDw8JkzZ2ppaUVEREyZMsXKysrPz48d2OcLOxb19OnTESNG3Lhxg63YXpPs7Ozhw4f/+eef2traW7Zs4SZsNT9sCsV+X3/dswCNEDsvmCMSidiBWK4DuzG8hYUF12JmZlZYWMgd5uXlGRkZaWhoVLkwLy+PiNh1cPBaTpw4kZyc7OTkNGDAgGo7bN26lVrS7s6vpX379iEhIWpqagEBAceOHaup2/nz5/X09ORy+ahRo7hGY2PjPn36cMNaR48eVSgUgwcP5jr06tXL2Ng4LCyMPQwNDXV3d2/Tpo2zs3OHDh24C8PCwszMzNi5yMXFxdOmTdu8efP58+fd3Nx4v9/XINSzyaYoLy9PKpV6eHhwf3qenp5SqZSbA1E33AJAIlq8ePFLFwBGRUU5ODgQkb29/fXr1+vz1o3fgQMHiOiNN6qvxxEYGEhES5cuVXFUADy6efMmKU3XPX78uKampnI94XXr1jk4OHAzQT/99FMvLy+GYf7++28DAwN25SDDMOvXr7e3t1d+5aY7Bys4ONjFxcXExGTQoEHR0dFVzsrl8uXLl9vb25ubm0+ePDktLY079cMPPzg5OZmamo4YMSImJoZtzM/Pf/vtt+3t7W1tbadNm/biIs1JkyYR0Zo11e+Ey46gW1tby2SyajsAwzCrVq0iIhMTk1q20+UqYyk3Hj9+XCwWv/3221988YWJicmnn37KMMzTp0/79esXHx/PMMx3332np6f36aefLl26VENDIywsjL1w165dGhoaS5cu/fTTT9l6DQzDZGdnd+zYUSwWv/POO/7/acDbrhUSrLqIjIyUSCQGBgZsVmRgYDBz5swX1y+8irS0NG4BoHIJtZps375dR0eHiKpdy938sLOv2rRpU+3Z2tMvgKaia9eus2fPVigUZWVlgwYNmjZtGsMwBQUFDx8+ZBgmOztbW1v7zz//ZBgmOTnZxsaGXelWWlpqbm7OrhzMyclxcXGpsjNJE02wTpw4oaGhsXnz5tjY2Hnz5llZWXFJJCsoKMjc3PzkyZN3794dMGDAwIED2fadO3fq6uqGhoY+fPhw8uTJbdu2Zb+vvvHGG2PGjLl79+6DBw969eo1evRo5VfLzs7W0tISi8XKiZqy2bNnE9GKFSsa5G6bi8rKyrFjx4rF4lpWsnft2pWIOnbsWKX9+vXrS5cuffvtt7nkKT8//5NPPklPT2cPDx8+/M477yxevLhK2ZGzZ88uXrz43XffPXLkCNuSlpbm/wLebvI1IcGqu9LS0tDQUG9vb5FIxGZaHTp0CAwMzMrKesVXuHjxIvtE/1UWAMpkMn9/f/aNJBJJ7Vv0NBsVFRUaGhpqampVPmFZ7OyrmtIvgKYiMjLS3t7eysrKyMioc+fO7KKqXbt26erqsh1CQkJ0dXVbt26tqanp6+vL/fM/cuSIsbGxvb29jo6Ot7c3N5puYWHBzuUyMDAwMTGp5yi7ivn4+LAzphmGKS8vNzExUV4dVllZaW9vz9W3ZD8Ebt++zTBMr169PvzwQ7Y9Ly9PS0uL/b2bnJzM/Qns27dPS0tL+UEBO6wyZsyYaoMpKCjQ1dUViUS1DMwAq6Cg4Ny5czWdLSkpYdds7dixQ5VRCQgJFg9iYmL8/f3ZnVuISFNT08fHJzQ0tPbxZKlUqqmpSUQDBgx4+vRp7W+RnZ3NPpbW0tLatGkTr+E3duxEqzt37rx4qvb0C6AJkclkt27dun//Pvco8Pnz548fP+Y6FBUVRUZGJiVV3ai+pKTk5s2b7MMUTmJiYoKSprXdu62t7ZYtW7jDUaNGLV68mDtkVw0rPze0sbHZsmWLQqHQ1tY+dOgQ196zZ8+vvvqqyovv3r3b1NRUufIOO02npl1NN2zYQESDBw+u1y0Bw7Dzpao8xW7ekGDxRi6Xh4eH+/r6isViNtNi58i/uOV4eXn5woULX30s6ubNm46OjuwLXr16tcHuoJFiy8/UtGNoLekXADQ5CoVCXV398OHDXMvs2bOnTJnCHd66dYv+t3JS586dv/3229zcXCK6fPky1+7j4/POO+9Uef2RI0cqb8Zy6dIlIrKysqrpc7h79+4tatCl4QwfPpyIfvnlF6EDUR2sIuSNurq6t7d3aGhoUlJSYGCgs7NzWlpaUFBQ27Zthw0bFhISwtaned0FgDt37uzXr19SUlLfvn0jIyPZCVstSu21GDp06FDLWQBoWkQikZ6eHltPklVcXKy80p6d/PpiB319fXYwu6YLieiHH364devWN998w7Vs2rSJiObMmVPt5/Ddu3dv3LhhZGQ0YcIEHu6tBUtJSTl16pS2tvb06dOFjkV1kGDxz9bW1t/f/9GjRxEREVOnTtXU1IyIiJg9e7aDg8PUqVM7d+587tw5BweHS5cusRXbayKXywMCAqZPn15SUiKRSE6fPs1WbG9paq/FgAQLmqv09HShQxCGg4NDXFwcdxgXF8eum2bZ2tqKxWKuQ0lJSXp6uoODg4aGhrW1dXx8PNvOMEx8fDx3IcMwX3/99XfffRceHs7Vsnr+/Dm7/r+mj+KNGzcS0YwZM3R1dfm9x5Zm06ZNlZWVEydOrLaOa7Ml9BBa85efny+VSrt160ZEYrFYQ0PjVRYA5uTksBtjaWlpsfuOtVjsGD67Lv1FmzdvJqLp06erOCqABpWammpsbOzr65uTkyN0LKr29ddfu7i4sNPSL126pKamxu0jxJo0aZKPjw87sezXX381NzcvLS1lGGbp0qVeXl7s9imHDh3S1NTMyMhgGKagoGDSpEmdO3eusmUeu2H8gAEDqg2jvLycLSp28+bNhrnRlqKyspKd5XLq1CmhY1EpJFiqExkZyY5X17IvJuvWrVtt2rQhIhsbG+UpBS3Ts2fPiMjAwKDaHSFrT78Amqh9+/axBVmsra337dsndDgqlZeX16lTJ2dn54kTJxoaGnILAx0dHdnJ7/fv37ewsPDy8vLx8WHnWrAd0tPTnZyc3Nzcxo8fr6en980337Dt7Ch47969vf/DZmy9evUioprq4+zatYuIOnfu3MC32/wdP36ciNq0adO0FlvUn4ipYfMgaAiDBg06d+7ciRMn2Ol+1fr777/nz59fUlLi6em5b98+5bHxFsvS0jI7Ozs1NVV5JwRWbm6umZmZgYFBQUEBVy8DoBlITExcuHDh6dOnicjHx0cqldra2godlIqUl5cfO3YsKyvLw8OjR48ebOORI0c6duzYunVrIsrLyzt27FhZWVn//v3btWvHXVhcXHzs2LH8/Pzu3bt36dKFbdyzZ0+V33Te3t5Pnz51dXU1NDTMyMio9gngsGHDIiIifvvtt3fffbeBbrOFmDJlSlhY2Ndff/3pp58KHYtqCZ3htSx+fn5EtH79+mrPyuVyf39/NkuYMWMGSg9w+vfvT0QRERHVnmU3FVFeVQTQPCgUCqlUyk7rNjExkUqlQkfUfCxdupSI3n777WrPPn78WE1NTVtbOzc3V8WBNTM5OTlaWlrq6urJyclCx6JqmOSuUi/dO+/WrVvq6uqBgYFcxXYgzHOHlkokEkkkkpiYmLFjx+bl5fn5+Y0ePZrb4BbqrKKiYseOHVTz9oKbN29WKBSTJ09uWZOyG0BISEh5efnIkSNbtWoldCyqhgRLpWpPBdTV1Xfs2HH27FmuYjuwsOUztGS2trYHDx4MDQ01MzM7evRop06d2IUvQsfVhB08eDA7O7tTp05eXl4vnlUoFNu2bSPs7syHlrxPNhIslXppKmBubt63b18VRtQ01J6YIsGClsDX1/fevXuTJk0qKCjw8/MbNGiQci0DeC1s+asFCxZUe/bEiRPJyclt2rQZOHCgauNqbq5evXrnzh0rKysfHx+hYxEAEiyVat26tba2dlpaWmFhodCxNCV4RAhARNbW1nv27AkNDbWwsDh//ryHh0dQUFBlZaXQcTUxqampERERmpqa06ZNq7YDl35h3Uw9sX+Ss2bNqr2ednOFBEul1NTU2rVrxzAMvnq+FicnJy0treTk5OfPn794FiNY0KL4+vrGxsZKJJKSkpKAgIABAwbg28Vr4YpesmWuqnj27NmRI0fEYnHthaDhpYqLi0NDQ4lo7ty5QsciDCRYqobhljpQV1d3dnZmGIYr06ys9vQLoPlhVxT+888/9vb2ly9f7tq16xdffCGTyer/yiUlJbdv305NTa2pQ0FBwc2bN9nqdMqKi4tv3br1YvX58vLyu3fvPnnypP6x8YJhmJCQEKp5VtDWrVvZSdktpyhGA9m9e3dhYWG/fv1cXV2FjkUYSLBUDcMtdVNLYlp7+gXQXI0aNerevXsSiaS8vPzLL7/s3r37zZs36/OCYWFhNjY2I0eObNOmzfjx48vKyqp0CAwMtLCwGDt2rLW19dKlS7mJ9tu3b7eysho1apSjo+Nbb73FpXonT560t7cfOnRohw4dBg8eXFBQUJ/weBEREZGYmNi6deshQ4ZU22HLli3UUidl84t9PtiS/ySRYKkaEqy6wTQsgBcZGRlJpdKzZ8+2bdv29u3bvXr1CggIKC8vr8NLFRQUzJ8///PPP8/IyHj8+HFUVNSvv/6q3OHOnTuffvrpwYMHU1NTL1++LJVK2Qrd2dnZEonkxx9/TE9Pj42NPXPmDLuFX0VFxZw5c+bPn5+VlZWWlpaZmblmzRpe7ro+2N/68+bNU1Or/tff/v37V61aNXr0aNXG1dzExsZeuXJFX19/8uTJQsciHCGLcLVIN27cIGy/8PrYtb5Tp06t9uzy5cuJ6IsvvlBxVACNRHFxsb+/P5s0dOzY8dq1a6/7Cn/++aelpaVMJmMPV69e7ebmptzhgw8+6N+/P3f41ltvTZkyhWGYtWvXOjg4cDtZBQQEdO/enWGYAwcO6OjoFBYWsu1SqdTCwqJON8ePBw8eLFu2zNjYWE1NLSkpScBIWoJly5YR0cKFC4UOREgYwVK19u3bi0SiR48eYe3Pa0GlBoBa6OrqBgYGXrhwoUOHDvfu3evbt++SJUuKi4tf/RUePXrUqVMnsVjMHnbt2jUuLk6hUCh36Nq1K3fo4eHx6NEjtt3Dw4NbcOfh4cEu4omLi2vbti1bhp5tz87Ozs/Pr9d9vr6CgoLg4OA+ffq4ubn98MMPz58/VygU7DQsaCByubz2Oq4tBBIsVTMwMLC1tS0rK0tOThY6lqaETbBiY2OVP/GrnMUjQmjh+vTpc/PmzY8//phhmPXr1w8aNOHixVetR5qbm8vuRs8yNjaWyWRFRUW1dGCnur/YXlBQIJfLc3NzjYyMlNuJKCcnp4739poUCsXFixf9/Pzs7Oz8/PyuXLliaGg4c+bMr7/+Wl1dfdWqVYcPH1ZNJC3QoUOHMjMzO3bs2LNnT6FjERISLAFguKUOjIyMrK2tS0pKql3fxP2RVpt+QRX//PPPkCFDXFxcJkyY8ODBgxc7/PLLL927d3dzc/Pz81NeL7Z9+/Y+ffp06NBh+vTpyhu2XL58+c0332QfCoCwdHR0vvvuu8jIyK5du2pr/zFggMjPj5TSpBpZWFgoDy89e/ZMW1tbOXN6sYOlpWW17aampmKx2NzcPC8vT7mdiNhLGlRqampQUJCLi0v//v2Dg4NLS0v79u0rlUrT09NDQkL8/f1Xr16tUCimT59e7V9+qD9Mb2chwRIAhlvqppbE1NjYuJb0C5TdunVrwoQJAwcO3Lx5s7a29tChQ6s8SNq4cWNAQMCiRYt+/fXXmzdvTp06lW0/evTo/Pnzp06dGhwcnJ2dPWrUKIZhiGj06NESiSQpKSkqKkqA+4HqeHh4XLlydcgQZ7GYgoOpSxeKiHjJJS4uLnfu3OEWAEZHR7u4uChX2nRxcVFepXjr1i32n6SLi0t0dDT33SY6Opptb9++fUJCArdyMDo62sbGRjlj41dZWVlYWNiYMWNat24dEBCQkJBgb2/v7+8fFxd38eJFiUSip6fH9gwICHjzzTeLioomTpzYGBY2NjNpaWknTpzQ1NScPn260LEITehJYC3RunXrqOZd3KEmfn5+RLR+/fpqz7KbWpw8eVLFUTU5c+bMYXMjhmEqKirMzc3//PNP5Q6urq7ccgH2K/6dO3cYhhk6dKifnx/bnpOTo6Ghcfz4cYZhnj17xjDMt99+O2jQIJXdBbyiu3eZ7t0ZIoaI8fVlnj2rsWdRUZGRkdE333yjUCgSEhJsbGzYf2spKSl3795lGOb+/ftqamr79u1jGObChQva2tqnT59mGObZs2e6urpr165lGObhw4fm5uabNm1iGEYmk9nb2y9durSysjIjI6Ndu3YrV65siHuMjIxcvHixmZkZ+0tNW1vb19f30KFDcrm8pktKSkq6detGRCNGjKilG9TB6tWriYhdANHCIcESwIkTJ4gIv41e108//URE7733XrVna0+/gOPq6sr+LmRNnjx5wYIF3CG7idPly5e5llatWrEZmL6+/p49e7j2nj17fvnll9whEqxGSyZj1q5ldHUZIsbamtm3r8aeR44cMTc3NzExEYvFM2bMqKioYBhm5cqV7KpAhmF++eUXXV1dc3NzTU3Nzz//nLswLCzMxMSEfTK4YMECLmU5f/68nZ2dsbGxWCweO3bs8+fPebyvjIyMtWvXdunShRsv8PT0XLt27bNaskglT548YSu5K98I1JNCoXB2diaiEydOCB2L8JBgCYAtamxjYyN0IE3M0aNHiWjo0KHVnq09/QKOsbHxX3/9xR2+99573IAWwzDsk+v4+HiuxcvL68svv2QTr/Pnz3PtY8eO5Qa0GCRYjV58PDNo0P8PZWVlVd+toqIiNjY2OzubaykuLs7Pz+cOnz9/HhsbW1BQUOXCsrKy2NjYnJycKu1yuTw2NjYzM5Of22AYuVweHh7u6+vLbW9nY2OzePHi27dvv+5LRUREiMVikUgUGhrKV3gtXEREBBG1atUK44IMyjQIwsHBQVdXNyMjQ/Urlpu02hcHsGcxs+2ldHR0lAtRlpWVcXNT2LNEVKWDrq6utra2mppaLRdCI+fsTKdPk1RK+voUFkYdO1K1lQo0NDRcXFyUN+nT1dVVXgyop6fn4uLy4lQqLS0tFxcX7jkdR11d3cXFxcrKqv638ODBg4CAAFtb22HDhoWFhYlEIh8fn9DQ0KSkpHXr1nXu3Pl1X3Do0KHffvstwzDz5s27d+9e/SMEbnq7urq60LEIDwmWAEQikYuLCxGxVWTgFbVu3VpbWzstLY0dTamCq+Og8riaGHt7+8ePH3OHjx8/tre35w5tbGzEYjHXQS6Xp6SktGrVSkNDw8rKqpYLofETiUgiobt3ydubsrJo9mwaM4bS0oQO62Vyc5//+uuvnp6e7u7uQUFBWVlZXbp0Wbt2bVpa2uHDh5WHsupg2bJlc+bMef78+dixY1/cYBFeS35+/oEDB9TU1LBPNgsJljCwkLAO1NTU2rVrxzAMW8awCi79KnqVJekt2KRJk/7++++SkhIiSkhIOH/+/KRJk7izGhoaY8aM2bx5M3u4b9++ysrKESNGENHEiRO3bdvGFsi9ePFiYmLihAkThLgDqJfWrenkSdq2jUxN6cgR6tiRgoOJedVqWaqjUFBEBM2aRZ076y5b9unNmzeNjY0lEsmFCxdu3bq1ZMkS5WG2+vjjjz+8vLweP348depU1H+uj+3bt5eWlg4bNszR0VHoWBoHoZ9RtlCrVq0iohUrVggdSBPj6+tLRDt27Kj2bKdOnYgoMjJSxVE1Lbm5uS4uLp07d37nnXdsbW0nTZrEtvft23fLli0Mw0RHRxsaGnp7e8+bN09PTy8oKIjt8OTJE2tr6z59+kgkElNT0yVLlrDtR48elUgkPXr0sLGxkUgkmM7SVKSnM+PH/zsra8QIpvFsHhMbyyxfztjZ/RubujojkYSEhoaWlZU10DsmJSVZWFgQ0fLlyxvoLVoCDw8PIsInAEcsZHLXgqHWaN28dBrW3bt3Y2JiPD09VRtXU2JiYhIVFbV79+6MjIxff/113LhxbPvbb7/NLsjy8PB48ODB3r17i4qKTp482adPH7aDo6Pj3bt3w8LCnj17tmvXruHDh7Pt1tbWnp6e3J85nhs2FTY2tH8/hYXRu+/SiRPk6kqff04ff0w1bILc4EpL6cgRCg6mU6f+HVFzcaGpU2nOHGrdemaDvrWDg8O+ffuGDh0aGBjYpUuXN998s0HfrlmKjIy8deuWmZnZ2LFjhY6l0RA6w2uh2Hp97u7uQgfSxGzfvp1qrrDy2WefEVED1doBaK6ePmVmzvx3uKhfPyYmRtUBREYyEgmjr/9vDDo6jK8vEx7O/Ld/tIqsXbuWiHR0dKKiolT6xs3C22+/TURLly4VOpBGBHOwhMGWSI6Li5PL5ULH0pRgy2cA3llaUkgIHTpEdnZ08SJ17UpBQVRlMlJ6OiUmUmIiVVRU/yIlJf92+K8U/MulpVFQELVrR15eFBxMz5+TpydJpZSVRaGh5O1NSmXkVWHJkiXz588vLS2dNGmSyvZMbB5KS0v//vtvIsL0dmVIsIShp6fXqlWriooKtiYWvKL27duLRKJHjx5VOxcVSwcA6mzMGLp3jyQSKi2lgADq14+Ud+qbPJmcncnZmVavrv7yc+f+7ZCY+JI3Ki+nsDAaM4Zat6aAAIqPJzs78venuDiKjCSJhPT1ebup1/Xrr7/26NHjyZMnb731Fr79vrqwsLD8/PyePXvWoVhGM4YESzAYbqkDAwMDW1vbsrKy5OTkF8/Wnn4BQO2MjUkqpWPHyMGBrl4lDw8KCKg6ZPXdd1TnLZKjomjJErK3pylT6MgRUlcnX186dIiePKHAQGrbtv53UF/a2toHDhywtbU9derU8uXLhQ6nycDuztVCgiUYDLfUTS2JqYGBgY2NTU3pF9SNQqHYtGnT0qVLU1JShI4FVGHkSLp7l/z8SC6noCDatu1/zlZU0Ntvv15Zh9xcCg6mrl3Jy4vWr6ecHPL0pLVrKTWVQkNpzBgSN6bVVjY2NmFhYZqamj/88MPWrVuFDqcJSExMvHDhgp6eHhYHVIEESzAYwaqb2hNTlBvlV3FxcdeuXRcsWLB27VoHB4fWrVv//fffTCMsmgS8MjSkDRvo1CmaPJnmzv3/9j59SF2dLlyomnVVq7KSIiJoyhSytiY/P7p1i0xNSSKh6GiKjKQlS4inOlb869Onz88//0xE77zzTmRkpNDhNHbsXqVTpkx5sb5/C4cESzDY2qVuak9MMS7Io8TExD59+ty5c0ddXd3AwICIkpKSpk6d2qFDh6CgoMzMTKEDhIY1eDCFhf3P8JKbG7GTmD/5hGope84wtGwZ2drSsGEUFkZENH48HTxIT5+SVEoeHg0aNT/efffdhQsXlpWVTZo0KTs7W+hwGi+5XB4SEkJ4PlgdJFiCwVhL3WAhoWqcO3euV69ed+7cadeu3e3btwsLCy9evDhjxgxHR8dHjx4FBATY29uzW8LJXn3ZGDR933xDhoaUnU3+/jX2EYno5k3KyiJXVwoMpJQU2r+fxo5tXI8CX+q3337r379/cnLyxIkTK2paPNniHT16ND09vX379lzBPOAgwRKMnZ2dgYFBVlZWbm6u0LE0Ja+y5TMSrHoKDg729vbOzs4eNWrU9evX3d3diahv377bt29PTEwMDw/39fVVU1OLiIiYMmWKo6NjQEBAfHy80FGDKlhZ0YoVRESbN9PZszV2+/ZbunGDHjwgf3/iY6NnAWhoaISGhtrZ2V28ePGTTz4ROpxGip3evmDBApGKi2o0CUIX4mrR2OLXly9fFjqQpkShUOjq6hJRXl7ei2fZshc2NjYqj6uZKC0tnT17NhGJRCJ/f//Kysqaeubm5kqlUuVV2Z6enlKp9Pnz56oMGFSjd2+GiFmwgGEYpqyMadeOIWLc3ZmKin87HD36b5lQ1dcpbVBXrlzR0tIiok2bNgkdS6OTkZGhoaEhFoszMjKEjqUxwgiWkDBhqA5EIpGLiwsRPXr06MWzrVq10tXVzcjIyM/PV3VkTV9qauqAAQO2bdumr68fFhYWGBioVvO2KSYmJhKJ5Pbt25GRkRKJRF9fPyoqys/Pz9bW1s/P7+LFi6qMvBbPnj07e/bs3bt3a+qQmpp66tSpx48fV2nPyso6c+bMgxdqEhQUFJw/fz46OlqhUPAfblOgpUW//kpEdP8+/fST0NE0sF69ekmlUiJ67733rl+/LnQ4jcvWrVtlMtnYsWOtra2FjqVREjrDa9G++uorIvL39xc6kCbmrbfeIqJt27ZVe5bdcPTatWsqjqqpO3/+vJWVFRG1bdv23r17r3t5YWHhtm3bvL29uc8WNze3wMDA7Ozshoj2FW3cuFFbW9vV1dXAwGDQoEFFRUVVOvj7+2toaHTs2FFLS2vu3LnciN0vv/yipaXl5uamr68/YsSIkpIStn3v3r16enouLi4mJibdunUT9u5USXkEizV5MkPE6Ooyjx8zTPMdwWK98847RGRjY5OWliZ0LI0IO0bwzz//CB1II4UES0i7d+8movHjxwsdSBOzatUqIlqxYkW1Z9laLCEhISqOqkmTSqUaGhpENHLkyNzc3Pq81MOHD/39/S0sLNg0S0tLy9fXNzw8XKHijeUYJicnR0dHh11Dnp2d7eTk9M033yh3uH79urq6+oULFxiGuX//vr6+/oEDBxiGSUtL09DQ2LlzJ8Mw6enpdnZ2a9euZRimpKTE3Nx8zZo1DMMUFhZ26dJlyZIlKr4pobyYYKWk/Lt7oK8vwzT3BKuiomLgwIFE1KdPn/LycqHDaRTOnj1LRHZ2dnK5XOhYGikkWEK6ffs2EXXo0EHoQJqYnTt3EtGkSZOqPcumX59++qmKo2qiysrK5s2bR/9NuuLrs7K8vPzQoUO+vr7q6upspmVvb+/v7/+YHe5Qid9//93W1pYblAoMDGzfvr1yh/fee2/o0KHc4YwZMyZMmMAwzPfff+/s7My1r1y5smvXrgzDhIWF6evrc6NZmzZtMjU1bei7aCReTLAYhvnuu3+TqrNnm3mCxTBMZmamvb09Eb377rtCx9IozJw5k4g+++wzoQNpvDAHS0guLi7q6uoJCQlY6P5aXqVSA2a2vYq0tLQBAwZs3rxZX19/9+7dgYGBXD5UT5qammPGjAkNDU1KSgoMDGzTpk1qampQUJCzs7PKijskJia6u7tz08g6duyYmJioPHEqISGhU6dO3CHbgb2wSntCQgLbv23btjo6Olx7bm5uXl5eQ99Io/XBB+TuTkS0dCk1+wlpVlZWhw4d0tHR+f333//880+hwxFYQUHB3r17RSIRdneuBRIsIWlrazs4OMhkssSX7o8KSlxcXEQiUVxcXLW7saJSwyu6dOmSl5fX9evXnZ2dL1++7Ovr2xDvYmdn5+/vHx8fHx4ePnPmTC0tLba4g5WVlZ+f3507dxriTVl5eXn6SvsGGxkZyWSy58+fcy35+flsAVWuA5st5efnV7mwqKhILpdXaWfrVrfkMisaGrRhA4lEFB1Nf/0ldDQNr2vXrtyE9wsXLggdjpB27txZUlIyZMgQZ2dnoWNpvJBgCQzZQB3o6em1atWqoqKCLcpQBbflc7XpF7CCg4OHDBmSmZk5cODAK1euKA/YNAQ1NTVvb++QkJD09HSpVOrh4ZGXlxccHNylSxcvL6/g4GDlvIcvVlZWysNLOTk5urq6yrt5WFpaKqdHOTk57GIoS0vLKheam5uLxeIq7c+ePWPfhffIm5B+/WjGDCKiv/8WOhSVmDlz5uLFi2Uy2ZQpU9LS0oQORzDY3flVIMESGCo11E0tiamenp69vX1FRUVSUpLK42oCysvLFy5c6OfnV1FRIZFIIiIiuAnpKmBsbCyRSKKjoyMjIxcvXmxqasoVd5g1a1ZERASP7+Xq6nrr1q3y8nL28MaNG25ublU6XLt2jTu8ceOGq6sr2x4VFVVZWVnlQjc3t7i4OC7HunHjhqOjo/KYVsv0/fdkbPx62z83aT/++OPgwYMzMzMnT57M/e1qUe7evRsVFWVsbDx+/HihY2nchJ4E1tL98ccfRDRv3jyhA2liNmzY8OGHH96+fbvas8OGDSOiI0eOqDiqxi8tLa1Xr15EpK2tvXXrVqHDYUpLS0NDQ729vbky0B06dAgMDHz69Gn9X7ykpMTMzMzf37+8vPzmzZvm5ubsisJHjx6x1X3j4uLU1dU3bdokl8sPHTqkoaHBthcUFBgYGHzxxRcVFRVXr141MjJiVxTK5XInJ6eFCxeWlJQ8evTIwcGBXVHYElQ7yZ3zyy//znBvxpPcleXk5LRp04aIZs+eLXQsDaL2lZLvv/8+Eb3//vsqi6eJQoIlsNOnTxNR3759hQ6kWVm0aBER/fjjj0IH0rhcunTJxsaGiOzt7W/cuCF0OP8jJibG39+fe9ymqanp4+MTGhoqk8nq87Jnzpxp3bq1pqampqbmokWL2BWFn332WY8ePdgO27ZtMzMz09TUNDAw+Omnn7gLjx8/3qpVK01NTW1t7WXLlnE1JiIjI11dXTU1NcVi8axZs1rOiv3aE6zKSqZHjxaUYDEMEx0dzW4p8ccffwgdS71UVFQkJCQcOnQoMDBQIpH07dvX0NDw+++/r6l/WVmZubk5EUVHR6swzCYJCZbA0tPTiajlLPZWjV9++YWIJBKJ0IEwCoVizZo1rq6udnZ206ZNe3FDifz8/IULFzo4OLRt25Yda2HbS0pKPvzwQycnp9atW7/33nvc/jNlZWUbN27s1avXli1bXisSqVSqqalJRAMGDMjMzKz3nTUIuVzObnQo/m9bYHaOfGJiYp1fU6FQZGZmcrUV2BblLYBkMllmZuaLqVJlZWVmZmZpaemLr5mZmVlcXFznkJqi69eZ8HDmwYMaOyQnM+HhTHg403L+YP766y8i0tDQOHv2rNCxvKrMzMyzZ89KpdIPP/zwjTfecHJyqnbh8KJFi2p6BbZKjqenpyrDbqKQYAnP2NiYiFpOSWgVCA8PJ6Lu3bsLHQjz22+/GRoa7tmz5/r16/369evfv3+VDm+99Za7u/vFixePHz9uY2PDVU/94IMPHBwcIiIizp07165du/nz5zMM8/TpUwcHh1GjRjk5OX3xxRevGEN5ObNiRYa6uiYRffDBB/UcE1KNtLS0wMBAboESO0d+27ZtynkSgOA++ugjIrKyskpJSRE6lqrYoanw8PC1a9dKJBJvb+9qF2SIxWInJydvb+/FixdLpdLw8PD09PRaXnbo0KFE9Pvvv6vsRpouJFjC69GjBxGx5aSh/hQKxYoVK4yMjDQ0NHic0FM37dq146qHx8fHE1FUVBR3NjU1VSwWnz9/nj3csmWLqalpWVlZYWGhgYFBaGgo237s2DEtLa2cnByGYdihrOHDh79igpWRwfTtyxAx3t7Xmlx1e4VCceHCBYlEwj6LIaU58kKHBsyjR4xEwnz+udBxCEoul7/xxhtE1LVrV2Gz/9zc3MjIyG3btvn7+/v6+np6empra7+YThkbG3t6evr6+q5atSo0NDQyMvK1wk5MTFRTU9PR0cnLy2uwW2k+kGAJj62Hy06/hXrKz8/38fEhInV1dRMTE25Cz+TJk48dO6b8YEg1wRDR1atXuRZHR0epVModHjlyREtLi5vfwy57fPDgQWRkJBE9e/aMbS8rKxOJRGfOnOEufMUEKyqKcXRkiBg7O6ZJ782Yn58vlUq7devG/Z7w9PRcu3Yt90cEqnf9OkPEeHkJHYfQnj17xg61zpw5UzXvKJPJqgxNsXMrX2RjY+Pt7S2RSNauXRseHp6QkFDPHas+++wzVd5pUyeu9v8KqBJKYfElLi5u/PjxDx48MDU1/fvvv4cMGXLmzJng4OD9+/fv2bNnz549tra2M2fOlEgkTk5OvL97aWnpmTNnuEM7OzstLS0iUi6CYGlpmZGRwR1mZGSYmZlxC+gsLS3ZxpKSErFYzCWIWlpaRkZG7HS9V/fXX7RwIZWWUr9+FBZGTXq3eyMjI4lEIpFI7t+/v3379o0bN0ZFRUVFRQUEBIwZM0YikQwdOpT7YwTVYEu0FhUJHYfQTE1N9+3b16dPn+3bt3fv3p1dYcej/Pz8hISExMTE+/fvP3jwIDEx8cGDB6WlpVW6GRkZtW3b1snJycnJyc3Nzd3d3dXVlRv65YVCoQgJCSGUv3plSLCEx5bCQoJVT0ePHp0+fXp+fn7nzp3379/PplDe3t7e3t4ZGRkhISEbN26Mj48PCgr6/vvve/fuPWvWrBkzZvD4AVRYWPj9999zh8OHD2fHJpU/CktKSpRLh+vp6VU5S0QGBgYikUgul1dUVLApGvsiyhfWTi6nzz6joCAiIomEfvmFNDXrfFuNi7u7e2Bg4BdffHH48OHg4OBTp06FhYWFhYW1b99+7ty5c+bMaeFlP1WJrf/VAAVim57OnTuHhIRMnjz5ww8/7Nix4+DBg+v8Uunp6WwWxaVT1e7zYWNj4+7uzuVSTk5Obdq0aejvGD/++GNycrKjo+OAAQMa9I2aD6GH0IC5d+8eEbVr107oQJoqhUIRGBjIbjk3ZcoUbsHdiyIjIyUSiZ6eHvuXn53Qc/PmzQYKTCaTaWhoHDhwgD0sLy83NDTkZlYxDHPhwgWRSMROrmIY5vr160SUnp4eFxdHRA/+W7KVnJxM/7soupZHhNnZzJAhDBGjpcU0+8fOjx49WrVqlYODA/s/VF1d3dvbe/v27WVlZUKH1vzl5zNEjKGh0HE0GgEBAURkZmb2iote8/PzIyMjQ0NDAwMDZ86c6enpWe33PS0tLTc3N19fX39//23btkVGRtbyEdegbG1tiWjEiBGCvHtThARLeOXl5WKxWCwW47dCHRQVFU2aNImIRCLRqlWrXmWGATuhp2/fvtxHWMNN6JkyZYqPjw8bVUhIiKGhYWFhIXdWJpO1atXq22+/ZQ8XLlw4YMAA9ueuXbsuWbKE/XnlypXt27dXvrWaEqzoaKZ1a4aIsbVlrlzh/W4aqcrKSra4g4aGBpdp9erV6/Tp00KH1pzJ5QwRo6bG1G9WT/NRWVk5atQoIvLw8HhpFY/ly5e/mEuJRCIHB4dhw4YtWrTot99+i4iISE5OVk3wtXv27BlbE5tQ/up1IMFqFNq2bUtE9+/fFzqQJiYuLq5jx45EZGhoePDgwde9/N69e/7+/mzRPCLS1tb29fUNDw+v5zxQZffv3zczM+vTp4+vr6+Ojs769esZhnn69KmJiQn7ObVr1y4tLa1x48Z5e3vr6+tfunSJvfDkyZM6OjojR44cM2aMtrb2oUOH2PavvvrK19fXysrK3d3d19dXeU3izp2Mri5DxPTpw9S6zrrZysjIWLt2LTdCqa6ufvLkSaGDas50dBiiFlT46qVyc3PZD/Np06bV3vPXX3/V1NR0cnLy8fHx9/eXSqUXLlxQ/vYloNzc3AsXLkilUnZBovL2nba2tkJH15SImJazg1Qj5uPj888//+zbt2/ChAlCx9JkHD9+fNq0aXl5ee3btz9w4AA7la0OysvLDx06xE7oYf85uLi4TJ06dd68edyzp/rIzs4+fPhwUVHRwIEDPTw8iKisrCw0NHT06NFmZmZEFBsbGx4erqGh4ePjY2dnx12YlJR09OhRhUIxcuRIriLUP//8o7zF7MiRIx0cHCor6dNPm+ekqzpQKBS//vrrp59++vz58759+168eFHoiJotKyvKyqLMTMLMN05MTEzPnj0LCwt//PHHDz/8sKZuFRUVGhoagq/MKC8vf/ToUWxsbGxsbExMDPtDYWHhiz01NTVdXV2PHDlib2+v+jibKqEzPGCY/6rVtZx9zeqJnXTFFiD28fHJz8/n5WWTk5MDAwMdHR3ZfxrshJ7Q0NCKigpeXr+B5OQw3t4MESMWM4GBQkfTaPzwww9ENHbsWKEDac6cnBgiJj5e6DgamQMHDohEInV19WPHjgkdy/+oUivLzc2t2jLuJiYmyrWyzp07V1RUJHTsTRJGsBqFP//8UyKRzJ49e+vWrdV2YBimtLSU3zW3TVRxcfHcuXPDwsJEItEnn3yyZs0adno7XyorK48dO7Zp06Z//vlHJpMRka2t7bx5frNnf962LY/vw487d2j8eHr8mCwsKDSUBg0SOqBGIyoqysvLq1OnTnfu3BE6lmbLw4Nu36boaPLwEDqURubzzz9fvXq1qanp9evXueFnVZLJZCkpKcqrEe/evfv06dMq3cRisYODg/JqRHd395qqasHrQoLVKJw/f37gwIG9evW6cuVKTR3GjBkzbty4WbNmeXt7qzi8xiM5OXnixIlRUVEGBgYhISHjx49vuPfKzc3ds2fPr7/+evfu3b59Z1y6tN3TkyQSmj6d/pvkI7Ddu2n+fCoupm7daP9+4uN5ZvNRXFxsYGCgqalZXFxc7dd0qL9+/ejSJbpwgfr1EzqURkahUIwbN+7IkSOdO3e+fPmyXgN/ZOTl5bEFHbh06v79+2VlZVW6GRsbOzs7K6dTbm5uOjo6Vbrl5+f/9ddfKSkprq6ub731FlcshvP48ePdu3cXFRX17t2bLezMyszM3LlzZ05OjoeHx+TJk7mvvnK5/MCBA0Q0efJkfm+8sRN4BA0YhmEY9ouFsbFxTR2++OIL7n9Z586d161b1wJrWJ89e5Yt2tmuXTtVLgi4cOHCsmX39PQYIoaIMTZm3n2XUZpcLgC5nPH3Z0QihoiZPp3BBn3VYieLJCQkCB1IszVyJEPEHD0qdByNUmFhoZubGxFNnDiRx3UzDV3GPS8vz9nZuVu3bh999JGzs3O/fv3kcrlyh1u3bunp6Y0aNWrx4sUmJiZLly5l25OSkiwsLAYMGLB06VJbW9spU6aw7b/++qujo6Oenl5j2BxWxZBgNRampqZElJmZWVOHhw8f+vv7s8W+iUhLS8vX1/fQoUNV/vY3V1KpVCwWE9GoUaME2QaroIDZtu3f2U7sf25uTGAgo/pNugsKmDFjMOnq5dhdaf/55x+hA2m2Jk9miBilym7wP2JiYoyMjIgosK7/UPPy8thaWatWrWJ3GHxxwImIjIyM2FlTXK2sl9aJqMkPP/zQpk0b9vL09HRdXV1uCTNr8uTJ48aNY38+ceKEhoZGWloawzBLlizp1asX+/vozp07IpGIXSi9d+/e+/fvr1+/HgkWCKZ3795EdPbs2dq7sUvefH192WyDiOzt7f39/R8/fqySMAVQWlo6e/ZsIhKJRP7+/ireT/BFDx4w/v6MhcW/aZaWFuPry4SHq6gaUEwM06EDQ8SYmzOnTqniHZuu9957j4h++uknoQNptubOZYiYTZuEjqMRO3TokJqampqa2qsk+mlpaeHh4VKpdPHixd7e3jXt6MX7DoPK+vfv//HHH3OH48aNmz9/PneoUCi0tLT27t3LHdrY2LAb6bZu3fqXX37henbr1u3LL7/kDltmgoWtchoLdouPo0ePDhw4sJZumpqaY8aMGTNmTHp6+vbt24ODgxMTE9ntX4YMGSKRSMaNG6fZjNbop6amTpw48caNG/r6+lu3bmVrigrL1ZUCA+mrr+jECdq+nfbto7AwCgsje3uaPp3eeYf+W4b4EiUlVF5ORKSnV31VBYah/Pz/6XD4MM2YQYWF5OFB+/dT69b83FFzhV0+Gxp2y3mpMWPGrFq1atWqVTNmzLh+/Xrb/1bKFBQUxMfHK0+cevjwIbtZljItLS1nZ2flXXE6dOjQoDO6UlJSpk6dyh22bt06JiaGO8zOzi4vL2/930ePSCRydHRMTU1VKBTp6emtlT6SWrdunZqa2nBxNglIsBoLdk+6U6dOvWJ/W1tbf3//jz/++PLly9u3b9+xY0dERERERISJiYmvr++7777bpUuXhoxXFS5cuODr6/v06dO2bdseOHDA3d1d6Ij+n6YmjRlDY8ZQWhrt2EFSKT1+TEFB9P33NGQISSQ0fjz9V1e8egEB9MsvRERvvEFHj1bTISeH2AfC27fT9On03Xe0YgUpFDR1Km3cSFhR+lJsaTTlXw/AL+z3/CpWrlx569at/fv3e3t7Dx069PHjxw8fPszMzKzSjU1WXFxc2rdv7+rq2r59excXl1atWqk4WplMxj0eISINDY1y9osgERFVVFQQ0YsdFAqFXC6v5cKWCQlWY9GtW7cTJ06wu869OjU1tX79+vXr1y8oKCg0NHTDhg3R0dHBwcHBwcGenp4SiWTq1KmvvklwoxIcHLxo0SKZTDZy5MidO3eamJgIHVH17OzI358+/phOn6aQENqzhyIiKCKCTEzI15fee486d37JKxw7Rvv3U+0lZhmGLlwgkYgCA8nfn8fwmzOMYDU0jGC9CpFIFBIS4uDgIJPJNm/ezDZqamq2bduWHZpiR6e6dOnSGD6rbW1tlas5ZGZmKlc/tra2VlNTe7GDWCy2tLRUzhozMzP79OmjmpgbL6GfUcK/jv43iOHg4LB69eo6F7eMjIxcvHgxO2WeiHR0dHjf/qWhlZWVzZs3j/6bdFXtLP7Y2NiPP/54zpw5P//8c0l1i+jOnTv33nvvzZ8/f9euXcr3HhUV9cEHH8yZM+fPP/+UyWRc+7Nnz4KCgnbt2lXP4PPyGKmU8fD4/7nwnp6MVMq8WKjv/ff/v0+rVsyLm2RkZf17dvv2f1/5ZTP04H8oFAp9fX0iys3NFTqW5mn9eoaIWbRI6Dgavd27dxORvr7+119/feLEiSdPnjTaD+QPPvigb9++7M8ymczGxuaPP/5Q7tC7d+8PP/yQ/Tk+Pp6bzD5p0qS33nqLbc/Ly9PW1laedtYy52AhwWpERo8ezSW+bJqVkpJSt5cqLS0NDQ319vbmtmLo0KFDYGDg06dP+Y2Zd6mpqT169GA/jEJrWJ4UGxtrYGAwadKk7777rn379oMHD67SYd++ferq6kuWLPnqq6+MjY2XL1/Otl+8eFFDQ2PevHnffvutnZ3d7NmzGYZRKBSLFi0yNja2srIaP348XzcSGclIJIyBwb9JkoEBM3MmEx7+/x3YBMvZ+d/58v8tdv5/VRIsqIOuXbsS0dWrV4UOpHnasoUhYmbPFjqOxi0nJ4edYstOBm/k4uLidHR0lixZEh4ePmXKFBsbG7aMe2BgYEREBMMwe/fu1dTUXLdu3dGjR728vIYNG8ZeeOnSJbFY/NVXX504ccLb27tTp07sgqRHjx6FhobOmzfP2dk5NDQ0StgKN6qFBKtxuXbtmre3NzdQrKamxu7WUl5eXrcXjI2NXbVqFfcgX1NT08fHJzQ0VHnwpvG4ePGitbU1m19GRkbW1M3Pz2/QoEHsV8CkpCR1dfXz588rd/Dw8Pjkk0/Yn/fs2aOnp8dWdhg1atSMGTPY9itXrqipqbFFksLDw/Pz8wMCAnhMsFhFRcymTUyfPv8/WNWpE3P4MMP8l2B5ePw7DCAWM1V2qUeCVX/sdN2tW7cKHUjzFBbGEDGTJgkdR+M2bdo0Iho8eHCjHbWq4tKlS2PHju3Wrdu0adMePXrENi5YsIAb4N+5c+eQIUO8vLwWL16sXDTnn3/+GTFihKen54IFC9L/23B+79693krWrl2r2rsREhKsxqiysjI8PHzmzJlcyRMTExOJRHL79u26vaBcLg8PD/f19dX4b941O0e+UdVglEql7PrHgQMHZmVl1dLT0dHx999/5w779Onz6aefcocZGRlExH1PKi8v19bWPnjwYGVlpYaGxmE2u2EYhmEcHByUR78bIsHixMQw/v6MlRVDxBw8yDBKCZZMxri7M0RM9+6M8uNQJFj1x1bo5YYwgV/HjzNEzPDhQsfRiB05coSIdHV147FlY8vD5yZuwBd24CokJCQ9PV0qlXp4eOTl5QUHB3fp0sXLyys4OPj5a04r5fYtTkpKCgwMbNu2bXp6elBQULt27fr16xccHPzi8mBVKi8vX7hwoZ+fX0VFhUQiiYiIYCu2V4thmIyMDOUd3Vu1aqW8Hpj9WXnQzsrKKi0t7enTpzKZTPlCe3v7tLQ0/u+nOu3bU2AgJSfT/v00atT/nBKLae1aIqIbNyg4WDXhtBSY596gMMm9dgUFBW+//TYRffvtt4LsSAjCQoLVqBkbG0skkujoaG7qelRUlJ+fn6Wl5ZQpU9gn4q/1gjY2Nv7+/nFxcZGRkRKJREdH59KlS35+fnZ2dn5+ftHR0Q10I7VIT08fNGjQxo0btbW1t27dylVsrwl7y9zcsio/V0sk+v89N1/rQt5patL48fTi/Xl705gxREQrVtALy7eh7lCpoUGhTEPtPvroo9TU1F69erE1b6GlQYLVNHh6eq5bty4tLY2dul5WVhYWFjZs2DA3N7egoKCsrKw6vKBUKk1LS5NKpZ6envn5+cHBwd26dfPy8lq3bl1ubm5D3MWLrly54uXldfXqVXt7+/Pnz7MV22unpqZmbW2dnp7OtaSnp9va2nKH7M9ch8rKyqysLDs7O0tLS7FYXMuFwvrlF9LVpfx8+ugjoUNpRtq3b6+mphYfHy+Xy4WOpRnCCFYtTp8+vXnzZi0trU2bNmG78ZYJCVZToq2tzdZciImJ8ff3t7KyiomJCQgIaNWq1ZgxY8LCwl73t4iRkZFEIomMjLx3756/v7+5uTlbxcDOzq5uI2SvZfv27UOGDMnIyOjfv39kZGT37t1f8cIhQ4YcPHiQ/TkrK+vq1avsrnMsGxsbV1dXrgNboqJv377q6uoDBw7k2u/evfvkyZMhQ4bwd0P14uhIAQFERDt3UkSE0NE0Fzo6Oq1ataqoqHj8+LHQsTRDGMGqSXFxsUQiYRjmyy+/ZLd8hpZIyAlgUD/c1HXumZqdnV19pq6XlZVVKe7g4uKyatWq5ORkfiOXyWT+/5XLlEgkr1v06/bt29ra2n5+flu3bu3evXuvXr3Y5Tnz5s1jlxOGhIRoamp+8803f/zxh42NzeLFi9kLT548KRaLAwICNm3a5OLi4uvry7ZfvXpVKpWOHDnSw8NDKpWqZiExN8mdU17OtG/PEDGurkxFBSa582P48OFEpLy4AfhSUsIQMdraQsfR+CxatIiIPDw86lzREJoBjGC9HrlcfuXKlTNnzuSzu8S94Pnz52fPnr148SK7pQCnrKzswoUL586de3E6+aNHj/Ly8uoQDDd1PTk5OTAw0NnZOS0tjZ26PmzYsJCQEHb7nVenpaXFjpCxc+EdHR0fPXr05ZdftmnTZtiwYWFhYTKZrA5xVpGdnT18+PCgoCAtLa3NmzdLpVKN2veUeUHnzp3Pnz9fUVGxe/fuYcOGHT9+nM0INTQ01NTUiGjmzJl///13dHT00aNHV6xY8eOPP7IXsp2Tk5P37ds3Z86ckJAQtj0uLi4iIsLAwKBdu3YRERGJiYn1v8060NT8d/Ochw9pwwZBQmiGMA2r4ejokFhMZWXExwdD83HlypXff/9dLBZv3rz5dT/coFkROsNrSlJSUtq3b29tbe3m5mZgYPDid+Jz586Zmpq6uLjY29s7OjrGxMSw7Xfu3LGzs3N0dHR2drawsGDLHrK1GHx8fIho5cqVvETITl3X/W+bOm6OfN1ejY1QubgDN0e+zhHevHnT0dGRiOzs7K5du1bn12kGXhzBYvn6MkSMqSkTG4sRLB789ttvRLRgwQKhA2me3N27mplZPHuGWvn/Kisrc3V1JaJVq1YJHQsIDAnWa5g7d26fPn1KS0sZhlm5cqWdnZ3y8K9CoXBzc3v33XcZhpHL5W+88caYMWPYU0OHDp04cSJb1nbOnDmenp4Mw+Tm5k6fPn337t2DBg3iK8Fi5efnS6XSbt26cWm0p6fn2rVrnz17VrcXzMjIWLt2badOnZRfUCqVPn/+/LVeZ8eOHWxlr759+2ZkZNQtmGajpgQrPZ0xNPy3QDYSrPpjN1Dv16+f0IE0T2zdE95nETRdH3/8MRG5urqWlZUJHQsIDAnWq2LrVe7Zs4c9zMvL09TUPH78ONfh+vXrIpGI29zm9OnT6urqz549S0lJEYlE3GgN+6ji/v373IWjR4/mN8HisFPXzczM2KyImyNft4LCCoWCXejHjZC5u7u/4rVVJl3VuTB9c1JTgsUwzA8/MESMmhoSLB6wddEsLCyEDqR5YkdrlD/QWrKbN29qaGioq6tfv35d6FhAeJiD9aoyMjLKyso6duzIHhobG9vb2ytP1klMTDQyMuLqWLq7u1dWViYlJT1+/JhhGO5CFxcXDQ0N1czycXd3DwwM5Io7lJeXs8UdOnToEBQUpLwj+qsQiUT9+/ffunVrRkbGtm3bvL29x48f/yoX5uTkjBw5kp109eeff3IV26EmS5ZQly6kUAgdR7NgZ2dnaGiYnZ397NkzoWNphtjttF+39HGzVFFRMXv2bJlM9tFHH3FrogsLC99///0OHTp06tRp9erVlZWVVa568uTJm2++2a5dux49emzbto1rz8nJmT9/fvv27T08PH766SfmvwXdUVFR48aNc3Fx6d+/P7uHNDRaSLBeFTurnf00YRkaGipPTs/Pz69ylohyc3Pz8/PV1dW5TW9EIpGBgUHdZrXXTbVT1wMCAuzs7Nip669b3MHQ0HDWrFnh4eFffvnlSzvfunWre/fup06dsrW1PXv27IIFC+p6Hy2IWEy//koqL4PabLm4uBDquTcMduPUIpRqIFqzZs3du3ddXFzYDZpYfn5+p06d+uOPP7766qv169d/++23ypcoFIrRo0cXFhZu27Zt7ty5Cxcu/Oeff9hT06ZNu3v37p9//unv7//FF19s2LCBiBISEgYNGtS7d+89e/ZMnz592rRpZ86cUeEtwmsSeASt6WB3uLtz5w7X0qpVq02bNnGH+/fv19PT4w7ZBxP379+/du0aEeXn57PtcrlcTU0tPDyc69lwjwir1RBT12uyc+dO9nlinz59uL0/gVXLI0LWrFl4RMiPGTNmEJHyv1bgy9ixY4nowIEDQgcisDt37mhqaqqpqZ07d45rTE5OVldXv3DhAnu4YcMGa2tr5Zm7//zzj6amJjc71s/Pz9vbm301Inr48CHb/vXXX7u4uDAMI5PJrly5wl0+cODApUuXNvCdQd1hBOtVWVpasgsA2cPU1NS0tDR3d3eug5ubW0lJyd27d9nDa9eu6enpOTo6tm3bVlNT88qVK2z7jRs36L+l44JgNzpkizuwU9czMjLY4g7sRofFxcX1f5fKysqAgIBp06aVlJRIJJIzZ87Y2NjU/2Wbk/feo/Dw2soxrF1L4eEUHk7e3ioMqznq2fO9Hj0OpaT4CB1IM4QRLCKSy+Xz58+vqKh47733BgwYwLVHRUVpaWn17duXPfT29s7MzExOTuY63Lhxw9PT09TUlOvAfiG/ceOGvb0992vC29v70aNH+fn5YrG4V69e3OVqamra2toNfXdQd0JneE3J8uXLnZyc4uLiCgsLp0yZ0rVrV4ZhSktLDx48WFBQwDDMkCFDhg8fnpubm5KS0qlTp3feeYe9cMaMGV5eXunp6dnZ2QMHDhw7dizbXlRUlJubO3z48GXLluXm5gpVko4t7sA93+TKu9f5BZ89ezZs2DAiEovFgYGBPIYKUAehoQwRM26c0HE0R35+fkT0xx9/CB2IkAIDA4nI0dGxsLBQuf333393cHDgDtk0VHmISyKRjFP6e3n+/HkiKiwsXL16dbdu3bh2dh+CKisJnjx5oqmpyQ2PQSOEEazXsHLlyl69erm6upqYmCQmJu7atYuInj17Nn78ePYfwObNm0tLSy0sLFq3bu3s7Mz+qyOin3/+2cLColWrVtbW1urq6hv+G7Xw9fU1NTU9efLkDz/8YGpqeuTIEUHui9uXkJ26XlBQEBwc7OXl5e7uHhQU9LpTg+/cudO9e/fw8HALC4vw8HBu8SC8og0baMYMio8XOo5mhB0IQKnRhoARLLYas0gkkkql7J8GR1dXt6ysjDtkq0wrT9V9sYOampqOjs5LLywpKZk2bdqbb77Zr1+/Brgn4InQGV7TU1paWuVrShVFRUUlJSUvthcXF79u4ShBPHjwwN/f38LCgv0bws2Rf5XiDrt379bT0yOibt26PXnyRAXRNj8+PgwRs3ev0HE0I6WljLo6o6HBYNsS3rETuj///HOhAxFGZWVl//79iWj+/Pkvno2IiFBXV2efbzAMw84wefr0Kdfhxx9/ZCdXsTZs2GBnZ8cwzO7duw0MDLhnGkeOHBGLxVx1m+zs7EGDBr3xxhuod9PIYQTrtWlra1f5mlKFvr4+t2ZQma6uLpt8NHKurq6BgYEpKSmhoaE+Pj5yuZwt7uDo6BgQEJCUlFTtVQzDfPHFF2+99VZxcfH06dMvXrzIVmyH14XhFt5pa5OjI8lklJAgdCjNTgsv07Bt2+3IyEhbW9sffvjhxbP9+/c3NzffsmULe7hp06YBAwZYWlpyHSZMmBAfH88+GZTL5SEhIZMnTyaikSNHElFoaCgRMQyzZcuWsWPHstVtbt682aNHDxcXlwMHDqDeTWMndIYHjV1KSkpgYGDr1q3ZvzDcHHnlGWNJSUnsRE5Muqq/P/9kiJhZs4SOo3l54w2GiGnxa934J5VKiWjhwoVCByKAxERGX59p3frRP/+cq6nP5s2bNTU1p02bNnr0aF1dXXbK1JEjR9q0acN2WLJkibGx8bx583r27GlnZ8eVqv7hhx90dHRmzZo1ZMgQY2NjdgH76dOntbS0TE1Nff+zYsWKhr9RqCMR81/5MoBaKBSK06dPBwcHHzx4kN3H2sjIqG3btl988YVIJJo4cWJFRYWJicmePXuGDBkidLBN24ULNGAA9ehB164JHUoz8uGH9PPPFBhImBPIr507d06fPn3q1Kk7d+4UOhaVYhgaOZJOnqRp0+ivv2rrefv27fDwcE1NzXHjxrHj+o8fPz5z5sy8efPYDqdPn75+/bqZmdnkyZNNTEy4C69du3bu3Dk9Pb2JEyeyq7AfPnx44cIF5Re3tLR8xYLPoHpIsOD1ZGVlbd++ffPmzQ8ePFBu19XVPXXqlPISYqib7GyytCQjI8rPFzqUZkQqpbffprlzafNmoUNRobKysj/++CMyMtLMzGz+/PldunSp0iEzM/O3336Lj493dnZ+7733uFoqBQUFv/zyy/3791u1auXn5+fs7My2KxSKAwcOXLx4cfXq1eyEh8OHD48dO9bHx+fw4cOqvDXB/fknSSRkbk7375PSQz+A/4c5WPB6LC0tP/roo/v372/evLlDhw4ikYiIWrdunZycjOyKFxYWZGZGBQWUmSl0KM1I+/ZERC2tlvukSZN+//13T0/PgoKCXr16RUdHK58tKirq3bv3+fPne/fuffny5Z49exYUFBBRZWXloEGD9u/f37Nnz0ePHvXo0SMlJYWIDh8+7OLismzZsp9//pmbdNUy52Clp/87FPrbb8iuoGZCP6OEpi09PT0hIUHoKJqbPn0YIubMGaHjaEYyMhgixtRU6DhUKDIyUiQSxcbGsodjx46dOnWqcodff/3V1taWXfJcVlbm4OCwdu1ahmH279+vq6vLlhevrKz08vLy9/dnGCYmJiY+Pp7dRzUzM5N9kevXrxORp6enKm9NcOPHM0TMmDFCxwGNG0awoF5sbGycnJyEjqK5YYdbsJCQR9bWZGxMubmUnS10KKpy4sSJLl26sPswEtHkyZNPnjyp3OHkyZNjxoxhlzxraWn5+PiwHU6ePDl06FC2vLiamtqECRNOnDhBRO3bt+eeFXLYJdUtagTrr7/owAEyMqI//hA6FGjckGABNDot83lWQ2tpaWtaWpq9vT13aG9v/+zZs9LSUq4lNTW1Soe0tLRa2qvFPiJsOYVGc3Loww+JiNauJTs7oaOBxg0JFkCjw5bCQoLFr5aWtrLzIzkMw4hEIuXGFzvU3l6tl45g5ebmrlix4uzZs+zq46bu3XcpK4uGDqXZs4UOBRo9JFgAjQ5qjTaElpZg2drapqenc4cZGRmmpqbKewO/2MHOzq6W9mpxk9xrysNOnTr17bffDh482MTEZNiwYUFBQVFRUbUnbY3W4cMUFkZ6ehQcTP+bhQJUAwkW1Oj48eNz5syZNm3an3/+WVlZWeVsZWXlpk2bpk2bNmfOnKNHjyqf2r1794wZM2bMmLF7927l9nv37n344Ydnzpxp8NCbOCcn0tCgpCRSep4D9dXS0tahQ4feunWL3SaViA4cOMBuwc7x9vY+cuQIO7Akk8n++ecfb29vtv3UqVOFhYVExDDMwYMHq1yoTF1dXUdHR6FQsPvlvcjV1fWjjz7q3LlzaWlpREREQECAl5dXq1at5s2bt2vXruymMycuP5/eeYeI6PvvCfNO4ZUIOcMeGrHQ0FCxWLxixYqff/7Z0tJy8eLFVTp8+OGH5ubmP/3002effaahofHXX3+x7T/++KOent6aNWu+/fZbPT2977//nmGYrKys4cOHW1hYmJqarl69WtU30wR16MAQMbdvCx1HM3L/PkPEtG0rdBwq5O3t7ebmFhwc/O6772ppabFDR9u3b2fLf+fn59vb248YMWLTpk2jRo2ys7PLy8tjGEYmk7m7u/fu3Xvjxo1Tp041NjZOSkpiGKagoEAqlX7zzTdE9P333+/YsYN9F3bvF25dYU2ysrJCQ0MlEkmrVq2Ufwc5OTktXrw4PDy8tLS0Qf806mnWLIaIGTiQeYVNWQEYhmGQYEH1OnfuvHz5cvbnf/75R0tLKysrizubk5Ojo6Nz8OBB9vDzzz93c3NTKBRlZWVWVlZ//PEH275hwwZLS8uysjK5XH7hwgW5XO7t7Y0E61WMG8cQMaGhQsfRjJSXM2Ixo67OlJUJHYqqPH/+/Ouvvx43btz8+fOvX7/ONu7Zs+fbb79lf05JSVm6dOm4ceM++OCD5ORk7sKcnJzly5ePGzfunXfeiYmJYRufPn0qUfLJJ5+w7ew64ri4uFcPLCEhQSqV+vr6Ku/rqqOj4+3tHRgYGBkZ+Spby6tSeDgjEjG6usyjR0KHAk0HEiyoRmZmJhFFRkayh3K5XF9ff+/evVyHgwcP6ujoyGQy9vDWrVtElJaWduPGDSLiUrGsrCwi4j7ZGYZBgvWK/P0ZIuarr4SOo3lp25YhYu7fFzqO5oUtEB8dHV2Ha2Uy2YULF/z9/T09PdXU/n/KiqWlpa+vr1Qq5fbmE1BBAePgwBAxP/4odCjQpIgb/BkkNEHsqmxuqba6urq1tbXyUu20tDQrKyux+N+/P+wc2LS0tPT0dE1NTXNzc7bdwsJCS0srLS2te/fuKr2Bpq+lzchWjQ4dKD6eYmLIzU3oUJqR+lRqEIvF/fr169evHxFlZWVFREScPHkyPDw8PT09LCwsLCxMJBJ17tx5+PDhb7yxrFcvSx0dnoN/Ff7+lJxMPXvSkiUCvDs0XUiwoBrsV0mFQsG1MAyjvHhbTU2tylkiEolEL7YzDKP8xRReERKshtChAx050oLmuasGX7VGLS0tp02bNm3aNCJKTEyMiIiIiIg4ceLE7du379y5GxISWFhIffuStzd5e1O3bipax3f2LEmlpKlJmzaRuroq3hGaDSRYUA1bW1siSktLYzd/lcvlmZmZyku1bW1ts7KyZDKZhoYG/TfiZWdnJxKJ5HJ5VlaWlZUVEWVnZ1dUVNSyxhtqwi15YxgsCOcN0lbeZWdnP3r0yMTEZPr06d7e3t7e3j4+PuwHSH04OTmxM73Ky8svXbp08WLsoUNq0dEUEUEREUREVlY0bNi///23RTX/Skpo4UJiGFq5ktzdG+pdoLnC0AJUw9LS0sPDY+/evezhyZMnFQrFgAEDuA79+vVTU1PjqjPs37+/c+fONjY2nTp1srGx2bdvH9u+b98+tlHF8TcDpqZkYUHPn5NSQSKoLyRY/IqOju7evTu7O2FeXl5YWJifn1+rVq28vLyWL19+5syZ8vLyer6FlpbWkCFDPv/8nchIevqUQkNJIiFHR3r6lHbsoNmzydaWnJ3Jz4/CwqiwkI+7UvLZZxQfT507/7u1M8DrEXICGDRie/bsEYvFAQEBP/30k6Wl5UcffcQwTEZGxqhRo9hppx9//LGZmdkPP/ywYsUKDQ2NsLAw9sK1a9fq6up+/fXX33zzjZ6e3rp169j2nTt3BgYGtm3bdvjw4YGBgdgi+qX692eImIgIoeNoRrKyGCLGyEjoOJqFXbt26erqEpGnp2dSUpKKFwYmJDBSKePryxgZMUT//icWM56ezKpVTGQkU1lZ37e4epVRV2fEYua/1T4Ar0fENM2KuqACERERO3fuLC8vHzp06Jw5c9TU1HJyclauXLlq1Spra2uFQhESEhIeHq6lpTV16lTlUoR79uw5dOgQEY0bN27SpEls408//RSrNHTw/vvvd+zYUcV31LQsXEgbN9Jvv9G77wodSjNibk7PnlFGBllbCx1Kk1VZWfnpp58GBQUR0cyZM6VSqY7S5PPS0tJLly6xM6hu3rzJ/YqxtLQcOHCgt7f36NGjeZw2UFlJt279++jw3DmSyf5tNzenwYPJ25tGjiQHh9d+2fJy8vSk+/fp00/p66/5ChZaFiRYAI3UDz/Qxx/T4sW0bp3QoTQjffvS5ct05gwNGiR0KE1Tbm7uW2+9FR4eLhaLv/76a/9aH55lZ2efPXs2IiLi2LFjKSkpXLuTkxM7W2vYsGHKu/fU0/PndOYMHTlCJ0/Skyf/3+7k9O/U+OHDycjolV7q009pzRpq355u3SL+AoSWBQkWQCN15AiNGUPDh9OJE0KH0ozMm0dbttCGDeTnJ3QoTdCdO3cmTJiQmJhobm4eGho6ePDgV79WeWFg4X+zpXR0dPr27cvOju/WrZuIvwUdiYn/DmuFh1N+/r+Nn31Gq1e//Nrbt6l7d6qspHPnqF8/viKCFgcJFkAjFR9P7dqRgwMlJQkdSjOyeTMdOEALFtDYsUKH0tSEhobOmzevuLi4a9eu+/fvd3R0rNvryOXy27dvHz58+MiRI9HR0VxhF+4Z4qhRo7gifPUnl9PVq3TyJIWH03ffUf/+L7+koIA++YT09enHH/mKAloiJFgAjVRlJenpUUUFFRaSvr7Q0UALxjDMd999x+6dNX369D///FOHp4qf2dnZERER4eHhJ0+e5EoZi0SiceMy27a1HDaM+vcnQYqLEqFCCtQXEiyAxqtv35zi4gfbtrl26WIhdCxN2JMn9OwZEZGzMxkbV9OhvJzu3SMiatv2VefotByFhYWzZs06ePDgq0y6qg/uGeKFCzfz8uLZCg9iMfXsSWPGkLc3de1K9SlarFAQW3lGXZ3Gj6/+pe7epZgY0tamMWPq/kYALCRYAI3XpEmT9u3bt2vXrrfeekvoWJqwWbNo+3YiogkT6L8abf8jLo5cXIiIjhyh0aNVGlsjFxsbO378+JiYGDMzs927dw8dOlQFb1peXnn5snp4OIWH082bxO0NYWVF3t7/FhetQynT8vL/n64eHEwLF1bTZ/lyCgwka2vKyKhr9AD/QaFRgMarQ4cORBSDvV14sn8/HTokdBBNx5EjR3r27BkTE+Ph4REZGama7IqItLTUBw+mNWvoxg3Kyvqf4qJ//UVz5pCdXX2LiwYEUFYW33ED/C8kWACNV/v27YkoFqXH+fP++1TvTfOaP4ZhgoKCxo0bV1BQ8NZbb126dKl169aCRGJmRr6+JJXSkyeUkEBSKfn6kpERJSZScDBNmUJmZuTlRQEBdPEiKe2DWhuRiHJz6eOPGzh0aPGQYAE0XkiweOThQWZmlJz8Sgv1W7KioqJJkyYFBASIRKLAwECuYrvgnJxIIqHQUHr2jCIjKTCQvL1JJKKoKAoKov79ycqKpkyh4OCXLLxln7eHhNDp06oJHFooJFgAjRf7iDA2Nlbxit/NoWbGxrRiBRHRzz/T3btCR9NYxcXF9erVa//+/aampseOHWu4Ke31oa5Onp7k70/h4ZSbS+HhtHgxtWlDOTkUFkZ+ftS69f8/QywoqHr5qFH/lpl97z2q92aJADVCggXQeBkZGVlbW5eUlKSmpgodS3OwaBG5uJBMRgsWvOrjpBbl6NGjPXr0ePDgQefOnW/cuKG8/1Wjpa9P3t60bh0lJtKjR/TbbzRuHBka/v8zREtLGjqU5PL/ueqXX0gsppgY+u47geKGFgAJFkCjhnnuPNLU/Ld05PXrtGmT0NE0JuykqzFjxuTn50+ZMuXy5ctOTk5CB/Xa2rWjd9+lAwcoN/f/nyEyDBUWklj8Pz07dqQFC4iIvvmG8AQeGggSLIBGDdOw+OXjQ2+8QYR1ZEqeP3/u6+sbEBDAMMyqVav+/vtvPT09oYOqF+VniFlZtG1bNX1WryYzMyovp8WLVR4ftAxIsAAaNSRYvFu/nrS1sY7sX/Hx8b179967d6+hoeGBAwe++OILHjcEbAyMjcnNrZp2c3Nas4aI6ORJ+vtvFQcFLQISLIBGDY8Iede2LX3yCRHR9u109qzAwQjr+PHjPXr0uHfvXvv27a9duza2hW3QuGAB9exJRPThh1RUJHQ00OwgwQJo1DCC1RBWrCAXF2IYWrSo6vTnFoKddOXj45OXl+fj43Pt2jU2lW9R1NTo999JXZ0yMuibb4SOBpodJFgAjVrr1q11dHTS0tIK61ayGqqjpUXr1xMR3b9Pf/5Z9Wx6+ksKKTV1xcXFb775ZkBAgEKh8Pf3P3jwoFFL3YKxWzd6+20iop9/prg4oaOB5gUJFkCjpqam1rZtW4Zh4vDxz6sRI2jCBCKiVauqlkr66SdycqJhwygkhEpLBYmuAaWkpAwcODAsLMzAwGDfvn2BgYFq9dlCuen7+muysqKKCgoIEDoUaF5a9L8rgCYB07AayLp1pK9P2dkUFPQ/7TIZaWpSRATNnk2tWtEHH9C9ewKFyLdz5855eXlFRUW1a9fu6tWr48ePFzoi4Rkb0/ffExHt20eXLgkdDTQjSLAAGjtMw2ogrVrRZ58REe3d+z/t69ZRZiZJpdStGz17RuvWUadO5OVF69ZRbq4gkfIjODjY29s7Kytr1KhR169fd6t2cV2LNGMGDR5MRHThgtChQDOCBAugsUOCVTc3bry8z9Kl5OpKDFO13ciIJBKKiqJ798jfn8zMKCqKPviA7OxoyhSKiKjmksasrKxszpw5fn5+lZWV/v7+hw8fNjY2FjqoRkQkol9/JQ0NoeOA5gUJFkBjh0eEr0sup4AA6tGDtmx5SU9NTdqwgWop/OTuToGBlJZGoaHk7U3l5RQWRsOGUYcOFBRET5/yG3iDSE1NHTBgwLZt2/T19cPCwjDpqlpubvTBB0IHAc0L/pkBNHbt27cXiUSPHj2qrKwUOpYmIDubhg+noCDS0nql/gMG0NSpL+mjpUW+vhQeTklJFBhIjo706BEFBJCdHQ0bRmFhjbfWw4ULF7y8vG7cuNG2bdsrV65MmjRJ6Igary++IEdHoYOAZkTENK2RboAWyd7ePi0tLTExsU2bNkLH0qjdvk0TJtDjx2RrS3v2UO/eRERPntCzZ2RgQC4u1V9VWPjvEv22belV6hUoFHT6NAUH04EDJJMREdnY0KxZtGABtW3L163wIDg4eNGiRTKZbOTIkTt37jQxMRE6IoHl5RER6emRpmb1HUpKqLyc1NRe6a8BQO2QYAE0AUOHDj19+vSxY8dGjhwpdCyN199/0/z5VFJCnp60bx85ODT4O2Zm0u7dtGkT3b37b4unJ0kkNH06CbubX3l5+bvvvrt582aRSPTJJ59888036urqQgYE0PLgESFAE4BpWLWrrKSAAJo6lUpKaOZMunBBFdkVEVlb05IldOcORUaSREL6+hQVRX5+ZGdHfn4UFaWKGF6UlpY2YMCAzZs36+np7d69OzAwENkVgOohwQJoArCQsBa5ufTGGxQURGIxBQZSSAjp6Kg6Bk9PkkopNZU2bKDu3amggIKDycuLunWjrVsf5ufnqyySS5cueXl5Xb9+3cHB4dy5c76+vip7awBQhgQLoAnACFZN7tyh7t0pPJzMzenkSfL3FzIYIyPy86Pr1+nBA/L3JwsLunWLVq16z9raesqUKREREQ09JSM4OHjIkCGZmZkDBw6MjIz09PRs0LcDgFpgDhZAE5CUlNS6dWsbG5v09HShY2lEwsJo7lwqLqauXWn//ka3BKy8nP75J3/DhimnTp1SKBRE5OzsPG/evDlz5tja2vL9XuWLFi3auHEjEUkkkt9++00sFvP7FgDwWpBgATQBDMPo6+uXlJRkZGRYW1sLHY7wGIa++46WLyeGoWnTaONGAR4Lvrq0tLQdO3ZIpdLHjx8TkZqa2pAhQyQSyfjx4zX4qG6Znp4+adKkq1evamtrb9iwYfbs2fV/zWassrJy586d0dHR5ubmM2fObNWqVZUO+fn5W7ZsSUlJcXZ2njNnjt5/CxZKSkq2bt0aHx9vb28/d+5c5VWZp06dioqKWrZsGWqMAQcJFkDToKurW1pa2qdPnx07drTwYg2FhTRrFh08SGIxff31Sx4LJiQkpKamdujQwcrK6sWzCoXizp07z58/9/Dw0NfXVz714MGDZ8+edezYUfn3qFwuv3PnjkKhcHNz09XVfa2wFQrF6dOnQ0JC9uzZU1paSkTso8MFCxZ06tTptV5KGVvdKiMjw97eft++fd27d6/zS7UQvr6+V65cmTVr1s2bN69fv37jxg1nZ2fubFFRUbdu3YyMjEaOHHnw4EENDY2rV69qamrK5fJevXqVlpZOmDDh5MmTz549u3nzppGR0T///PPZZ589efIkPz+/vLxcs6YKENACMQDQFMydO5f7Z9u1a9dDhw7J5XKhgxJATAzj6soQMWZmTEREbT0rKiomTZqkra3doUMHTU3NoKCgKh0yMjI6d+5sYmLSpk0bIyOjo0ePsu2FhYUDBgzQ19d3cXHR0dHZtm0b23758uVWrVo5Ozu3bdvWysrq/PnzdbuF3NxcqVTapUsX7n+op6enVCotKip63ZcKCQnR1tYmov79+2dmZtYtnhYlOjpaJBLdvXuXYZjKysr+/fu/8847yh3Wr1/fqlWr58+fMwzz7NkzY2PjnTt3Mgyze/duQ0PDnJwchmGKi4tbt279008/MQxz+fLl69evR0ZGElF5ebkAtwSNFRIsgKZBJpN98MEHZmZm3G9lJyen1atXp6SkCB2a6hw+zBgZMUSMhweTmPiSzlu2bDE3N09ISGAY5vDhw2pqanFxccod3n77bU9PT/ZX6WeffWZra1tRUcEwzFdffeXs7Jydnc0wzO+//66np5eXl8cwzKRJk9auXcte6+fn17Fjx3reTmRkpEQiMTAwYP+HGhoazpw5Mzw8/FWulclk/v+N3UkkEjZyeKmvvvqqW7du3OEff/xhb2+v3GH48OGLFy/+v/buPLiL8nD8+HIGDKQM9w3lEAkgVjEiit+BolhExiuAo0ZEhJajlKljwNZSW6sB75EZDVjbivWAka8KCBQsg6BcxniDApGbgIZLjkCOz/ePnX5++WG1VR9A5PX6K7s8u9klM8l79rP7bHLxhhtuuP766xOJxI033jho0KDk+nHjxvXq1Su5mJeXJ7A4hsCCU8zKlSvvu+++Nm3axH9cK1eu3KdPnxkzZvywf7mXlydychKVKyeiKDF4cOLgwf+8yf/8z/9U/Et5zjnn/O53v0sulpaWpqam/v3vf48X9+3bl5KSMnfu3EQi0aZNmwceeCA5rGHDhtOmTTtm53PmzKlSpUqQi4iHDh2aMWNGnz59Kv3rnYgdO3bMycnZtWvXV22ya9euXr16RVGUkpLy1FNPffdjOH2MGDHiqquuSi6++uqrx/wc09PTH3zwweTinXfe2bNnz0Qi0atXrzvuuCO5/pFHHjnzzDOTiwKLL3M7HpxiMjIyxo8fv27duoULF950000pKSmLFi0aOHBg48aNR4wY8d57753sAwzviy+ia6+Nxo+PKlWKcnKi556L/pvbnzZs2HD22WcnF7t06bJhw4bkYmFh4cGDB5MD0tLSWrVqtWHDhrKysk2bNiVviqpSpUp6enrFDWMFBQWtW7cOMoFnzZo1MzMzFy5cuGbNmuzs7IYNG65Zs2b8+PEtWrQYOHDg7Nmzj3kH5bx5884///zFixc3a9bs9ddfr/jZMccYO3ZsrQrWrl1bUlJS8adWtWrV8vLyiv/DpaWlxwwoKSn5mvXwVQQWnJLiC1dPP/309u3bc3NzzznnnD179kydOrVr167dunWbOnXqgQMHTvYxhrFuXdS9e/S//xvVrRvNm/eVt7T/4he/uOlfJkyYEEXR3r17k5++RVGUlpa2J34XXRTF/xpF0TEDdu/evX///rKysq/ZMIqiL7744tFHHx01alSQE0zq0KFDTk7O9u3bFy5cmJmZWVZWNnPmzAEDBrRq1Wr8+PHxE4gjR47s16/fpk2bLrroorfeeisjIyPsMfzA3HPPPQUVtG/fvmnTpjt37kwOKCwsbNCgQcU7048ZsGPHjmbNmsXrCwsLK24Yr4evIrDg1FanTp3hw4fn5+e/9dZbv/zlL+vWrZuXlzdixIimTZtmZWUtWrToZB/gd/Lqq1FGRvTRR9HZZ0erV0eXXvqVI7t3737xv5x77rlRFDVo0GD37t3JAUVFRRUfJGzYsGEURRXLqaioqHHjxnXq1ElJSfmaDY8cOTJo0KB27dqNHj06zEn+/6pUqRJ/5ltQUHD33Xe3bt1627ZtkyZNatu2bY0aNR5//PEoirp27bp48WITdvxHtWvXblhBlSpVevbsuWrVqs8//zwe8Oqrr15yySUVN+nZs2f8uEMURWVlZQsWLIgH9OzZ8x//+EdpaWk8bN68ecdsCMc62Z9RAiEdPnz4mBt6zjrrrJycnJ07d57sQ/tmysvL//SnSe3aFUdRIjMzceDAN97DFVdcMXTo0ORiu3btJk+eXHH/DRo0yM3NjRcLCwurVKmyZMmSRCJx9tlnJ+/WOnz4cFpa2nPPPRcvbtu27aKLLurbt+/B/+YusBDKysqWLl06fPjw5OdTN99884n51j9I5eXlP/nJTy6++OKXXnrprrvuqlat2vLlyxOJxAsvvBA/wbB169a0tLShQ4fOnj174MCBjRo12rdvXyKR2L9/f+PGja+99trZs2ffdtttaWlp8fMlRUVFM2bMmDRpUhRFzz777IIFC07uCfL9IbDgh+njjz/Ozs5OXnqpXr16//79Z8yYUVJScrIP7T/bv3//NddcE0VR27b9Jk0qKy//Njt5+eWXU1NTlyxZUlpa+uijj9aoUSOuzNdff33z5s2JROLOO+9s27btxo0bDx06NGTIkPT09PLy8kQiMWXKlIYNG77//vtHjx694447GjVqVFxcHG/YpEmT4cOHn5T/w/Xr10+ePHnFihUn/lv/wOzcuXPUqFHdu3cfMGBA8pnN3Nzc3/zmN/HX77zzzuDBgzMyMrKysj755JPkhuvWrcvKysrIyBg8eHB+fn68cu3atX0qGDZs2Ik9G76/BBb8kJWWlsY39CRfnNKsWbPs7Ox48oLvp3Xr1nXu3DmKorS0tJdeeum77Oq3v/1tzZo1U1JSGjZsOGvWrHhlq1atHnvssUQicfDgwczMzCpVqlSvXj09Pf3dd9+NB5SUlAwfPrxatWopKSmtW7dOznf15Tkk8/LyvsvhAT9gAguCWb58ea9evVq2bNmjR4/58+d/ecAzzzxz3nnntWzZ8oorrvjoo4+S6+fOnXvhhRe2bNmyd+/eK1euTK5/++23b7zxxssvv/y7H9v27dtzcnKSM1bH98j/7W9/O3To0HffeUDz5s2LZ07v0KHDmjVrvvsODx8+vH379orP4R89erSsrCy5+MUXX/zbKToPHTq0ffv28m939Qw47QksCGPXrl21a9cePXp0fn7+xIkTq1evfkwfLF68uGrVqo899lheXt6gQYOaN28ef/D04YcfVqtW7e67787Pzx85cmRaWlo8xeWYMWM6dOjQr1+/Bg0aBDzOeHLL5GtekvfIB/wW3055eXlOTk58p1H//v337t17so8I4NsTWBDGfffd17Zt2+QFj0suuWTMmDEVB1x11VU33HBD/HV863Q8y+XIkSOTU0KXl5f/+Mc/ju/Fjl+c8sorr4QNrNjevXtzc3Pjp+1i55133iOPPFJUVBT8e/03Dh8+fNNNN0VRVKlSpezs7IpXmABORaZpgDBWr17905/+NPnsXu/evePXkx0zIP66Ro0aF1100erVq49ZX6lSpV69esUbHvPu4bB+9KMfDR8+PC8v74MPPsjOzq5Xr15eXt6vfvWrZs2aDRw4cNGiRYkT+Br4LVu2XHzxxdOnT69du/aLL76Yk5NTufJp+qvp3Xffffjhh6dMmbJx48Z/O+C11167//77//znPx8zNdeKFSsefPDBJ554YseOHRXXf/DBBw8//PBjjz1WUFBw/A4b+LLT9LcYBLdjx44GDRokFxs2bLh9+/bkYiKR2Llz578d8PUbHm+dOnXKycnZtm1bPLnDkSNHZs6ceemll5511lmTJk2qOOPicbJkyZJu3brl5eW1b99+xYoVV1999fH+jt9bTz31VEZGxsqVK+fOnZuenr506dJjBowZM+a666778MMPH3/88S5dumzdujVeP3ny5F69euXn5z///PMdO3ZMzuY/ffr0c889d/ny5fPnz+/UqdNrr712Qs8HTnMn+xIa/ED06dPn17/+dXLxgQce6NSpU8UBtWvXnjFjRnIxMzPz1ltvTSQSHTp0SL5COJFIjB079mc/+1ly8Th9RPhVNm/enJOT06pVq/j3Q3LSy+M0MUFubm61atWiKOrXr1/8QuXT1pEjRxo3bvzQQw/Fi8OGDYtfgZe0fv36ypUrxzN1HT16NCMjY+zYsYlEYs+ePampqc8880wikSgvLx8wYMC1116bSCRKSkqaN2+ek5MTbz5y5MiMjIwTeEJwunMFC8Jo0aJFxTfWFRQUtGjR4pgBFT+mKSgoaN68+Zc33LBhQ7z+pGjRokV2dnZBQUE8uUPlypXjFx22bNly/PjxX34l37dWXFx8yy23jBgxorS0NDs7e/bs2XXq1Am181PRsmXLPvvss1tvvTVeHDZs2NKlSyteQZw1a9aZZ54Zzx5erVq1IUOGvPjii1EUzZs3r2rVqgMHDoyiqFKlSsOGDXv55ZdLSkpWrFixbdu22267LbnDVatWbdmy5USfGJyuBBaEkZmZuWDBgk2bNkVRtHv37lmzZsV/8yoOmD59+uHDh6Moevvtt/Pz8+MBmZmZL774YnxLzcaNG+OgORln8P/EkzjMmDFj8+bNjzzySOfOnXfs2DFp0qR27drFLzo8dOjQd9n/1q1bL7nkkr/+9a+1atWaOXPm6XzTVdKWLVvq16+flpYWL8YTalTsoS1btrRp0ya52KZNmx07dpSWlm7ZsqVVq1bxhcB4fWlpaWFh4ZYtW+rUqVO3bt2v2iFwXJ3uv9QglL59+/bq1atHjx5DhgzJyMho3br19ddfH0XRDTfccPvtt0dRNHr06CNHjmRkZNx8882XXXbZ8OHD09PToyi68cYbmzZtmpGRMWTIkB49evTt2ze+53316tUDBw6cNGnS/v37Bw4ceO+99574k2rcuPHYsWPff//9eHKH1NTU5IsOR4wY8fbbb3+LfS5durRbt26rV69u167d8uXLr7322uCH/f33/PPPt6/glVdeKS4uTklJSQ6Iv45zPHbMgBo1apSVlR05cqS4uLjiDKg1atSIN/yPOwSOq6on+wDgB6Jy5cqzZ8+eO3fu2rVrr7zyyiuvvDL+szd06NB40qn69evn5+e//PLLhYWFWVlZyScHzzjjjGXLls2ePfvTTz+97rrr+vXrFz+K2KBBgz59+kRRlJWVFUXRMR84nmDnnXdebm7u/fff//zzzz/99NNvvPHG1KlTp06dmp6enpWVNWzYsHr16v03+5k6dero0aNLSkouv/zyZ599Np5T9DR02WWXxXkda9my5euvv/75558nEon4p79r164oipo2bZoc06RJk48++ii5uHPnzjp16qSmpjZp0iQenFwfb9ikSZOioqKysrJ4arEv7xA4vk72TWDAqeejjz7Kzs5OPvyYkpKSmZm5cOHCr5n3vLi4eOjQodG/ZrqqOLU6iURiy5YtVatWXbZsWbyYm5vbpEmTo0ePJgfMmTMnJSUl+SjA8OHD+/btm0gk3nnnnSiKPv7443j9H/7wh/jpih07dlSrVu2f//xnvP4vf/lLgwYN4rltgRNAYAHfUnFx8YwZM/r37x9fI4n+dY/8xo0bjxm5devWCy64IIqi1NTUio9SUtHgwYPT09OXLFkSPzp67733JhKJDz/88E9/+lMikSgrKzvrrLOuuOKKlStXPvHEE9WrV0++jql3797du3d/4403nnvuudq1az/55JPx+qysrA4dOixevHjOnDmNGzf+/e9/f7JODU5DAgv4rrZs2ZKTk9O6des4s+J75CdMmBC/6HDZsmWNGzeO8+utt9462Qf7/bV///7Ro0e3a9euU6dOf/zjH+OLfAsXLuzRo0c8YOPGjZmZma1bt+7Wrdv06dOTG+7ateuWW25p06bN2WefXXHKjwMHDowdO7Zdu3bp6ekTJ048TnNtAP9WpcQJnK8Z+AErKytbsGDBU089NXv27KNHj0ZRVLly5SZNmuzYsaO8vPzSSy997rnn/stbtQBOdQILCGzPnj0TJ06cNm1acXFxvOaaa6554YUXqlb1VA1wuhBYwPGSm5u7ZMmSCy64YOzYsSf7WABOKIEFABCYiUYBAAITWAAAgQksAIDAPNQDfBt79uxZtWrVGWeckZGRUfGdd0nbtm1777336tWr161bt4rvci4qKlq9enWtWrUuuOCC5CuKoyg6cODAihUrKleufOGFF9asWfNEnAPAcSOwgG9s3rx5gwYNatWq1b59+6pWrbpo0aI2bdpUHDBlypTbb7+9U6dOmzZtat++/YIFC9LS0qIomjVrVlZWVtu2bT///PNatWq99tprzZs3j6LozTffHDBgQL169UpLSw8dOjR//vyuXbuenHMDCMFHhMA3U1ZWNnLkyFGjRr3//vsbNmxo06bNhAkTKg7YtWvX7bff/uSTT+bl5a1bt66oqOjRRx+Noujo0aOjRo0aP378u+++W1BQ0KBBg4kTJ8abjBkz5qqrrvr444/XrVvXs2fPcePGnYQTAwjHNA3AN7N48eLLLrussLAwnpZ97ty5V1999Z49e1JTU+MBU6ZMmTx58qZNmypVqhRF0f333z9t2rRPPvlkzpw5mZmZu3btql27dhRFM2fOvPnmm/fv37927douXbp88skn7du3j6Jo1apV3bt337x5c3xxC+BU5AoW8M0UFBQ0atQo+dKbTp06lZSUbN26NTng008/7dixY1xXURSlp6dv3LixvLy8oKCgZcuWcV3F6w8fPlxYWFhQUFC9evV27drF6zt37pxIJDZu3HjiTgkgNIEFfDP79u2rVatWcjEOpj179nzNgJKSkoMHD37VhvH6ZJDVrFmzSpUqFXcIcMoRWMA306hRo927dycXi4qKoihq3Lhxck3Dhg0rDti9e3dqamrt2rW/asNGjRrt27evrKwsXr93796ysrKKOwQ45Qgs4Jvp3LnzZ599tn79+nhxxYoVdevWbdasWcUB+fn5yTc9L1++vEuXLvH6zZs3b9u2LblhkyZN6tev37Fjx0QisWrVquT4GjVqxPdjAZyiBBbwzXTt2vXCCy8cN27c7t27169ff8899wwbNqxatWpbt2595ZVXoii65pprqlevPmHChEOHDr3xxhvTpk37+c9/HkVRjx49OnfuPG7cuL17965Zs2bSpEkjRoyoVKlSixYt+vfvf8cdd+zcuXPr1q133XXX4MGD69Spc5LPE+A7EFjANzZ9+vQ9e/bUr18/PT39/PPPj2dbePPNN4cMGRJFUY0aNV566aX58+fXqlWrb9++w4cPz8rKijd84YUXNm3aVLdu3XPOOad3797jx4+P1z/xxBM1a9Zs1qxZ69atW7Ro8dBDD52kMwMIwzQNwLd0+PDhqlWrVpyN/RgHDhyI71g/Zv2hQ4eqVav25Q2Li4srVar0b+eFBzi1CCwAgMB8RAgAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAQmsAAAAhNYAACBCSwAgMAEFgBAYAILACAwgQUAEJjAAgAITGABAAT2fyuZZI+xC+mvAAAAAElFTkSuQmCC", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAyAAAAMgCAIAAABUEpE/AAC87klEQVR4nOzdZ1xUx9cH8LOFpXdUmoqIgl3E3hUwxtgV1ChqLBg1aoqKMVFjSQRjrDEGjMZuxGhiiQ01xl7Aih1RpKMICAILuzvPi0nus38ERLmwgL/vxxfc2dm7cxGWs3NnzpEwxggAAAAAxCPV9QAAAAAAqhoEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAAAiQ4AFAAAAIDIEWAAAAPB6aWlpERERmzdvbteunYODg4GBgYWFxaxZs3Q9rgpKwhjT9RgAAACgAlGpVE+ePImOjr5169bt27f5F4mJiYV23rhx46hRo8p5hBUfAiwAAIB3Wnp6+sOHD7XDqdu3b+fk5BToZm5u7uLi4uzsrFQqa9eu3alTp2+++eb27dsymSwiIqJZs2Y6GXyFhQALAADgHZKQkCBMSvEvoqOjX+1mZ2fXqFEjZ2fnhg0b8i/q1KkjkUi0+2RnZzs4OKSnp7dq1erMmTMKhaK8LqISQIAFAABQNWVkZERFRfEQiodTd+7cyc7OLtBNX1+/bt262uGUm5ubsbFxSV7i/v37Xl5esbGxkyZNWrNmTRlcRGWFAAsAAKAqeHVq6tGjR6/+lbe0tBQmpfgXTk5OUunbb3q7evVqhw4dcnJyQkJCxo8fX7qLqDoQYAEAAFQySqUyKipKO5y6e/fuy5cvC3RTKBSOjo7a4VSzZs1MTU1FH8+WLVtGjhypp6d3/PjxTp06iX7+yggBFgAAQIWWlpamvZvv9u3bjx8/1mg0BbqJPjX1RqZNm7Zq1SpbW9vw8HAHB4fyedGKDAEWAABARZGXl/fgwQNh7fmtW7euX7+elZVVoNurU1NNmzY1MzPTyZg5lUrVo0ePv//+u23btidPntTX19fhYCoCBFgAAAC68eTJkydPnpRkakp7N1/Dhg3d3NxkMplOxlyM1NTU1q1bR0dHjxo1auPGjSV5Ck+4ZWpqWq1ataL6xMfHq9XqmjVrFtjD+PLly+TkZBsbmwKRZV5eXlJSkoWFhW4jTmIAAABQXhITE1u3bm1kZFQgXOAUCkXDhg0HDhz45Zdfbtq06eLFi+np6boe8hu4du2akZEREf3000+v7bx7924bGxuFQiGRSPr06ZOZmVmgw8OHDz08PKRSqVwur1evXkREhPDQt99+a2hoqFAoFArFZ599ptFoePuqVausrKz4Fsi+fftmZGSIeHVvBAEWAABA+alZs6YQTslkMg8PDz8/v8DAwNDQ0MjIyPz8fF0P8DWOHz9+5cqVYjps27aNiPT09E6ePFlMt5SUFAMDgy+//DI3N/f27dsODg4ff/xxgT7du3dv27ZtSkrKixcv+vXr5+TkpFQqGWMHDhyQSqXbtm1Tq9WHDh0yMDBYt24dY+zChQsmJiZhYWGMsYcPH9rZ2U2fPr20F/y2EGABAACUkxs3bvDQ6ttvv42KitL1cN7Y0aNH5XJ57dq1U1JSiun2xRdfEFGNGjViY2OL6hMUFFS9enUhoFyxYoWJiQmPn7i7d+8S0alTp/hhdHS0RCI5ePAgY2zAgAHe3t5CTz8/v7Zt2/Kvnz59KrR/8skn7dq1e+OLFAmKPQMAAJSTgIAAIrK3t589e3bdunV1PZw31rVr13bt2sXExAwcODAvL6+obkFBQe+//35ycnLfvn1fLbnDRUZGtm3bVi6X88NOnTplZWXFxcUJHW7evCmVStu3b88P69Sp4+joyKOue/fudezYUejZsWNH3k5ENjY2QvvLly+trKze8lJLDQEWAABAObl9+zYRTZgw4a3PoNFobt68ee7cuVe3FgqePHly9uxZ7WCFUyqVly5dCg8PV6lUBc5569at8PDwYs7J6enphYaGOjg4nDlzZubMmUV1k8lkW7durVu37tWrV4u62MTERGtra+GQB0ba9aSTkpIsLS211/Lb2NjwDgWeW61atfT09AKR3PPnz//4448BAwYUf0VlBwEWAABAeTh16lRMTIyDg8NXX331dmd48uRJs2bNWrVq1bdvX3t7+127dhXooFarR48e7eTk5OvrW6tWrUmTJrH/cgWcOXOmdu3a3t7e3bp1c3Z2vnLlCm+/fPly/fr1u3bt2rdvX0dHx9DQ0OLHYGtr+/vvv+vr669cuXL9+vVFdbOystqzZ4+xsfGWLVtWrVr1agcTExPtkIgX8NFOgmpiYlKgqs/Lly95hwLPffnypUKhMDAwEFry8/P9/PwaNWr00UcfFX85ZQcBFgAAQHng4cjo0aPfOsOCv7+/QqFITk5++vTplClTRo8enZSUpN0hJCRk165d58+fj4+PP3ny5IYNG7Zu3UpESqVy2LBh3bt3T01NTU1Nbdas2YcffsjzQUybNm3IkCFJSUkJCQkTJ04cN25cZmZm8cNo27ZtSEgIEX3yySeXLl0qqlvTpk03b94skUi++OKLv//+u8Cjjo6Ojx8/Fg4fPXrEG7U75OTkJCcn88O8vLyEhATewdHRkffnHj9+7ODgIOzKfPHixcCBA+Pj4/fv319ueVYLoavFXwAAAO+OjIwMnprhwYMHb3eGpKQkiUSye/dufpidnW1mZvbjjz9q92nXrt2oUaOEw8GDB/PF4IcOHZJIJLw0IWPs+vXrRHT+/HnGmFKpVKvVvP3evXtEdOnSpZKMZ+LEiURkZ2cXHx9fTLdZs2YRkbW1dXR0tHb7iRMnpFLp7du3+eHQoUM9PDy0O+Tk5FSrVi0oKIgf7tixQyaTPX78mDH29ddf29nZvXjxgo+/UaNG/v7+vNvdu3cbNmz4/vvvp6WlleQqyg4CLGD5+fnjxo3r0KHD9OnTX01DAgAApbd27Voi6tat21uf4ezZs0SkHc106NBh2rRp2n1sbGy0E1AFBQU5OTkxxngRG+2ehoaGmzdvLvAS586dI6Jitv5py8vL69KlCxG1b99ee/dfAWq1+oMPPiCi5s2bv3z5UmjXaDSenp6Ojo5z58718fHR09M7ceIEY+z8+fPu7u7Pnz9njK1cuVIul0+ePHnGjBkmJiZCzoVnz545OTm5u7svWLCgY8eO1apVi4mJYYydOHHCxMTE2tp6xowZAf8pybWUBdwiLA8ajSYyMvLq1atKpbKoPrGxsREREU+fPi3QrlQqr169GhkZWSC3r0ajuX37dmRkZDHnLImEhISaNWv+8ssvZ8+eXbp0qZmZWdeuXfmHGwAAEAu/Pzh27Ni3PkNKSgoRWVpaCi3W1tbatwjVanVqaqr2vjkbGxveISUlpcB+OisrK+Hum+Cnn37q2LGj9n26Yujp6e3cubNmzZrnzp377LPPiuomlUq3bNni4uJy7dq18ePHC+0SieSvv/6aOXPmo0ePHBwcLl261K1bNz6wrl276unpEdHUqVP379+fn5+fmpq6fv36JUuWCBd+6dKl/v37P3jwoEuXLleuXKlVqxY/5+TJk8eNG6fLO4MCXUV2744HDx40aNBAX1/fzMzMxsbm6NGjBTrk5eUNHTpUIpFUq1ZNJpN9/fXXwkPHjh2rVq2amZmZgYGBm5vb/fv3efvp06fr1KljZWVlbW1dvXr1w4cPv93Y7t69y1cFSiSSli1baq8QbN++/fr16zGhBQBQejz9lbm5ufYUjrZTp0598sknN2/eLOYkJ06cICLtPE9du3YtkJzT1NT0l19+EQ6XL19uZ2fHGPv+++9r1apVTE/G2Lp160xMTK5fv17iy2KMsStXrhgaGhJRSEhIMd3u3LnDC9f88MMPb3T+ygsBVplr3759+/btX7x4oVKpJkyYYG1tXSBzf1BQkLm5OU+M++eff0ql0r/++osx9uLFCxsbm3HjxqlUqszMzI4dO7Zp04YxplKpGjRosGjRIrVarVar/f39tXO1ldxff/1lYWFBRAYGBkK+3U2bNn3yySfCBx1DQ0MfH5+wsDChCgEAALypqVOnEtHkyZOL6jBs2DAi+uabb4o5SVRUFBGdPXuWH2o0mpo1a3733XfafRo3bjxz5kzhcPLkyfwPx86dOxUKBV+0xBjj01pHjhwRei5btszExOTvv/9+42tjbPPmzUSkp6cnJAUt1J9//imRSGQy2aFDh97iVSodBFhl68GDB0TE0/Yzxp49e6anp7dt2zbtPo0aNZoyZYpw2L179yFDhjDGduzYoaenJ2TL5Z9d7t69yxjLzs4WIp6LFy8SkTC5VRIajSYwMJDPoPr6+r66EjAnJyc0NNTLy0vYlOHm5hYYGJicnPxGlw8AAEqlkid5KqrCTFpamqGhoVQq5Su4i9GyZcuRI0fyrw8ePCiVSgukg1+wYEGtWrV4+cLU1FRbW9vly5czxl68eGFmZrZy5Uqhm7m5eVZWFmMsMzNzyJAhjo6OJVzbXigeQdra2sbFxRXTbc6cOURkZWVVGbPYvykEWGXrr7/+IiLtOeFGjRppf0bRaDT6+vrbt28XWmbPnu3u7s4Ymz9/foMGDYR2nvNj//79BV7i0KFDUqm05PUsMzMzBw0axG8Lzps3r/ipqXv37gUEBNSoUYOHWQqFonfv3qGhoRW/WhYAQAWxY8cOImrRokVRHXiaqPfee++1pzpy5IhCoejZs+e4ceNMTU0/+eQT3t6xY8elS5cyxp49e+bs7NygQYPJkyfXq1evYcOGwl+HpUuX6unpjRgxYsiQITKZjN8fzM3NbdSoERENHDjQ/z98sfkbyc/P5yuo2rZtm5ubW1Q3tVrdp08fImratCkP76qwCrAKrEpLSUnR19fnpcW5AmsS09LSlEql9qJFKysrYU2idruBgYGxsXGBlCdE9PPPP/fs2ZPf236tqKiodu3a7d6928zM7M8///zmm28KLecuqF+/fmBgYHx8fFhYmI+Pj0ajOXDggK+vb+3atWfNmhUdHV2SFwUAeJe9dnn7hg0biu8g6NGjx/Xr19u3b29jY7N9+/bVq1fz9lGjRrVt25aIrK2tr1y5MmnSJGNj488///zixYvCX4cvvvji+PHjtWrVql+//rlz54SX46WmW7du7fwfc3PzN71GuVy+a9cuZ2fnCxcuFJOnnldobtiw4Y0bN/hU3Ju+UGWi6wivituzZ49EItHev+ru7v7ll18Kh/n5+TKZ7Pfffxda5s2b17BhQ8bY7NmzmzVrJrSrVCqpVLpr1y7t869YscLCwuLevXslGcyhQ4d4xObq6nrnzp23uJyEhITAwEAXFxf+wyOVSjt06BAcHFzUsk0AgHfco0ePpFKpoaEhzzvwqsuXLxORtbV1MRM/lcW1a9f4hIJ2qohX3b17l8dwgYGB5Ta28ldpZrDyWX5ifmJifmIeK7K6ZAXk6OjIGHv48CE/VKlUMTEx2jtg5XJ5jRo1+NJF7uHDh7xDzZo1Y2Ji8vPzeXt0dDRf0sgPGWOLFy+eO3fuwYMH69evX/wwGGNBQUG9e/dOS0vr3bv3xYsX3dzc3uJy7OzsAgICHjx4EB4e7u/vb2hoePbs2QkTJjg4OEyYMOHq1atvcU4AgCpsw4YNGo1m0KBB2ncktPH5rZEjR+rr65fv0MTXrFmzdevWEdG0adP++eeforq5urru3LlTJpPNnj374MGD5TjA8qXjAK/EzmadpQiiCDqaUTDNQUWWn59va2v7+eef80P+I1Vgcd+YMWMaNWqUk5PDGEtKSrKwsFixYgVjLDExUaFQCCviZ8yYYWtrm5eXxxh78eLF4MGD69ate+PGjdeOIScnx8/Pj4gkEklAQICQsbf00tPTg4ODPTw8hB8nDw+PFStWpKamivUSAACVl1qt5vmZitqdl52dzXdzv2lyhIrsiy++IKIaNWoUn7B0/vz5RGRpafnWqe0ruAoRYCXmJf6Q/EOvqF4NbzWscaOGS6RL53udFycujs37//+bShpgsf+y+/v4+Pj7+xsZGQkbaD08PNavX88Ye/z4sa2trYeHx6efflq3bl13d3fhjtusWbOMjIzGjx/v6+srk8l27NjBGMvIyKhfv75EIhk8eLCwJvHixYuFvvqTJ094AGRqavrHH3+U0TVGRkYGBAQItc0NDAyQ3AEAgE/PODs7F/VmuGnTJiJq27ZtOQ+sTKlUqvfff5+I3N3ds7Ozi+qm0Wh8fHyIyM3NreT7tCoRCdPpEjNGbFHiosDkwGxN9quPGkuNAx0CP6n2CRGde3muw70ORHTU5ai3mXd5D7R0IiIidu/enZub6+npySsGENHy5cs7dOjQunVrIkpOTt60aVN8fLyrq+vo0aO1F8X/9ddfx48fNzAwGDhwYMuWLYnoxYsXvOSCtj59+jRs2LBA4z///OPr65uSklKvXr0///zz1Q7iUiqV+/btCwkJOX78OP+5ql+//rBhw8aOHSvc2QQAeHcMHjx49+7d33333Zdffllohy5dupw6dWrdunXjxo0r57GVqefPn7du3frhw4d+fn48S1ahsrKy2rVrFxkZ2a9fvz/++KP4TVeVjw6DOw3TjHw0ks9Lya7I+j/s/2PKj7vSdm1K3fTJk0+srlvxh5YmLWWVeQZLV4KDg3mpgV69epWk5mVYWNjYsWNFmXN68uRJYGBg7dq1+c+YTCbz8vIKDQ3l9zcBAHQiNjb26tWrxRSoyMjIuHLlSmJi4qsP3bt37+bNm6++icXHx9+8ebPQjT7Pnj3T19eXy+VF1UKOioqSSCTGxsZVcv7m+vXrxsbGRCQk3ypUdHQ0v/uxcOHCchtb+dBlgPVTyk88ZnK44RDxMqLAoyn5Kd3vd69zs86N7BsMAdabyMnJGTVqFL3JoqsffvhBLpcTkXZGrlJSq9U8uQOP80hrjbxYLwEAUBJZWVkffPCBRCIxNzc3MjJau3btq32CgoL09fXNzc0lEsmQIUOE3d/R0dFNmzaVy+XGxsa2trZ8hp4xFhsb27lzZ319fRsbGyMjo1f3zS1dupSI+vTpU9SoAgICiGjMmDEiXWWFs3v3bolEIpfLi0+sdfToUZlMJpVKX030WKnpLMDKVmfzOSr9K/qROZGF9slSZz3N/7foEgKsEoqNjW3VqhURmZiYaGd/KEpOTs7o0aPfKBp7U4mJiStWrGjSpIkwb+rh4REcHFzls8wBQAXx+eef29nZ3b17V6PRrF69WiqVXrt2TbvD5cuXJRLJ5s2bGWNXr161tLQUJlQ6d+7cunXr58+f5+XljRs3Tih39uGHHw4cOJDPh61evVoulxf49MhXZfz555+FDik/P9/e3p60St9USbNmzSIia2vr6OjoYrp99913RGRqanrr1q1yG1tZ01mAtSl1Ew+YpsZOLUl/BFglcerUKZ513cXFpfiiodybRmOlxJM7mJiY8DDL3Nzc398/PDy8rF8XAN5lGo3G2tr622+/FVrq168vbO7mxo8fz2v2cTNmzKhbty77L8nO4cOHebt2ubP8/HxhBXdeXp6wD4k7d+4cEdWoUaOopRF79+4lIldX16q9GUitVvOVx82bNy8mY6JGoxkyZAj/hvA6P1WAzvJgncg8wb/ws/LT1RiqmJCQEE9Pz+Tk5J49e166dKlx48bF9z99+nTLli0vX77s4uJy/vx5Xj+nTPGJq/j4+E2bNnl5eWVkZISEhLRs2bJRo0ZBQUHPnj0r6wEAwDsoNTU1NTW1Q4cOQku7du3u37+v3ScyMrJ9+/bCYdu2bR89epSfn8+7Cc+1trauX78+b5TL5YaGhrw9MzNTrVZXr15dOAPPbjV69GhhjUQBvMO4ceOq2sru/yWVSrds2eLi4nLt2rXx48cX1U0ikfz6668tWrS4d++en5+fRqMpz0GWEZ0FWOEvw4nIQGrQ3LC5rsZQZSiVyrFjx06YMEGlUgUEBBw4cKCojHaCN43GRGRmZjZy5MiwsLDbt28HBARUq1bt9u3bs2bNcnR09PX1PXbsGKvaxRMAoHwlJycTkfa7YoGSZbyPlZWVcGhjY6PRaFJSUpKTk/X09IR5d/5QYmJigZcICQlxdHTkxWqIKCsrKzQ0lIj4AoxXJSUlHTp0SC6XjxgxojSXVilYWlru37/fzMxs+/bty5YtK6qboaHh7t27bWxs9u/fv2DBgvIcYRnRWYCVokohIieFk1wi19UYqob4+PguXbps2LDB2Nh4586dgYGBMpmsmP5vEY2VkQYNGgQGBsbGxoaGhvbu3VulUu3atcvb25sXOoyJidHJqACgiuHF+LKz/z8ZUFZWVoFye2ZmZi9fvtTuQETm5uZmZmb5+fl5ef9fQSQzM5OnBhWcPn16wYIFq1evFjLs7Ny5MzMzs1OnTkXVzNi4cWN+fn7fvn1tbW1LeXWVgpub2+bNmyUSycyZMw8fPlxUNycnp99++00uly9YsGDXrl3lOcIyoat7k4orCoqgVndalbB/gTVYqfmpUblRr31WlXfmzBn++1mzZs2SLGaKi4tr06YNERkYGPDlnBVHXFxcYGBgnTp1+E+mVCpFcgcAKL28vDw9Pb2tW7cKLV5eXqNHj9bu07dvXx8fH+Fw1apVVlZW7L9CgULxVpVKZW1tvXr1aqHngQMHTE1Nf/75Z+2ztWvXjog2btxY1JB44PXXX3+V7soqmTlz5hCRlZVVgXImBXz//fdEZGJiUpKVxBWZzgIsw6uGFEEedzxK2L9AgLU4cTF/evDT4Cz1O7oZLTg4WKFQEFGXLl2Sk5Nf2/9NozGd4Mkd/Pz8hMUNtra2U6dOLUlRIACAQg0ePNjT05Pvko6KipLL5QWCm82bN5uYmCQkJDDG8vLyWrVqNWHCBMaYWq12dnaePn0677Znzx65XB4XF8cY02g0gYGBZmZmBXYI3blzh/6bEit0MCdPniQiBwcHlUpVBtdacanV6j59+hBR06ZNi99Fzm+t1qtXryRJHCssnQVYdjfsKILqRtYtYf8CAdZX8V8ZXTXiLZbXLD958smVl1fKcrwVS25urpD219/fPz8//7VPEaKxzp07lyQaK0Cj0Wzfvv3DDz8cNmzYr7/+Wmg2h/Dw8AkTJvj4+MydO1e7HGFubu6yZct8fX1HjRp15MgRoT0/P3/9+vV+fn6jR49+9ZzPnz8PDg5u1qyZMNvK18gXkyQQAKBQV69eNTEx6d69+/Tp02vWrNm1a1f+hjN+/Hi+nTA3N7dFixbOzs4zZ87s0KGDtbW1MMuyfft2qVT64YcffvLJJyYmJp999hlvHzBgABF16tRJKFm2c+dOxtjnn39ORDw+KxQvDvv111+X+WVXPC9evODZKwYOHFjM9sns7GxeucTb27vyhqE6C7Da3G3DE7i/VBe5b1Pbq2kaMlQZm1I3ed334u0UQQ1vNQxMCnyW/6wsB6578fHxfCmlgYFBMVPQAqVSKezd8Pf3f7s7bl9//bWBgcGsWbPmzJljYmIydWrB5Bpnz55VKBSDBw9esmRJ48aNGzRoIHx669Wrl729/bfffjtx4kSJRLJp0ybe3r9//3r16i1ZsmThwoUWFhbTpk0r9KV5cgdTU1N+CWZmZn5+fmFhYW9xFQDwznr48OHcuXMnTZoUHBwsJBHdunWrMP/08uXL1atXT5w4cdGiRQWqFF+4cGH69OlTpkzRrui6cOHCgP+1b98+pVLJ9xJeunSp0GGkp6cbGRlJJJLib5NVYXfv3uUL4AIDA4vpFhMTU61aNSKaPXt2uY1NXDoLsKY8mcKjoiMZR17fu9g8WLdzbgfEBVS7Xo130L+i7xPtE/YiTMOqYHKR8PBwXpvd0dGxqF9gbfHx8Xw1gIGBwa+//vp2L/r06VOFQiHkKd62bZtMJouJidHu061bt549e/JPJCkpKUZGRmvWrGGMHT9+XPu95vPPP7e3t1er1Tk5OV999ZUwl7ZmzRp9ff1ipuKys7NDQ0O9vLyELc0NGjSYMWPGvXv33u6iAABEx5dmN2nSpKgOP/30ExF5enqW56gqmn379kmlUqlUWvwqtNOnTysUColE8ttvv5Xb2ESkswDrcMZhHg/5RPu8vncJEo3manJDn4f2juotuyLjPWverBkQF/BY+VjUgevS5s2b+cqkTp06JSUlvbZ/RETEG0VjRdm7d69EIhGqZeXm5hoYGGzZskXooFKp5HK5dkv//v0HDx7MGJs7d66rq6v2kIjo9u3bBV7i0KFDRFSS2+23bt36/PPP+ScbrlatWitWrHjrqwMAEEvPnj2p2Op7Hh4eJGpRskpq/vz5RGRpaVl88bQVK1YQkaGhYUREwXp6FZ/OAiw1U9eLrEcRJImQHEg/8Nr+Jc/kHpsXG5gUWOdmHd5fGiH1uu8V+jw0T1OJN6Pl5+fzqlUlv823ZcuWN4rGirFmzRpra2vtljp16mjP7sbFxRHRmTNnhJZPP/20Xbt2jLGPPvqoR48eQntqaioRvXqDb+7cuS4uLiUfkkqlOnLkiJ2dnRBmrVu37o0uCgBAXLGxsTKZTKFQPH36tNAO169fJyILCwshBfw7S6PR+Pj4EJGbm1vxta7HjBlDRE5OTkV9VyssneXBkpJ0ba21EpIwYoMfDV6ful5D/5O5lRH7J+ufZSlFJiUriqOeY0CNgKjGUWH1wvys/PSl+scyj/k+8q0VWWta3LSbOTfFu4hy8uzZsx49evBCpOvXrw8ODi4qNTCnUqlmzZrl5+eXk5Pj7+9//PhxXj/nreXm5urr62u36OvrayeVycnJISK+iL5AhwLPNTAwoP9NSENE0dHRK1eu5Dt4S0gmk/Xo0SMhIeHChQt8a+TatWvf6KIAAMS1YcMGtVo9cOBAGxubQjusW7eOiLR3Sb+zJBLJhg0bGjdufPfu3ZEjR7Ki80uvWbOmdevWjx8/HjZsmEqlKs9BlpZu47sVySukEVI+1eQc6TzxycTFiYu/T/p+4pOJfH5LfkWelJfESlGL8LnqefDT4Ga3mwlr4Xlyh0x15diMduXKldq1axORg4PDhQsXXtv/6dOn3bp1IyIejYkyhu3btysUCu0dH9bW1tp5X3hSPu276WPGjOnVqxdj7PPPP+dTWdzjx4+J6PLly0JLXFxcvXr1xo8f/9YFub788ksiqlat2ts9HQCg9DQaTd26damwGXouNzeXB15Xr14t36FVXNHR0dbW1kQklNYuVEJCAi+MPWPGjHIbW+npOMBijB1IP+B6y1WIfgr8a3239b3ce0yMYs/hL8P9Y/xNr5ny85hdM/N75Bf2okJvRtu6dSv/oNOhQ4fExMTX9n/TaKyErl69Slol3yMjI4no9OnT2n1q1649c+ZM4dDFxYVnjtm8ebOBgYEwA7xp0yaFQiHU8jxz5oy9vf3HH39caN6HEjp48CARyeXytz4DAEApHT16lIicnJyKejfbtm0bEXl4lDT74zvi6NGjMplMKpXu37+/mG58ozoRvfVurfKn+wCLMZavyT/+4viX8V8Oix7m/cC7d1TvsY/HrkhecSvnltDnifLJ3IS5cxPmPsx9WJrXylZnhz4P9brvJYmQ8Eirwa0GgUmBKfkppb4OMRVYdCVsKi7Gtm3beKGGEkZjb6Rt27Zt27aNj49PTk729PRs2rQpfxMJDQ19/PgxY+y7774zNzc/efJkdnb2okWL9PT0eO7jnJyc6tWrjx49Oj09/fbt287OzqNGjeLnXLlypYGBweLFi5//5+1SSGRnZ8vlcqlUimUNAKArQ4YMIaIFCxYU1aF79+5EJGzHBsF3331HRKamprdu3Sqm25o1a4jIwMBA+x5IRVYhAiyduJtzNyAuoPr16trJHfal71NpdJ/T7NmzZ56envw2X0nWbr9FNPamHj9+zOvJSySSVq1aCfs+iIiX3MnPz584cSLfUlu9evVdu3YJzz179qyLi4tEIpFKpf379+ezWXxWrIDDhw+/3fBcXV2JCNneAUAnUlNTDQwMpFJpgfw1gujoaKlUamhoWKlTk5cRjUbDw1NXV1fh/kaheE7HWrVqpaRUrDmRQr27ARan1Ch3pe3q+aCnsBSs96ne8+fPf/Lkia6GdPXqVScnJyKyt7c/f/78a/trR2MhISFlOrZnz54V+LHOz8/XXjuVnZ0dFxf3auJdjUaTkJBQ/FaR0ujXrx8RhYaGltH5AQCKwbMJvP/++0V1+Prrr4nIz8+vPEdViWRnZ7do0YKI+vTpU8yKkby8vE6dOhFRx44dK36Z2nc9wBLE58UHJgU6Rzq7+7vTf5WGN23aVM53nXbs2MFv87Vr145XxSretWvXeHVkOzu7c+fOlcMIK6aZM2e+dpkkAEAZ4UW9ChQlFKjVap6S8OTJk+U8sErk0aNHfBPAvHnziumWmJjo4OBARAXKdVdACLD+h5qpj4YdHTp0qJBZoFq1ap9//nnxN4ZFoVKphNt8fn5+JQnshGjMw8OjqHnpKmPevHk1a9bcsWNHoY+uX7+eiEaMGFHOowIAuHTpEhFZW1vn5uYW2uGvv/4iImdn57feK/2OOHbsmFwul0gkxd+OOH/+vFwuJ6KxY8eW29jeAgKswqWlpQUHB7u7uwvLg3il4RcvXpTFy6Wmpnp7e/OtcMWXZ+J4NMaLxowYMeJdWNzNczF88803hT569uxZImrZsmU5jwoAYMKECUT0xRdfFNVh4MCBRLR48eLyHFUl9f333xORiYnJzZs3i+k2btw4/qd5w4YN5Ta2N4UA6zXCw8OnTp1qZWXF/y8NDAx8fHzCwsJE/CBy/fp1Z2dnIrKxsTlx4sRr+79pNFY1bNy4kYiGDRtW6KM8QbypqSk+IAJAecrOzrawsCCiom50JCcn6+npyeXy+Pj4ch5bJTV69GgiqlevXvEbAho2bEhENWvWLK9xvTEEWCWSk5NToNKwq6trYGCgUKv4re3cudPY2JiI3N3deb6D4r1pNFZlXLhwgX+XiurAqxPGxcWV56gA4B3HP/tpp1MugE/J9O3btzxHVallZ2e3bNmSiLy9vV/dMiXgtzWMjY3Lc2xvBAHWm7l37968efP4ckUikslkXl5eoaGh+fn5b3oqjUYTGBjII7bhw4eX5DZfaGjoG0VjVUlaWhr/XSpqjqpjx45EdPz48XIeGAC8y/imtl9++aWoDnyiZe/eveU5qsru8ePHNjY248aNKybrEC+S9sEHH5TnwN4IAqy3oVKpwsLCfHx8hJqA9vb2AQEBUVFRJTxDRkZG3759S36bTzsa+/DDD9+FRVev4hUVi8qgwW/Jr1mzppxHBQDvrHv37kkkEhMTk6KW5545c4aIbG1tK35OgYqm+DuqvOpaBU9/qLNiz2Xk2LFjI0eO7NOnz/z581+8ePFqh4cPH06ZMqV3796TJk26d++e0K7RaH755ZdBgwYNGjTol19+0Wj+v/L006dPFy5cqF2KWJi4iomJCQwMdHFxSUhICAoKql+/fseOHUNCQgoUMy7g3r17bdq02bdvn7W19eHDh4XNg0V58eLFgAEDZs2aJZPJAgMDt23b9m4WCnVzcyOiu3fvFvoozzWq/X8KAFCmeL3XIUOGmJqaFtWBiEaPHi18GocS4sUHi/Lbb78RUefOnZs0aVJeI3pzuo7wxLR7926JRDJx4sTly5fXq1evVatWBe7cPXnyxNraunv37j/++GPPnj3Nzc2FjOSffvqpiYnJggULFixYYGJiMm3aNMaYRqOZNGmSiYmJo6NjnTp1innp8PBwf39/fv+OiCwsLPz9/a9cufJqz/3795ubmxNR8+bNHz169NqLunfvXoMGDYjI2tr62LFjJf5mVEF8q86qVasKfXT//v1E1KNHj3IeFUCV8ezZs1mzZr3//vsjRoz4+++/X+2gUqnWrl3br1+/fv36hYSEaCeEjIyMnDBhQo8ePSZPnqw9l69Wq/fv3z969Ohr166VwyWUp/z8fDs7OyIqKgdhZmYmD7x43TAQEf+8feDAAV0PpDhVKsCqV6/euHHj+NcPHz6USqUFcmlMmTKlbt26PFVJfn5+48aNx4wZwxiLi4uTyWS85AtjbPPmzTKZjC+XPnLkyLNnz37++efiAywuPT09ODiYl5ThPDw8VqxYkZqayv67zSeVSolo6NChL1++fO0JhWisWbNm0dHRb/btqHKWLVtGRJMnTy700fv37xNR7dq1y3dQAFVETk5Oo0aNmjRp8uOPP44fP14qlR48eLBAn8mTJ5uZmS1evPjbb781MTH57LPPePvdu3dNTU179+69du1aT09Pa2vr2NhYxtjx48fd3Nxq1aoll8t37txZ3pdUxv744w8icnNzK2phaEhICBF17ty5nAdW5f3zzz9E5ODgUMwS+Iqg6gRYCQkJRKS9sa59+/YF/hg3a9Zs5syZwuH8+fNdXFwYYzt37lQoFMLCpuzsbD09Pe23gxIGWIIrV65MnjzZ0tKSh1lGRkbDhg3r1q0bEfHbfK89w1tEY1UeT9bn5eVV6KMqlUpfX18ikeB7Be+gp0+flvIMISEh+vr6Qp34YcOGtWjRQrvD48ePpVKpkOx348aNcrmcF5wYOXJk8+bN+V+73NxcZ2fnTz/9lDEWFRUVERHBGLOwsKh6AVbv3r2J6IcffiiqQ5s2bYho06ZN5Tmqd8HIkSOJ6KuvvtL1QF6j6qzBiouLIyJhfx//Oj4+vkAf7Q61a9fm01RxcXHVqlUTFjYZGhpWq1atwHPfiLu7+48//piYmMiTO+Tk5OzYseP69esWFhaHDh167aKrzMzMQYMGzZo1SyKRBAYGChnb33HFr8GSyWR169Zl/01lAbw7Dh06VLt27aCgILVa/dYnOX36dLdu3fjOLCIaOnTolStXsrKyhA5nz55VKBQDBgzghz4+PkR07tw5Ijp16pSvr69MJiMifX39AQMGnDp1iojq1q3LC8xVPUlJSYcPH1YoFCNGjCi0w61bty5evGhubj548OByHlvVlpGR8fvvv0skEp4uqyKrOgGWUqkkIoVCIbTo6+vn5uZq98nNzS3QgdcqViqV2u38oZycnFIOSV9fn2clffDggbu7+/Pnz7/44gueI7QYDx48aNu27R9//GFlZVWSaOzd4eTkZGBgEB8fr/2mr634CAygqjp27Fh2dvasWbO6du361vs8Cnz+5F9rf86Mi4urXr26UEbMyMjIxsaGf0ZNTEws8Fz+ibcK27Bhg0ql6tu3b/Xq1QvtwJe3Dxs2DB+PxbVjx47s7Oxu3bq5uLjoeiyvUXUCLL7j4OnTp0JLSkpKgW0I9vb2z5490+5ga2srk8ns7e21n8gYe/r0KS8nKYq6desOGzaMiLRfvVAHDx5s3br17du3mzZtevny5ddGY+8UqVRar149xlhRf0KwkRDeTT/88MNff/3l6Oh45syZ5s2bf/PNN/n5+W96kry8PO2dbvwzp/ZnVKVSWWArnEKhUCqVarU6Pz9f+yF9fX3+ibeqYoz9+uuvRDR27NhCO+Tl5W3durWYDvDWeORaKb6xVSfAqlmzZrVq1f7++29+qFQqz58/37hxY+0+LVq0EDoQ0d9//807NGzYMCsr6/Lly7w9PDw8KyurwHNL6bWTK4yxoKCgPn36pKen+/r6njt3jmdsB23Fh1AIsOCd1atXr8jISH9/f6VSOX/+/FatWl25cuWNzmBnZ1fgAyoRaX/OLPABlX8Qtbe3l8vl1atX135ucnKyiB9QK6CTJ09GRUU5OjoW9Rl47969T58+bdKkCc9IDmK5efNmeHi4hYVF//79dT2W16s6AZaenp6/v/+SJUvOnz///PlzvsSS3x0/ePBgeHg4EX3yySfHjx8PCQl58eLF5s2bDxw4MGnSJCJq2bJl69atp06dGhMTExMTM3Xq1LZt23p4eBBRTk5OWlpadna2RqNJS0vLzMx8u+G99m9/VFTU/PnziWjx4sW//fabkPEBtPE4tahvI24RwrvM3Nw8ODj45MmTLi4u169fb9u27axZs0o+k+Th4XHq1ClhFdfff/9tZ2dnbW2t3SEjIyMiIoIfXrhwIScnh2chatSoUYHPrhU6O1Gp8UmUMWPG8GVnRXUYP358uQ7rHbBu3ToiGjFiROW48aqz5fVlIDc396OPPuIz2/Xq1fvnn394e+vWrYXtxGvWrOGVmy0sLJYtWyY8NyYmpkuXLkQkkUi6desmpAufO3eu9rfrrTfc5ufnKxQKqVRazB63HTt2vLovGrRt2bKFiHx9fQt9lJfTMTIy0k7PA/CuefnyZUBAAN+D3Lhx44sXL5bkWSkpKUZGRp9++unTp0+PHTtmYWGxaNEixtjjx483bNjAf6c6duzYtm3b6OjoqKioli1bdurUiT939+7dcrl848aNz58/X716tVQq5W+/Go3m4cOHDx8+NDMzW7169cOHD3NycsrsustJenq6kZGRRCIpqnRHbGysTCbT19cv/dZO0KZUKm1sbIio0ByTFVCVCrC43Nzc9PT0Yjqo1ernz58Xmj8jKysrKyurjAbG84VWvWx75enSpUtE1LRp06I68D1QMTEx5TkqgAro7NmzfE5XLpdPnTq1JO9sR44c4SsTDA0Np06dyt8kd+/eraenx0vCxcTEdOvWTSKRSCQSb29v7WImS5cu5YlpqlWrJhTme3U/SlE5OSuRH3/8kYrOF8MY++abb4ho2LBh5Tmqd8H27duJyMPDQ9cDKakqGGBVWHx782+//abrgVRiL168kEgkBgYGReWX49OQR48eLeeBAVRAOTk5AQEB/DZW3bp1tdMEFiMjI6P46vXZ2dmFlkPVaDTPnz9/y7FWHu7u7kQk5AMrQK1W165dm4je8cIbZcHT05OIfvrpJ10PpKSqzhqsig9LsEvP1NTUzs4uNzc3Nja20A6vLsNSq9W//vrr6NGjP/744+PHjxf6rLNnz37yySejRo1au3at9varzMzMJUuW+Pn5ffHFF5GRkUJ7Xl7e+vXrx4wZM2XKFJ7+FKACMjAwCAwMDA8Pd3d3f/jwoaen54QJE167kNTMzEwulxfTwdDQsNByqBKJRMiuXFXduHHj6tWrVlZWRS2yPnbsWExMTJ06dXhmaRDL48eP//77b0NDw6FDh+p6LCWFAKv8IMASxZuWfB4zZswXX3xhZ2fHGOvRowdfI6nt999/79SpU2ZmZs2aNefPnz9w4EDenpeX16VLl3Xr1tWtW/fBgwctW7a8cOECEanV6vfff/+XX35xc3NTKBQ+Pj5LliwpiysFEEXz5s0vXrwYGBiop6cXEhLStGnTsLAwXQ+qsuLVb0aMGGFgYFBoB2H9O18DB2LZsGGDRqMZPHhwZQridT2F9g45f/48ERWoPgFvauLEiUS0fPnyQh/l80menp788OrVq0QkbB2YPXu2tbU1X03CaTQaJyenCRMm8EMeQvE7Kb/88otCoeDbHTQaTc+ePbt168YYS09P//rrr/Py8vhTZs2a5ebmVhZXCiCuGzdutGrVir/z+/j48BqpUHI5OTn8r/vVq1cL7fDs2TN9fX2pVIploOJSq9U8k+3Jkyd1PZY3gBC7/AgpBhhjuh5LJVb8RGCBPA4nTpywtbV97733+OGoUaNSU1OvXbsm9H/48OHjx4+Fkgtt2rRxc3PjdxJPnDjh6elZs2ZNIpJIJH5+fqdOncrPzzc3N1+4cKGQVjExMVF7KztAhdWkSZNz586tWLHCyMho165djRo14uWKoYR2796dlpbWqlWr5s2bF9rh8ePHderUee+997Tz2kPpHTly5MmTJ87Ozp07d9b1WN5AcTfaQVwWFhY1atRITk6Oi4vjf7bhLRSfCksop5OZmWlqahobG+vk5CTM1depU0cikRSo/sHbhZY6derwxtjY2KZNm2q3q9XqpKQk/n+XkpKyffv2CxcuXL16NTQ0VPzrBCgDcrl82rRpvXv3Hjdu3MmTJwcOHOjj4/PTTz/x3e9QDI1G89NPP1GxOcQ9PDzu3LmTnp5efsMqdyqVat26dadOnVIoFAMHDuzXr9+rfU6cOLF169bs7OxWrVpNmjRJWLGXkZGxevXqGzduWFlZjR07VphPzc3NXb9+/fnz5w0MDN57771BgwYVuMHKb7yOGzdOIpGU8fWJCTNY5QrLsEqv+DVYQjkdXvI5OztbKJ1GRHK5XC6Xa28dz87OJiLt5RSGhoa8Q3Z2doF2IhKeq1Qqnzx5kpWVpVQqk5KSxLs+gDLHdxQGBwebmJjwqazNmzfrelAVV2xsbFBQkIuLy71790xMTNq0aVN8fwsLi3IZl26MHj16zpw5DRo0sLKyGjx48MqVKwt0CA0N9fLykslkTZs2XblyZZ8+ffhNG6VS2blz561bt7Zs2TI9Pb19+/YnT54kIpVK1aNHj9DQ0NatW9vZ2Y0dO5bn3BY8e/bswIEDcrl81KhR5XWVItHxLcp3jL+/PxGtXr1a1wOpxNRqNc/hm5aWVmgHXrt+69atjLF58+Y1btxYeIhnItXePs3LiWgnDOzUqdOUKVMYY7179x41apTQztcFv5pibdGiRTY2NkWljQCoyB49euTl5cX/FvTu3ZtXbgYuMzPz119/7dSpkzBrYmpqSkTNmjUru3SJFRxf1Xro0CF+uHDhQnNzc+3ksXxV68SJE/nhjRs3hP6//PKLvr6+kDutX79+7dq1Y4y9ePHi22+/FTKDzJ07t27dutovunTpUiLigVrlghmscoUZrNKTSqX169cnIj5H9Srte4hNmza9e/euUCXt9OnTEomEd+Dq1q2rp6d3+vRpfpidnX3lypWGDRvy85w5c4b9t2DuzJkz9vb25ubmBV6uRYsWz549e/78uYjXCFA+nJycjh49GhISYm5ufuDAgdGjD2zapOsxVQARERETJkywt7f/6KOPTp8+bWBg4OPjExYWFhcX17Bhw+vXr48cOZK9k0tpT548Wb169R49evDD4cOHZ2RkaFe9fPz48ePHj3mROiJq0qRJ06ZNeRmlkydPdu/e3d7enj80YsSICxcu5Obmmpqazp49W8gM8vjxY54vWrBhwwaqJNWdC0CAVa54gIVieaVU8pLPffr0qVGjxueff/7y5cvExMQ5c+Z88MEHDg4O2dnZ+/fvT09PNzMz8/Hx+fbbbx89epSbmxsQEKCnp+fj40NEo0ePjomJWbJkiUqlunLlypo1a8aNG0dEsbGxzZs3P3v2LGPs5cuXISEhdevWxRIWqKQkEsn48eMjIyN9fSdcuTJ+9Gjq1YuKSDNXxSUkJAQFBdWvX79ly5YhISGZmZkeHh7BwcHJycn8tpeZmdmePXvMzc337NnzbiZniYuLq127trBAqlatWlKplC9aFTrQ/65qdXJy4steY2NjeQpWoZ0xlpCQwA+TkpIWLVrUv3//iIiI4OBgodu5c+du375do0aNXr16leWVlQkEWOWq+AXaUEIlL/msp6f3+++/nz171tLS0sHBwdzc/JdffiGihISEvn378jP8+OOPdevWdXZ2Njc3/+OPP3bv3s13BTZq1OjXX38NCgoyNjZu3br1+++//9VXXxGRo6PjoEGD3nvvPTMzMysrq3v37u3atatyLb0EKMDR0XHnzp9//llqY0OHDpGbGwUFkUaj62GVC6VSuX//fl9f39q1a8+aNevBgwf29vYBAQH3798PDw/39/fndwY5V1fXLVu2SKXS2bNnHzx4UIfD1onc3FztVa0ymUxPT4+vZOVycnKIiFcE5vT19XmHAs/lK1yF56rV6szMTD09vbS0NO27E3x5++jRo4WN25WJbu9QvmvUarWBgYFEIsnMzNT1WCoxXpFq0KBBhT76ajkdtVr9+PHjxMREoY9arU5PT9deOJWcnPzo0aNXl1Ll5eU9fPjw1QIg2dnZt27devToUekvB6DiSE5mfn6MiBGxjh3Z3bu6HlBZioyMDAgIEKaf9fX1fXx89u3bV3ylIMYYX4VtaWn54MGD8hlqBbFo0SJXV1fhMCMjg4iOHDkitNy8eZOI7ty5I7R07dp18uTJjLH+/fsPHz5caD9x4gQRvZqMbenSpRYWFjxbId8MXuCElQgCrPLWuHFjIoqIiND1QCqxiIgIItJevV6Ag4MDEUVHR5fnqACqjH37mIMDI2KGhiwwkFWxLRzPnz8PDg7mJQW5hg0bBgYGPn36tIRn0Gg0fCGBm5tbRkZGmY62Qjlw4IBUKhUWqu/bt08ulyckJAgdXr58aWZm9vPPPwuHJiYmvHrg7Nmza9asqVar+UPz5s2rXbs2/1poZIwdPnyYiPg5eeGNzp07l/mFlQ0EWOVt0KBBRLRt2zZdD6QSy8rKkkgkCoWiqA+a3bt3J62tLgDwptLSmL//v1NZbduyW7d0PaBSU6lUYWFhPj4+wg0sS0tLf3//K1euvMXZMjMz+aflfv36aTQa0UdbMalUqjp16gwYMCA1NTU6Orpx48a+vr6MsaysrJ07dz579owxNmXKlJo1a964cePFixf+/v6WlpZ8murBgwcKheLrr79++fLlmTNnrKyslixZwhiLjY11dXU9cuRIXl5eWlpa7969XVxceMjVtm1bItq0aZNOL/rtIcAqb3wdz9y5c3U9kMqNZ/vUTq+gbdKkSVR0OR0AKKFDh1itWoyI6emxgACmVWWqMrlz505AQICwN00mk3l5eYWGhipLdz3R0dF8vebChQvFGmrFd/Xq1UaNGkkkEolE0qNHj5SUFMZYdHQ0EfFt15mZmTxTqEQiqVOnzj///CM8948//rC1tZVIJHp6euPHjxcKji1btszKykqhUEilUnd39xs3bjDG7ty5Q0Tm5uYvX77UxYWKAJncyxsyNYjCzc0tNjb27t27devWffVRfJMBRNGzJ928SXPm0I8/UlAQHTpE69dTy5a6HlbJZGS82LFj+8aNGy9evMhbGjVq9NFHH40YMaJGjRqlP3+dOnV27Njx/vvvz5s3r3nz5r17937tUx48ePDnn39mZ2d36tSJT7QX8OLFi9DQ0NjY2Lp16/r6+mrnOj5//vzx48flcvkHH3zQpEkTof327duHDx9WKpXt2rXr2rVr6a+reM2bN4+MjExOTtbX1xdSqtapUycvL4+nWjAxMfn9998zMzOzsrJ4OCU8t3///n369ElKSrKyshLSuxPRZ599NmXKlISEBAMDg+rVq/NGXld72LBhPPFhpaTrCO+dc+nSJSJq1qyZrgdSuX3yySdE9MMPPxT6KL+Lz2szA0DpnT7N6tdnREwuZwEBLDdX1wMqmlrNTp9m/v6sZcsM/mfO3Nzcz88vLCysLF7uu+++IyJTU9Nbr7uNeujQIYVC0a1btyFDhhgYGMyYMaNAh5SUFGdnZxcXl9GjRzs6Orq7uwsZTYOCguRy+aBBg3r16iWXy3kiZcbYqlWrjIyMhg4dOnLkSAMDg4CAANEvUCeUSmW1atWI6PLly7oey9tDgFXeMjIyJBKJoaGh9rI+eFOrV68mIn9//0Ifffz4MRHZ2dmV86gAqrDsbBYQwGQyRsRcXJjWnZ+K4uFDNncuq13736VjUikbOjRg27Zt2qnGRafRaIYMGUJErq6ur1Z60O7m7OzMM5Qyxnbs2CGVSm/fvq3dZ9q0abVr1+Y1KuLj4y0tLYOCghhjCQkJ+vr6K1eu5N1mzJhhY2OTm5ur0WhGjhx5/Phx3r5x40apVPrqvrzKiBd4bdKkia4HUioIsHTAzs6OiLDDvzR44ZqidpdoNJriy+kAwNs5d441bPhv+OLvzypCwpmcHBYayry8mETyb2hVsyYLCGAPH5bTALKzs1u0aEFEffr0KeqT861bt4jo0qVL/FClUlWvXn3p0qXafVxcXL766ivhcMyYMZ06dWKMbd68mddI5e2PHj0iopMnTxb6EteuXRPrunTovffeI6JVq1bpeiClgkSjOvBqnszMzMzFixcPGTLk448/vnDhwqtPYYzt3LnTz89v+PDhmzdv1mhlAHzy5MnMmTN9fX2nT5/OVxpyT58+Xbhw4YcffjhlyhR+X7IqKb7ks0QiKb6cDgC8nXbt6No1CgwkuZxCQqhpUzp+XGeDiYigadPIwYF8fenYMdLXJx8fCgujmBgKDCRn53IahqGh4e7du21sbPbv379gwYJC+zx58oT+Wx5KRDKZzMXFhTdyjLG4uDihAxHVr1+fd3jy5Imjo6OxsTFvd3JyMjAw0H4ud/v2bblcXq9ePfGuTDfi4uKOHTumUCiGDRum67GUCgIsHSgQHOTn53fv3n3Dhg3u7u6ZmZkdO3bk0zPavvnmm1GjRtnb29epU2fy5MmfffYZb09ISGjVqtW5c+datWoVERHRqlUrfncsMTHR3d393Llz7dq1y87ObteuHc/qVmU4ODiYmJikpKQUVQQQ69wByoieHgUE0OXL5OFBjx6RtzdNmEAvXpTfABITaeVKataMWrakVavo+XPy8KDgYEpJodBQ8vKi8i+s4OTk9Ntvv8nl8gULFuzatevVDunp6UQkBElEZGpqyhu53Nzc3NxcExMTocXMzIx3yMjI0G4nIhMTE+3nEpFSqfz2228nTJhQiZeE/2f9+vVqtXrQoEGVvgSZrqfQ3kUrVqwgIqHe+JYtW+RyuXDH0NfX18PDQ7v/06dPFQrFmjVr+OG2bdtkMllMTAxj7NNPP3VycsrOzmaMKZXKBg0aTJgwgTF27969+fPnC9lZevToMXDgwPK4tnLE5+TPnz9f6KPz5s0jIu35dgAQV34+Cwxk+vqMiNnZsT///J9Hvb2ZszNzdmZr1xZ5hg8+YM7OzM+vRC+nVLJ9+5iPD9PT+/dWoJ0dmzqV3bhRqqsQ0ffff09EJiYmN2/eLPAQ/4irncvUw8OjwJp0MzOzX375RThcsGCBm5sbP62Qk5MxplarZTJZaGio0JKfnz9kyJBmzZpVgUURfLEaER07dkzXYyktzGDpQIHJlZMnT3bq1MnJyYkfjhgxIiIi4oXW58Fz587l5+cPHz6cHw4aNEihUPzzzz/8uYMHD+b7XRUKhY+PD69bXr9+/blz5wr7Y+vWrZuamlo+V1duir9LiLraAGVNLqeAAIqIoDZtKDGR+vcnX18S3mni4ig6mqKjaeZMio8v/Ay8T2Lia17o1i2aNYtq1qS+fWnXLpJIqHdvCg2lJ09o5UrSSlmgY9OnTx89enRWVtbAgQMLzDDxhDLh4eH88OXLl69mmalbty4vU8FFRETwDi4uLnFxcUlJSbz92rVrarXaxcWFH6ampn7wwQd37949evSokDeh8jp27Fh0dLSTk1O3bt10PZbSQoClAwUig7i4OCG6IiL+dbzWG1JcXJylpaW5uTk/1NfXt7W15R1efW5sbOyrr3jhwgU+31OVFH8TsPjwC0SUn5+v6yGALjVqRGfP0tKlZGREu3ZR48Z09er/dMjMpP8WNbyZ9HQKCaGOHalxYwoKopQUatiQAgMpLo727ycfH5JXvEyOP/30U8uWLR88eODr66tWq4X2WrVqderU6dtvv83NzSWioKAgiUTSv39/IoqKinr27BkRDR8+fPv27bdv3yaic+fOHT582M/Pj4i8vb2trKy++eYbIlKr1fPnz2/UqFHz5s2J6OrVq61atbKwsDh79qyQQapS49Wdx4wZI5VW/vhE11No7yK1Ws1vk/MiVp07d/7444+FR/m6bO3kH0uXLi2QcaBhw4Zz5sxhjBkbG69bt05o3759u0wmK1CxeOPGjcbGxrGxsWV0Obqyc+dOIurfv3+hj762nA6U3ooVK4yNjaVSab169VavXq3r4YCOPXzIundntWv/u7WwQQNGxGxs/r2dd/BgIU9p1owRMS+v/2lUq1lYGPPzY4aG/z7XwoL5+7Pw8PK4itKLiYnhOZxmz56t3X7//v26deuam5vXqlXL2Nj4999/5+21atX69ttvGWN5eXkDBw6Uy+X169eXSqUTJ04Ulnnw2Sk7Oztra2sHB4erV68yxtLS0gwMDHjCdOf//Pbbb+V6taJ69uyZvr6+VCrla2Aqu4oX/78D+B+k69ev37t3r1WrVnZ2dvzjC/f06VMi4uWKOXt7e57aRLjl9/TpU3t7eyJ69bk1atSQyWRCy8GDBydPnrx582ZHR8eyvq5yVvwMlrGxsaOjY2xsbExMTKHZ3qGUVq9e/cUXX/DP6A8ePJgyZcqSJUs+//zzESNGVPqlqfBWnJ3p2DGKiyPtBdmenhQbS+fO0cSJdOsWaS3yLsT9+7R9O23cSDExRERSKXl5kZ8f+fiQVt7viq5WrVp79uzx9PRcvHhx06ZNeZYsIqpXr97du3cvX76cm5vr7u4u3M47ePAg/5XR09PbvXv3nTt34uPjebQknNPb2zs2NjY8PFwul7dq1UpfX5+ITExMzpw5U+DVa9euXQ7XWEa2bt2qVCrff//9WrVq6XosYtB1hPeO8vX1JaItW7Ywxr755htbW1th2um7776ztrbWTqZy9epVIjp79iw/5MlOTp06xRjr06dPz549hZ6DBg3y+u/DoEaj+eGHH0xNTXfu3Fk+F1XOsrOzpVKpnp6eUNCqAG9vbyI6cOBAOQ+sysvNzR0zZgwRSSQSX1/fvXv3tm7dWv7f3RqFQtG7d+/Q0FDMHb7j+AzW0KHs9Ol/M1R9+WXBPtozWJcu/TtfRcRcXVlgIIuPL9j/0aNHP/744w8//CAklCogKytr69atgYGBf/zxR4GfwCtXrixbtmz16tUP/zdBVlRU1E8//bR69epwUafI+GYmQ0PDiIgIEU9btTVr1oyIhLm9yg4Blm7MnTuXiL7++mvG2KNHjwwMDGbNmpWdnX3p0qXq1at/+eWXjLGEhIRdu3bl5+drNJo2bdq0a9cuISHh6dOnXl5ejRs35hHY/v37ZTLZ5s2blUrlrl279PT0+I9mZmamj49PtWrVwsLCnv9Ht5dcFvj6s3v37hX66JQpU4ioQCo/KKW4uLg2bdoQkYGBwebNm4V2lUoVFhbm4+MjRFoODg4BAQEPyy3bI1QwQoDFGBs6lBExhYIVKCejHWBpNKx5c+bnx8LC2H93xv7H4cOHDQwM2rVr17t3bz09vUWLFhXokJKSUq9evdq1a/N3v06dOuX+V9Pnhx9+kMvlvXr16tixo76+/t69e3n7mjVrDA0N+/btO3DgQD09ve+++07E7wD/HOLk5KS9eRCKwktG8iT1uh6LOBBg6ca2bduIaPDgwfxw3759dnZ2UqlULpePGjWK13jft28f/bdO6/Hjx+3atZNIJFKp1MPDQzuk+P77701MTKRSqbGxsfCOM3/+/AJTlfr6+uV+lWWuZ8+eRCS8Vxbw448/EtH48ePLeVRV2JkzZ2xtbYmoZs2aRX3cj4+PDwwMFG7LSqVSLy+vTZs28WQi8O7QDrBiY5mxMSNinTr9T/BUYA1WoXEVp1ar69SpI5SaCQ4O1s5uw02dOtXJyYl/mIyKijIxMfnxxx8ZY3FxcQqFYsWKFbzbhAkTHBwc8vPz1Wr1wIEDjx49ytt//PFHPT09IWF66eXk5LRu3ZqIPD09MaH7Wv7+/kT0xRdf6HogokGApRt8L27jxo2FFpVKFRsbm6lVeEKtVhcI5J8+fZqcnPzq2ZRKZUxMTJWJ+ktu2rRpRLRkyZJCHy2+nA68qeDgYIVCwb+lhf4cFhAeHu7v7y+kPbSwsPD39+eLc+FdoB1gMcYWLfr39t+GDf/fp9BF7oW6fv06EV25coUfKpVKCwsLITsg5+TkNG/ePOFw2LBhPXr0YIytX7/e2Nj45cuXvJ2vsrhw4UKBl+CLMe7cufNGl1m8hIQEvl721dLOoC07O5svSnttzexKpPJvg6ycXF1dJRLJgwcPhH28MpnM0dFRO12vVCrlKxkFNjY2hW7EVSgUtWrVKtD5XYBMDeVDqVT6+/tPmDAhLy/P39//2LFjJdkQ7uHhERwcnJCQEBwc3KJFi/T09JCQEHd395YtW65cubKoFPxQVc2YQW5u/37x9OkbPz0mJoaIGjRowA8VCkXdunV5I6fRaOLi4oQOROTm5sY7xMTEODk5CbG+q6srz9Vc4CXu3bunp6ennfim9Ozs7Hbt2qVQKL7//vuNGzeKeOYqZufOnenp6e3bt2/YsKGuxyIaBFi6YWxs7ODgwGeedD2WSqz4EOq15XSgJBISErp167Zu3ToDA4Nff/01ODhYT0+v5E83Nzf39/ePiIiIjIwMCAiwtraOiIj49NNPHRwcfH19ebLmshs8VBwKBa1aRUSUmkqzZ7/x09PS0vT09AwMDIQWMzOztLQ04TAzM1OlUpmamgot5ubmvENaWpp2u0wmMzIyKvC2oFQqg4KC/P39tV9CFO3bt1++fDkRTZw4UUg0CgXw9Fdjx47V9UDEhABLZzC/Unp8BuvOnTuFPiqRSFCRsJQiIiLatWt3/vx5R0fHU6dOjR49+q1P1ahRo8DAwPj4+NDQUC8vL74tw9vb283NLSgoKDk5WbxRQwXl7U2+vkREGzbQ5ctv9tzq1avn5+dnZWUJLampqXxFIGdmZmZgYKAdcgkdqlevrt3Oz6P9XLVaPWbMmPz8/EWLFr3pRZXEpEmTxo8fn5ubO2jQoKdvMX1X1d2/f//s2bMmJiY+Pj66HouYEGDpDAKs0rO3tzc3N3/+/Ll2MjBtCLBKY+vWrZ06dXry5EmnTp3Cw8NbtWpV+nPq6+v7+PiEhYXFxMQEBgbWrl37/v37s2bNcnBw8Pb23rVrl0qlKv2rQIW1ciWZm5NGQ59+Sm80d8mTQl39L0l8VlZWVFSUdqYonm/zqlYW+WvXrvEOzs7Ojx49EmIsvhBQeG5aWlq/fv2uX78eFhZWdqVm1qxZw3+bBg4ciOIHBfAKjEOHDtWeaKwCEGDpDP72i6J+/fpU9LcR3+S3o1KpZs2a5efnl5OT4+/vf/z48Ro1aoj7EjVr1gwICIiOjubJHaRS6bFjx3x9fWvVqjVr1qyoqChxXw4qCFtbmjuXiOjcOdq9+w2eWL9+/RYtWnz33Xc8Olm6dKlMJuvTpw8RxcXF8eKtQ4cO3bx586NHj4jo4sWLR44cGTZsGBG9//77hoaGgYGBRKRWq7/77rsmTZo0adKEiK5fv96qVSuJRHL27FntOS3R6enphYaGOjg4nDlzZubMmWX3QpWOSqXaunUrVbn7g0RINKo7R48eJaIuXbroeiCVG6/VpV2CXlvx5XSgUE+fPuVlVvX19devX18+L5qYmLhixYrGjRsLb018jbyIe+ahnBXYRSjIz/9386CzM3NzK+kuQsbY9evX7e3t7ezs3NzcDA0NhRTKNWrU+P777xlj2dnZXl5eRkZG7u7uenp648aNE0rN7Nmzx8jIqH79+vb29ra2tjz557NnzwwNDYlIu9TMnj17RPoGFOL8+fN8N1K5/WZVfHv27CEiNzc3XQ9EfAiwdObJkydEVKNGDV0PpHLjayaK2gJ97do1ImrQoEE5j6ryunLlCi+14eDg8Oo+9nLAkzsY/1dRRVgjX/4jgVIqKsBi7P9zu/N/JQywGGMvX748cuTIvn37UlJShMaLFy/GxcXxrzUazeXLl3///fdXd/s/ffp03759hw8fFqL23NzcsFckJCS84YW+Gb6X0MDA4OLFi2X6QpXFBx98QETLli3T9UDEhwBLZzQaDU/KUCVzrJebXbt2EVGfPn0KffS15XRA27Zt2/gH+g4dOiQmJupwJBkZGcHBwR06dBAmtBo2bBgYGPjs2TMdjgreSDEBFmNs1Ki3CbCqho8//piI7Ozs4l8tBvSOiYuLk8lkCoVCO2KuMrAGS2ckEknx64egJJo3b/7hhx/26tWr0EcNDQ1r1aqVn5/Pl2VAUfiiq+HDh/NFVydOnCjT9SivZWZm5u/vf+bMmdu3bwcEBFSrVu327dt8LTySO1QNP/xA1ta6HoSOrFq1qkuXLomJiT4+Pnl5eboeji5t3LhRrVb369evWrVquh6L+BBg6RKWYJeei4vLtm3b+CfCQmG35mulpqb27NkzKChIX19/3bp1Qsb2iqBBgwaBgYGxsbGhoaG9e/dWqVQ8uUPt2rVnzZqFNHKVl7U1LVyo60HoiJ6e3s6dOx0dHc+dO/f555/rejiiUalU0dHRx44dW7ly5YQJE7y9vWvWrJmbm1tUf/bfDdMquLydiIjkuh7AOw0BVjlwdXU9fPjw3bt3+/btq+uxVETXrl0bOHDgo0eP7O3tf//993bt2ul6RIXgyR18fHzi4uK2bdv2888/P378OCgo6Pvvv+/evbu/v3///v3fKP0plIPly+nFC6pZs8gOEyZQ9eqk0ZDYW1QrgRo1auzbt69Dhw5r1qxp1qzZ+PHjdT2iN5aenv7w4cPo6Ohbt27dvn07Ojr69u3bOTk5BbpFRUVpb17RdvLkyaioKEdHRy8vr7Ifrw4gwNIlTK6UAx7F4ptcqN27k0eO7JCdnd2uXbvff/+dF02ryBwdHQMCAmbMmHHixImQkJC9e/ceO3bs2LFjtra2vr6+48aN43vvoSJ4773XdJBKadCgchlKheTu7h4cHDxy5MjJkye7ubl16tRJ1yMqTkJCAo+ihHCKV9ou0M3Ozq5Ro0bOzs4NGzbkX9SpU6eocwrZ22UyWdmOXkckWMqgQ9euXXN3d2/YsCEvPgplISAgYO3atSqVys/Pb8KECS1atND1iCoEtZq++oqCgqhLl721au0ODg7my9srl7S0tF27dv3000+8EjAReXh4+Pv7f/jhh9plPaHi2LCBMjLok08IE47c1KlTV69ebWtrGx4e7uDgoOvhEBEplcqoqCjtcOrOnTvZ2dkFuunr69etW1c7nHJ1dS35711GRoa9vX1OTk6BhLFVCQIsXcrJyTExMZHL5S9fvpTLK+VsYl5e3saNG69evWpmZjZ8+PCmTZu+2uf48eP79u1Tq9Wenp4DBgwQ2p8/f75u3bro6GhHR8exY8cK0yc5OTmbN2++ceOGhYXFoEGD3jokys/P/+yzz9asWSOR/P/PuYeHx9ixY4cNG1Z2KZsrvtRUGjaMwsJIoaCVK9nHH0t0PaLSioiICAkJ2bFjR2ZmJhGZmZn169dv5MiRVfXWQ+VlZUVpaZSaSlZWuh5KxaBSqXr06PH333+3bdv25MmTPEtWeSrh1JSlpaUwKcW/cHJykkrffhn3mjVrPvnkE29vb54SsmrS2f5FYIwxxnMO3b9/X9cDeRsajcbb29vOzm7GjBl9+/bV09M7ceJEgT6rVq2SyWRjx46dMmWKoaHh9OnTefvz58+dnZ0bN2785ZdftmnTpnr16jExMYyxrKys5s2bd+3adeHChX5+flKp9O3y/j19+rR79+5EpK+v/8svv9y6dSsgIMDGxob/2AsFW4Q8hO+O69eZszMjYjY27JX/rrKiVqtzcnKK7/Py5ctC25VKZX5+fkleJTs7mxc6lEj+DRn5GvkquQO8kqpVixGxx491PY6K5NmzZ/w+2qhRo8r0hXJzcyMjI0NDQwMDA/38/Dw8PISEc9oUCoWzs3Pv3r0DAgKCg4NPnz794sUL0Qfj7u5ORL/99pvoZ644EGDp2HvvvUdE+/bt0/VA3sa+ffskEsn169f54eDBgz08PLQ7ZGZmmpqaLly4kB9u3rxZJpM9fvyYMTZ37twaNWqkpaUxxnJycho0aODv788Yi4mJ+frrr4W4Z/Dgwb169XrTgQnZMu3t7bWzZebm5vLNaMItf16w5fE7836/cyczNmZEzN29nP7IZWVljRw5kn8ud3d3v3z58qt9li9fzmNfW1tb7aT8UVFRXbt2lUgkMpmsb9++ycnJvD07Ozs4ONjNza1bt26FvuidO3cCAgKqV6+uHU/v27dPpVKVxTVCyTVsyIhYZKSux1HBXL161cjIiIjWrl0r1jmfP39++vTp4ODggICA3r17Ozs7FzrhZGlp2aFDB39//8DAwNDQ0MjISLVaLdYYinL48GEisra2zs3NLevX0iEEWDo2depUIuJ1HiqdKVOmtGrVSjj866+/iEg7FeSxY8eISMiMnJuba2JismHDBsZYu3btJk6cKPRcsGCBk5PTqy/RrVu3Dz/88I1GtX37dv5W1aFDh6KSMsfGxgYGBjo5OfG3GKlU6uXlFRoaWoXzkWo0LDDw3/TZw4ez7Oxyel1/f387O7vTp0/HxMT4+PhUr149IyNDu8OePXtkMllwcHBiYuIPP/wglUr//vtvxphKpWrcuHHHjh3v379//fr1hg0bent7M8by8/Pr16/fs2fPnj17NmvWrJiXViqV+/bt8/HxEe6/Ozg48AKIZXjBUKw2bRgRO39e1+OoeLZt20ZEenp6J0+efNPnKpXKhw8f7tu3LzAw0N/fv0OHDoVWTdbT0yswNVXgl7Hc8H2Fnp6eOnn1coMAS8fWrFlDROPGjdP1QN5G//79h2rlab5z5w4RXbt2TWj59ddfDQ0NtZ/i6uo6f/58xpijo6N2WLl161Y9PT3hk1NUVNSqVasGDBjQpEkTviCgJFQqVUBAAH8r8ff3VyqVxfdXq9W80rCQ9snW1nbq1Kk3btwo4StWFhkZrF8/RsTkchYYWH6vm5mZqa+v//PPP/PDtLQ0fsdWu4+np6d2scguXboMHjyY/RedCwVPjhw5ItxM5/+zQUFBxQdYgvj4+MDAQGEhrVQqdXFxmTJlSnp6uggXCW/C05MRsbAwXY+jQuI5sWrUqBEbG/vazhcvXpw6dWqPHj1q164t3BPXVqNGja5du/r7+//www8HDx58+PBhBZnBff78OR+wUE2yqkKiUR2r1JkaXr58qb31jN/O56uMuaysLD6ZJDAyMuIdXn1ufn6+Uqnkh8+fP7927VpiYqJMJtM+YTGEbJkKhSIkJKQk2TKFiaukpKTg4OBmzZolJSWtWrWqadOmLVu2DAkJycrKKslLV3D37lGbNrR3L1lb0+HD9F8IWh6ioqKUSqWnpyc/tLCw8PDwiIyM1O5z69YtoQMRde/enXe4deuWnZ1dw4YNeXu3bt2kUil/6E3zoNrb2wcEBDx48ODo0aNDhgyRy+VRUVGrV6+2srIaPnx4aS4Q3hSfWCnZ7/Q7Z8mSJT179kxOTu7bt++rCaUKuH///qpVq44ePRoTEyOXy52dnb28vKZOnRocHBwWFpaUlJSUlPT3338HBwd//vnn77//vrOzsw6zIaSkpPz9998jR45s1apV7dq1GWPGxsa+vr66Gk/5qJQ716qSSh1g2drapqamCofPnj0jIjs7O6HFzs4uLS1NrVYLv9jPnj3jHezs7Ao819LSUgi5WrVq1apVKyIaNWrU2LFjL126VPxIrl+/PmDAgEePHlWrVm3Xrl1dunR5owuxtLT09/fnRYX5ZrSIiIgJEybMmDGjsm9GO3CARoygjAxq3pz++IP+uylaThISEohIuwhG9erVeSOnUqlSUlK0O9SoUYN3SEhI0G7X09OztLSMj49/68FIpVJvb29vb++4uLgvvvhi//79OTk527dv7969e1VNJF0B8V38VeKTi/hkMtm2bdtat2599erVCRMmbN68uZjOHTp0+P77711dXd3c3OrUqVNx9qHn5+fHxsZqb0uMjIxMSkrS7iOVSr/66itdjbDcVJT/kneWvb29mZnZs2fPUlNTrStbaa7GjRsvW7ZMpVLx3+2zZ89aWFjU1Mrc3LhxY41Gc/78+Y4dOxLRkydP4uLieCqHxo0bnzt3Tuh59uzZQlNEtm7des+ePcUPY+fOnWPGjMnOzm7RosUff/xRq1att74iDw+P4ODgFStWHDhwICQk5Pjx41u2bNmyZUuDBg1GjRo1ZsyYSlQwizFasoRmzyaNhoYOpfXr6X8nE8uDgYEBEeXn5wstSqXS3NxcOJTJZLwUt9CSm5vL42wDA4MCZdqUSqUoybocHR137txJRM7Ozo8ePSowowZligdYmMEqipWV1Z49e9q3b79ly5ZWrVpNmTKlqJ516tSZPn16eY6tUKmpqXfv3r179+69e/fu3bt39+7d6OholUpVoJuFhYWrq2t+fn61atWaN2/et2/f9u3b62TA5UrX9yiBtWzZkojOnDmj64G8seTkZH19/W+++YaXoHJycvr8888ZY6mpqWFhYXzBeNeuXTt16pSenp6dne3r61uvXj2+0Or48eMymWz37t2MsZMnTxoZGW3fvp0xduXKlY4dO/JlyGlpaR06dOjSpUtRA+CLrvjt/BEjRmSLvXK78m5Ge/GCDRjAiJhMVq6LrgrgU7MRERFCS+PGjWfPnq3dx8nJ6dtvvxUOp0+f3qJFC8ZYSEiIubm58K1+/vw5ER0+fFjoWfI1WEUJCgoiok8//bQ0J4E3MmMGI2JBQboeR8W2e/duiUQil8tfTXyjQ/n5+Q8fPgwLC1uxYoW/v7+Xl1dRCULt7Oy8vLz8/f1XrFgRFhb28OHDdzAhDsMi94pgxIgRRLR+/fpi+vzxxx+iRw+iCA0NtbS0NDY2lkgkvXr14ulSeOI4vqk+KiqqadOmcrncwMCgTp064eHhwnPnz5+vUChMTExkMtm0adP4b2BOTs6YMWNkMlmNGjX09fXd3d2joqIKfenU1NQePXoQkVwuDyzLIEKpVO7atatnz57CJucmTZrPn6958qTsXrNU7t//dzO8lRU7elSXI1Gr1S4uLgEBAfzw9u3bRHT27FntPpMnT27RogUPu/Py8lxcXObMmcMYu3PnjkQi+euvv3i34OBgc3PzzMxM4YmlD7D27t1LRD179izNSeCNzJ/PiNicOboeR4XH9+tYW1vratNrWlpaeHh4aGjovHnzfHx8PDw8Cp0/1tfXb9iwoY+PT0BAwKZNm8LDw4tKaPcOQoClewsXLiSimTNnFtUhPDyciMzMzPz9/U+fPl2eYyuJnJycmzdvxsfHCy1KpTIxMVE7mcqDBw/u3r37anqV9PT0GzdupKamFmh//vx5RETEw4cPi3rRGzdu8A9PNjY25fYhT9iM1rnzJCImlTIvLxYayl63W7Fc/fUXs7BgRKxpU1b096/8bNiwQSaTzZo166effnJxcenevTtvnzBhwrZt2xhj9+/fNzExGTBgwPr163v27GljYyMk1xg5cmS1atWWL1/+3XffGRkZffPNN7z93LlzwcHBAwcOdHR0DA4OPnTo0NuNjU+w1alTp9RXCSX1ww+MiGHS8LXUanWvXr2IqHnz5uUQssTHx4eFhQUHB0+dOpVPTRW6M1GYmgoMDNy3b987OzVVQiiVo3u///67j49P3759+efpV505c+aLL74QFnq7u7uPGTNm+PDhlpaW5TjMCmT//v0jRox48eKFu7v7H3/8wXOKlhuNRnPy5MuQENM//yS+67F6dfLzozFj6L8db7qhvejK15c2bKDCsjTrwJ49e7Zu3Zqdnd22bdvp06fzamWfffZZ+/btfXx8iOjmzZvLly+PjY11dnaeMWOGi4sLf2J+fv7q1auPHTsmk8n69+8/ZswY/qa/cePGgwcPCudv0aLFrFmz3mJgKpXKyMhIrVZnZmYalf8KtXfSunXk70/jxtG6dboeSoWXlpbWunXrqKio4cOHb926VazTlk+pQSDCGqwK4MaNG0Tk6upafLfbt28HBAQIi6zfzWIvGo0mMDCQ36r78MMPdTsXnZbGgoOZuzsj+vefhwcLDmZlUFXi9TIz2aBBjIhJJGzePPYu/VCUCt/GK1QjgLK2fTsjYkOG6HoclcSdO3fMzMyI6Icffni7M5Rwako7nzufmiqHfO5VHmawdE+pVPI1TC9fvnxtgp+8vLwjR45s2bJlz549arWaiBwdHYcPH/7xxx87lfMW/HL34sWLkSNH7t27Vy6XL1q0KKA8EzoVKyKCNm+mrVvp+XMiIkND6t2b/P3J05MKeysTX1QUDRhAkZFkZkZbtlDfvuXxolXDgAED/vzzz507d1b5lDwVxIED1KcPffABHTig66FUEnv37h0wYIBUKj1w4EDPnj2L6fnq1NS9e/deTeanUCgcHR21Kzc3a9as0MzvUFq6jvCAMcb4vNTx48dL/pS4uLjAwEBeIpTegWIv9+7da9CgARFZW1sfO3ZM18MpRE4OCw1lXl7/lqMhYq6uLDCQ/VdA7/Wyslh4OAsPZ8XMp7x8+W8foSLRoUPM0vLfl7tzp8gn7t+/39PTs0GDBv369bty5cqrHR4+fDhs2LAGDRp06tRp06ZNQrtGo/nxxx/btWvXqFGjMWPGaC+22759u5eXV+PGjQcNGlToOSs+HqYvWLBA1wN5V/z9NyNinTvrehyVytdff01EVlZW2jt+3rrUYMXfBF1lIMCqEHiANWvWrDd9Ii/24ufnJ+zv4Dkzq9gtjwMHDvDkSc2aNav4heTu3WPz5rGaNf8NsxQK1rs3Cw1l+fmveeLFi/9/t/G/zXMFXb36b4fNmxljbPFiJpMxIjZwYHG3Jo8fPy6VSj/77LMDBw4MGjTI3Nw8JiZGu8OLFy9q1qzZtWvXffv2LVq0SCaTCTHWDz/8oFAoli1b9scff7Rs2bJBgwa8Us3y5cutra1DQkKOHj06bNgwc3Nz7dirstiwYQMRDR8+XNcDeVeEhzMi1qLF/zQ+ffo0PDy8mPowubm5V69evXXr1qsrIh4/fhweHv78+fMC7fyciYmJIg1cl9Rqde/evYmoVq1avr6+LVq0MC5sfaW+vn6TJk0GDx781Vdfbdmy5dKlS7oqNQgcAqwKoUWLFkTUo0ePtz5DWlpacHBw8+bNhV82njNTe1t7ZaS96Gro0KGVaAOwSsXCwpiPD9PT+zcksrdnAQHF7ezTDrBcXFhOTiF9CgRYkyYxiYQFBLDi10t4enp+8MEH/Ov8/PzatWtPnz5du8OqVauMjY2Ft+NJkybVq1ePd7axsZk7dy5vj4uLk8lkO3bsYIxduXLlwoULvD03N9fY2HjdunWv/75UMDzbrYeHh64H8q64e5cRsfr1/79l9uzZMpnMxsZGIpEMHz48/5UPIkeOHLGxsTEzM1MoFI0aNRI2F+fm5g4YMEAikdjY2Ojp6X333Xe8PS8vb/LkyYaGhrVr19bT0+vfv39Oob9LlUp6erqpqal2FmVLS0sPDw8/Pz9MTVVYCLAqhFGjRhGRRCJp2rTpH3/8UZpThYeH+/v7CzfUTU1N/fz8wipnbdUXL14MGDCAiGQyWWBgYCVdzp+QwAIDmYvLv4GRVMo6dGDBwezVWFE7wCJi/0U1/6NAgJWXx06efP0YzMzMtKOfSZMmdf7fmzR+fn69e/cWDnll5efPn9+7d4+ItG//tWnTptDMnNWrV1+zZs3rh1LB8HpNJiYmlfSnq9KJj2dEzM7u38N9+/bJZLI///yTMRYeHm5mZlZgNXdOTk61atUmTpyoUqkyMjLatGkj/OjOmzfP2to6MjKSMbZ9+3aJRHLy5EnG2O+//+7h4ZGSksIYi46OtrS0XLZsWTleYplYtWoVEdnY2Pz0008XLlxIS0vT9Yjg9RBgVQgZGRn29vbCR5M2bdqsX7++NJNPOTk5oaGhXl5ewoYRNze3wMBA/qZTKdy/f58X+rWysjpy5IiuhyOC8HDm78+Mjf+NkCwsmL8/0165JARYrVszIqavX8iaqgIBVkm8ePGCiP7Suum4aNEiZ2dn7T7dunWbMGGCcHjz5k0iunnz5okTJ4goKSlJeGjAgAE+Pj4FXiIiIkIikVy7dq2kY6pIeKb+Yu5PgYgyMhgRMzX993Dw4MGenp7Co5MmTWratKl2/+3bt+vr6wt3AA8fPkxEfJ2Ak5OTkMOWMda6deuPPvqIf52bmyu0N2/e/LPPPiuTiykvjx8/5vkR9uzZo+uxwBsoZE0clD8zM7P4+PiNGzc2btxYX1//4sWLY8eOrV69uq+vL1/Q/aYnNDAw4EkceLGXGjVq3L17d9asWY6Ojn369Nm1a9ertaIqlEOHDrVu3fr27dtNmjS5fPkyz9he1rKzs1++fFlMh7y8vPT09EIfevHihZInxSqahwcFB1NsLP34I7m7U3o6hYRQixbUpg0VKOo6fTrVqEFKJU2YQKXf5qvRaIhIe2+2VCot8ENV4JDfk+WTOq8+l59QkJub+/HHHw8bNqxZs2alHasuuLq6EhGfq4OyZmJCEgm9fEn8h+j+/fvaNenatm374MED7f63b992c3MTcv61a9eOiB48eJCXlxcTE1Pguffv3+df6+vrE1FmZmZwcPCDBw+GDx9etldVlhhj/v7+WVlZH374IZ/Rf2svX74s6h2My8/Pf/78eaF/HTIyMl7dkEhESqWy+LfNdxkCrApk1KhRN2/eTE9P55NPubm5u3bt8vb2btiwYVBQUEpKyluc09XVNTAwkKdC8fHx0Wg0Bw4c8PX1dXJymjVr1qNHj0S/ilJijAUFBfXu3Ts9Pb1v375nzpwpqtyViOLi4jw9PU1MTExMTDp27BgVFVWgg0qlmjx5srm5uaWlZa1atfbt2yc8dPr0aVdXV3Nzc1NT0+HDhwv5+lJSUubPn1+jRo2xY8dqn8rSkiZPpitXKDKSAgLIxoYuXaKTJ//n5UxM6JtviIhOnaItW0p7debm5iYmJsnJyUJLUlKSg4ODdh8HB4cCHXgjn1jVfigxMdHR0VE4zM7OHjhwoEwmCw4OLu1AdYSnwuJZ3aGsSaVkZEQaDfFflOTkZCsrK+FRa2vrnJycjIwMoSU5OVk7o7KZmZmenl5SUhKfjNd+yNramv/ccl9//bWZmdnHH3/85Zdfenh4lOlFlan169cfPXrUxsZm+fLlb32S58+f9+3b18zMzNLSsnHjxrw6SAHz58+3srKytrauUaPG2rVrhfbbt2+3atXKwsLCzMzMy8srISGBt0dHR3t7exsaGpqYmDRp0kRIhQ3/T4ezZ1C8e/fu8ckn/j+lUCh69+4dGhr66iLQkuPFXurWrcvPyZM7bNq0qYIUOszMzBw8eDARSSSSgICAclsW06FDhxYtWkRFRcXExHTu3LlJkyYFkuwtXLjQzMwsLCzs+fPnM2fO1NfXf/DgAWPs2bNnlpaWo0ePTklJuXjxop2d3ccff8wYS0pKsre3HzFiRLt27Xx9fYt56exstmUL4/fWhFuEBw8ytZq1bMmImLU1e/r0//u/xS1CxlinTp2EYWg0GldX12nTpml3WLp0qYWFhbAWeMaMGU5OTowxpVJpbm4e9F9t3mfPnikUCmGD4YMHD5o0aeLt7V2pV4QsXbqUiKZMmaLrgbwratRgRIxv76tfv/7ChQuFh3777TeZTKa9WHv69OmtWrUSDnNycoho7969PAjTrpI0Y8aMli1bar+QUqk8efKkpaVlZVwdyMXHx/Mg8rfffivNeQYPHly3bt3IyMiUlJTBgwfb2dllZWVpd9iyZYuent6OHTsyMzPXrFkjlUp5CbK8vLz69evzuCoqKqpZs2bdunVjjGk0miZNmowePfrp06fp6ek+Pj41a9asqkmC3hoCrIpOpVLxySe5XM6jInt7+4CAgGLq9L2WRqM5ffq0v7+/UB7EwsLC39//6tWr4g38jUVFRTVu3JiIzMzM+KLX8nH9+nUi+ueff/ghX3506tQp7T729vazZ8/mX6vV6po1a/LD1atXGxsbC29Va9asMTQ05Fsd+R+JsWPHFh9gadMOsBhj//zzb0qt8eP/v8/bBVh79+6VSqXff//91atXJ02aZGRkxAPEP//8c+PGjYyxZ8+e2djYDBo0KDw8fMOGDfr6+j/99BN/7ty5c01NTX/77bdLly716NGjTp06PBzfu3evubm5t7f3xYsXw8PDw8PDK34GjULt37+fSreHF94I3/Bx/z5jjHXv3n3UqFHCQ99++23NmjW1O69cudLGxkYIuSIjI+m/XRfm5uarV68Weg4cOLB///6vvty4ceM6V9q8W/379yci7Q0obyE5OVkqlfLNv4yxlJQUuVzOK4EKOnToMGzYMOGwc+fO/I2L73e5z/+3GAsLCyOie/fuMcbi4+OV/9VhjYiIIKLbt2+XZpxVDwKsSiMhISEwMFAo0yaVSjt06BAcHFyazAXp6enBwcE8SQTn4eGxYsWKV6svl7XDhw/zD2r169cv59/SHTt26OnpaU9Z2djYrF27VjhMS0sjIu2dmCNGjOjXrx9jbOLEifzzHHfr1i0i4tuauNIEWIyx4cP/3Xh45sy/LW8XYDHGfv3114YNG1paWrZt2/bkfzsPZ82a5efnx7++du1a9+7drays6tat+/333wtPVKlUc+bMqV27trW1da9evXhkxhj74IMPnP/X1KlT32xMFQNf9FOrVi1dD+Rd0bw5I/p3e8eSJUssLS2Tk5MZY9nZ2Q0bNvT399fuHBsbK5fLf//9d3746aefOjg48HjL19e3ZcuWfNYkNjbWyMgoJCSEMRYTE7N//37hDJ6enqUMUHSF1x80NzePi4srzXlOnTpF/7tVpVmzZl999ZV2H0tLy+DgYOFw7ty5jRs3ZowtX77c0dFRaM/Ly5NIJK9udT9//jwRlXKcVQ8CrMqHJ2IQEs3xyadS5tGOjIwMCAiwtrbm5xTWyJfPTboVK1bIZDL+QS09Pb0cXlHbsmXL7IRd44wxxho0aPD1118Lhzxs0t4i99lnn/GbEf3799eOn54+fUpER48eFVpKGWAlJjJzc0bEmjRhfPb9rQMsKIpKpdLX15dIJJU9aVxl0akTI2J8yjgzM7NJkyYuLi6fffaZh4eHvb093875yy+/CHcGP/vsM2Nj4wkTJvj4+MhkMiHYun//vrW1ddu2bT/99FMnJ6c2bdrwCZW9e/caGhoOHjx43rx5H3zwgYGBwenTp3VzqaXw9OlTvr/1119/LeWpfvvtN4lEor22xMvLS9hxyf678bp7926hZfXq1VZWVoyxmTNnuru7a5/Nysrq1VuukyZNat68eSnHWfVgkXvlwzOIxsfHBwcHe3h4pKenh4SEtGjRomXLlitXrnzO6+G9oUaNGgUGBsbFxfH19Uqlkq+vb9CgQVBQkPYaZ3Hl5uaOGjXq008/1Wg0AQEB/K5TGb1WUQwNDXNzcwuMSrh5SkT8a+1NgjyvJn+udjt/n9J+binZ2tKCBUREN2/SmjVinRX+h0wmc3FxYf9NZUFZMzEhIuI70kxMTM6dOzdt2rT8/HxfX9+rV6/yLRSNGjUaNmwY779s2bLt27cbGhq6uLhcunRp0KBBvL1evXrXr1/v27evSqUKCAg4efIkr+Xat2/f69evN2nSJDExsXXr1jdu3OjYsaMuLrRUJk2alJKS4unpybMkloaRkRFjLC8vT2gR3sE4hUIhl8sLvJUJb3EF3h6FhwQHDhxYv3796tWrSznOKkjXER6UVllMPt2/f3/evHlC1mCZTMYLHZZmff2rnjx5wnf3mJqa6jC/y759+yQSiZDEXKlUGhkZbdaaIMrNzZVKpaGhoULLBx988OGHHzLGpk+frv3x7uzZs0T0+PFjoaWUM1iMMZWKubv/mzcrJQUzWGVi4MCBRCQsUoEy5evLiFjpFm1XcXyfsrGxcWnW2gr4Aqk7Wln1atWqtXjxYu0+NWvWDAwMFA4/+eSTtm3bMsZCQkLMzMyEd36+mV17vcTu3buNjY3xu1MoBFhVRG5uboHMovXr1w8MDNS+7/6meKFDHx8fPT09fk47O7uAgADtgqNv7Z9//uET4PXq1bt161bpT/jWMjIyTE1N+eoNxtiuXbv09fULfN/ee++9gQMH8q9TUlIMDQ35G8rJkyclEonwzjVlypQCaRJLH2Axxs6fZ1IpI2Iff4wAq0zMnj2biObNm6frgbwTZs8+1aVL4NatlbI6eDlIT0/nWVTE2vyYl5dnbW09Z84cfnjmzBkiEupccSNGjGjRogVf3JadnS3s47l3755EIhF2Ha1atcrIyOjFixeMMZVKNW/ePHNz81JWH6nCEGBVNU+ePAkMDKxdu7aIk0+JiYkrVqzgW/w4fpvyrdfXBwcH86CtV69eFWGH/9dff21kZPTNN99899135ubmwmJtLy8vvlf55MmTcrl85MiRq1atatq0aePGjYUNyV26dHF2dl6+fPnUqVOlUumuXbt4e2hoaGBgYKtWrZo2bRoYGHj8+PHXDqOoAIsxNm4cI2IyGdu6FQGW+DZt2kREQ4cO1fVA3gnTpk0jouXLl+t6IMXJysoqPnmNUqksqpRyenp6oQkLsrOzS5LIgN8TbN++vbr4CqNvYu3atTKZ7IsvvggMDLSzsxs0aBBvHzp06Pbt2xljd+/eNTMz69Wr18qVKzt27Ghvb893HjDGxo0bZ2lpuWjRoq+++kpfX5/XfMzNzfX09CSijz76KPA/169fF2vAVQPWYFU1NWvWDAgIiI6O5pNPUqn02LFjvr6+tWrVmjVr1qspNEvC1tZ22rRpN27cOHnypJ+fn6GhYURExIQJExwdHadOnRodHV3yU+Xm5n700UcTJkzgyyb2799vYWHxFkMS14IFC9auXXvjxo1Lly4tWbJESOjn4OBgaGhIRF26dDl16pRUKj18+HDv3r1PnTolzOr99ddfY8aMOX78eGpq6qFDh3geLyJKSkqKjo52d3dv27ZtdHT0262NEwQGkrU1qdU0b15pTgOF48nckWu0fPCqL4WmBa8I7ty506ZNG552uFevXq+uQM3JyRk1apSpqam5uXn9+vVPaqUJPnz4sLOzs4WFhampqb+/v7Ds6fLly61atTIyMjI0NPT09Cwmw/Px48c3b95saGi4ceNGXlBBFB9//PGePXsSExPPnz8fEBCwfft23u7o6MiXvbq6ul66dMnJyenYsWOtWrW6ePEiv8NARD///PPixYuvXLny4MGDTZs2ffnll0SUl5dXt25df39/PT296P/wqlzw/3Qd4UHZ4pNPTZo0Ef7H+eRTgSxzbyQjIyM4OLhDhw78hAWmmosRGxvbqlUrIjIxMRG2AoGgmBksxlhIyP+UgsYMloh4/RAjIyMR5wygKIsXLyYi7TKCFYeQVzM2NvbOnTuNGjV6NUHal19+Wb169bNnz6akpIwfP97MzIxP9jx58sTIyGjKlClPnz79+++/rays+F22Fy9eWFtb//zzz1lZWbGxsR07duzevXuhr56RkcFXvi5durSsrxTKAQKsdwVP7sA/OxKRubm5v79/eHh4ac557dq1BQsWlLDzqVOneFZ6FxeXmzdvluZ1q6riAyy1mrVrhwCrrNja2hJRTEyMrgdS9f34449ENGnSJF0PpBCHDh2SSCTCMtODBw8SkfZKc6VSaW1tvWzZMn6Yk5NjZmbGDxcuXFitWjVhMcbChQttbGx4yK6dIGrp0qXVq1cv9NU//vhjImrdurV2LnuovHCL8F0hJHfYtGmTl5dXRkZGSEhIy5YtGzVqFBQUlJqa+hbnbNas2Zw5c0rSMyQkxNPTMzk5uWfPnpcuXdJezgUlJJVScDD9l88fRIaKhOWmIt8ivHXrVq1atYRiYt26deONQoeYmJjU1FTeTkQGBgbt2rXjHW7fvt2pUyeh5Ea3bt2ePXsmlPXkjSkpKTt27Ojdu/erL/3PP/8EBwcrFIr169fzvIBQ2SHAereYmZmNHDkyLCzs9u3bAQEB1apVu3379qxZsxwcHHx9fY8dO8YYE/cVlUrl2LFjhUVXBw4c0K7PCm+kSROaOFHXg6ii3NzcpFLpw4dxuh5I1WdqakpEmZmZuh5IIRITE6tVqyYcGhgYmJmZCeWNeQci0u5TvXp13iEhIaFAO2/khw8ePHBycnJ0dNTX1381ZVR2dva4ceMYY3PmzMHnzyoDAdY7qkGDBoGBgbGxsaGhob1791apVDyzaO3atWfNmhUTEyPKq8THx3fp0mXDhg0GBgabNm0KDAzEJ7NitG5NjBFj9P77RfZZterfPn5+5Tiyd0DTposUipeRkWN0PZCqryLPYL2aV5MnxtPuQG+Vdrh27drHjh07ePBgdnb26NGjC7zu119/HRUV1bRp04CAAHGvCHQIAdY7TV9f38fHZ//+/Y8fPw4MDHRycoqNjQ0KCnJ2dvb29t61a1d+fv5bn/zs2bMtW7a8ePFizZo1z5w544eIACowJyfr3FyDe/d0PY53QEWewXJwcIiPj1er1fwwOTlZqVTy5PJCByKKjY0VWp48ecIbHRwcnjx5IrTzPsLNQYVC4eLi4uXltXjx4l27dmlvTrx48eKqVavkcvmGDRuE7clQBSDAAiIiR0fHgICAhw8f8uQOcrlcSO4wbdq0mzdvvukJQ0JCunfvnpSU1Llz5/DwcJ6xHd5Ofj717UvNmpHY92/h/7m5ERFhCVY5qMgzWO+///6LFy+OHj3KD3/77TcbG5t27doJHezt7Vu0aLFjxw5+GB0dffny5T59+hBR7969T58+HR8fzx/auXNn586deRIEXm6Ze/nypUQiETJC80UUarV65syZeJ+sanS9yh7egFKpTEhIyM3NLaqDRqNJTEzkaXYLyM7OTkhIeHVzSn5+flJSUoH258+fBwcHN2vWTPg54WvkS1INV6lUjh8/nj+LZ4Ip2cVBcWrUYETsyRNdj6PqUquZoSGTSFgRySNBNDwLVO3atXU9kMJ99NFHNjY2S5YsmTNnjr6+/pIlSxhjqamp3t7eV69eZYz98ccfUql08uTJK1eurFevXseOHXldMpVK1bx584YNG65atcrf318mk/G670lJSTY2NgMGDFi3bt2KFSvs7Oy0qzt89dVXROTq6pqTk6ObC4YygwCr0liyZAmfWjc2Ni40OcKpU6f45hepVNqnTx8hQ3peXt64ceP4zHONGjWEmnpqtXru3LkmJiYSicTAwGDGjBmvli/kyR346xKRmZmZn5+fdiGqAuLj4/mnPQMDg9IXgQdBly6MiB09qutx6MKzZ88+++yzDh06vP/++1u3bn21g0qlWr16taenZ6dOnb755hvtAgORkZF+fn5t27b19fU9d+6c0K5Wq//888/BgwfzjR1ckyaMiJUudQm83tOnT4nI2tpa1wMpXH5+/qpVq/r27Tto0CCe5Zwxlp6ePnLkSKGo19GjR4cPH/7BBx/Mnz9f+2NnWlra119/3atXLz8/v3/++Udoj4uLmz59+nvvvdenT5+VK1cqlUrefu1anqtrU6lUevbs2fK6Pig/CLAqh/3790ul0l9//TUnJ2fHjh16eno7d+7U7pCWlmZtbT1y5Mi0tLRbt27VqVOHVyNmjH3zzTcWFhb//PNPVlbW3Llz9fT0eO28X375pUmTJvfv32eMHT9+XF9fX3g3KSA7O7tAoUO+Rj4lJUW726ZNm/iCA0dHx0uXLpXJN+Jd5e/PiNjq1boeR7nLz8/38PBo2LDhr7/+Om/ePJlMtnbt2gJ9AgICjI2Nly5dGhwcbG9v369fP94eFxdnYWHxwQcfbNu2bfjw4fr6+hEREYyx8+fP16tXz8XFxdTUdNWqVcJ5fHwYESsshAMx8VXkCoVC1wPRsfx85u7O9PVzFy48oOuxQJlAgFU59OnTp2fPnsLhoEGDPD09tTusW7fOwMBAqI21ceNGPT09PollZ2f31Vdf8XaNRuPk5DRz5kzGmFqtfvbsmXAGFxeXuXPnFj+MO3fuBAQECCUU9PX1a9euvXDhQj5Jxhs7duxYmgrTUKgffmBE7JNPdD2Ocvf7779LpVIh02NAQICtra32He3U1FSFQiFEXSdOnCAiHkh99dVXzs7O/Ca1RqNp37794MGDGWPJycmRkZGMMVdXV+0Aa84cRsT+K4kLZYhPqAsTOe+mRYsYEXNyYiVYeQGVEha5Vw537tzp0qWLcNi5c+c7d+5od7h79667u7uZmRk/7Nq1a35+/oMHD9LT0xMTE7t27crbJRJJp06dbt++TURSqdTa2pq379q1Ky4urtD0d9rc3Nx4coddu3b17NkzLy8vJiZmzpw5BgYGv/zyCxE1a9YsLCyMZ2wHEb2zS7DPnTvXokULZ2dnfjho0KCkpCTtUm4RERF5eXlCCciuXbtWq1bt3LlzRHT06NH+/fvzv+USiWTQoEFnz54lourVqzdq1OjV13J1JXonv8nlj69zr5gbCcvH3bu0aBFJJBQSQv/V14CqBgFW5VAg/V21atWSk5OFvcRElJCQYGNjo92BN/K0eEIgxR/ijdz8+fPNzMyGDBmybNkyXijwtRQKxeDBgw8dOhQeHu7t7S2XyzUajUQiGTt27LVr1wwMDEpxoVC4dzbAio+P194kz78WNmoRUVxcnIGBgfDDL5FI+E57/lCB5yYnJ6tUqqJeiwdYyNRQDviyzoq5kbAcaDQ0bhzl5pK/P3l763o0UGZQd6NyMDIy4mnruJycHAMDA+2kncbGxnzpqNCBiExMTHgGPO3UeTk5OSZan5hmzJgxcuTIEydOfPLJJzVr1nztJJa2Fi1aHD16VKVS/fXXX82aNXNycnqba4MScHIiAwOKj6esrHfr865KpdL+Oed1SLTTs6lUKvn/1g+Sy+W8Q4Hn6unpMcZe7S9wdSWJhO7fJ7WakBC3TL3jM1grVtDZs2RvT4GBuh4KlCXMYFUODg4O2tnVY2JitD+aF9qBiBwdHW1tbaVSqXb6u5iYGCH3HREZGRnVqVNn7NixvXr1Wrdu3VuMTS6X9+vXD9FVmZJKycWFGKP793U9lPJlb2+fkpIiHPL0jNo/wA4ODllZWdnZ2dp9eIcCz+W75YuZYTU1JXt7ys0lrV8XKBPv8gzWo0c0bx4R0dq1ZGGh48FAmUKAVTl069Zt9+7dfCIqPz//999/7969e4EODx48uHz5Mj/ctm1bgwYN6tevr1AoOnTosG3bNt6enJx84sSJ/v37E1FSUtK1a9eEM6SlpSGJcEX2bt4lbNKkyeXLlzMyMvjh8ePHTU1NhSVZRNS4cWOJRBIWFsYP7969Gxsby1O4tW7d+tixY0LPY8eOaad2K9S7+U0uf+/sDBZj5O9PWVk0ciT17avr0UBZ0/UqeyiR+Pj4GjVqdOzY8YcffujevbuNjU1MTAxjbP369cOGDeN9evfubW9v/+23306aNEkqlf7222+8/fTp0wqFYsiQId9//33Dhg2bN2/O91X9/PPPxsbG06dPDwkJ8fPzk8vlf//9t46uD17v668ZEXvdRs+q5sWLF7a2tgMHDrx///6RI0esra35Hti4uLjly5fznLqDBg1ydnY+f/58ZGRkhw4dmjdvrlarGWPXr1+XyWTz589//PjxmjVrZDLZ/v37+WkfPXr08OFDZ2fnefPmPXz4MCsri7dPmsSI2PLlurnYdwf/jLdnz56iOqSmpt69e7c8h1Q+fvqJEbFq1dj/priBqgkzWJWDvb395cuX27Zte/78+ebNm1++fLlWrVpEVK1aNRcXF95n9+7dM2fOvH79enZ29tGjR4cMGcLbO3bseP78eQsLi4sXLw4dOvSff/7hM1UTJkzYv39/Wlran3/+aWxsfOHCBWGzIVRA7+YeN1NT08OHDycmJrq5ufn4+AwfPnzRokVE9OTJk0WLFr148YKIfvnll7Zt23bv3t3d3d3U1HTv3r1SqZSImjZtumPHjs2bNzs5OS1evHjNmjXCEsNOnTq1bNkyLS1t1apVLVu2/Oeff3g71rmXj9fOYO3atcvNzY0nPQ8JCdHel1N5xcfT7NlERGvXktaeJaiyJAzlzQAqg8uXqXVrataMtO7rgsiOHqX33qOuXenvv3U9lCpt0qRJa9euXbNmzaRJkwrtsHLlyu+++05YQieVSj08PLy9vb29vdu3b69QKMpxsKJ5/306fJh8fCg0VNdDgXKBGSyAysHN7d89bhpNwYeUSmXxz83LyyvqoaKeq71T792BGazy8doZrGnTpiUnJz98+DA4OLh3794KheLy5cvfffddt27dLC0tvb29g4KCeDrZchx1qWzcSIcPk7U1rV6t66FAeUGABVA5mJqSnR3l5PzPHreQkBA7OzsDAwNbW9uffvrp1WdduXKlZcuW+vr6hoaGH330kbDbTqPRzJgxw8zMzMDAwNXV9ejRo0L7d999Z29vr1AoHB0d16xZU/ZXVoHUqkVGRpSYSOnpuh5KlcYDrNfuInR2dvb399+/f//z58/DwsICAgI8PDxycnKOHTs2a9asli1bCvcQtVOjVUBJSfTFF0REK1cS0jC/Q3S9CAwASqp7d0bEDh369zAsLEwqlS5fvjw+Pv7HH3+USqV//fWXdn9hhXh0dPTp06ft7Oz8/f35QytXrjQ2Nv7zzz+fPHkyZcoUQ0PDR48eMcaWLVvm6Oh44sSJZ8+erVu3TiKRHD9+vFwvUteaN2dE7OJFXY+jSlu2bBkRffrpp2/x3OTk5NDQUH9//wKpapydnadOnRoWFpaTkyP6gEtp4EBGxHr10vU4oHwhwAKoNCZOZERsxYp/D/v16+fl5SU8+sEHH/T637fwDRs2KBSK1NRUfhgcHGxgYJCZmckYc3FxCQgI4O0qlcre3n7OnDmMsZcvX0ZFRQlnaNSo0axZs8rwkiqeoUMZEdu0SdfjqNJCQkKIaOzYsaU8D7+H6OPjwxNrcYaGhl5eXoGBgeHh4RqNRpQBl8ZvvzEiZmbGnjzR9VCgfOEWIUClUWCF0K1btzw9PYVHu3fvfvPmTe3+kZGRLVq0sLKy4oeenp65ubm8QmVUVJTwXJlM1rVrV/5cIyOjunXrCmdgjBkZGZXdFVVAWIZVDsRKNMrvIYaGhqakpISFhc2cObN58+a5ubnCPUQHB4cpUzK3baPkZDHG/eZSU2naNCKipUupZk3djAF0BQEWQKVRIA1mQkJC9erVhUerV6+emJio0VoD/2oHIoqPj09ISBAOhYd4o7Zr167dvXv3vffeE/s6KjQEWOXA0NCQiHisL8oJDQwMvLy8goKCrl69KtxDrFWrllpdbc0a0xEjyNaW6talCRNo1y4qz/ymU6dScjJ160bjxpXfi0IFgVqEAJVGgb/9+vr62tsDlUqlvr4+TwHFGRgYaP8B4xsGDQ0NebkY7efm5eXxv3mCzMzMUaNGjRw5snXr1uJfSQWGZO5lLTU1dcmSJba2tleuXLGxsWnevLmXl5eXl1fnzp1Fyb9QrVo1Hx8fHx8fIrpzJ/nwYQoLo1OnKDqaQkIoJIQMDaljR+rRg7y9qWlTkkhK/5qF++sv2r6djIxo3boyfBWosDCDBVBp8D1uCQnEK8c4ODjExsYKj8bFxWkX6Su0A2+0t7eXSCT8kIuNjdV+7tOnT9977z1bW9uff/65zK6mgqpfnyQSevCAVCpdD6Uqunr1qoeHx7lz5/Lz89u0aSOTySIiIoKCgry9vatVq9a/f/81a9bcF6/iZoMGNT77jA4epPR0Cg+nwEDq0IGUSgoLoxkzqHlzqlGDfH0pJIS0flHEkZFBH39MRLR4MWnddYd3ia4XgQHAG2jWjBGxS5cYY2zmzJlubm75+fmMMZVK1bRp0wLbss6cOUNEkZGR/HD27Nm2trYqlYox1r1796FDh/L21NRUIyOjrVu38sMLFy7UrFnTz88vOzu7vC6rYqlVixGxBw90PY4qZ8eOHXxJn4eHBy/29fLlSyH/gkRrksfW1tbHx2fTpk3CFg0RPX3KQkOZvz+rXZsR/f8/Z2fm789CQ1lGhgivMnYsI2Jt2zKVSoSzQWWEAAugMhkyhBGxzZsZYywmJsbCwuKDDz749ddf+/XrZ2ZmFh0dzRjbtm3bxIkTef+uXbvWrVt37dq1X331lVwuX7duHW/nKR4mTZr0yy+/tGzZ0tXVValUMsY2bdqkr6/fokWL4P/s3btXN5eqO97ejIgdOKDrcVQhKpUqICCAh1AjRowoNHZPSkria6fs7e2FSIvncA8ICAgLC8vNzRV9YA8fsuBg5uPDzM3/P9KSy5mHB5s3j4WHM7X6bU57/DiTSJi+Prt1S+wRQ+WBUjkAlcm8ebRgAX31FS1aRER0586d77//PiYmplatWtOnT2/UqBERhYaGnjt3bsWKFUSUmZn5/fffnzt3zsTExM/Pb9CgQcKpTpw48fPPP6empjZr1mzWrFl8zfvixYuvXr2q/YoNGjSYP39+OV6i7k2dSqtX09Kl/yaHhFJ6/vz50KFDw8LC5HL5okWLAgICXvuU6Ojo/fv3Hzhw4MyZM7m5ubzRyMioffv2fMFWixYtJKIua8rLo/PnKSyMjh6liIj/r5dQvTp5elKPHvTee2RnV6JTvXxJzZrRw4cUGEgluFaoshBgAVQm27fT8OE0eDDt2qXroVRda9bQJ5/Q+PEUEqLroVR+N27cGDBgQHR0tI2NTWhoaLdu3d7o6Tk5OWfPnj127NixY8euXLki/MGytbXt1KmTl5dX7969tWe8RJGVRX//TQcO0NGj9Pjxv40LFtCcOSV6+qef0sqV5OFBFy6QHBvJ3mEIsAAqkytXyMODGjem/814BWK6eJG+/5569sTW+tLatWvXRx999PLlS3d39z/++KN27dr/196dx1VV5g8c/957kUVEUXADFMQlwbAMbFQ008DMnNJmQF+YtmhoNa1TQZs5WYZbo+nPhLQUNRU0R2vUQms0NUVxy9DUVFRQcWcREC7P74+jd264pPgAwnzef3kOzzn3uTYTn+495zm3crbs7Ow1a9asWrVq+fLl9rdoBAYG/vnPfw4LC+vWrZuTk9Mtz/p39u6V776TlBR5910JCbmhQ9LTZcQImTpV2rfXOxdUMwQWUJ3k54ubmzg6Sn6+WCxVPRvgGpRS48aNe/PNN5VSUVFRM2bMKLMOyC2efMeOHSkpKSkpKT/++KPtO8QWLUIDAtaFh0t4uLRrp+vVgHIisIBqJjAwR2TvN9+08fevW9VzqTlOnJD8fBGRhg3F7rErv5OdLXl54uAgzZtX5tSqn5ycnCFDhixduvTGL7oqN/vvEF1cRqxbd+lTx8aN5b77JCxMHn5Yfr96SXmUlMiSJZf+/MgjctWPyUpLZfFiEZG2bSUo6FZfETUAgQVUM+Hh4atWrfr3v//dp0+fqp5LzfHYY5d+g3bvLj/8cPVlIQcOlIULxcdH/5pJNcmvv/7av3//3bt3e3h4LFy40P5pThXtxAlrSoolJUVSUuTYsUs7TSa5+24xPtbq2lWcnctz5txcqXv5P2dGj5Z33rnKmIsXL4XXm2/KmDHleRXUMCw0ClQzbdu2FZE9rDVeMdaskXnzqnoS1dY333zzpz/9affu3XfdddfmzZsrs65EpHFjy+OPy+zZkpUlv/0m8fESESFubrJtm4wbJ+Hh0qCBhIfL2LGSlibl/mzhgw94khJuCIEFVDN33HGHiPzKv+MrzCuvyKlTVT2J6kYpNXbs2EcfffT8+fMDBw7csGFDixYtqnA+/v4SHS1JSXL6tGzZIu+9J8HBUlQkq1ZJbKyEhEiTJpfWcLe7XP6GFBXJiy9WzKRRsxBYQDVDYFWcZs2kbl05dUrefruqp1Kt5Obm/uUvf4mNjTWZTHFxcbYV228HDg4SHCyjRsmWLXLihCQlSXS0NG8u2dmSnCzDh0uzZjfxHOj77xcR+e47WbCg4qeOao7AAqoZviKsOB4e8tprIiIzZsiGDVU9m2pi3759nTp1WrJkSYMGDVasWFGhl7TfIk9PiYiQ+HjJyPjvd4j16l16DnRkpDRoIF27XvoO0bbWqL2+fcVYyeuVV8TuQerAVRBYQDXj4+NTp06dEydOnD17tqrnUgO9+qr4+EhpqTz7LM97/mPLly+/995709PT27dvv3nz5vDw8Kqe0Y2yfYeYnS0//CBvvikhIVJaKuvXX/oOsWlTGTRI5s4te+CECWI2y/HjN7ruKP5nEVhANWMymdq0aSN8S1gxXF1l3DgRkZ07ZdKkKp7M7cy46OrPf/7zuXPnIiMjN2zY4O/vX9WTKg9HR7n/fhkzRjZvluzsS98h+vlJdrZ8+aVMn152/D33yJNPiohMmyYbN1b6dFF9EFhA9cNlWBVq4EDp2lVEZNQoOXy4qmdzW8rLy4uIiIiNjVVKxcTELFiwwNXVtaonpYGHx6XvEA8elD17ZMoU+dvfrjJszBipV09KS+X558VqrfRZoprgOUlA9UNgVSiTSaZNk3vukfx8GTFCli+v6gndZvbv39+/f/9du3bVrVs3MTHx0UcfrdCXy8nJ+fHHH0tKSjp37mw8krwMq9X6008/HT9+PCAgoN3vV3DPyspKTU11cnK677777BMwJydn/fr1paWlwcHBTZo0uerr3nGH3HHH1afUuLH84x/y8suydatMnSovvVT+d4cajE+wgOqH69wrWlCQPPeciMiKFbJ0aVXP5naycuXKe++9d9euXXfcccemTZsquq5++uknf3//Z599NjY21s/PLykpqcyA8+fPd+nSpU+fPnFxcR06dHjmmWdsq2fPmjXL39//nXfeGTZsWMuWLbdt22bs//rrr/38/N59992RI0f6+fkllOuZ3n/7m9x9t4jIu+9KZma53x9qNAWgutm+fbuIBAQEVPVEym/RokWBgYEuLi6tW7eeNWvWlQN+/fXXXr161a5du1GjRi+88EJBQYHtR3Fxcb6+vi4uLiEhIWvWrLHtj4+PDwwMdHV1DQgIuOo5r6N/fyWi7r77v3vOn1dNmyoR5eurLlxQSqkBA5SI8vG5ybdaU5SWlsbFxVksFhHp27fvuXPnKvoVrVZrmzZt+vfvb7ValVIxMTF169Y9c+aM/ZhXX33V29s7MzNTKbV27VqLxfLVV18ppY4dO+bi4jJ69GilVHFx8YMPPtihQwfjXXTv3v27774zDv/oo49cXFwuXrx4/Znk5CgRJaImTPjvztRUZTYrERUVpZRSRUWXxrz5pra/AVRrBBZQ/Vy4cMFsNjs6OhYXF1f1XMpj48aNFovlrbfe+uWXX+Li4sxm88qVK+0HFBQU+Pv79+jRIy0t7d///renp+dzzz1n/Gj69OmOjo4zZ878+eefn376aVdX14yMDKXUZ5995uHhsWjRov3790+aNMlkMv3nP/+58SldGVhKqXnzLv3KfP99pf63A6ugoGDw4MEiYjKZYmJijOKpaFu2bBGRrVu3GptnzpxxdHT88ssv7cf4+Pi89957ts2wsLABAwYopRISEurUqZOfn2/sX7NmjYj8+uuvZV4iOTnZbDbn5ORcfyZXDSyl1NChSkSZTGrtWgILZRFYQLXk6+srInv37q3qiZRHVFTUn/70J9vmQw891Lt3b/sBCxcutFgsxscSSqnp06c7OTkZvwXbtGnz/PPPG/tLSkq8vLzefvttpdTZs2d37NhhO0NQUNDrr79+41O6amAppXr2VCLK1VUdOVI2sCZOVLNnq8u/wWuyw4cPBwcHi4ibm5vx+VDlSEpKMpvNJSUltj2tW7f+4IMPbJtFRUUmk+lf//qXbc9rr73WsWNHpVRsbGxwcLBt//nz50XE1vGlpaX79+9fsWJF27ZtX3rppT+cybUC6+RJ5eGhRFTHjqqggMDC73ANFlAtVevLsHbs2PHggw/aNnv37r1jx44yA4KCgry8vIzNBx98sKioaPfu3QUFBXv37rUda7FYwsLCjGPd3d3bt29vO4Ojo2OtWrVufaqffipOTpKfX3bRo7w8GTVKnnhCmjaV4cNl3bpbf6nb1Jo1a0JCQtLS0lq3br1x48b+/ftX2kufOnXKzc3N+FLSUL9+/ezsbPsBSil3d3f7ASdPnhSRkydP2u+vW7eug4OD7djS0tJevXpFRESUlJQMHTq03DP09JSPPhIR2bxZ5s8v92lQMxFYQLVUrQMrKyvL/tatxo0bZ2dnl9gt65mVldW4cWPbpjE4KysrKyvLGG9/bOYV1xj/9ttv27dvDwsLu/Wptmkjr7wiIpKYKLt2/Xe/xSIffyydOklOjiQkSLducvfdMmWKnDlz6695G0lISAgPD8/Ozu7Tp09qampgYGBlvrqHh0deXp7VbiGEc+fOeXp62jYbNGhgMpmMT6fKDPDw8LDfn5eXV1JSYjvWYrH89ttvZ8+eHThw4P3333/uFhZlHzpUOnUSERk5stznQM1EYAHVUrVeqcHBwcE+p4qLi81ms9lsvs4AEalVq5bxoZT9j0pKShwdHe1PXlRU9MQTT/Tt27eH8UyTW/buu+LnJ6Wl8ssv/93p4iLDhslPP8nu3RITI40ayY4d8uKL4uUlkZHy9dfVfnmkwsLCp556avjw4SUlJTExMV9//bX9B0KVo3nz5larde/evcZmfn7+kSNH/Pz8bAOcnZ0bNWq0e/du257du3cb3577+vru37//4sWLxv709HRjp/35HRwcnn/++TNnzmzevLnckzSbJT5eHBxu+qHRqPEILKBaqtaBZdz2ZdvMysry8vKyD6wrBxg7mzZtajabjU1DZmamt7e3bTM3N7dfv34lJSVz5sz5w2nk5Uli4h/PtnZt+fjja/60bVuJi5MjR2TZMomIEKtVkpPlkUfEz09iY+XQoT8+/23o6NGj991336xZs+rUqZOcnGzciFD50wgJCfHx8Zl0eUH9+Ph4EbH/cllE+vXr9/nnnxsfVqWnp6ekpBhfYvbp0yc/P3/WrFkiopSaPHly27ZtjRtvP/zww6OXaygtLU1EbF9Gl0/79jJixK2cADVUFV8DBqBcjP7w9PSs6omUx4gRIwICAoyLl0tLSzt37jxw4ED7AStWrBCR9PR0YzMuLs7d3b2wsFAp1bFjxyjjtnil8vPz3d3dJ1y+8Dg9PT0gIKBPnz43soLAvn3qzjuViJo7V6lrX+Ru07fvpUuYr38X4dGjKi5O+ftfGmw2q7AwlZSkior+cEa3i7Vr1xpfwrZq1ernn3+u2sksX77cxcWlS5cuDz74YK1atT799FNjf7169Yw/Hz9+vHXr1n5+fo899lj9+vUfeeQR20Xx48aNq1Wr1kMPPdSxY8c6deoYK3oUFxdHRUV5eno+/vjjkZGRzs7OI0aM+MNpXOsid5vz55WXFxe543cILKC6qlu3rly+zrd62bdvn5ubW2Rk5JIlS5566ilnZ+ft27crpZYuXfr3v/9dKWW1Wjt37hwQEDB//vyPP/7Y2dl57NixxrFff/212Wx+4403Fi9e3LNnTx8fHyOn/vWvf7m5ud11110LFixISkpKSkpatWrVtSawcqWqX1+JqDZtlFFxfxhYhw6p2rVvdJkGq1WlpKjBg5WLy6VfuvXrq+hoZXeb420qPj7e+B62d+/eZVacqiqHDh369NNPJ0+evGvXLtvO+fPn29ZcyMvLW7BgwYQJE1asWFFaWmp/7LZt2yZNmhQfH3/06FH7/evXr584ceKECRPWrVt3I3PIzVX166v69dXUqdccM2/epTH/+MeNvznUZCZ1edFbANXLvffeu3nz5nXr1oWGhlb1XG7a1q1bP/zww4MHDzZr1iwmJqZLly4iMm/evG+//TYxMVFEzp49O2rUqHXr1rm6ukZFRQ0fPtxkMhnHLlu2bOrUqadOnQoKCho1alSLFi1E5L333tuwYYP9S7Rt23bKlClXvvTkyfL3v4vVKn37yty5Uq+eiEhenhQXi8Uidetec865uVJSImbzpUNuxLlzkpQkn34q27df2hMcLNHREhUlderc6EkqR1FR0XPPPff555+bTKY33njjww8/tL99D8DNIrCA6mrw4MFz586dOXPm008/XdVzqR4KCyU6WubMEZNJ3nhDxoyRSruyKC1NEhNl7txLtxm6uEjfvhIdLQ88IJe7sSplZmb+5S9/2bRpk7Ozc0JCgrGmKIBbwUXuQHVVra9zr3xHjkjXrjJnjri5yeLFEhdXeXUlIsHBMnmyZGZKUpKEhUlhoSQnS3i4BAbK2LFit7RTFVi/fn1ISMimTZuaNWu2bt066grQgsACqqtbXwpLKfXZZ5/17t27e/fuI0eOzMvLu3LMTz/9NGDAgK5duz755JP2r1VQUDB69Oj777+/V69e06ZNKy0tNfaXlJR88sknffr0CQ8Pf++99656zsq3dq2EhEhamrRuLRs3SiUulvk7zs4SESEpKZKeLq+/Lo0by549EhsrzZpJZKR8//0B219jpUlISOjZs+fx48e7d+++ZcsWY8V2ABpU8TVgAMpr586dInLHHXeU+wyjR492cnIaM2ZMfHy8r69vr169ygzYvHmzk5PToEGDEhMTe/fu3aBBgyNHjhg/6tevn5eX17Rp08aPH1+7du3Y2Fhj/1//+te2bdvOmDHjiy++aNmyZZln4FSJ+HhVq5YSUX36qLNnq3o2dkpKVEqKiohQDg7KyUk1aNDG29s7Jibmt99+q4RXLywsHDZsmPGLIDo6+loPPC4qKtqxY0d6enqZ68ft7du3b+vWrflXPDYoNzd369atBw4cuPKQ/fv3X/UQoMYgsIDqqrCw0GKxODg4FJVrDYD8/HxXV9dx48YZm5s2bRKRH3/80X5Mv379unbtavxmLSoq8vX1fe2115RSxupB33//vTFs2rRpTk5OZ8+eLS0tjY+Ptz1D8JtvvhGRrKyscr/HW1RQoJ588tLjeGNiVKU8obg8jh5VkycfbtmypZE7ZrO5V69eCxYsMFamqAiZmZmdO3cWEWdn51mzZl1r2KpVqxo1auTm5ubo6NiuXbv9+/eXGXDy5MnQ0FCLxVKvXj03N7d58+bZfjRz5szatWu7u7ubzeaePXva1s44deqUcYi7u3uZQ4CahMACqjF/f38Rsb99/cYZ99wdPHjQtqd58+ZxcXH2Yxo1ajRp0iTb5gsvvNC5c2el1JQpUxo2bGj7SMN4xFtKSkqZl9i6datU3ROpjxxRHTsqEVWnjlq0qEqmcNO2bNkSHR1du3Zto7Tc3d2jo6O3bdum/VWaN28uIj4+Pqmpqdcadv78eU9Pz6FDhxYXF58/f75z585dunQpMyYqKqpVq1aZmZlWq/X99993dHQ0/heVnp7u4OAwfvz40tLSjIwMX1/fZ555xjhk0KBBtkNGjx5tOwSoYQgsoBozHv3Rv3//chybnJxsMpmKi4tte0JDQ1944QXbZlFRkclk+uqrr2x7xo4d27x5c6VUbGzs3b9fM8rFxeXKD0ImTpzYqFEja1V8cLR2rWrcWImoVq1UVS+WedPOnTsXHx9/zz332K7lCA4OnjRp0unTp2/95ImJiS4uLiLSrVu348ePX2fkl19+WatWrZMnTxqbKSkpIrJv3z7bgLy8PEdHx88++8zYLC4ubtiw4UcffaSUevvtt40H3Rg/mjx5cp06dS5evJifn+/k5JSQkGA7pFGjRmPGjLn19wXcbrjIHajG7r33XhFZsmSJs7Pzo48+mn0zd6Nd9QmAtme3iYixIrb9Yki1atUyBhQXFzs4ONifzWKx2B8rIhkZGR9++OH7779f+U9ZSUiQBx6QEyekd29JTZU776zk179V9erVi46OTktL27VrV0xMjIeHR1pa2ssvv+zt7R0ZGWksoFqO05aUlMTGxg4ZMqSgoCA6Onr16tX2j82+0t69e9u0aWN7QLLxlaLtyYAikpGRcfHiRdsybA4ODiEhIcaAvXv3durUyfaPvnPnznl5eVlZWRkZGUVFRVc9BKhhCCygGktMTOzWrZvZbC4qKlq2bFnz5s0jIyO//fbbG7kZzcvLy2q1njp1yrbnxIkT9s/1My6gsY822wAvLy/7/fn5+Xl5eT4+PrY9R48e7dWr12OPPRYdHX2L7/GmFBXJ0KEyfLiUlEhMjHzzjdSvX5mvr1m7du3i4uIyMzOTkpLCwsKKioqSk5PDw8Pbtm07duzYEydO3PipTp061atXr7Fjxzo5Oc2cOdO2Yvt1nDhxor7dX5+rq6uzs/Px48ftB4iI/RhPT09jQJljPTw8ROT48ePXOQSoYQgsoBpzdnZeu3Ztfn7+qFGjHnjgAavVmpyc3Lt37+bNm8fGxh44cOA6xwYGBlosllWrVhmbR44c2bt371133WU/JigoyPhiyLB69WpjQPv27Q8fPmxbgmvVqlUmkykoKMjYXLNmTUhISHh4eHx8vKkSl9HMzJTu3eXzz8XZWWbPlrg4qYSlyNPS0qZMmTJr1qxrfXyYmZk5c+bM//u//9u1a5f9/tLS0hUrVnzyySeLFi0qKCiw/1FBQcHixYsPXX5StJOTU0REREpKSkZGRlxcnK+v7969e2NjY729vcPDw5OTk0tKSq4/yW3btoWEhPzwww/e3t5r1qy5wZVp69Wrl5+fb9ssLi4uKipyd3e37TEe1mQ/Jjc31xhQ5tjc3FwRcXd3v84hQE1T1d9RAtAmKysrLi7O/ma0sLCw2bNnX7hw4arjBw8e3KxZs/Xr1+/Zsyc8PLxNmzbGvfozZ85MS0tTSiUnJ1sslvj4+MOHD48aNcrBwcHYX1JSEhQU1LVr1/T09NTU1JYtW0ZERCilSktLJ0yYUKtWrbfeeuu3y3Jycirhva9bp5o0USKqWTO1ZUslvKBSSo0cOdLR0TE8PDwoKKhevXobN24sM2DVqlW1a9cODg7u2bOng4PDxx9/bOy/ePFieHh4/fr1+/bt6+3tHRAQYDxTMjs7+913323YsKGIjB8//qovarVaU1JSIiIibB9BNW3aNCYmxv7qKHtz5841LroKDQ09duzYjb+7qVOnNmjQwHaV3u7du0XE/qJ445OnlStX2vYEBQW98cYbSqnnnnvOuB/C8NVXX5nN5tzcXOMTrBUrVth+1L59+9dff/3GZwVUFwQWUAPd4M1oubm5TzzxRO3atS0WS48ePWy/oQMDA6dMmWL8+ZNPPmnatKmItGrVavHixbZjDx482KtXLwcHB2dn56ioKOMm/GPHjtW/wvz58yv6/cbHK0dHJaK6d1cnTlT0q12SkZHh4OCQmJiolLJarX379r3nnnvKjGnfvv2gQYOM2y0nT57s6OhoLCQWHx/v4uKye/dupdSpU6d8fX1ffvllpdSWLVveeuut1NTUoKCgawWWzbFjxyZNmmT74FBEgoOD4+PjbYtLFRQUdOzY0fhRdHT0zS7n8euvv5rN5oULFxqbr7/+esuWLcvcstChQwejrdXllT62bNmilPrhhx/MZvPPP/+slCotLX344Yf79u1rDLvnnnv++te/Gn9OTU0Vkc2bN9/UxIBqgcACaqwbvxntOmtIXn/AHx5Y0QoL1TPPKBEloqKj1TUWy6wQcXFxxnVsxqbxZWtGRoZtwPbt20XE1rUFBQVubm7x8fFKqT59+tgiQyk1cuRIPz8/+5O3b9/+DwPLxuhpV1dX459yvXr1hgwZ8s9//tO41MlkMk2fPr187/HVV191dXUdMWLEgAEDLBbLggULlFJZWVn+/v7GR1k//PCDk5PTQw899OKLL3p6ekZGRtqOfeihh5o0afLSSy+Fh4fXrl3bVlH/+c9/7A+x9RlQw3ANFlBj3fjNaH94pdS1BlTmJVZXOn48v0cP+ewzcXGRuXMlPl7+6LptnQ4ePNi2bVvbjXLt2rUTkYyMDPsBIhIYGGhsOjs7+/v7GwMOHTp0p93Nje3atTty5IjVai3fTIwPrjIzMz/99NOQkJDz588nJia+8sorZ8+etVgsCQkJw4cPL9+ZJ06cOH/+fCcnJz8/v40bNw4YMEBEXF1dhwwZYtyBeP/99xsXeCmlJk2aNH/+fNuxy5Yt++ijj6xWa2ho6M6dO0NCQoz93bt33759+1UPAWqUKg48AJWlsLDQuBnNVkVt2rSJi4u7/mJIty1jtcwuXTJ8fNS1F8usQBEREf369bNtXrhwQUSWLFli2zNz5kxnZ2f7Q+67774RI0YopZo2bTpx4kTb/pUrV4qI/SeLN/UJVhnbt28PDQ11dXX18/PTvkgpgBvEJ1jA/4pbvxnt9jFjxozQ0NDDhw/Xrfvq1q1y+UKjStWoUaOzZ8/aNk+fPi0i9itLNWzYsLCw0P4OwTNnzhgDGjZsWOZYR0dHXTfT3XXXXevWrcvLyzt48ODdd9+t5ZwAbhaBBfzPadasWUxMzIEDB4yb0cxm86pVqyIjI43FHfbv31/VE7weY7XMZ555pqioKDo6etmy+Q0bVs1MWrVq9csvv9iqdOfOnSaTqUWLFvYDRGTHjh3GZl5e3oEDB4wBLVq0sO03jvXz86v8FVkBVBz+/wz8jzIWcUhKSjp8+PCkSZPuvPPOY8eOjR07tnXr1iEhIQkJCcZ3XreVcqyWWXGioqJyc3Pj4+NF5OLFi+PHj+/evXuTJk2Ki4uPHz9utVoDAgI6deoUFxdnXFz1z3/+02w2P/zwwyJirAe7ZcsWETl69Ojs2bONy5sA1BxV/R0lgNvFlTejGdfIV/W8Ltm6davx7EVvb+8rV5yqElOnTnV0dOzQoYO3t7ePj8+ePXvU5dUKjEdcp6amNmzY0M/Pr3379s7OzvPmzTMOtFqtjz/+uLOzc6dOnerWrdutW7e8vDzjRz179gwODnZxcfHx8QkODv7uu++q6t0BuBUmVa5nWgGoqXJychYsWJCYmLh+/XpjT2Bg4JAhQ4YNG2Y88KRKfPnll8OGDSsoKAgNDV20aFGTJk2qaiZlHDp0aNOmTXXq1OnRo4ex8FhOTk5qampoaKixvGdOTs6aNWsKCwtDQ0O9vLzsj01LS9u3b5+Xl1fXrl1t3w+uWbOmuLjYNiYoKOj6TwwEcHsisABc3e7du2fPnv3555+fPHlSRJycnB555JHo6OgHHnigMldnKCkpeeedd8aOHSsi0dHRU6ZMcXR0rLRXB4DyIbAAXI/xGOnExMQVK1YY1xI1a9YsKirq2WefNb6wq1CnT58eMGDA6tWrnZycpk6dOmzYsIp+RQDQgsACcEOOHj06b9686dOnGw8hNpvNPXv2jI6O7tevXwVdab5jx47+/fsfPHjQy8tr0aJFnTt3rohXAYCKQGABuAmlpaXff/99QkLC0qVLL168KCJNmjSJjIwcNmyY/UPxbt2CBQuGDh164cKF4ODgr776qnnz5hpPDgAVjcACUB5nz55NTk6eNm2abT2n4ODg6OjoqKioOnXq3MqZrVbr22+/bVx0NXjwYOO5yBpmDACViMACcEvS0tISEhLmz5+fm5srInXr1n300UeHDBkSFhZWjrOdOXNm4MCBKSkpDg4OH3zwQUxMjO75AkBlILAAaFBQUPDNN98kJCSsXr3a+LdKQEDAE0888fTTTze84aXWd+7c2b9//wMHDnh6eiYlJfXo0aMipwwAFYjAAqDTnj17Zs2a9cUXX2RnZ8vlxR0GDx7cp08fi8VynQOTk5Ofeuqp/Pz8Dh06LFmypBJuUQSAikNgAdDv4sWL33777Zw5c5YsWWI8rc/Hx2fQoEEjRozw8/MrM1gpNW7cuDfffFMpFRUVNWPGDC66AlDdEVgAKlBWVtacOXMSEhIOHDgglxd3GDp0aN++fY1r4XNycoYMGbJ06VIuugJQkxBYACpcaWnphg0b5syZM3fuXOMZ0iaTKSgoqF+/frNmzTp8+LCHh8fChQsfeOCBqp4pAOhBYAGoPKdPn547d+77779/5swZ287AwMDly5dz0RWAmoTAAlAFvvjii/Hjxx87dszX13f16tVV+BhpAKgIBBYAAIBm5qqeAAAAQE1DYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGhGYAEAAGj2/wFk5f+x2ImxAAAAAElFTkSuQmCC", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Draw molecule with atom SHAP values\n", + "atom_labels = {i: f'{atom_shap_values[i]:.3f}' for i in range(n_atoms) if atom_shap_values[i] != 0}\n", + "mol_with_atom_shap = Chem.Mol(mol)\n", + "for atom in mol_with_atom_shap.GetAtoms():\n", + " atom_idx = atom.GetIdx()\n", + " if atom_idx in atom_labels:\n", + " atom.SetProp('atomNote', atom_labels[atom_idx])\n", + "img_atom_shap = Draw.MolToImage(mol_with_atom_shap, size=(800, 800), kekulize=True)\n", + "img_atom_shap.save('atom_shap_values.png')\n", + "\n", + "# Draw molecule with bond SHAP values\n", + "bond_labels = {bond.GetIdx(): f'{bond_shap_values[bond.GetIdx()]:.3f}' for bond in mol.GetBonds() if bond_shap_values[bond.GetIdx()] != 0}\n", + "mol_with_bond_shap = Chem.Mol(mol)\n", + "for bond in mol_with_bond_shap.GetBonds():\n", + " bond_idx = bond.GetIdx()\n", + " if bond_idx in bond_labels:\n", + " bond.SetProp('bondNote', bond_labels[bond_idx])\n", + "img_bond_shap = Draw.MolToImage(mol_with_bond_shap, size=(800, 800), kekulize=True)\n", + "img_bond_shap.save('bond_shap_values.png')\n", + "\n", + "# Display the images if running in a Jupyter notebook\n", + "try:\n", + " from IPython.display import Image, display\n", + " display(Image(filename='atom_shap_values.png'))\n", + " display(Image(filename='bond_shap_values.png'))\n", + "except ImportError:\n", + " print(\"IPython is not installed. Images are saved as 'atom_shap_values.png' and 'bond_shap_values.png'.\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop_delete", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop/examples/training.ipynb b/chemprop/examples/training.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..a4cb71fefea1882de1d6bf664a83ec1b557e4cf8 --- /dev/null +++ b/chemprop/examples/training.ipynb @@ -0,0 +1,887 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/training.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "from lightning import pytorch as pl\n", + "from lightning.pytorch.callbacks import ModelCheckpoint\n", + "import pandas as pd\n", + "\n", + "from chemprop import data, featurizers, models, nn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change data inputs here" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\" # path to your data .csv file\n", + "num_workers = 0 # number of workers for dataloader. 0 means using main process for data loading\n", + "smiles_column = 'smiles' # name of the column containing SMILES strings\n", + "target_columns = ['lipo'] # list of names of the columns containing targets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smileslipo
0Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc143.54
1COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)...-1.18
2COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl3.69
3OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C...3.37
4Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N...3.10
.........
95CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C...2.20
96CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)...2.04
97CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)...4.49
98COc1ccc(Cc2c(N)n[nH]c2N)cc10.20
99CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(...2.00
\n", + "

100 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " smiles lipo\n", + "0 Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14 3.54\n", + "1 COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)... -1.18\n", + "2 COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl 3.69\n", + "3 OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C... 3.37\n", + "4 Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N... 3.10\n", + ".. ... ...\n", + "95 CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C... 2.20\n", + "96 CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)... 2.04\n", + "97 CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)... 4.49\n", + "98 COc1ccc(Cc2c(N)n[nH]c2N)cc1 0.20\n", + "99 CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(... 2.00\n", + "\n", + "[100 rows x 2 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_input = pd.read_csv(input_path)\n", + "df_input" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get SMILES and targets" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "smis = df_input.loc[:, smiles_column].values\n", + "ys = df_input.loc[:, target_columns].values" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14',\n", + " 'COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)CCc3ccccc23',\n", + " 'COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl',\n", + " 'OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(Cl)sc4[nH]3',\n", + " 'Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)NCC#N)c1'],\n", + " dtype=object)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "smis[:5] # show first 5 SMILES strings" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 3.54],\n", + " [-1.18],\n", + " [ 3.69],\n", + " [ 3.37],\n", + " [ 3.1 ]])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ys[:5] # show first 5 targets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get molecule datapoints" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "all_data = [data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Perform data splitting for training, validation, and testing" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['SCAFFOLD_BALANCED',\n", + " 'RANDOM_WITH_REPEATED_SMILES',\n", + " 'RANDOM',\n", + " 'KENNARD_STONE',\n", + " 'KMEANS']" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# available split types\n", + "list(data.SplitType.keys())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ChemProp's `make_split_indices` function will always return a two- (if no validation) or three-length tuple.\n", + "Each member is a list of length `num_replicates`.\n", + "The inner lists then contain the actual indices for splitting.\n", + "\n", + "The type signature for this return type is `tuple[list[list[int]], ...]`." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "mols = [d.mol for d in all_data] # RDkit Mol objects are use for structure based splits\n", + "train_indices, val_indices, test_indices = data.make_split_indices(mols, \"random\", (0.8, 0.1, 0.1)) # unpack the tuple into three separate lists\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " all_data, train_indices, val_indices, test_indices\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ChemProp's splitting function implements our preferred method of data splitting, which is random replication.\n", + "It's also possible to add your own custom cross-validation splitter, such as one of those as implemented in scikit-learn, as long as you get the data into the same `tuple[list[list[int]], ...]` data format with something like this:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import KFold\n", + "\n", + "k_splits = KFold(n_splits=5)\n", + "k_train_indices, k_val_indices, k_test_indices = [], [], []\n", + "for fold in k_splits.split(mols):\n", + " k_train_indices.append(fold[0])\n", + " k_val_indices.append([])\n", + " k_test_indices.append(fold[1])\n", + "k_train_data, _, k_test_data = data.split_data_by_indices(\n", + " all_data, k_train_indices, None, k_test_indices\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get MoleculeDataset\n", + "Recall that the data is in a list equal in length to the number of replicates, so we select the zero index of the list to get the first replicate." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer()\n", + "\n", + "train_dset = data.MoleculeDataset(train_data[0], featurizer)\n", + "scaler = train_dset.normalize_targets()\n", + "\n", + "val_dset = data.MoleculeDataset(val_data[0], featurizer)\n", + "val_dset.normalize_targets(scaler)\n", + "\n", + "test_dset = data.MoleculeDataset(test_data[0], featurizer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get DataLoader" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = data.build_dataloader(train_dset, num_workers=num_workers)\n", + "val_loader = data.build_dataloader(val_dset, num_workers=num_workers, shuffle=False)\n", + "test_loader = data.build_dataloader(test_dset, num_workers=num_workers, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change Message-Passing Neural Network (MPNN) inputs here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Message Passing\n", + "A `Message passing` constructs molecular graphs using message passing to learn node-level hidden representations.\n", + "\n", + "Options are `mp = nn.BondMessagePassing()` or `mp = nn.AtomMessagePassing()`" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "mp = nn.BondMessagePassing()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Aggregation\n", + "An `Aggregation` is responsible for constructing a graph-level representation from the set of node-level representations after message passing.\n", + "\n", + "Available options can be found in ` nn.agg.AggregationRegistry`, including\n", + "- `agg = nn.MeanAggregation()`\n", + "- `agg = nn.SumAggregation()`\n", + "- `agg = nn.NormAggregation()`" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'mean': ,\n", + " 'sum': ,\n", + " 'norm': \n", + "}\n" + ] + } + ], + "source": [ + "print(nn.agg.AggregationRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "agg = nn.MeanAggregation()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Feed-Forward Network (FFN)\n", + "\n", + "A `FFN` takes the aggregated representations and make target predictions.\n", + "\n", + "Available options can be found in `nn.PredictorRegistry`.\n", + "\n", + "For regression:\n", + "- `ffn = nn.RegressionFFN()`\n", + "- `ffn = nn.MveFFN()`\n", + "- `ffn = nn.EvidentialFFN()`\n", + "\n", + "For classification:\n", + "- `ffn = nn.BinaryClassificationFFN()`\n", + "- `ffn = nn.BinaryDirichletFFN()`\n", + "- `ffn = nn.MulticlassClassificationFFN()`\n", + "- `ffn = nn.MulticlassDirichletFFN()`\n", + "\n", + "For spectral:\n", + "- `ffn = nn.SpectralFFN()` # will be available in future version" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'regression': ,\n", + " 'regression-mve': ,\n", + " 'regression-evidential': ,\n", + " 'regression-quantile': ,\n", + " 'classification': ,\n", + " 'classification-dirichlet': ,\n", + " 'multiclass': ,\n", + " 'multiclass-dirichlet': ,\n", + " 'spectral': \n", + "}\n" + ] + } + ], + "source": [ + "print(nn.PredictorRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "output_transform = nn.UnscaleTransform.from_standard_scaler(scaler)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "ffn = nn.RegressionFFN(output_transform=output_transform)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Batch Norm\n", + "A `Batch Norm` normalizes the outputs of the aggregation by re-centering and re-scaling.\n", + "\n", + "Whether to use batch norm" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "batch_norm = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Metrics\n", + "`Metrics` are the ways to evaluate the performance of model predictions.\n", + "\n", + "Available options can be found in `metrics.MetricRegistry`, including" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'mse': ,\n", + " 'mae': ,\n", + " 'rmse': ,\n", + " 'bounded-mse': ,\n", + " 'bounded-mae': ,\n", + " 'bounded-rmse': ,\n", + " 'r2': ,\n", + " 'binary-mcc': ,\n", + " 'multiclass-mcc': ,\n", + " 'roc': ,\n", + " 'prc': ,\n", + " 'accuracy': ,\n", + " 'f1': \n", + "}\n" + ] + } + ], + "source": [ + "print(nn.metrics.MetricRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metric_list = [nn.metrics.RMSE(), nn.metrics.MAE()] # Only the first metric is used for training and early stopping" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constructs MPNN" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): Identity()\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0): RMSE(task_weights=[[1.0]])\n", + " (1): MAE(task_weights=[[1.0]])\n", + " (2): MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mpnn = models.MPNN(mp, agg, ffn, batch_norm, metric_list)\n", + "mpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Set up trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "# Configure model checkpointing\n", + "checkpointing = ModelCheckpoint(\n", + " \"checkpoints\", # Directory where model checkpoints will be saved\n", + " \"best-{epoch}-{val_loss:.2f}\", # Filename format for checkpoints, including epoch and validation loss\n", + " \"val_loss\", # Metric used to select the best checkpoint (based on validation loss)\n", + " mode=\"min\", # Save the checkpoint with the lowest validation loss (minimization objective)\n", + " save_last=True, # Always save the most recent checkpoint, even if it's not the best\n", + ")\n", + "\n", + "\n", + "trainer = pl.Trainer(\n", + " logger=False,\n", + " enable_checkpointing=True, # Use `True` if you want to save model checkpoints. The checkpoints will be saved in the `checkpoints` folder.\n", + " enable_progress_bar=True,\n", + " accelerator=\"auto\",\n", + " devices=1,\n", + " max_epochs=20, # number of epochs to train for\n", + " callbacks=[checkpointing], # Use the configured checkpoint callback\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Start training" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /home/knathan/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | BatchNorm1d | 600 | train\n", + "3 | predictor | RegressionFFN | 90.6 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.276 Total estimated model params size (MB)\n", + "25 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1: 0%| | 0/2 [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test/mae 0.643399715423584 │\n", + "│ test/rmse 0.9120855927467346 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mae \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.643399715423584 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test/rmse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9120855927467346 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results = trainer.test(dataloaders=test_loader)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop/examples/training_classification.ipynb b/chemprop/examples/training_classification.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..ed28f59641fff508c906b6c912b47df2aee5eee8 --- /dev/null +++ b/chemprop/examples/training_classification.ipynb @@ -0,0 +1,848 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training Classification" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/training_classification.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from pathlib import Path\n", + "\n", + "from lightning import pytorch as pl\n", + "\n", + "from chemprop import data, featurizers, models, nn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change data inputs here" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"classification\" / \"mol.csv\" # path to your data .csv file\n", + "num_workers = 0 # number of workers for dataloader. 0 means using main process for data loading\n", + "smiles_column = 'smiles' # name of the column containing SMILES strings\n", + "target_columns = ['NR-AhR', 'NR-ER', 'SR-ARE', 'SR-MMP'] # classification of activity (either 0 or 1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smilesNR-AhRNR-ERSR-ARESR-MMP
0CCOc1ccc2nc(S(N)(=O)=O)sc2c11.0NaN1.00.0
1CCN1C(=O)NC(c2ccccc2)C1=O0.00.0NaN0.0
2CC[C@]1(O)CC[C@H]2[C@@H]3CCC4=CCCC[C@@H]4[C@H]...NaNNaN0.0NaN
3CCCN(CC)C(CC)C(=O)Nc1c(C)cccc1C0.00.0NaN0.0
4CC(O)(P(=O)(O)O)P(=O)(O)O0.00.00.00.0
..................
495Cc1ccccc1CO[C@H]1C[C@]2(C(C)C)CC[C@@]1(C)O2NaN0.00.00.0
496NNc1ccc(C(=O)O)cc1NaNNaN0.00.0
497CCCCCCOc1ccccc1C(=O)O0.0NaN0.00.0
498O=C(OCc1ccccc1)C(=O)OCc1ccccc10.00.00.00.0
499CCCSc1ccc2[nH]c(NC(=O)OC)nc2c11.01.00.01.0
\n", + "

500 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " smiles NR-AhR NR-ER SR-ARE \\\n", + "0 CCOc1ccc2nc(S(N)(=O)=O)sc2c1 1.0 NaN 1.0 \n", + "1 CCN1C(=O)NC(c2ccccc2)C1=O 0.0 0.0 NaN \n", + "2 CC[C@]1(O)CC[C@H]2[C@@H]3CCC4=CCCC[C@@H]4[C@H]... NaN NaN 0.0 \n", + "3 CCCN(CC)C(CC)C(=O)Nc1c(C)cccc1C 0.0 0.0 NaN \n", + "4 CC(O)(P(=O)(O)O)P(=O)(O)O 0.0 0.0 0.0 \n", + ".. ... ... ... ... \n", + "495 Cc1ccccc1CO[C@H]1C[C@]2(C(C)C)CC[C@@]1(C)O2 NaN 0.0 0.0 \n", + "496 NNc1ccc(C(=O)O)cc1 NaN NaN 0.0 \n", + "497 CCCCCCOc1ccccc1C(=O)O 0.0 NaN 0.0 \n", + "498 O=C(OCc1ccccc1)C(=O)OCc1ccccc1 0.0 0.0 0.0 \n", + "499 CCCSc1ccc2[nH]c(NC(=O)OC)nc2c1 1.0 1.0 0.0 \n", + "\n", + " SR-MMP \n", + "0 0.0 \n", + "1 0.0 \n", + "2 NaN \n", + "3 0.0 \n", + "4 0.0 \n", + ".. ... \n", + "495 0.0 \n", + "496 0.0 \n", + "497 0.0 \n", + "498 0.0 \n", + "499 1.0 \n", + "\n", + "[500 rows x 5 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_input = pd.read_csv(input_path)\n", + "df_input" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get SMILES and targets" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "smis = df_input.loc[:, smiles_column].values\n", + "ys = df_input.loc[:, target_columns].values" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array(['CCOc1ccc2nc(S(N)(=O)=O)sc2c1', 'CCN1C(=O)NC(c2ccccc2)C1=O',\n", + " 'CC[C@]1(O)CC[C@H]2[C@@H]3CCC4=CCCC[C@@H]4[C@H]3CC[C@@]21C',\n", + " 'CCCN(CC)C(CC)C(=O)Nc1c(C)cccc1C', 'CC(O)(P(=O)(O)O)P(=O)(O)O'],\n", + " dtype=object),\n", + " array([[ 1., nan, 1., 0.],\n", + " [ 0., 0., nan, 0.],\n", + " [nan, nan, 0., nan],\n", + " [ 0., 0., nan, 0.],\n", + " [ 0., 0., 0., 0.]]))" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Take a look at the first 5 SMILES strings and target columns\n", + "smis[:5], ys[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get molecule datapoints" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[09:05:03] WARNING: not removing hydrogen atom without neighbors\n" + ] + } + ], + "source": [ + "all_data = [data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Perform data splitting for training, validation, and testing" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['SCAFFOLD_BALANCED',\n", + " 'RANDOM_WITH_REPEATED_SMILES',\n", + " 'RANDOM',\n", + " 'KENNARD_STONE',\n", + " 'KMEANS']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# available split types\n", + "list(data.SplitType.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "mols = [d.mol for d in all_data] # RDkit Mol objects are use for structure based splits\n", + "train_indices, val_indices, test_indices = data.make_split_indices(mols, \"random\", (0.8, 0.1, 0.1))\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " all_data, train_indices, val_indices, test_indices\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get MoleculeDataset" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer()\n", + "\n", + "train_dset = data.MoleculeDataset(train_data[0], featurizer)\n", + "val_dset = data.MoleculeDataset(val_data[0], featurizer)\n", + "test_dset = data.MoleculeDataset(test_data[0], featurizer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get DataLoader" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = data.build_dataloader(train_dset, num_workers=num_workers)\n", + "val_loader = data.build_dataloader(val_dset, num_workers=num_workers, shuffle=False)\n", + "test_loader = data.build_dataloader(test_dset, num_workers=num_workers, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change Message-Passing Neural Network (MPNN) inputs here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Message Passing\n", + "A `Message passing` constructs molecular graphs using message passing to learn node-level hidden representations.\n", + "\n", + "Options are `mp = nn.BondMessagePassing()` or `mp = nn.AtomMessagePassing()`" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "mp = nn.BondMessagePassing()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Aggregation\n", + "An `Aggregation` is responsible for constructing a graph-level representation from the set of node-level representations after message passing.\n", + "\n", + "Available options can be found in ` nn.agg.AggregationRegistry`, including\n", + "- `agg = nn.MeanAggregation()`\n", + "- `agg = nn.SumAggregation()`\n", + "- `agg = nn.NormAggregation()`" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'mean': ,\n", + " 'sum': ,\n", + " 'norm': \n", + "}\n" + ] + } + ], + "source": [ + "print(nn.agg.AggregationRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "agg = nn.MeanAggregation()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Feed-Forward Network (FFN)\n", + "\n", + "A `FFN` takes the aggregated representations and make target predictions.\n", + "\n", + "Available options can be found in `nn.PredictorRegistry`.\n", + "\n", + "For regression:\n", + "- `ffn = nn.RegressionFFN()`\n", + "- `ffn = nn.MveFFN()`\n", + "- `ffn = nn.EvidentialFFN()`\n", + "\n", + "For classification:\n", + "- `ffn = nn.BinaryClassificationFFN()`\n", + "- `ffn = nn.BinaryDirichletFFN()`\n", + "- `ffn = nn.MulticlassClassificationFFN()`\n", + "- `ffn = nn.MulticlassDirichletFFN()`\n", + "\n", + "For spectral:\n", + "- `ffn = nn.SpectralFFN()` # will be available in future version" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'regression': ,\n", + " 'regression-mve': ,\n", + " 'regression-evidential': ,\n", + " 'regression-quantile': ,\n", + " 'classification': ,\n", + " 'classification-dirichlet': ,\n", + " 'multiclass': ,\n", + " 'multiclass-dirichlet': ,\n", + " 'spectral': \n", + "}\n" + ] + } + ], + "source": [ + "print(nn.PredictorRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "ffn = nn.BinaryClassificationFFN(n_tasks = len(target_columns))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Batch Norm\n", + "A `Batch Norm` normalizes the outputs of the aggregation by re-centering and re-scaling.\n", + "\n", + "Whether to use batch norm" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "batch_norm = False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Metrics\n", + "`Metrics` are the ways to evaluate the performance of model predictions.\n", + "\n", + "Available options can be found in `metrics.MetricRegistry`, including" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'mse': ,\n", + " 'mae': ,\n", + " 'rmse': ,\n", + " 'bounded-mse': ,\n", + " 'bounded-mae': ,\n", + " 'bounded-rmse': ,\n", + " 'r2': ,\n", + " 'binary-mcc': ,\n", + " 'multiclass-mcc': ,\n", + " 'roc': ,\n", + " 'prc': ,\n", + " 'accuracy': ,\n", + " 'f1': \n", + "}\n" + ] + } + ], + "source": [ + "print(nn.metrics.MetricRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# AUROC used by default\n", + "metric_list = None " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constructs MPNN" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): Identity()\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): Identity()\n", + " (predictor): BinaryClassificationFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=4, bias=True)\n", + " )\n", + " )\n", + " (criterion): BCELoss(task_weights=[[1.0, 1.0, 1.0, 1.0]])\n", + " (output_transform): Identity()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0): BinaryAUROC()\n", + " (1): BCELoss(task_weights=[[1.0, 1.0, 1.0, 1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mpnn = models.MPNN(mp, agg, ffn, batch_norm, metric_list)\n", + "\n", + "mpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Set up trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(\n", + " logger=False,\n", + " enable_checkpointing=True, # Use `True` if you want to save model checkpoints. The checkpoints will be saved in the `checkpoints` folder.\n", + " enable_progress_bar=True,\n", + " accelerator=\"cpu\",\n", + " devices=1,\n", + " max_epochs=20, # number of epochs to train for\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Start training" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /home/knathan/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "--------------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | BinaryClassificationFFN | 91.5 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "--------------------------------------------------------------------\n", + "319 K Trainable params\n", + "0 Non-trainable params\n", + "319 K Total params\n", + "1.277 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sanity Checking DataLoader 0: 0%| | 0/1 [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test/roc 0.6421189308166504 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/roc \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.6421189308166504 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results = trainer.test(mpnn, test_loader)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop/examples/training_regression_multicomponent.ipynb b/chemprop/examples/training_regression_multicomponent.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..0e76efaef4e5d7b32e27c4e77687c516000df720 --- /dev/null +++ b/chemprop/examples/training_regression_multicomponent.ipynb @@ -0,0 +1,713 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training Regression - Multicomponent" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/training_regression_multicomponent.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from lightning import pytorch as pl\n", + "from pathlib import Path\n", + "\n", + "from chemprop import data, featurizers, models, nn\n", + "from chemprop.nn import metrics\n", + "from chemprop.models import multi\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Change your data inputs here" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol+mol\" / \"mol+mol.csv\" # path to your data .csv file containing SMILES strings and target values\n", + "smiles_columns = ['smiles', 'solvent'] # name of the column containing SMILES strings\n", + "target_columns = ['peakwavs_max'] # list of names of the columns containing targets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smilessolventpeakwavs_max
0CCCCN1C(=O)C(=C/C=C/C=C/C=C2N(CCCC)c3ccccc3N2C...ClCCl642.0
1C(=C/c1cnccn1)\\c1ccc(N(c2ccccc2)c2ccc(/C=C/c3c...ClCCl420.0
2CN(C)c1ccc2c(-c3ccc(N)cc3C(=O)[O-])c3ccc(=[N+]...O544.0
3c1ccc2[nH]ccc2c1O290.0
4CCN(CC)c1ccc2c(c1)OC1=C(/C=C/C3=[N+](C)c4ccc5c...ClC(Cl)Cl736.0
............
95COc1ccc(C2CC(c3ccc(O)cc3)=NN2c2ccc(S(N)(=O)=O)...C1CCOC1359.0
96COc1ccc2c3c(c4ccc(OC)cc4c2c1)C1(c2ccccc2-c2ccc...C1CCCCC1386.0
97CCCCOc1c(C=C2N(C)c3ccccc3C2(C)C)c(=O)c1=OCCO425.0
98Cc1cc2ccc(-c3cccc4cccc(-c5ccc6cc(C)c(=O)oc6c5)...c1ccccc1324.0
99Cc1ccc(C(=O)c2c(C)c3ccc4cccc5c6cccc7ccc2c(c76)...ClCCl391.0
\n", + "

100 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " smiles solvent peakwavs_max\n", + "0 CCCCN1C(=O)C(=C/C=C/C=C/C=C2N(CCCC)c3ccccc3N2C... ClCCl 642.0\n", + "1 C(=C/c1cnccn1)\\c1ccc(N(c2ccccc2)c2ccc(/C=C/c3c... ClCCl 420.0\n", + "2 CN(C)c1ccc2c(-c3ccc(N)cc3C(=O)[O-])c3ccc(=[N+]... O 544.0\n", + "3 c1ccc2[nH]ccc2c1 O 290.0\n", + "4 CCN(CC)c1ccc2c(c1)OC1=C(/C=C/C3=[N+](C)c4ccc5c... ClC(Cl)Cl 736.0\n", + ".. ... ... ...\n", + "95 COc1ccc(C2CC(c3ccc(O)cc3)=NN2c2ccc(S(N)(=O)=O)... C1CCOC1 359.0\n", + "96 COc1ccc2c3c(c4ccc(OC)cc4c2c1)C1(c2ccccc2-c2ccc... C1CCCCC1 386.0\n", + "97 CCCCOc1c(C=C2N(C)c3ccccc3C2(C)C)c(=O)c1=O CCO 425.0\n", + "98 Cc1cc2ccc(-c3cccc4cccc(-c5ccc6cc(C)c(=O)oc6c5)... c1ccccc1 324.0\n", + "99 Cc1ccc(C(=O)c2c(C)c3ccc4cccc5c6cccc7ccc2c(c76)... ClCCl 391.0\n", + "\n", + "[100 rows x 3 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_input = pd.read_csv(input_path)\n", + "df_input" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get SMILES and targets" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "smiss = df_input.loc[:, smiles_columns].values\n", + "ys = df_input.loc[:, target_columns].values" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([['CCCCN1C(=O)C(=C/C=C/C=C/C=C2N(CCCC)c3ccccc3N2CCCC)C(=O)N(CCCC)C1=S',\n", + " 'ClCCl'],\n", + " ['C(=C/c1cnccn1)\\\\c1ccc(N(c2ccccc2)c2ccc(/C=C/c3cnccn3)cc2)cc1',\n", + " 'ClCCl'],\n", + " ['CN(C)c1ccc2c(-c3ccc(N)cc3C(=O)[O-])c3ccc(=[N+](C)C)cc-3oc2c1',\n", + " 'O'],\n", + " ['c1ccc2[nH]ccc2c1', 'O'],\n", + " ['CCN(CC)c1ccc2c(c1)OC1=C(/C=C/C3=[N+](C)c4ccc5ccccc5c4C3(C)C)CCCC1=C2c1ccccc1C(=O)O',\n", + " 'ClC(Cl)Cl']], dtype=object),\n", + " array([[642.],\n", + " [420.],\n", + " [544.],\n", + " [290.],\n", + " [736.]]))" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Take a look at the first 5 SMILES strings and targets\n", + "smiss[:5], ys[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Make molecule datapoints\n", + "Create a list of lists containing the molecule datapoints for each components. The target is stored in the 0th component." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "all_data = [[data.MoleculeDatapoint.from_smi(smis[0], y) for smis, y in zip(smiss, ys)]]\n", + "all_data += [[data.MoleculeDatapoint.from_smi(smis[i]) for smis in smiss] for i in range(1, len(smiles_columns))]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Split data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Perform data splitting for training, validation, and testing" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "component_to_split_by = 0 # index of the component to use for structure based splits\n", + "mols = [d.mol for d in all_data[component_to_split_by]]\n", + "train_indices, val_indices, test_indices = data.make_split_indices(mols, \"random\", (0.8, 0.1, 0.1))\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " all_data, train_indices, val_indices, test_indices\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Get MoleculeDataset for each components" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer()\n", + "\n", + "train_datasets = [data.MoleculeDataset(train_data[0][i], featurizer) for i in range(len(smiles_columns))]\n", + "val_datasets = [data.MoleculeDataset(val_data[0][i], featurizer) for i in range(len(smiles_columns))]\n", + "test_datasets = [data.MoleculeDataset(test_data[0][i], featurizer) for i in range(len(smiles_columns))]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Construct multicomponent dataset and scale the targets" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "train_mcdset = data.MulticomponentDataset(train_datasets)\n", + "scaler = train_mcdset.normalize_targets()\n", + "val_mcdset = data.MulticomponentDataset(val_datasets)\n", + "val_mcdset.normalize_targets(scaler)\n", + "test_mcdset = data.MulticomponentDataset(test_datasets)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Construct data loader" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = data.build_dataloader(train_mcdset)\n", + "val_loader = data.build_dataloader(val_mcdset, shuffle=False)\n", + "test_loader = data.build_dataloader(test_mcdset, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Construct multicomponent MPNN" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## MulticomponentMessagePassing\n", + "- `blocks`: a list of message passing block used for each components\n", + "- `n_components`: number of components" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "mcmp = nn.MulticomponentMessagePassing(\n", + " blocks=[nn.BondMessagePassing() for _ in range(len(smiles_columns))],\n", + " n_components=len(smiles_columns),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Aggregation" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "agg = nn.MeanAggregation()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## RegressionFFN" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "output_transform = nn.UnscaleTransform.from_standard_scaler(scaler)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "ffn = nn.RegressionFFN(\n", + " input_dim=mcmp.output_dim,\n", + " output_transform=output_transform,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "metric_list = [metrics.RMSE(), metrics.MAE()] # Only the first metric is used for training and early stopping" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## MulticomponentMPNN" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MulticomponentMPNN(\n", + " (message_passing): MulticomponentMessagePassing(\n", + " (blocks): ModuleList(\n", + " (0-1): 2 x BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): Identity()\n", + " )\n", + " )\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): Identity()\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=600, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0): RMSE(task_weights=[[1.0]])\n", + " (1): MAE(task_weights=[[1.0]])\n", + " (2): MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mcmpnn = multi.MulticomponentMPNN(\n", + " mcmp,\n", + " agg,\n", + " ffn,\n", + " metrics=metric_list,\n", + ")\n", + "\n", + "mcmpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Set up trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(\n", + " logger=False,\n", + " enable_checkpointing=True,\n", + " enable_progress_bar=True,\n", + " accelerator=\"auto\",\n", + " devices=1,\n", + " max_epochs=20, # number of epochs to train for\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Start training" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /home/knathan/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "-------------------------------------------------------------------------\n", + "0 | message_passing | MulticomponentMessagePassing | 455 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | RegressionFFN | 180 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "-------------------------------------------------------------------------\n", + "636 K Trainable params\n", + "0 Non-trainable params\n", + "636 K Total params\n", + "2.544 Total estimated model params size (MB)\n", + "35 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sanity Checking: | | 0/? [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test/mae 87.1765365600586 │\n", + "│ test/rmse 105.41293334960938 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mae \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 87.1765365600586 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test/rmse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 105.41293334960938 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results = trainer.test(mcmpnn, test_loader)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop/examples/training_regression_reaction.ipynb b/chemprop/examples/training_regression_reaction.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..5e5439d54f3aefe1b94d73656fb3b65e557def7f --- /dev/null +++ b/chemprop/examples/training_regression_reaction.ipynb @@ -0,0 +1,804 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training Regression - Reaction" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/training_regression_reaction.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from lightning import pytorch as pl\n", + "from pathlib import Path\n", + "\n", + "from chemprop import data, featurizers, models, nn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change data inputs here" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"rxn\" / \"rxn.csv\"\n", + "num_workers = 0 # number of workers for dataloader. 0 means using main process for data loading\n", + "smiles_column = 'smiles'\n", + "target_columns = ['ea']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smilesea
0[O:1]([C:2]([C:3]([C:4](=[O:5])[C:6]([O:7][H:1...8.898934
1[C:1]1([H:8])([H:9])[O:2][C@@:3]2([H:10])[C@@:...5.464328
2[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H...5.270552
3[C:1]([O:2][C:3]([C@@:4]([C:5]([H:14])([H:15])...8.473006
4[C:1]([C:2]#[C:3][C:4]([C:5](=[O:6])[H:12])([H...5.579037
.........
95[C:1]([C:2]([C:3]([H:12])([H:13])[H:14])([C:4]...9.295665
96[O:1]=[C:2]([C@@:3]1([H:9])[C:4]([H:10])([H:11...7.753442
97[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H...10.650215
98[C:1]1([H:8])([H:9])[C@@:2]2([H:10])[N:3]1[C:4...10.138945
99[C:1]([C@@:2]1([C:3]([C:4]([O:5][H:15])([H:13]...6.979934
\n", + "

100 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " smiles ea\n", + "0 [O:1]([C:2]([C:3]([C:4](=[O:5])[C:6]([O:7][H:1... 8.898934\n", + "1 [C:1]1([H:8])([H:9])[O:2][C@@:3]2([H:10])[C@@:... 5.464328\n", + "2 [C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H... 5.270552\n", + "3 [C:1]([O:2][C:3]([C@@:4]([C:5]([H:14])([H:15])... 8.473006\n", + "4 [C:1]([C:2]#[C:3][C:4]([C:5](=[O:6])[H:12])([H... 5.579037\n", + ".. ... ...\n", + "95 [C:1]([C:2]([C:3]([H:12])([H:13])[H:14])([C:4]... 9.295665\n", + "96 [O:1]=[C:2]([C@@:3]1([H:9])[C:4]([H:10])([H:11... 7.753442\n", + "97 [C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H... 10.650215\n", + "98 [C:1]1([H:8])([H:9])[C@@:2]2([H:10])[N:3]1[C:4... 10.138945\n", + "99 [C:1]([C@@:2]1([C:3]([C:4]([O:5][H:15])([H:13]... 6.979934\n", + "\n", + "[100 rows x 2 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_input = pd.read_csv(input_path)\n", + "df_input" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load smiles and targets" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array(['[O:1]([C:2]([C:3]([C:4](=[O:5])[C:6]([O:7][H:15])([H:13])[H:14])([H:11])[H:12])([H:9])[H:10])[H:8]>>[C:3](=[C:4]=[O:5])([H:11])[H:12].[C:6]([O:7][H:15])([H:8])([H:13])[H:14].[O:1]=[C:2]([H:9])[H:10]',\n", + " '[C:1]1([H:8])([H:9])[O:2][C@@:3]2([H:10])[C@@:4]3([H:11])[O:5][C@:6]1([H:12])[C@@:7]23[H:13]>>[C:1]1([H:8])([H:9])[O:2][C:3]([H:10])=[C:7]([H:13])[C@:6]1([O+:5]=[C-:4][H:11])[H:12]',\n", + " '[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])=[C:6]([H:16])[C@@:7]12[H:17])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([H:14])=[C:5]([H:15])[C:6]([H:16])=[C:7]1[H:17])([H:8])([H:9])[H:10]',\n", + " '[C:1]([O:2][C:3]([C@@:4]([C:5]([H:14])([H:15])[H:16])([C:6]([O:7][H:19])([H:17])[H:18])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C-:1]([O+:2]=[C:3]([C@@:4]([C:5]([H:14])([H:15])[H:16])([C:6]([O:7][H:19])([H:17])[H:18])[H:13])[H:12])([H:8])[H:10].[H:9][H:11]',\n", + " '[C:1]([C:2]#[C:3][C:4]([C:5](=[O:6])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[C:3]=[C:4]([H:10])[H:11])[C:5](=[O:6])[H:12])([H:7])([H:8])[H:9]'],\n", + " dtype=object),\n", + " array([[8.8989335 ],\n", + " [5.46432769],\n", + " [5.27055228],\n", + " [8.47300569],\n", + " [5.57903696]]))" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "smis = df_input.loc[:, smiles_column].values\n", + "ys = df_input.loc[:, target_columns].values\n", + "\n", + "smis[:5], ys[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get datapoints" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "all_data = [data.ReactionDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Perform data splitting for training, validation, and testing" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "mols = [d.rct for d in all_data] # Can either split by reactants (.rct) or products (.pdt)\n", + "train_indices, val_indices, test_indices = data.make_split_indices(mols, \"random\", (0.8, 0.1, 0.1))\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " all_data, train_indices, val_indices, test_indices\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Defining the featurizer\n", + "\n", + "Reactions can be featurized using the ```CondensedGraphOfReactionFeaturizer``` (also labeled ```CGRFeaturizer```).\n", + "\n", + "\n", + "Use ```_mode``` keyword to set the mode by which a reaction should be featurized into a ```MolGraph```.\n", + "\n", + "Options are can be found with ```featurizers.RxnMode.keys```" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "REAC_PROD\n", + "REAC_PROD_BALANCE\n", + "REAC_DIFF\n", + "REAC_DIFF_BALANCE\n", + "PROD_DIFF\n", + "PROD_DIFF_BALANCE\n" + ] + } + ], + "source": [ + "for key in featurizers.RxnMode.keys():\n", + " print(key)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.CondensedGraphOfReactionFeaturizer(mode_=\"PROD_DIFF\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get ReactionDatasets" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "train_dset = data.ReactionDataset(train_data[0], featurizer)\n", + "scaler = train_dset.normalize_targets()\n", + "\n", + "val_dset = data.ReactionDataset(val_data[0], featurizer)\n", + "val_dset.normalize_targets(scaler)\n", + "test_dset = data.ReactionDataset(test_data[0], featurizer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get dataloaders" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = data.build_dataloader(train_dset, num_workers=num_workers)\n", + "val_loader = data.build_dataloader(val_dset, num_workers=num_workers, shuffle=False)\n", + "test_loader = data.build_dataloader(test_dset, num_workers=num_workers, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change Message-Passing Neural Network (MPNN) inputs here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Message passing\n", + "\n", + "Message passing blocks must be given the shape of the featurizer's outputs.\n", + "\n", + "Options are `mp = nn.BondMessagePassing()` or `mp = nn.AtomMessagePassing()`" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "fdims = featurizer.shape # the dimensions of the featurizer, given as (atom_dims, bond_dims).\n", + "mp = nn.BondMessagePassing(*fdims)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Aggregation" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'mean': ,\n", + " 'sum': ,\n", + " 'norm': \n", + "}\n" + ] + } + ], + "source": [ + "print(nn.agg.AggregationRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "agg = nn.MeanAggregation()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Feed-Forward Network (FFN)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'regression': ,\n", + " 'regression-mve': ,\n", + " 'regression-evidential': ,\n", + " 'regression-quantile': ,\n", + " 'classification': ,\n", + " 'classification-dirichlet': ,\n", + " 'multiclass': ,\n", + " 'multiclass-dirichlet': ,\n", + " 'spectral': \n", + "}\n" + ] + } + ], + "source": [ + "print(nn.PredictorRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "output_transform = nn.UnscaleTransform.from_standard_scaler(scaler)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "ffn = nn.RegressionFFN(output_transform=output_transform)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Batch norm" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "batch_norm = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'mse': ,\n", + " 'mae': ,\n", + " 'rmse': ,\n", + " 'bounded-mse': ,\n", + " 'bounded-mae': ,\n", + " 'bounded-rmse': ,\n", + " 'r2': ,\n", + " 'binary-mcc': ,\n", + " 'multiclass-mcc': ,\n", + " 'roc': ,\n", + " 'prc': ,\n", + " 'accuracy': ,\n", + " 'f1': \n", + "}\n" + ] + } + ], + "source": [ + "print(nn.metrics.MetricRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metric_list = [nn.metrics.RMSE(), nn.metrics.MAE()] \n", + "# Only the first metric is used for training and early stopping" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Construct MPNN" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=134, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=406, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): Identity()\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0): RMSE(task_weights=[[1.0]])\n", + " (1): MAE(task_weights=[[1.0]])\n", + " (2): MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mpnn = models.MPNN(mp, agg, ffn, batch_norm, metric_list)\n", + "mpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training and testing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set up trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(\n", + " logger=False,\n", + " enable_checkpointing=True, # Use `True` if you want to save model checkpoints. The checkpoints will be saved in the `checkpoints` folder.\n", + " enable_progress_bar=True,\n", + " accelerator=\"auto\",\n", + " devices=1,\n", + " max_epochs=20, # number of epochs to train for\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start training" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /home/knathan/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 252 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | BatchNorm1d | 600 | train\n", + "3 | predictor | RegressionFFN | 90.6 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "343 K Trainable params\n", + "0 Non-trainable params\n", + "343 K Total params\n", + "1.374 Total estimated model params size (MB)\n", + "25 Modules in train mode\n", + "0 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sanity Checking DataLoader 0: 0%| | 0/1 [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test/mae 1.111189842224121 │\n", + "│ test/rmse 1.4387098550796509 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mae \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.111189842224121 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test/rmse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.4387098550796509 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results = trainer.test(mpnn, test_loader)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chemprop/examples/transfer_learning.ipynb b/chemprop/examples/transfer_learning.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..328e2197e98593be679a016c7893ce746899d05b --- /dev/null +++ b/chemprop/examples/transfer_learning.ipynb @@ -0,0 +1,953 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Transfer Learning / Pretraining\n", + "Transfer learning (or pretraining) leverages knowledge from a pre-trained model on a related task to enhance performance on a new task. In Chemprop, we can use pre-trained model checkpoints to initialize a new model and freeze components of the new model during training, as demonstrated in this notebook." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/transfer_learning.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from pathlib import Path\n", + "\n", + "from lightning import pytorch as pl\n", + "from sklearn.preprocessing import StandardScaler\n", + "import torch\n", + "\n", + "from chemprop import data, featurizers, models" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change data inputs here" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\" # path to your data .csv file\n", + "num_workers = 0 # number of workers for dataloader. 0 means using main process for data loading\n", + "smiles_column = 'smiles' # name of the column containing SMILES strings\n", + "target_columns = ['lipo'] # list of names of the columns containing targets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smileslipo
0Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc143.54
1COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)...-1.18
2COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl3.69
3OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C...3.37
4Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N...3.10
.........
95CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C...2.20
96CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)...2.04
97CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)...4.49
98COc1ccc(Cc2c(N)n[nH]c2N)cc10.20
99CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(...2.00
\n", + "

100 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " smiles lipo\n", + "0 Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14 3.54\n", + "1 COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)... -1.18\n", + "2 COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl 3.69\n", + "3 OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C... 3.37\n", + "4 Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N... 3.10\n", + ".. ... ...\n", + "95 CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C... 2.20\n", + "96 CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)... 2.04\n", + "97 CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)... 4.49\n", + "98 COc1ccc(Cc2c(N)n[nH]c2N)cc1 0.20\n", + "99 CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(... 2.00\n", + "\n", + "[100 rows x 2 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_input = pd.read_csv(input_path)\n", + "df_input" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get SMILES and targets" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "smis = df_input.loc[:, smiles_column].values\n", + "ys = df_input.loc[:, target_columns].values" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14',\n", + " 'COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)CCc3ccccc23',\n", + " 'COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl',\n", + " 'OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(Cl)sc4[nH]3',\n", + " 'Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)NCC#N)c1'],\n", + " dtype=object)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "smis[:5] # show first 5 SMILES strings" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 3.54],\n", + " [-1.18],\n", + " [ 3.69],\n", + " [ 3.37],\n", + " [ 3.1 ]])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ys[:5] # show first 5 targets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get molecule datapoints" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "all_data = [data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Perform data splitting for training, validation, and testing" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['SCAFFOLD_BALANCED',\n", + " 'RANDOM_WITH_REPEATED_SMILES',\n", + " 'RANDOM',\n", + " 'KENNARD_STONE',\n", + " 'KMEANS']" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# available split types\n", + "list(data.SplitType.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "mols = [d.mol for d in all_data] # RDkit Mol objects are use for structure based splits\n", + "train_indices, val_indices, test_indices = data.make_split_indices(mols, \"random\", (0.8, 0.1, 0.1))\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " all_data, train_indices, val_indices, test_indices\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Change checkpoint model inputs here\n", + "Both message-passing neural networks (MPNNs) and multi-component MPNNs can have their weights initialized from a checkpoint file." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "checkpoint_path = chemprop_dir / \"tests\" / \"data\" / \"example_model_v2_regression_mol.ckpt\" # path to the checkpoint file.\n", + "# If the checkpoint file is generated using the training notebook, it will be in the `checkpoints` folder with name similar to `checkpoints/epoch=19-step=180.ckpt`." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "mpnn_cls = models.MPNN" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): GraphTransform(\n", + " (V_transform): Identity()\n", + " (E_transform): Identity()\n", + " )\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0-1): 2 x MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mpnn = mpnn_cls.load_from_file(checkpoint_path)\n", + "mpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Scale fine-tuning data with the model's target scaler\n", + "\n", + "If the pre-trained model was a regression model, it probably was trained on a scaled dataset. The scaler is saved as part of the model and used during prediction. For furthur training, we need to scale the fine-tuning data with the same target scaler." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "pretraining_scaler = StandardScaler()\n", + "pretraining_scaler.mean_ = mpnn.predictor.output_transform.mean.numpy()\n", + "pretraining_scaler.scale_ = mpnn.predictor.output_transform.scale.numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get MoleculeDataset" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer()\n", + "\n", + "train_dset = data.MoleculeDataset(train_data[0], featurizer)\n", + "train_dset.normalize_targets(pretraining_scaler)\n", + "\n", + "val_dset = data.MoleculeDataset(val_data[0], featurizer)\n", + "val_dset.normalize_targets(pretraining_scaler)\n", + "\n", + "test_dset = data.MoleculeDataset(test_data[0], featurizer)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get DataLoader" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = data.build_dataloader(train_dset, num_workers=num_workers)\n", + "val_loader = data.build_dataloader(val_dset, num_workers=num_workers, shuffle=False)\n", + "test_loader = data.build_dataloader(test_dset, num_workers=num_workers, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Freezing MPNN and FFN layers\n", + "Certain layers of a pre-trained model can be kept unchanged during further training on a new task." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Freezing the MPNN" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mpnn.message_passing.apply(lambda module: module.requires_grad_(False))\n", + "mpnn.message_passing.eval()\n", + "mpnn.bn.apply(lambda module: module.requires_grad_(False))\n", + "mpnn.bn.eval() # Set batch norm layers to eval mode to freeze running mean and running var." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Freezing FFN layers" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "frzn_ffn_layers = 1 # the number of consecutive FFN layers to freeze." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "for idx in range(frzn_ffn_layers):\n", + " mpnn.predictor.ffn[idx].requires_grad_(False)\n", + " mpnn.predictor.ffn[idx + 1].eval()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Set up trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(\n", + " logger=False,\n", + " enable_checkpointing=True, # Use `True` if you want to save model checkpoints. The checkpoints will be saved in the `checkpoints` folder.\n", + " enable_progress_bar=True,\n", + " accelerator=\"auto\",\n", + " devices=1,\n", + " max_epochs=20, # number of epochs to train for\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Start training" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | eval \n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | BatchNorm1d | 600 | eval \n", + "3 | predictor | RegressionFFN | 90.6 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "301 Trainable params\n", + "318 K Non-trainable params\n", + "318 K Total params\n", + "1.276 Total estimated model params size (MB)\n", + "11 Modules in train mode\n", + "15 Modules in eval mode\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sanity Checking DataLoader 0: 0%| | 0/1 [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test/mse 0.9625480771064758 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9625480771064758 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results = trainer.test(mpnn, test_loader)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Transfer learning with multicomponenent models\n", + "Multi-component MPNN models have individual MPNN blocks for each molecule it parses in one input. These MPNN modules can be independently frozen for transfer learning." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Change data inputs here" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "checkpoint_path = chemprop_dir / \"tests\" / \"data\" / \"example_model_v2_regression_mol+mol.ckpt\" # path to the checkpoint file. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Change checkpoint model inputs here" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MulticomponentMPNN(\n", + " (message_passing): MulticomponentMessagePassing(\n", + " (blocks): ModuleList(\n", + " (0-1): 2 x BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): GraphTransform(\n", + " (V_transform): Identity()\n", + " (E_transform): Identity()\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): BatchNorm1d(600, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (predictor): RegressionFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=600, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=1, bias=True)\n", + " )\n", + " )\n", + " (criterion): MSE(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0-1): 2 x MSE(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mpnn_cls = models.MulticomponentMPNN\n", + "mcmpnn = mpnn_cls.load_from_checkpoint(checkpoint_path)\n", + "mcmpnn" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "blocks_to_freeze = [0, 1] # a list of indices of the individual MPNN blocks to freeze before training." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BatchNorm1d(600, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mcmpnn = mpnn_cls.load_from_checkpoint(checkpoint_path)\n", + "for i in blocks_to_freeze:\n", + " mp_block = mcmpnn.message_passing.blocks[i]\n", + " mp_block.apply(lambda module: module.requires_grad_(False))\n", + " mp_block.eval()\n", + "mcmpnn.bn.apply(lambda module: module.requires_grad_(False))\n", + "mcmpnn.bn.eval()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop/examples/uncertainty.ipynb b/chemprop/examples/uncertainty.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..aff3b9b190fb2f9d7a50bbecb6e56701a33fa566 --- /dev/null +++ b/chemprop/examples/uncertainty.ipynb @@ -0,0 +1,1152 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Uncertainty Quantification" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/uncertainty.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install .\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import torch\n", + "import pandas as pd\n", + "from pathlib import Path\n", + "\n", + "from lightning import pytorch as pl\n", + "from lightning.pytorch.callbacks import ModelCheckpoint\n", + "\n", + "from chemprop import data, models, nn, uncertainty\n", + "from chemprop.models import save_model, load_model\n", + "from chemprop.cli.conf import NOW\n", + "from chemprop.cli.predict import find_models\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Training" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loda data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "input_path = (\n", + " chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\"\n", + ") # path to your data .csv file\n", + "df_input = pd.read_csv(input_path)\n", + "smis = df_input.loc[:, \"smiles\"].values\n", + "ys = df_input.loc[:, [\"lipo\"]].values\n", + "all_data = [data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "mols = [d.mol for d in all_data] # RDkit Mol objects are use for structure based splits\n", + "train_indices, val_indices, test_indices = data.make_split_indices(mols, \"random\", (0.8, 0.1, 0.1))\n", + "train_data, val_data, test_data = data.split_data_by_indices(\n", + " all_data, train_indices, val_indices, test_indices\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "train_dset = data.MoleculeDataset(train_data[0])\n", + "scaler = train_dset.normalize_targets()\n", + "\n", + "val_dset = data.MoleculeDataset(val_data[0])\n", + "val_dset.normalize_targets(scaler)\n", + "\n", + "test_dset = data.MoleculeDataset(test_data[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = data.build_dataloader(train_dset)\n", + "val_loader = data.build_dataloader(val_dset, shuffle=False)\n", + "test_loader = data.build_dataloader(test_dset, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constructs MPNN\n", + "\n", + "- A `Message passing` constructs molecular graphs using message passing to learn node-level hidden representations.\n", + "\n", + "- An `Aggregation` is responsible for constructing a graph-level representation from the set of node-level representations after message passing.\n", + "\n", + "- A `FFN` takes the aggregated representations and make target predictions. To obtain uncertainty predictions, the `FFN` must be modified accordingly.\n", + "\n", + " For regression:\n", + " - `ffn = nn.RegressionFFN()`\n", + " - `ffn = nn.MveFFN()`\n", + " - `ffn = nn.EvidentialFFN()`\n", + "\n", + " For classification:\n", + " - `ffn = nn.BinaryClassificationFFN()`\n", + " - `ffn = nn.BinaryDirichletFFN()`\n", + " - `ffn = nn.MulticlassClassificationFFN()`\n", + " - `ffn = nn.MulticlassDirichletFFN()`\n", + "\n", + " For spectral:\n", + " - `ffn = nn.SpectralFFN()` # will be available in future version" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MPNN(\n", + " (message_passing): BondMessagePassing(\n", + " (W_i): Linear(in_features=86, out_features=300, bias=False)\n", + " (W_h): Linear(in_features=300, out_features=300, bias=False)\n", + " (W_o): Linear(in_features=372, out_features=300, bias=True)\n", + " (dropout): Dropout(p=0.0, inplace=False)\n", + " (tau): ReLU()\n", + " (V_d_transform): Identity()\n", + " (graph_transform): Identity()\n", + " )\n", + " (agg): MeanAggregation()\n", + " (bn): Identity()\n", + " (predictor): MveFFN(\n", + " (ffn): MLP(\n", + " (0): Sequential(\n", + " (0): Linear(in_features=300, out_features=300, bias=True)\n", + " )\n", + " (1): Sequential(\n", + " (0): ReLU()\n", + " (1): Dropout(p=0.0, inplace=False)\n", + " (2): Linear(in_features=300, out_features=2, bias=True)\n", + " )\n", + " )\n", + " (criterion): MVELoss(task_weights=[[1.0]])\n", + " (output_transform): UnscaleTransform()\n", + " )\n", + " (X_d_transform): Identity()\n", + " (metrics): ModuleList(\n", + " (0): MSE(task_weights=[[1.0]])\n", + " (1): MVELoss(task_weights=[[1.0]])\n", + " )\n", + ")" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mp = nn.BondMessagePassing()\n", + "agg = nn.MeanAggregation()\n", + "output_transform = nn.UnscaleTransform.from_standard_scaler(scaler)\n", + "# Change to other predictor if needed.\n", + "ffn = nn.MveFFN(output_transform=output_transform)\n", + "mpnn = models.MPNN(mp, agg, ffn, batch_norm=False)\n", + "mpnn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set up trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "model_output_dir = Path(f\"chemprop_training/{NOW}\")\n", + "monitor_mode = \"min\" if mpnn.metrics[0].higher_is_better else \"max\"\n", + "checkpointing = ModelCheckpoint(\n", + " model_output_dir / \"checkpoints\",\n", + " \"best-{epoch}-{val_loss:.2f}\",\n", + " \"val_loss\",\n", + " mode=monitor_mode,\n", + " save_last=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(\n", + " logger=False,\n", + " enable_checkpointing=True,\n", + " enable_progress_bar=False,\n", + " accelerator=\"cpu\",\n", + " callbacks=[checkpointing],\n", + " devices=1,\n", + " max_epochs=20,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start training" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "\n", + " | Name | Type | Params | Mode \n", + "---------------------------------------------------------------\n", + "0 | message_passing | BondMessagePassing | 227 K | train\n", + "1 | agg | MeanAggregation | 0 | train\n", + "2 | bn | Identity | 0 | train\n", + "3 | predictor | MveFFN | 90.9 K | train\n", + "4 | X_d_transform | Identity | 0 | train\n", + "5 | metrics | ModuleList | 0 | train\n", + "---------------------------------------------------------------\n", + "318 K Trainable params\n", + "0 Non-trainable params\n", + "318 K Total params\n", + "1.274 Total estimated model params size (MB)\n", + "24 Modules in train mode\n", + "0 Modules in eval mode\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n", + "`Trainer.fit` stopped: `max_epochs=20` reached.\n" + ] + } + ], + "source": [ + "trainer.fit(mpnn, train_loader, val_loader)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Save the best model" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "best_model_path = checkpointing.best_model_path\n", + "model = mpnn.__class__.load_from_checkpoint(best_model_path)\n", + "p_model = model_output_dir / \"best.pt\"\n", + "save_model(p_model, model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Predicting" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Change model input here" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smileslipo
0Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc143.54
1COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)...-1.18
2COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl3.69
3OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C...3.37
4Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N...3.10
.........
95CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C...2.20
96CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)...2.04
97CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)...4.49
98COc1ccc(Cc2c(N)n[nH]c2N)cc10.20
99CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(...2.00
\n", + "

100 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " smiles lipo\n", + "0 Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14 3.54\n", + "1 COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)... -1.18\n", + "2 COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl 3.69\n", + "3 OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C... 3.37\n", + "4 Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N... 3.10\n", + ".. ... ...\n", + "95 CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C... 2.20\n", + "96 CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)... 2.04\n", + "97 CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)... 4.49\n", + "98 COc1ccc(Cc2c(N)n[nH]c2N)cc1 0.20\n", + "99 CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(... 2.00\n", + "\n", + "[100 rows x 2 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chemprop_dir = Path.cwd().parent\n", + "test_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\"\n", + "df_test = pd.read_csv(test_path)\n", + "test_dset = data.MoleculeDataset(test_data[0])\n", + "test_loader = data.build_dataloader(test_dset, shuffle=False)\n", + "df_test" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# use the validation set from the training as the calibration set as an example\n", + "cal_dset = data.MoleculeDataset(val_data[0])\n", + "cal_loader = data.build_dataloader(cal_dset, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constructs uncertainty estimator\n", + "An uncertianty estimator can make model predictions and associated uncertainty predictions.\n", + "\n", + "Available options can be found in `uncertainty.UncertaintyEstimatorRegistry`." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'none': ,\n", + " 'mve': ,\n", + " 'ensemble': ,\n", + " 'classification': ,\n", + " 'evidential-total': ,\n", + " 'evidential-epistemic': ,\n", + " 'evidential-aleatoric': ,\n", + " 'dropout': ,\n", + " 'classification-dirichlet': ,\n", + " 'multiclass-dirichlet': ,\n", + " 'quantile-regression': \n", + "}\n" + ] + } + ], + "source": [ + "print(uncertainty.UncertaintyEstimatorRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "unc_estimator = uncertainty.MVEEstimator()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constructs uncertainty calibrator\n", + "An uncertianty calibrator can calibrate the predicted uncertainties.\n", + "\n", + "Available options can be found in `uncertainty.UncertaintyCalibratorRegistry`.\n", + "\n", + "For regression:\n", + "\n", + "- ZScalingCalibrator\n", + "\n", + "- ZelikmanCalibrator\n", + "\n", + "- MVEWeightingCalibrator\n", + "\n", + "- RegressionConformalCalibrator\n", + "\n", + "For binary classification:\n", + "\n", + "- PlattCalibrator\n", + "\n", + "- IsotonicCalibrator\n", + "\n", + "- MultilabelConformalCalibrator\n", + "\n", + "For multiclass classification:\n", + "\n", + "- MulticlassConformalCalibrator\n", + "\n", + "- AdaptiveMulticlassConformalCalibrator\n", + "\n", + "- IsotonicMulticlassCalibrator" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'zscaling': ,\n", + " 'zelikman-interval': ,\n", + " 'mve-weighting': ,\n", + " 'conformal-regression': ,\n", + " 'platt': ,\n", + " 'isotonic': ,\n", + " 'conformal-multilabel': ,\n", + " 'conformal-multiclass': ,\n", + " 'conformal-adaptive': ,\n", + " 'isotonic-multiclass': \n", + "}\n" + ] + } + ], + "source": [ + "print(uncertainty.UncertaintyCalibratorRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "unc_calibrator = uncertainty.ZScalingCalibrator()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constructs uncertainty evaluator\n", + "An uncertianty evaluator can evaluates the quality of uncertainty estimates.\n", + "\n", + "Available options can be found in `uncertainty.UncertaintyEvaluatorRegistry`.\n", + "\n", + "For regression:\n", + "\n", + "- NLLRegressionEvaluator\n", + "\n", + "- CalibrationAreaEvaluator\n", + "\n", + "- ExpectedNormalizedErrorEvaluator\n", + "\n", + "- SpearmanEvaluator\n", + "\n", + "- RegressionConformalEvaluator\n", + "\n", + "For binary classification:\n", + "\n", + "- NLLClassEvaluator\n", + "\n", + "- MultilabelConformalEvaluator\n", + "\n", + "\n", + "For multiclass classification:\n", + "\n", + "- NLLMulticlassEvaluator\n", + "\n", + "- MulticlassConformalEvaluator" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassRegistry {\n", + " 'nll-regression': ,\n", + " 'miscalibration_area': ,\n", + " 'ence': ,\n", + " 'spearman': ,\n", + " 'conformal-coverage-regression': ,\n", + " 'nll-classification': ,\n", + " 'conformal-coverage-classification': ,\n", + " 'nll-multiclass': ,\n", + " 'conformal-coverage-multiclass': \n", + "}\n" + ] + } + ], + "source": [ + "print(uncertainty.UncertaintyEvaluatorRegistry)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "unc_evaluators = [\n", + " uncertainty.NLLRegressionEvaluator(),\n", + " uncertainty.CalibrationAreaEvaluator(),\n", + " uncertainty.ExpectedNormalizedErrorEvaluator(),\n", + " uncertainty.SpearmanEvaluator(),\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load model" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "model_paths = find_models([model_output_dir])\n", + "models = [load_model(model_path, multicomponent=False) for model_path in model_paths]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.\n" + ] + } + ], + "source": [ + "trainer = pl.Trainer(logger=False, enable_progress_bar=True, accelerator=\"cpu\", devices=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Make uncertainty estimation" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicting DataLoader 0: 100%|███████████████████| 1/1 [00:00<00:00, 126.93it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smilestargetpredunc
0Cc1ccc(NC(=O)c2cscn2)cc1-n1cnc2ccc(N3CCN(C)CC3...2.062.0474741.543233
1O=C(Nc1nnc(C(=O)Nc2ccc(N3CCOCC3)cc2)o1)c1ccc(C...1.922.0475611.534631
2CNCCCC12CCC(c3ccccc31)c1ccccc120.892.0620571.548673
3Oc1ncnc2scc(-c3ccsc3)c122.252.0618131.555989
4C=CC(=O)Nc1cccc(CN2C(=O)N(c3c(Cl)c(OC)cc(OC)c3...2.042.0382381.532385
5COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCCC13.132.0488351.535416
6O=C(COc1ccccc1)c1ccccc12.872.0668441.534430
7CC(C)c1ccc2oc3nc(N)c(C(=O)O)cc3c(=O)c2c11.102.0537711.550390
8N#Cc1ccc(F)c(-c2cc(C(F)(F)F)ccc2OCC(=O)O)c1-0.162.0475541.535353
9COc1cnc(-c2ccccn2)nc1N(C)C1.902.0505011.537318
\n", + "
" + ], + "text/plain": [ + " smiles target pred \\\n", + "0 Cc1ccc(NC(=O)c2cscn2)cc1-n1cnc2ccc(N3CCN(C)CC3... 2.06 2.047474 \n", + "1 O=C(Nc1nnc(C(=O)Nc2ccc(N3CCOCC3)cc2)o1)c1ccc(C... 1.92 2.047561 \n", + "2 CNCCCC12CCC(c3ccccc31)c1ccccc12 0.89 2.062057 \n", + "3 Oc1ncnc2scc(-c3ccsc3)c12 2.25 2.061813 \n", + "4 C=CC(=O)Nc1cccc(CN2C(=O)N(c3c(Cl)c(OC)cc(OC)c3... 2.04 2.038238 \n", + "5 COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCCC1 3.13 2.048835 \n", + "6 O=C(COc1ccccc1)c1ccccc1 2.87 2.066844 \n", + "7 CC(C)c1ccc2oc3nc(N)c(C(=O)O)cc3c(=O)c2c1 1.10 2.053771 \n", + "8 N#Cc1ccc(F)c(-c2cc(C(F)(F)F)ccc2OCC(=O)O)c1 -0.16 2.047554 \n", + "9 COc1cnc(-c2ccccn2)nc1N(C)C 1.90 2.050501 \n", + "\n", + " unc \n", + "0 1.543233 \n", + "1 1.534631 \n", + "2 1.548673 \n", + "3 1.555989 \n", + "4 1.532385 \n", + "5 1.535416 \n", + "6 1.534430 \n", + "7 1.550390 \n", + "8 1.535353 \n", + "9 1.537318 " + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_predss, test_uncss = unc_estimator(test_loader, models, trainer)\n", + "test_preds = test_predss.mean(0)\n", + "test_uncs = test_uncss.mean(0)\n", + "\n", + "df_test = pd.DataFrame(\n", + " {\n", + " \"smiles\": test_dset.smiles,\n", + " \"target\": test_dset.Y.reshape(-1),\n", + " \"pred\": test_preds.reshape(-1),\n", + " \"unc\": test_uncs.reshape(-1),\n", + " }\n", + ")\n", + "\n", + "df_test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Apply uncertainty calibration" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicting DataLoader 0: 100%|███████████████████| 1/1 [00:00<00:00, 228.26it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smilestargetpredunccal_unc
0Cc1ccc(NC(=O)c2cscn2)cc1-n1cnc2ccc(N3CCN(C)CC3...2.062.0474741.5432331.691122
1O=C(Nc1nnc(C(=O)Nc2ccc(N3CCOCC3)cc2)o1)c1ccc(C...1.922.0475611.5346311.681696
2CNCCCC12CCC(c3ccccc31)c1ccccc120.892.0620571.5486731.697084
3Oc1ncnc2scc(-c3ccsc3)c122.252.0618131.5559891.705101
4C=CC(=O)Nc1cccc(CN2C(=O)N(c3c(Cl)c(OC)cc(OC)c3...2.042.0382381.5323851.679235
5COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCCC13.132.0488351.5354161.682556
6O=C(COc1ccccc1)c1ccccc12.872.0668441.5344301.681475
7CC(C)c1ccc2oc3nc(N)c(C(=O)O)cc3c(=O)c2c11.102.0537711.5503901.698965
8N#Cc1ccc(F)c(-c2cc(C(F)(F)F)ccc2OCC(=O)O)c1-0.162.0475541.5353531.682488
9COc1cnc(-c2ccccn2)nc1N(C)C1.902.0505011.5373181.684641
\n", + "
" + ], + "text/plain": [ + " smiles target pred \\\n", + "0 Cc1ccc(NC(=O)c2cscn2)cc1-n1cnc2ccc(N3CCN(C)CC3... 2.06 2.047474 \n", + "1 O=C(Nc1nnc(C(=O)Nc2ccc(N3CCOCC3)cc2)o1)c1ccc(C... 1.92 2.047561 \n", + "2 CNCCCC12CCC(c3ccccc31)c1ccccc12 0.89 2.062057 \n", + "3 Oc1ncnc2scc(-c3ccsc3)c12 2.25 2.061813 \n", + "4 C=CC(=O)Nc1cccc(CN2C(=O)N(c3c(Cl)c(OC)cc(OC)c3... 2.04 2.038238 \n", + "5 COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCCC1 3.13 2.048835 \n", + "6 O=C(COc1ccccc1)c1ccccc1 2.87 2.066844 \n", + "7 CC(C)c1ccc2oc3nc(N)c(C(=O)O)cc3c(=O)c2c1 1.10 2.053771 \n", + "8 N#Cc1ccc(F)c(-c2cc(C(F)(F)F)ccc2OCC(=O)O)c1 -0.16 2.047554 \n", + "9 COc1cnc(-c2ccccn2)nc1N(C)C 1.90 2.050501 \n", + "\n", + " unc cal_unc \n", + "0 1.543233 1.691122 \n", + "1 1.534631 1.681696 \n", + "2 1.548673 1.697084 \n", + "3 1.555989 1.705101 \n", + "4 1.532385 1.679235 \n", + "5 1.535416 1.682556 \n", + "6 1.534430 1.681475 \n", + "7 1.550390 1.698965 \n", + "8 1.535353 1.682488 \n", + "9 1.537318 1.684641 " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cal_predss, cal_uncss = unc_estimator(cal_loader, models, trainer)\n", + "average_cal_preds = cal_predss.mean(0)\n", + "average_cal_uncs = cal_uncss.mean(0)\n", + "cal_targets = cal_dset.Y\n", + "cal_mask = torch.from_numpy(np.isfinite(cal_targets))\n", + "cal_targets = np.nan_to_num(cal_targets, nan=0.0)\n", + "cal_targets = torch.from_numpy(cal_targets)\n", + "unc_calibrator.fit(average_cal_preds, average_cal_uncs, cal_targets, cal_mask)\n", + "\n", + "cal_test_uncs = unc_calibrator.apply(test_uncs)\n", + "df_test[\"cal_unc\"] = cal_test_uncs\n", + "df_test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluate predicted uncertainty" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "nll-regression: [1.4490190356267003]\n", + "miscalibration_area: [0.15619999170303345]\n", + "ence: [0.6248166925739804]\n", + "spearman: [0.27272725105285645]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/torchmetrics/utilities/prints.py:43: UserWarning: Metric `SpearmanCorrcoef` will save all targets and predictions in the buffer. For large datasets, this may lead to large memory footprint.\n", + " warnings.warn(*args, **kwargs) # noqa: B028\n" + ] + } + ], + "source": [ + "test_targets = test_dset.Y\n", + "test_mask = torch.from_numpy(np.isfinite(test_targets))\n", + "test_targets = np.nan_to_num(test_targets, nan=0.0)\n", + "test_targets = torch.from_numpy(test_targets)\n", + "\n", + "for evaluator in unc_evaluators:\n", + " evaluation = evaluator.evaluate(test_preds, cal_test_uncs, test_targets, test_mask)\n", + " print(f\"{evaluator.alias}: {evaluation.tolist()}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/chemprop/examples/use_featurizer_with_other_libraries.ipynb b/chemprop/examples/use_featurizer_with_other_libraries.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..40a6083bb7f570d89c72236b0dd7b2efc74ca435 --- /dev/null +++ b/chemprop/examples/use_featurizer_with_other_libraries.ipynb @@ -0,0 +1,513 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "bd06dbac-fe7b-43f1-a9e7-6b6a5b39ad52", + "metadata": {}, + "source": [ + "# Demonstration of using Chemprop featurizer with DGL and PyTorch Geometric" + ] + }, + { + "cell_type": "markdown", + "id": "4c55d990", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/use_featurizer_with_other_libraries.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb316f5c", + "metadata": {}, + "outputs": [], + "source": [ + "# Install chemprop from GitHub if running in Google Colab\n", + "import os\n", + "\n", + "if os.getenv(\"COLAB_RELEASE_TAG\"):\n", + " try:\n", + " import chemprop\n", + " except ImportError:\n", + " !git clone https://github.com/chemprop/chemprop.git\n", + " %cd chemprop\n", + " !pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0\n", + " !pip install .\n", + " !pip install dgl -f https://data.dgl.ai/wheels/torch-2.4/repo.html\n", + " !pip install torch_geometric\n", + " %cd examples" + ] + }, + { + "cell_type": "markdown", + "id": "40cfeccb-bfec-4aef-a09b-929903455cce", + "metadata": {}, + "source": [ + "# Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "77d50745-e204-4a53-9e32-5c585caa1b91", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import torch\n", + "from sklearn.decomposition import PCA\n", + "from pathlib import Path\n", + "import numpy as np\n", + "\n", + "from chemprop import data, featurizers, models" + ] + }, + { + "cell_type": "markdown", + "id": "6301b0e9-d2f4-41b5-9e05-726c09ae1565", + "metadata": {}, + "source": [ + "### Load data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3139fce1-cd87-4c56-9b65-ad13b6698d21", + "metadata": {}, + "outputs": [], + "source": [ + "test_path = Path(\"..\") / \"tests\" / \"data\" / \"smis.csv\"\n", + "smiles_column = \"smiles\"\n", + "df_test = pd.read_csv(test_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "caddf77e-317c-4dc1-9ddc-77a972ca58dd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[MoleculeDatapoint(mol=, y=None, weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14', V_f=None, E_f=None, V_d=None),\n", + " MoleculeDatapoint(mol=, y=None, weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)CCc3ccccc23', V_f=None, E_f=None, V_d=None),\n", + " MoleculeDatapoint(mol=, y=None, weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl', V_f=None, E_f=None, V_d=None),\n", + " MoleculeDatapoint(mol=, y=None, weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(Cl)sc4[nH]3', V_f=None, E_f=None, V_d=None),\n", + " MoleculeDatapoint(mol=, y=None, weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)NCC#N)c1', V_f=None, E_f=None, V_d=None)]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_test = pd.read_csv(test_path)\n", + "\n", + "smis = df_test[smiles_column]\n", + "\n", + "test_data = [data.MoleculeDatapoint.from_smi(smi) for smi in smis]\n", + "test_data[:5]" + ] + }, + { + "cell_type": "markdown", + "id": "5c0b1062-674d-41ad-8e06-1c925ca158f6", + "metadata": {}, + "source": [ + "## Featurize molecules" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "77b7159f-ea91-4b0b-8cd1-5c7c64d02fe8", + "metadata": {}, + "outputs": [], + "source": [ + "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer()\n", + "molgraphs = [featurizer(data.mol) for data in test_data]" + ] + }, + { + "cell_type": "markdown", + "id": "ba9f69aa-e676-463f-bafd-84f4a75a357a", + "metadata": {}, + "source": [ + "# Use Chemprop featurizer with DGL" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b5b24e61-67b9-42b7-a7ee-3bad644f18ec", + "metadata": {}, + "outputs": [], + "source": [ + "# Install DGL separately if not already installed\n", + "# see https://www.dgl.ai/pages/start.html\n", + "import dgl\n", + "import networkx as nx" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8ca20930-94ea-4399-b262-e6b0cc6bff2a", + "metadata": {}, + "outputs": [], + "source": [ + "def convert_molgraph_to_dgl_graph(mg):\n", + " \"\"\"\n", + " Takes a Chemprop molgraph from featurizer and converts it to a DGL graph object.\n", + " Atom features are saved in 'n' and edge features in 'e'\n", + " \"\"\"\n", + " # Instantiate a graph from the edges\n", + " g = dgl.graph((mg.edge_index[0], mg.edge_index[1]), num_nodes=mg.V.shape[0])\n", + "\n", + " # Assign features\n", + " g.ndata[\"n\"] = torch.tensor(mg.V)\n", + " g.edata[\"e\"] = torch.tensor(mg.E)\n", + " return g\n", + "\n", + "\n", + "def visualize_dgl_graph(g):\n", + " \"\"\"\n", + " Visualize a DGL graph object.\n", + " Adapted from https://docs.dgl.ai/en/0.2.x/tutorials/basics/1_first.html\n", + " \"\"\"\n", + " nx_G = g.to_networkx()\n", + " pos = nx.kamada_kawai_layout(nx_G)\n", + " nx.draw(nx_G, pos, with_labels=True, node_color=[[0.5, 0.5, 0.5]])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "954173cb-1913-4790-94ee-8705a1bce998", + "metadata": {}, + "outputs": [], + "source": [ + "# Convert the molgraphs to DGL graphs\n", + "gs = [convert_molgraph_to_dgl_graph(x) for x in molgraphs]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "183f5c98-7586-4c78-b5cb-0add1b82b220", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nO2deVzU1f7/X7OwiAgaKIJSgAqIKRrmhlcTx6VE0wzKEk3L0ZZr1vfaqLcyc2m0foZtNqa5hKlganrJBUwJJRdAUGRRcEEEVARZZBtmzu+PQx8nBATmM/OZmc7z0R90ZuZzXjPCa87nnPciIoSAwWAwGG1FLLQABoPBMG+YjTIYDIZeMBtlMBgMvWA2ymAwGHrBbJTBYDD0gtkog8Fg6AWzUQaDwdALZqMMBoOhF8xGGQyGeVBUVHTixIm5c+cOHjx4+fLllZWVQiuqR8SymBgMhqlRV1eXm5t75cqVixcvpqen0x8KCgp0n+Pv75+SkiKUQl2YjTIYDIEpKSnJysrKyMjIysrKysrKzMzMyclRq9UNnubo6Ojt7S2VSmtqapKTkwGsX79+3rx5Qkj+G8xGGQyGUcnPz+cWmPSHK1euPPw0V1fXPn36eHl5+fn50R88PT1FIhF99KOPPlqxYoWtrW1cXNygQYOM+w4awmyUwWAYitLS0uzsbGqU1DQzMjIe3tO0sbHp0aOHrmn6+vq2b9+++Yu/88473377raura2Jiopubm8HexKNhNspgMPjh4WXm1atXH3aYTp06cQtM+oOHh4dY3OrjbrVaPXbs2OPHjw8dOvTYsWM2NjY8vY9Ww2yUwWC0HbVaPXfu3J07d9bW1mo0mgaP2tnZeXt7+/j4+Pr6+vr6+vj4eHt7P3KZ2XLu3r379NNPX716dd68eevXr+frsq2F2SiDwWg777///pdffkl/5muZ2QwpKSnvvfdeVFSUs7MzHTl37tzw4cMrKytVKpVcLudxrpbDbJTBYLSdbt265efnBwYGRkVFubq6Gnq64cOHnzx5cty4cdHR0RKJhA5GRESEhYVZWVkdPXr0X//6l6E1PAwLv2cwGG0kLy/v1q1bVlZWW7ZsMYKHAtixY0eXLl0OHz783//+lxucPn36ggUL1Gp1aGhoXl6eEWQ0gNloiyCECPLPw2CYMps2bdJoNC+++GLPnj2NM6O7u/uePXusra3XrFmzc+dObvyLL74YN25cYWFhSEhITU2NccRwMBt9NLGxsR06dHB3d7ezs1uwYMHdu3eFVsRgCI9Wq928eTOA119/3ZjzBgYGfvHFF4SQ2bNnJyUl0UGJRPLzzz97eXmdOnVKgB1SwmiW6Ohoa2tr3U/Mzs4uLCwsLi5Oq9UKrY7BEIzDhw8D8PT01Gg0dKSkpGTatGmHDx82wuxvvPEGgCeeeOL27dvcYEpKCg0D+Pbbb42ggYPZaJNotVqlUkm3sXv06JGZmbls2TKZTMblUfTq1Wvp0qXXr18XWimDIQChoaEAli9fzo188803AMaMGWOE2aurqwcPHgxg9OjRarWaG9+9e7dIJLKysjp+/LgRZFCYjTZOeXn5iy++CEAkEikUCu77lhBy48YNpVLp4eFBzVQsFstkssjIyNraWgEFMxjGpKioyMbGRiwW6y4jBgwYAGDnzp3G0ZCfn0+Tl95//33d8YULFwJwcXHJzc01jhJmo42QnZ3dt29fAB06dNi7d2+jz9FoNDExMSEhIdwtf9euXefPn3/hwgUjq2UwjA+NFX3uuee4kdTUVACPPfZYVVWV0WQkJCTQ5KUff/yRG9RoNM8++yyAAQMG3L9/3wgyeLbR4rrinOqcck05NzLq0qivbn/F7ywG5fjx4507dwbg7e2dnp7+yOcXFxerVKp+/fpxm6cBAQEqlaqiosIIahkMQfD39wfwyy+/cCNvv/02gHfffdfISmjykq2t7ZkzZ7jB4uJiGjwwffp0I2jgzUb3luztl94PSUASJMmS8ZfHZ1dnE0JczrssvrmYr1kMjUqlkkqlACZMmFBSUqL70LVr15p/bWJiolwut7e3p2bq4OAgl8vj4+MNqZfBEIBTp04BcHZ2rq6upiNVVVWdOnUCkJKSYnw99Gj+8ccfv3XrFjeYnp7u4OAAIDw83NAC+LHRbXe3iZJEskuy2LLYnOqc6HvRARkBk7InEfOx0aqqqpkzZza6GUoIUalU1tbWu3bteuR1ysrKtm7dKpPJuMWpn5+fUqm8c+eOwbQzGEZlzpw5AP7zn/9wIxEREQCefvppQfTU1taOGDECQGBgYE1NDTe+Z88ekUgklUp///13gwrgwUYrNZVOqU6DMwfXah+csZTVlVVqKomZ2OiNGzcGDhwIwN7eXvc+hRBSXV3NhcV9/PHHLb9mRkaGQqHo0qULfa2NjU1ISMj+/fvr6ur4ls9gGI+Kigq6yrt48SI3OGrUKADff/+9UKoKCwu7d+8O4N///rfu+JIlS+iObU5OjuFm58FGD5YeRBK2393e6KOmb6NxcXEuLi4AevbsmZaWpvvQzZs3hwwZQndetm7d2oaL19TU7N+/PyQkhO4VAOjevbtCoaAFxBgMs+PHH3+k6z5u5MqVKyKRqF27dg32wYzMn3/+SY+bNm7cyA1qNJoJEyYA8Pf3N9xxBQ82uvbWWiQh+X5yo49yNlqnNcVVmEqlsrKyAvDss88WFxfrPnTy5EmaJuzu7n727Fk9J8rLy1MqlV5eXg3CpHTvQRgM0ycwMLDByThd8c2cOVM4UfVs3bqVLnpOnz7NDZaWlvbu3RvA1KlTDZQyw4ONLi9YjiTcqL3R6KPURu+o73RJ7SK/Lk+6n6T/jLxQXV09a9as5jdDAYwYMUJ331pPaJhUWFhYu3btqJ926tRJLpenpqbyNQWDYTiysrJEIpG9vX1ZWRkdqaurc3d3B/DHH38Iq41CAwZcXV1v3rzJDWZmZjo6OgL4/PPPDTEpDzYafiscSTh7v/H1GrXRzUWb6SE+kjAwY+D6O+vv1d3Tf+o2k5eXR/u32NvbR0VF6T6kVqsVCgX1OLlcbqCg+pKSEpVK1b9//wZhUuXl5Y9+MYMhEDSyfc6cOdzIgQMHaHSgieRG19bWjhw5EsDQoUO5QAJCyL59+8RisVgs/u2333iflAcbPVp2FEnYUrSl0Ue5m/qLVRcVeQrnVGdqprbnbEOuhMSUxWiJsT/9+Pj4rl270hTP8+fP6z50+/btZ555hp4I6d62GI5Tp07NmTOH7tkDaN++va+vb9v2YRkMg6JWq+kfzqlTp7jByZMnA1i9erWAwhpQVFTk6ekJYN68ebrjS5cupfd/ly9f5ndGHmy0Vlvret7VP92/StNI9kKDI6ZqbXVkcaTskkyUJKJ+2iut19L8pddrjJSZzt2tP/PMM7pFDQghSUlJjz/+OIBu3brp7q0YgaqqqsjISC5MSiQSbd/e+JEdgyEUe/bsAdCnTx9upLCw0MrKSiqV5ufnCyjsYZKTk+3s7ACoVCpuUKvVTp06FUDv3r1LS0t5nI6fuNG9JXslyZLBmYP3luxNq0qLLYt9/8b7Pxb9SJo+qb9Re0NZqPS44EHNVJwkll2SRRZH6kZN8Ut1dTWtCkPv1nXLGRBCIiIi6H7l8OHDCwoKDKThkRw8eNDJyYne4wulgcFoFHrkvXbtWm5k9erVACZPniygqqb46aefAFhZWelu2paVlfXp0wfAlClTeNyF4C2L6WjZ0eFZwyXJEiTBJtlmUOagfff2EUI8Lnh8nN9kuKWGaGLKYkKuhFgnW1M/7Xq+6/wb8y9U8pyZ3kzoUoPNUMGPzj/66CMatC+sDAZDl7y8PIlEYm1trXsPR0/ADxw4IKCwZliwYAGArl273rjx4AA8KyurY8eOAFatWsXXRDzn1Ndoa+6q77Zhu7OwtvDzws99L/pSMxUliUZmjdx5ZGdlZaX+qpoJXbpz505QUBDdDNUNNxOQixcv0iBWoYUwGA9YsWIFgJCQEG7kjz/+oCbV4MbOdKirqxs3bhyAIUOG6B43HTp0SCKRiMXi//3vf7xMZHIVnhLvJ8qvy+3P2buluEmsJPpnpjcTunTu3Dla787Nze3PP//UW3vbqa2t5X4Xa2pqpFKpRCIxZqUcBqMZtFotrfRx6NAhbpAmTy9ZskRAYY/k7t27NFh7xowZuuPLly+ngTq676jNmJyNUkrrSiMSI2hZVkr//v2//vrrBhHyzdN86NLPP/9MN6GHDRsm7Ab5jBkzrKysDh48yI306tULQIOUKgZDKI4ePUoT8LhU5vLycnt7e5FIxPupN+80WhJfq9XSE11ra+u8vDw9pzBRG+VoNDM9JibmkdvDzYQu1dXVmdRm6Jtvvom/16GZOHEigN27dwuoisHgeOWVVwAsXbqUG/n+++8BjBo1SjhRraDRkviFhYU0Rfu7777T8/qmbqOU1mamNxO6VFRURL+FpFKpUqk0hvpHER4eDuDNN9/kRv7zn/8AWLFihYCqGAxKSUmJnZ2dWCzWrRX59NNPA4iIiBBQWKtYuHBhgzjxqqoqsVgMYP369Xpe3DxslINmptPYWjTRwKOZ0KXU1FT62s6dOx87dszY6pvg4MGDAIKCgriRH374AUBYWJiAqhgMytdffw1g7Nix3MiFCxcAODo6Gqe2PC/U1dU1qJyyd+9eAB07dtQ/8snMbJTSVGZ6cnJyM3frO3fupFskTz31lEn1obt69So95uJG4uPjAQwaNEhAVQwGhXZY0i22++677wJ4++23BVSlP5MmTQJPWfZmaaMcd+7cWbt2LY2n5bCxsdm0aZPu0+hmKO3o+eqrr/ISRMUjGo2GHnZxmRV37twB4ODgIKwwRmtZvHhxQkKC0Cr4JDExEYCTkxMXMFRTU+Ps7AwgObnxom5mQUFBAc2/4iXXxrxtlINr4CGVSmNiYnQfKi0tpSc2prMZ+jC0s41uMxmay2RqOXaMZqCbMxKJ5L333jOju93moeefCxYs4EZ27twJoF+/fgKq0p/PPvsMwAsvvMDL1SzERilPPfUUAN0I0NzcXBrv1qVLF2P2rW4ttOX3tm3buBFa1dHQzQ8YPFJbW6tUKmmQsqenZ2xsrNCK9KWysvLhDktjxowB8M033wgoTH98fX0BREdH83I1MSwI+tFkZmZyI66uru7u7v379z99+jQtn2WaUOVZWVnciI+PT4MRholjZWWlUCjOnj07cODAq1evjhkzZu7cuWVlZULraju7d+8uKSkZPHgwvVsCcO3ataNHj9ra2tIQKDMlLi4uMzOzW7duNMdJfyzKRh+2HqlUGhUVlZCQQLOVTJaHlTMbNVP69ev3559/0mXphg0bfH19f/31V6FFtZFNmzYB4HqRAdi8eTOtk0RXqWYKfV+zZs2SSCT8XJGXNa2JQHdtpkyZIrSQVpOUlATgySef5Ebo39748eMFVMXQh7S0NC4NLyQkxOxaw+bk5IhEovbt23MnnxqNhoZjm/Ve07179+zs7EQiUXZ2Nl/XtKjV6MM39eaCj48PzavTaDTcCMzzvTAoffr0SUhIUKlUtMPCk08+uXv3bqFFtQJaqSc0NJSrKX7kyJHc3FxPT0+aH2im/Pzzz5WVlUFBQT169ODtonz5sSlQWVkpFoutrKwM1PnDoND2sNw3pFqttra2FovFFnPm+48lJyeHVhEDEBwcrNsjyGS5du1a586dAZw4cYIbzM/PX758+YYNGwQUpj8BAQHUTHm8pkXZKCGE7oFmZWUJLaTV0BRV3aNDurhm3e4sAK1Wq1KpOnToAKBjx466JdlNiurqai7r2tXV1d7e/tKlS0KL4pPz58/TfwJ+g8ct6qYe5nwy8/COhPnuUTAaIBKJ5HJ5ZmbmpEmT7t279913F4ODceOG0LJ0SE5Onj9/vpub26RJk6KioiQSiUQiqaioeOGFFyoqKoRWxxtcmjWXAMkLlmaj5ms9D38BmO97YTSKm5vbr7/+unnzZq12dXQ0+vbFxo0gREhJJSUlGzZsCAgICAgIoIUo/fz8lErljRs3MjIy+vTpk5aWRms7CKmSJ2pqanbs2AFg9uzZPF+ax5WtKfDdd98BeP3114UW0mqOHDkCYOTIkdzI5s2bAbzyyivCiWIYhMJCMnUqAQhA/vUvYvz7ZlqVIiQkhCYL4K+qFElJSbpPu3TpEu23YRnFxn7++WcYpsuZpdno77//DiAwMFBoIa0mNzcXgIuLCzeSkJBgoH91hikQGUk6dyYAsbMjSiX5qyCyYcnMzFy6dCmNW4JOjbSmqu4ePnyY9tsw2YZLLWf06NHgo7row1iajd68eROAk5OT0EJajVartbe3B8BV+C8pKQHQvn17HlsYMkyK4mIil9cvS4cNIxkZhpro3j2iUpExY0K529DevXuvWbOmJUUbVq5cCaBDhw4XL140lD7Dc+XKFbFY3K5du1Z10GghlmajhBAa5lZUVCS0kFbzcE0AWvZft68hw/KIjibu7gQgtrZk6VLCY7SeRkPi44lcTtq3JwAZMWKng4NDWFhYS/pHcGi1WlrzwcfH5969e7yJMy4ffvghDFbD1wJtlNbl1o13MxemTZsGYMuWLdzIiBEjADSoWcWwPO7dI3I5EYkIQPz9yd+3KNvC1atk6VLi4VG/1BWLyejRZNeu0rYF+pSXl/ft2xfApEmTNBqNvuKMDpd/ZaD6RJZ2Ug9zPuBmmfX/WBwdoVLh+HH06oXUVAwejEWLUFMDADk5SEqq/5kjIwNXrjRynepqREVh4kT07Illy3DtGrp3h0KBy5cRG4vQUIe2BfrY29vv37/f2dl5//79n376aRuuICyHDh3Kzc318vKi6xLesUAbNV/refgLwHzfC6MNjBiB5GS88w60WqxejaefRmoqPvgAAwdi5cq/PTMsDEuW/G0kKQnvvovu3REaiv/9D1ZWCAnB/v24dg1KJby89NXm4eGxY8cOqVT66aefmldWK/6qRTJnzhxau513mI2aEA/n0ZvvyvphwsPDHRwcrKysRo4ceaXRpRQDsLfH118jPh6+vsjIAC2xYGODNWvQ6G9BYSHWrUP//hg4EF99hbt3ERCA8HDcvInISEycCL5qGAGQyWSrVq0ihMyaNevixYu8XdfAFBUVRUdHS6XSGTNmGGoOQ+wUCAvtt+Xt7S20kFZDawJYW1ur1Wo6kp2dDcDd3V1YYXqi0Wg+/vhj3YWAWCweM2bMzp07udYUjAZUVpJDhwgh5IUXyIgRJCCAjBpFuGOhgADy0kskPp5IpfW7n66u5IMPDHjWz0F38Hv16tWgQ5zJ8vnnnwOYNGmS4aawQButrq6WSCRSqVTwBvRt4IknngDAJTLX1dXZ2tqKRKLy8nJhhbWZsrKyKVOmAJBIJMuXL//++++nTp1KewsC6NixI+1FKLRM0+WFF8gzz5CTJ4lIRLj2CNRGa2pIt24kOJhERvJ5vt88lZWVtLrH2LFj64wT7Koffn5+AH799VfDTWGBNkoIoSWw0tPThRbSamg57v3793MjtGGfmRrNpUuX6C/xY489duTIEW783r17KpWKNkqhBAQEhIeH3717V0C1pgm1UULItGmkc2dCPyFqo4QQQVbzXP2n//73vwJM3xpOnDgBwMXFxaBV3yxwbxRse9Q0+O233wYNGpSent6vX7+zZ8/SHj4UR0dHuVx+4sSJtLQ0hULh7OyclJS0YMGCbt26hYaG0i5GAio3Tb74AjU1WLTob4M2NgIoeeKJJ/bs2WNlZbVq1arIyEgBFLQYrtC9lZWV4WaxTBs1X+tpqkCJeX0lEEJWr149ceLEe/fuhYaGJiQkeDVxVNynTx+lUpmXlxcZGSmTyWpqaqKiosaMGePr6/vJJ5/cMKkiSELj5oZly7BpExIThZYCDB8+fM2aNYSQ119/nZ5GmCAVFRU0qGDmzJmGnclwC10BUalUAF577TWhhbSao0ePAhg+fDg3sm3bNgAv0Vs4c6C8vHzq1KkARCKRQqFoVSZrbm6uUqmkG8QAJBIJzfg2xzrcfMHd1BNC1Gri709GjnxwUy8stFSSh4eHabZIoWXxRowYYeiJLNNGjx8/DmDo0KFCC2k1tCaAs7MzN3L69GkA/fv3F1BVy7l8+fKTTz4JwMHBoc2b+lz9Ie5GzNXVVaFQXL58mV+1ZoGujRJSf9ZkY2MSNlpVVUWTBmUymQkeNw0ZMgTA1q1bDT2RZdpoYWEhgI4dOwotpC30798/ODiYiwQqLS0F0K5dO9NPwjt48CBtGOnj45PBR+hNQUFBeHg4TUOkBAQEqFSqiooK/S9uLjSwUULI7NkEMAkbJYTcvHnT1dUVwAcffCCIgOrq6vPnz0dFRZWVlemOZ2RkAHB0dDRCGx7LtFFCCP17vnXrltBCeID+ml67dk1oIU2i1WqVSiVtVxscHMx7AYvExES5XE4rYOGvE6r4+Hh+ZzFNPvmE/N///W3kzh0yfjwxnRKgJ0+epHVL+W1w1CjFxcWJiYlbt25VKBQhISF+fn5ck+QGvw/vvfcegHnz5hlaErFgG6Xr+bi4OKGF8AC9TZbL5aa5A1VRQcLC1F5eE8Ri8aeffmq4sn6lpaVbt26lTasotFS7aX4s/yi++eYbes+UmJjI1zVramouXrz4yy+/rFq1aubMmYMHD6Y1pBtgZWXl7e39/PPPnz59Wve1NCTr7NmzfOlpBou1UXo2Z+5dDNVqtUKhAODk5ATA2to6ODg4MjKSS3MSnJwc0q8fAUjfvlVGq+ybnp6uUCjo3wkAGxubkJCQVhV/M0eyskinTsTXV2gdTfDGG28AeOKJJ27fvt2GlzdYZgYEBNg0FszVsWPHgICAkJCQpUuXRkZGJiYmNlqzioZh9e3bV++31SIs1kZXrVoF4P8a3A6ZFXfu3KGNeW1sbN5+++0JEyZw9y+PP/740qVLBb/NP36cdOlCANKrFzF+Sd/q6urIyMjg4GDuY3F3d1coFIJ/LAYiN5cApFs3oXU0QW1t7fDhwwEEBQU1/zWvVqtzcnJiYmLCw8PlcrlMJqPbVg/j6uoqk8nkcnl4eHhMTExOTk4LxQwbNgzAV199xcc7ezQWa6N79uwBMGHCBKGFtJHk5GQa9+Pm5nbq1Ck6mJ+fr1Qqe/bsSX/JxGJxYGCgSqUSpJe9SkWsrAhAnnuOCJtdnZeXp1QqPT09uY/lySefXLhwoSAfi+EoKSEAcXQUWkfTFBQUdOvWDcB777338KNRUVGTJ0/28fFpNBLe0dFx0KBBM2bMWLly5e7du9PS0tqczH3o0CEAIpGosLBQvzfUUizWRtPT0wH06NFDaCFtYfv27XZ2dgACAwMbbfNAj1yEykyvqiKvvUYAIhIRhYKYSAQBDZPS7Z1rLlFiLUStJgCRSIgpb10kJCTQm/FNmzY1eEipVDazzGzzhszNmzdjYmJUKtX8+fNlMhk9W6bbC/q+mRZjsTZaU1MjlUolEklVVZXQWlpBXV0d3QwFIJfLm/9CppnptE4ExQiZ6TdukKefJgCxtye7dxtunrZz48aNSZMmicViANnZ2ULL4RNbWwIQE19kb9myBYCtre2ZM2d0xzMyMiIjI1NSUtr8J1lWVpaYmLh9+/YPP/wwJCTE39/f1ta20d0AT0/PtLQ0Pt5Ni7BYGyWE9OrVC0CDTzMiIuLAgQMmGCpMCCkqKqLH0NbW1q06HEtKSnrrrbe4c8wuXVxmz1YbIhzojz+IiwsBSM+e5MIF/q/PI3R37PfffxdaCJ/QTqKmH8U3d+5culWtT8RhcXFxfHy8SqVSKBTBwcFeXl70q7EBnTp1CgwMlMvlSqVy//79WVlZxv/rtmQbnThxIoDdOksmtVrt5uYGk8yKSUlJobt7nTt3blvHGHrkIpPJRoyQ0xqU3t5k6VKSm8uPQm4zdPx4YoDuijxD8xTXr18vtBA+8fQkADH9FXZtbS1t1zFs2LCWbHHW1NTk5OTs379fqVTK5fLAwMAOHTo87JjW1tZeXl7BwcEKhUKlUsXHx5eWlhrh7TwSS7bRhQsXAlihE6ZcWVm5evVqWv6DbkKPGjUqIiKibX2+eGTHjh10M/Spp566fv26nle7fLlu8WLi5lZf0NfKikyZQg4caHsn9Orq+swZuhlqkkv5hqxevRrAggULhBbCJzS2LCVFaB0toLCwsHv37gDeeeedBg+1fJkZEBAQFhamVCojIyPT0tJM8yaSWLaN0qrXw4YNe/gh08mKoZuhtDL89OnTeTR0jYbExJCQkPolJC2QrlCQRpfg6ekkJoY0+GpPTiZJSSQvjwweXN/+lysbbPr8+uuvAMaPHy+0ED4ZNowAxFyyt5KSkuhx39y5c1esWPHqq68OHDiwqWWmn5/fCy+8sHjx4i1btpw+fdpcSutTLNlGv/vuOwA2NjZNbdAInhVz9+7dsWPHApBKpUql0kCzFBSQ8HDSt2+9mQIkIICoVEQ3MX3OHAIQufxvLxw7lowZQw4dIhIJ8fQkqakGEmgQaJlET09PoYXwybhxBCAHDwqto8V8++23YrG4QTvSBsvMxMREc+8lY8k2mpOTI5VK6c379OnTm+n2LkhWzPnz52kVTmdnZ+OchCQmErmc2NvXm6mjI5HLCU3emzOH2NoSsZicPPng+dRGCSE7d5KiIiMI5BO1Wm1tbS0Wiy0penTqVAKQyEihdbSYefPmAfDy8vrggw82bdp08uRJi2xwYMk2SghZv369g4MD9zXYp0+fL7/8sqnFpjGzYn799VcqbMCAAUbOurl3j6xfXx+0RP/bv5/MmUP69SPjx5O+fR909eFs1EyhFa9TzWsV3Sw0XPfHH4XW0TKOHTsmEolsbGyMGXskCBZuoxS62HRxceE2YprPTH84K4bH4sG0GBLdDH3llVcEXCulpxOFgvTsSe7fr7fR9HRiZUXWrKl/grnb6OTJkwHs2rVLaCG88c47BCDr1gmtowXcv3+fptutMJ1SVAbjH2GjlLq6OloMmN7pA+jWrZtCoWgqUffhrJiuXbvOnz///PnzbdZQWlr6/PPPG3oztFXQfQtqo4SQ994j7dsTuj42dxtdtGgRgGXLlgkthDcWLyYAWblSaB0tYMGCBSeh5tAAABDNSURBVAD8/f3/CZ0L/kE2ykEz02n3UG6xuXXr1qZOyYuLi1Uqlb+/P7c5QIsHt7bpcVZWVu/evQE4OTnRrm2mA2ejpaXEzY08/zwh5m+jmzdvpkt+oYXwxsqVBCCLFwut41GcOnWKNjnnsW6eKfNPtFEOGvZEAzbxV2b6uXPnmn8+F7Hh4OAQFhbWzMmVLgcOHHB0dKTfz1euXOHvTfADZ6OEkB076o+Dzd1GExIS6Hee0EJ4Y906ApCHAjFNi+rqatoV3PTbL/PFP9pGKTQz/amnntJdbDaTmV5eXr5x48ahQ4dyz+/bt294eHhxE5k9dDOUBhi//PLLpnlwrGujhBCZjPTpQ4KCzNtGS0pKALRv395i6pBu2bK9QweHmTNnCi2kOZYsWQLA19fXvMpZ6AOz0QfQnum0QDIAW1vb5sOeMjMzFQpFly5d6PMbjd4vKyubMmUKAIlEYiKboY3SwEYzM4m1NbGyMm8bJYTQf50bN24ILYQfaDXiqVOnCi2kSVJSUqysrMRi8YkTJ4TWYjyYjTaEy0ynh+kAvL29lUplU7ULa2pqdu/e/cYbbzzstpcuXfLz8wPw2GOPHTlyxPDa204DGyWELFlCALO3UZrZ3cKNF9Pn4MGDAMaNGye0kMZRq9W03tj7778vtBajwmy0SZrqmd7CBh7R0dG05FK/fv1aXrVbKNaubZjCdP8+ef55IlC3R95YsGDJgAGjf/yxLaVeTJD4+Hg0kd9sCnz66ac0c6y1p6/mDrPRR9CGnum6m6GhoaHm2A1YoyF8N/cUhv/3/8zgTKblpKSk0C9moYU0QkZGhq2trUgkspi1f8thNtpSWtgzvby8fOrUqTQDVaFQmOPhxuHDpF07MnGi0Dr44H//s4StCY7s7GyYZKEAjUYTGBgI4M033xRaiwAwG201jVaH+vPPPwkhsbGxNAHRwcFh3759QittIxcu1NcqtQCyswlA3N2F1sETt27dAtC5c2ehhTRk7dq6kSOVPXv2MpECoEZGRAgBo/WUlpbu2LHjxx9/PHv2LB2xtbWlFWr9/Pz27dtHa++bIzU1aN8eIhHu34e1tdBq9EOjgb09ampQVoa/vvjMmMrKyvbt29va2lZVVQmt5QHZ2fD3R2UlDh5Ujx/fSLs6i6eRaqmMluDo6Dhv3rwzZ87QhH2JREKLfXXt2jU2NtZ8PRSAjQ08PFBXh5wcoaXojUSCHj1ACC5fFloKH9jZ2Uml0urq6rq6OqG11EMI5s1DZSVmzsQ/00PBbFR/evfurVQqi4uLP/74423btt28ebOppttmBO0PkJUltA4+8PUFgMxMoXXwBG0HW1FRIbSQetavx9Gj6NoVa9cKLUU4mI3yg4ODw7Jly8LCwhpth2B2WJL10PdiGV8JAOimfHl5ue7g4cOHCwoKjC8mNxeLFgHAt9/isceMP7+pYAl/8wzesaTVKH0vlvGVAICWdNBdjVZXV0+ZMsXNza1Hjx5z586NiooqKyszjpi5c1FejtBQvPCCcSY0UZiNMhrBkqzHkr4S0NhqtKioaPTo0fb29leuXNmwYUNoaGiXLl1kMtnq1auTk5O1Wq2BlGzZgkOH4OSEr74y0AxmAzupZzTCrVvo2hUdO6KkRGgpelNWBkdHtGuHigpYwI5LUFDQsWPHjh49GhQUpDuu0WhSUlJiY2NjY2Pj4uLUajUdd3Z2HjVqlEwmGz9+/OOPP86XjMJC9OmD4mJERODVV/m6qrnCbJTROI89hpIS3LqFv0qvmDFubigowLVr+Cuz14yZNGnSgQMH9u3bR+t/N0pxcfHRo0djYmKOHDly/fp1btzPz2/y5NOBgfYjR6J9e71kTJ2KPXvw3HOIjtbrOpYBs1FG4wwdilOnEBeHESOElqI3o0bh+HEcPoyxY4WWoh8ajSYgICA1NfWpp56aPXv2mDFjvL29m3/JlStX6BI1JibG2vrxO3dSCYFUCn9/yGSQyfDMM/irHURL2bULL78MBwekpcHdve1vx3IQMvafYcLQ7mkqldA6+GDePLNpYdQMxcXF48aNA/CYzqF4165dQ0JCtm7dWvSo3q1qtTohIeejj8iQIUQiedDQ0NmZvPQS2biRXL/eIhlFRcTFhQDkhx94eFOWAbNRRuN89hkBiGUUPPvySwKQt94SWoceZGZm0jxjZ2fn3bt3R0ZGyuXybt26cX4qFosDAgIUCkVMTMwj276Xl5OYGKJQED+/B34KEC8vIpeTyMjmCtNMm0YAMmoUMcNyEYaC2SijcfbsIQCZMEFoHXxw8CABSFCQ0Drayv79+2kHmv79+1+9elX3oZycnPDwcJlMZmtry1mqnZ2dTCZTKpWJiYmPLI5z+TL57jsyZQpxdHzgp9bWpKSkkSdHRxOAtG9PTK8PjpCwvVFG42RkwM8PPXogO1toKXqTn49lyzBoEF5/XWgprYQQsmbNmiVLlmi12mnTpm3cuJFrHdaAysrK+Pj4I0eOxMTEXLhwgRvv3r379Onr/f2DR49G587NzaXRICUFsbGIjUVZGU6fbuQ55eVQKNC7N/79b73el4XBbJTROLW16NEjr2vXrPj4f9nammuFkooKZGXBxQXduz8YLC7G1avo29fUC6+Ul5fPmDFj3759Eolk5cqVCoWihS+8fft2XFxcbGzsb7/9lpeXN2zY1YQEDwBeXggOxsSJGD4cOovXRsjPR3o6vLzg5fVg8Pp1XL6M0aPxV18Ixl8IvBpmmDD0FPjChQtCC2k7J07UF8rTLce+fTsBiIn3Z9Jtx93mQsharTY1NXXdusqxY0m7dg/u2e3syLPPkrVrSVP/tlFRBCA9exLdrnRffEEAotG0TYslY/7hyAyD4ePjAyDL/BOACgvxySdCi2gN0dHRgwcPzsjI6Nev39mzZ2UyWduuIxKJ+vXrN39+u8OHUVaG+HgoFAgIQHU1Dh7E+++jb1+4uCA0FBs24ObNhi+/eROrVun7Xv4JMBtlNAk9Gs40/5xQuRzr1iE1VWgdLYAQsnr16kmTJt27d++ll15KSEjw9PTk5cpSKYYPh1KJxETk5yMiAjNnwtUVt28jKgpz58LdHQMG4IMPkJRU/5J338WaNZaTR2s4mI0ymsRiVqMzZ2LAAMjlMFh+OT9UVFS8+OKLixYtEolESqVyx44d7fVMNmoCFxe8+iq2bEF+PnJyoFIhJAQdOiAlBZ9/jmPH6p/25pvo1Qtz54IdoDQPs1FGk1jMalQsxtq1OHsWP/wgtJSmyc7OHjJkyJ49exwcHPbu3atQKERGOcrx8oJcjshI3L6No0ehUGDixPqHpFKsXYu4OEREGEGIGcNslNEk1EazsrKI+a9Ghg/HzJlYvBi3bwstpTEOHTo0aNCgixcv+vj4nD59eiLnZEbExgZBQVAq62tiUcaMQUgI/vMfSyhSYziYjTKaxMnJycnJqaysrLCwUGgtPPDFFxCL8eGHD0YyMhqPjjQmdDM0ODi4pKRk4sSJZ86cod9epsO6daipwaefCq3DhGE2ymgOi7mvB+DkhOXLsWnTg7OmVaswZAh698bq1bhzRwBJFRUVoaGhixYt0mq1CoVi3759Dg4OAuhoFldXfPQRvv0Wly4JLcVUYTbKaA56ymSONtroadLcuQgIwNdf1/+vhwe6dEFmJhYtgrs7XnoJR44Y7xgqJydn2LBhu3fv7tChw549e5RKpcl2oHn3Xfj6YvNmoXWYKib6z8YwEcz0sP7OHchk2LWr4bhYjPXrUVtb/7/LlyM/HzExCAmBRoPISIwbh8cfx6JFuHrVsArj4uKGDh164cIFb2/v06dPT5482bDz6YdUiu++g8l0IzU5mI0ymoM7ZRJaSCs4cwYDBuDYMSxbBjs7BARANw09IACLFiEgoD4TVCKBTIbISFy/DqUSPXrg5k2sXo2ePTFmDLZtgyEawm/YsEEmk925c2fChAlnzpyh2UomhYsLZDLY2DwYGT4cS5ZAJmOZoI0hcBYVw7ShBurh4SG0kJYSEVGf9Th8OCkoaPXLNRoSH0/kcmJnV5832bEjkcvJuXP8yKuqqpoxYwYAkUikUCg0LLPSImA2ymgOtVptbW0tFovv378vtJZHoFYThaLe++RyUlOj19Xu3iXr1hF//wd56MOGaTdt2lZWVtbma+bm5g4cOBCAvb39L7/8opc+hinBbJTxCOh9fWpqqtBCmuPOHRIURABiY8NzVfa0NKJQECcnMnDgbQC2trYhISExMTGPrOPZgLi4uC5dugDo2bNnWloanxIZQsNslPEIJkyYAOCTTz4RWkiTnDtHPDwIQNzcyJ9/GmSK+/fJrl2nRowYwWUW9e7d+4svvrh161ZLXq5SqaysrAA8++yzxcXFBpHIEA5mo4xHEBwcTI3D29s7MjKytrZWaEV/4+ef6/cxhw0j+fkGn+7SpUtLly7lOhVLJBKZTBYZGalWqxt9fnV19axZs9hmqGXDbJTxCMrKynx00gNdXFwWLlyYmZkptC5SV8fnZmgrp66LiYkJCQmha0wAbm5uCoUiOztb92l5eXmDBg2im6FRUVHG08cwLsxGGS0iOztbpVL5+/tzfhoQEKBSqcp16yEbkaIiIpMRgEilRKkURAIhhBQUFCiVyp49e9LPRCwWBwYGqlSq+/fvf/PNN87OzgB69Ohx/vx5wSQyDA+zUUbrSExMlMvlHTp0oMbh4OAQFhbW5vLsbSM1VePpSQDStSs5ccKYMzeOVqs9duxYWFhYu3bt6MfC/RAUFHT37l2hBTIMC+vFxGgL1dXVBw4c2LBhw9GjR+mvUO/evWfOnDl79uzOzTdO05tdu3Z9+OEPJSVHnnhCvHcv/tqlNAnKysp27ty5bdu2hIQEQoi/v//p06dtdKPYGZYIs1GGXmRlZW3evHnz5s23b98GYG1t/fzzz4eFhT333HMSiYTfuTQazZIlSz7//HNCyMKF25YvDzNZgzpy5Iirq2vfvn2FFsIwBsxGGTyg0WiOHTu2YcOGvXv31tXVAejWrdv06dPnzp3LVw+MsrKy6dOnHzhwQCqVrlixouVtMhkMQ8NslMEn+fn5P/300w8//JCTkwNALBYHBQWFhYWFhIRw24VtICsra/LkyZmZmc7Ozrt27QoKCuJPMoOhL8xGGfyj1WoTEhJ++umniIiIyspKAJ06dQoJCXnrrbd0z/pbyIEDB6ZPn15WVta/f/+9e/d6eHjwr5jB0ANmowwDcu/evcjIyO+///7cuXN0JCAgQC6XT5s2jTvrbwZCyJo1a5YsWaLVaqdNm7Zx40Y73WJNDIZpwGyUYQySkpK2bdsWERFRXFwMwNbWduLEiXK5fPTo0U01bisvL58xY8a+ffskEsnKlSvZZijDZGE2yjAeD4dJ+fj4zJo1a9asWbRsB8elS5cmT56ckZHh5OS0c+dOmUwmkGQG49EwG2UIwOXLl7dv37558+bc3FwAEolk1KhRcrl8ypQpUqk0Ojr61VdfLS0t9ff337t3L19n/QyGgWA2yhAMtVr922+/bdy48eDBgxqNBoCVlZWrq+uNGzcIIS+//PKmTZvYZijD9GE2yhCegoKCbdu2rVu3rqCgAIBIJPrss88++OCDprZNGQyTgtkow1TQarVfffXV+fPnR4wY8dprrwkth8FoKcxGGQwGQy9YZ1AGg8HQC2ajDAaDoRfMRhkMBkMvmI0yGAyGXjAbZTAYDL1gNspgMBh6wWyUwWAw9ILZKIPBYOjF/wc5HGaMHRX65gAAAdN6VFh0cmRraXRQS0wgcmRraXQgMjAyNC4wOS4xAAB4nHu/b+09BiAQAGImBgiQAGJpIG5gZGNIANKMzOwOGkCamZkNQrNAxJmY2BkUQHwYFyEMVY4mDtfukAGWZ0RiQGQEwQYyYirAcAEWI3AZys3AyMDIlMDEDGQzsLAysLIxsLEzsHMwcHAysHMpcHFnMHHzJPDwZjDx8jHw8ifwC2QwMQlmMAkKJQgJZzAJiySIiGYwiYoliIlnMIkzJnCyMAhwJYgLJTixAM1nZQQqFGdjZWPn4GRh4+bh5RfgYhMWERUTFxLXYgR6hgEWrFfaDti/2dpmD+KETjNwaKzLsQOx/V1MHQ6aLtoPYqc+vmLv68B7AMRe2ddu58R7FSyurypoO73Cah+IPbFs3X6hiTvAep/yKh1Q9xUBq1maIHwg8RMzWK98c/p+3jVqYPbh3uoDjelie0HsJtnWA5+5doLdUHB34wGvun9g9n636weKjSFuC2dmOnhq2WkwW+7D+QOftO6B7TUwWHxA408l2K6PnzscLk68AmZX2C52eLn+PFgNy9pLDme6TcDsW5xPHBzz5oDNUfl+1GEvp6QDiD2jfZpDbQwXmH1Iq9WhvsgZrEYMAGdzeABl6urhAAACVnpUWHRNT0wgcmRraXQgMjAyNC4wOS4xAAB4nH1VW6obMQz9zyq8gTGSLMn2501yKaXcBNq0eyj0s/unRx6S8QXTmVh47GM9j5xTiuf79dvvv+n1yPV0Son+8+u9p1+FiE4fKSbp/P7l6y1dHm/n58rl/vP2+JFEk1ScwfsZ+/a4fzxXOF0SZyNlNkzIRONQpvEcJyXdkuRqnatjWwp14wWuQJ/kVkS6po2zeBW3BVCHYffCndImWaiS0QJosAyDrtUpNLqr+spFh0asFtPiaaPMTZhWPlYAoai49F6wX5y0yQLYYBquWRHqLVRa7YVWyB4qJWOzVo0ZObv3BZIpoJQ7adEBhXVZ5pKjOFvJsGkoyhbBIZ2rJLEEVDNbbyZIrKohYytkVGizbNakMZC9NRRrhYwSbZ6r1NI8yKENaldIS5c/aWuZxdrgEXhCSx6x70q1N6QoYtJSuizDr7unoLwXGTmrDqauoFEpzVJ7rW0nCei0TFRUyjKIIabDPJmXVfgShfJsDTyhwacKsq7Cl6hTRfAARu5FhNdAGSq5ucJj0KWz8NJ2FElzJXgnALJz60uNOoDY1xrhwkV1XwDfb9dPfb/fBOf77XrcBPHK0fC4PlI5+pox9OhexrCjRxnDj05kjHr0G2O0o6sYox+twzHmBol95qkNNATLxHYNwWVitYZgndjLQ9jEUh0rPpExPsG0iXMasbaJWRqC+8QgDSEzUzSE8MQIDSEylV5DSJlqrCFEp2LquLVfC5FX5OMVggyH65QcqfuZw7+o9FzX+H7+O2B++gcuuTJBQIIwXQAAAS96VFh0U01JTEVTIHJka2l0IDIwMjQuMDkuMQAAeJwlUDmSwzAM+8qWyYzC4U1xXLrPJ9zu5AV5/IJeN7JACARwfuR6nG89z/fjsuvC5fd5XfY8T31+Lr3mU/n5PoSCXWQJcajbOpQqWvZiUuMOGWSbaq+XkGZprUMo0wSIknJxrAPs9MrhZLrXIGzhFmAxyVZhSGFsqd1Qt2TfCkgpTGVYUW08kBF+wgZjy5LeAJ0kegecukdi5ysoYusG0nsrvL+S4M960vj24WwSBQkIUvE/x3tzjribtcqtxN15L+xKNHL7gpmqCckpiUgzZTf3wWBfscBJq2v/B2eQgpAUk5HnSJg5kmIj9l1EoTg8K9jKcaWqaB8U2em6sLRFdXSL8R6AJJrGHSd6ZYKCZ67n9w9ramN6pq4WpwAAAABJRU5ErkJggg==", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAApQAAAHzCAYAAACe1o1DAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACQq0lEQVR4nOzdeVhTZ9oG8DuHsCkqQVFcUEBwXxF3RUFtxUJb7VitVq21i2XG2s7W1i5j922mrbalddRqS7VVuwoU64biLhB3qoKIghqIJoCsITn5/uAjY2SHwCHJ/buuXt/HOck5N8qYh/e87/PKjEajEUREREREjSRIHYCIiIiIrBsLSiIiIiJqEhaURERERNQkLCiJiIiIqElYUBIRERFRk7CgJCIiIqImYUFJRERERE3CgpKIiIiImoQFJRERERE1CQtKIiIiImoSFpRERERE1CQsKImIiIioSVhQEhEREVGTsKAkIiIioiZhQUlERERETcKCkoiIiIiahAUlERERETUJC0oiIiIiahIWlERERETUJCwoiYiIiKhJWFASERERUZOwoCQiIiKiJmFBSURERERNwoKSiIiIiJqEBSURERERNQkLSiIiIiJqEhaURERERNQkLCiJiIiIqElYUBIRERFRk7CgJCIiIqImYUFJRERERE3CgpKIiIiImoQFJRERERE1CQtKIiIiImoSFpRERERE1CQsKImIiIioSVhQEhEREVGTyKUOQNZJp9NBo9FAr9dDLpfDw8MDTk5OUsciIiIiCbCgpHpTq9VITk5GWloatFptlfMKhQIBAQEICgqCp6enBAmJiIhICjKj0WiUOgS1blqtFrGxscjIyIBMJkNtPzKV5/38/BAeHg6FQtGCSYmIiEgKLCipVkqlEvHx8RBFEaIo1vt9giBAEASEhYUhMDCwGRMSERGR1FhQUo0SExORkJDQ5OuEhIQgODjYAomIiIioNeIqb6qWUqm0SDEJAAkJCVAqlRa5FhEREbU+XJRDVWi1WsTHxwMAysrKsH//fqhUKqhUKhQXF2PSpEkICQkxvV4URRw9ehSXLl1Cbm4uSkpK4O7ujr59+2LChAlwdXVFfHw8fH19OaeSiIjIBnGEkqqIjY01zZcsLi5GSkoKDAYD+vXrV+3ry8vLsW/fPri7u2P69OmYP38+AgMDkZKSgq+++grl5eUQRRGxsbEt+W0QERFRC+EIJZlRq9XIyMgwfe3u7o4XX3wRMpkMRUVF1T66dnR0xHPPPYc2bdqYjvn6+qJDhw7Ytm0bUlNTMXToUGRkZECtVrOlEBERkY3hCCWZSU5OhkwmM30tk8nMvq6OIAhmxWSl7t27AwAKCgpMr0tOTrZgWiIiImoNWFCSmbS0tFr7TDbE5cuXAcA0IimKItLT0y1ybSIiImo9WFCSSVlZWbU74DRGQUEBdu/ejW7duqFPnz6m4xqNBjqdziL3ICIiotaBBSWZWKqYLC4uxqZNmwAAf/rTnyAI5j9mGo3GIvchIiKi1oEFJZno9fomX6OkpATR0dEoKCjAggUL4OHh0Sz3ISIiotaDBSWZyOVNW/RfUlKCb775BlqtFgsXLoSXl1ez3IeIiIhaFxaUZFLdaGJ93VlMLliwAF27dm2W+xAREVHrw6EiMnFycoJCoagylzItLQ06nc60mEatVuPcuXMAgICAAMhkMkRHR+PGjRuYPn06RFFEVlaW6f1t27Y1FZEeHh5wcnJqoe+IiIiIWgILSjITEBCApKQks9ZBsbGxyM/PN32dmpqK1NRUAMDy5csBANevXwcA7Nixo8o1hw4dipkzZwIA/P39my07ERERSUNmtFTTQbIJarUaUVFRzXb9//73v+jQoQP69esHR0dHODk5wdHREe3bt8fLL7/MXXSIiIisEEcoyYynpyf8/PyQmZlp2s/bEgRBgLOzM65fv47r16/jjz/+AAA4ODhAFEUYjUY8/vjjLCiJiIisEBflUBXh4eFVekc2lSAIePLJJ7Flyxaz4waDATKZDGFhYRgyZIhF70lEREQtgwUlVaFQKBAWFmbRa4aFhUGhUODhhx/GkiVLzPYHF0URZ8+excGDBy16TyIiImoZnENJNUpMTERCQgKMRqNZAdhQoaGhmDhxounroqIiDBkyBFeuXAEAdO3aFdevX4coihgyZAg2bdqEQYMGNTn/nXQ6HTQaDfR6PeRyOVebExERWRALSqqWRqPBwoULcePGDYSHh0MulzdoTqUgCBAEAWFhYQgMDKxyXqlUYvTo0TAajTh//jzat2+PRx99FLt27QIAjBs3Dps3b0avXr1M7zlz5gxOnjyJBQsW1CuDWq1GcnIy0tLSqt1WUqFQICAgAEFBQZy7SURE1AQsKMlMUVERVq1ahXfeeQdFRUUAgOTkZKSmpiIjIwOCINRaWFae9/PzQ3h4OBQKRY2v3bJlCzQaDZ555hnTsYyMDMyfPx9Hjx6FTCbDvffei+joaHh4eGDQoEH4448/sH37dkRERNR4Xa1Wi9jYWGRkZEAmk6G2H/HK8/XJS0RERNVjQUkAKh4Jr1u3Dq+99ho0Go2pCHNyckJpaSlkMplpxC89PR0ajabKNTw8PODv72+REb/KkcizZ89CEASMGjUKR48eBQC4u7vj3Llz6NatW5X3KZVKxMfHQxRFi46oEhERUc1YUBIAYOXKlXj99derHB8yZAhOnTpV5XhLzUnct28fHn/8cVy+fNl0zMHBAcHBwdi9e7fZavTKOZ9NFRISguDg4CZfh4iIyF5wlTcBAB5//HGMHDnSbPGNg4MDBg8eXO3rnZyc4OXlhR49esDLy6vZFrhMnjwZL7zwgtkxg8GAhIQEfPTRR6ZjSqXSIsUkACQkJECpVFrkWkRERPaABSUBAHr27InExERTYVg58tevXz8pY8FgMOC1116r9tw//vEPvP/++9BqtYiPj7fofePj46tdyENERERVcaccMvnTn/6EsrIyREZG4tdff8W1a9ckLygBYNq0acjKyoJOpzP9p1KpoNFo8OKLLyI/Px+urq4oKyvD/v37oVKpoFKpUFxcjEmTJiEkJMTseleuXMHJkyehUqmQm5sLg8GA5cuXmy3IEUURsbGx9V5RTkREZM9YUBIAYNOmTYiLi8PIkSPx+eef45133sG3336L8PBwSXM5ODjg22+/rfac0WjEli1bcOHCBYiiiOLiYqSkpMDLywv9+vWr8bH15cuXkZGRga5du8LZ2RmZmZlVXiOKIjIyMqBWq9lSiIiIqA5clEO4efMmunfvDgcHB+Tm5sLNzU3qSPUWHx+PpKQkGI1G08p0mUyGoqIifPjhh9WOUIqiaHqkf+jQIezatavKCCVQ8dg/KCjI4rsGERER2Rq7H6HkDipAcHAwdDod4uLirKqYBIC0tDSzQrI+6rtPuSiKSE9Pb3Q2IiIie2GXBSV3UPmfl156CX/88QfmzZuHGTNmSB2nQcrKypp94YxGo4FOp7O7XzKIiIgawq4KyvruoKLVapGUlITjx4/b9A4qSqUS77//Pry8vBAdHS11nAZrqVXYGo0GXl5eLXIvIiIia2Q3bYOUSiWioqJMCzDqmjpaeT4zMxNRUVE215dQr9dj2rRpkMlk2LdvX70fA7cmer3epu5DRERkrexihLIpO6hUbuEXExODwsJCm9lBZdasWdBoNHj33XfRt29fqeM0ilzeMj++LXUfIiIia2V9w1INxB1Uqvr+++8RExODESNG4MUXX5Q6TqN5eHjY1H2IiIislU0PvVTuoFLfhtd3MhqN2LBhA65evYqRI0fivvvuA1DRpsbX19dq51TevHkTixYtgqurK/bu3St1nCZxcnKCQqFo1rmU9rjqn4iIqKFseoQyNjbWrOG1wWCo984vx48fh0ajqXK8cgcVazVp0iTodDps3boV7du3lzpOkwUEBJi1C0pLS8O5c+dw8eJFABUr+s+dO4dz585Bp9MBAIqKikzHcnNzAQDp6ek4d+6cWZNzQRDg7+/fct8MERGRlbLZEUq1Wo2MjAwAgLu7O1588UVTw+u6HltrtVrs2bMHM2fOxJYtW8zOWfMOKi+//DJSU1PxyCOPSL4DjqUEBQXh+PHjpq9jY2ORn59v+jo1NRWpqakAgOXLl8PJyQm5ubnYtm2b2XXi4uIAAL169cLixYsBVPxdBwUFNfe3QEREZPVstqBMTk42tQaqb8PrSjExMfDz80P//v2rPS8IApKTk61qB5WTJ0/i3XffhZeXV41bGVojT09P+Pn5ITMzE6Io4vnnn6/zPb6+vli5cmWtrxEEAT4+Plb3SwMREZEUbPaR9507qDRESkoKrl27VmuTb2vbQUWv12Pq1KmQyWTYu3evVbYIqk14eLjFvydBEGxmFJeIiKi52VZl8f8au4NKQUEBdu7ciWnTptU5v7ByBxVr8Kc//Qm3bt3CW2+9VeOoqzVTKBQWHy0OCwuz2oVXRERELc0mC8rGrvqNjY2Fl5cXRowYUa/XV7dop7XZsmULfv31VwQGBuKll16SOk6zCQwMrHXVfkOEhoYiMDDQItciIiKyBzZZUDZmZ5Nz584hPT0d06ZNQ2lpKUpKSlBSUgIAMBgMKCkpgcFgaPJ9WtKtW7ewcOFCuLi4WKwXZ2sWHByMiIgIyOXyBj8CFwQBcrkcERERmDhxYjMlJCIisk02uSinMTub5ObmQhRFrFu3rso5pVIJpVKJOXPmmD0ybu07qEyePBk6nQ6//vqrTbQIqo/AwED4+vqa9mwXBAGiKNb4+srzPj4+NrtnOxERUXNr3RVRIzVmZ5Nhw4bBx8enyvGvv/4a/fr1w+jRo9G5c+cm36c5vfTSS1Cr1fjoo4/w73//G2fPnsWcOXNw//33Sx2tRSkUCixYsABqtRrJyclIT0+vMj3BaDSisLAQU6ZMQVBQEFdzExERNYFNFpTV7aCSlpYGnU5nWkhT2fAaqGiOrVAoahydateuHXx9fc2OtcYdVDZs2ICcnBzExsYiNzcXXbp0webNm6WOJRlPT0/TYh2dTgeNRgO9Xg+5XI7XX38dX375JXx9fa2q/RMREVFrZJMFJVBRJCYlJZlaB9Wn4XVDdOvWDSqVCqIomv3Xs2dPSdry3L59Gzk5OQBg+r+zZ8+GKIo21yaoMZycnODl5WX6unK6wvLlyzFq1CiMHDlSqmhERERWT2ZsTLNGK6BWqxEVFdVs1//ss89w8+bNKsdfffVVvPHGG81235qkpKRUu6vL+PHjsX//fjg4OLR4ptYsPDzctDuOQqHA8ePHuc0iERFRI9ns0FXlDirN0fA6Ly+v2mISqFgII4Xz58+bfV35fZeXl7f61ehSSEpKMv3/+fn5mDJlimlkl4iIiBrGZgtKoPl2UHnllVdwzz33VNnS0cvLq1lazuh0OqhUKmRnZ0OlUlXbUD0lJcXsa19fX/zwww84evQonJ2dLZ7JmuXk5CA3N9f0tSiKyMrKwj333IOysjIJkxEREVknm33kXUmpVCImJsZi14uIiEBgYCBu3bqFIUOGICcnx6w/pbOzM5544gl89NFHZvMyjx49iosXL2LhwoX1uk/lCuW0tLRqG7UrFAoEBASYVih7eHhAq9XC3d0d//nPf7Bw4cJW39ZIKr/99hvuu+++Ksc9PT1x5swZdOnSRYJURERE1svmC0oASExMtEhj79DQULMRyOPHj2P8+PHQ6/UYPnw4nnnmGbz88stQq9VwdHTEokWL8Omnn0IulyMgIACZmZn45Zdf8MADD9R4D61Wa+qhKJPJat2PvPJ8+/bt8cYbb2DQoEHYtWsXXFxcmvy92rLVq1dj+fLlcHBwgCAIcHBwQGJiIgIDAznXlIiIqBHsoqAEKkYq4+PjTaux60sQBAiCgLCwsGq34/v000/x7LPPIjY21jTq9c033+CFF16ASqWCXC7HmDFjcPDgQQCAm5sbTp06BT8/P4tlBCoe295///313jbSnmm1WiQnJ2P06NF4/PHH8eOPPyInJ6dKn1EiIiKqH7spKAHz0b/67qDi5+dX6w4qRqMRWVlZ6NmzZ5VzW7duxfPPP4/r16+bjjk4OGDgwIE4duyY2UiipUZRQ0JCEBwc3OTr2IsdO3YgLCwM//rXv7By5Uqp4xAREVkluyooK9W2gwpQ0bTc39/fIjuobN26FXPmzKlyfNGiRdi4cSOA5pvnSXUTRRFOTk4YPHgwTpw4IXUcIiIiq2SXBeWd7t5BxdI74AwaNMi0I8/dpk+fjnXr1mHjxo0oKirC/v37oVKpoFKpUFxcjEmTJiEkJMTsPUePHsWZM2eg0Wig0+nQtm1beHt7Y9KkSaZHtnK5HJGRkdyXup769++PjIwMrvAmIiJqJJtuG1QflTuo9OjRA15eXhbfTnHkyJEYO3YsJkyYgODgYEyePBmBgYFo27Ytfv/9d7z77rsQRRHFxcVISUmBwWBAv379arxeSUkJAgICcP/992PBggUICQmBSqXC2rVrTb0xRVFEbGysRb8PWzZjxgzodDocOXJE6ihERERWye5HKKV06tQp/PLLLwBgWs0tk8lQVFSEDz/8sNoRyuqo1Wp8/vnnCA4ORmhoqOl4ZGRkkx/Z24OMjAz07t0bCxcuxNdffy11HCIiIqtj9yOUUrp+/bqpObpMJqvSKL2+2rRpAwBmTdwFQUBycnLTQ9oBPz8/uLm5Ye/evVJHISIiskosKCWUlpZWa5/J2oiiCL1eD7Vaje3bt6Nt27YYPny42fn09HRLRbV5w4cPx7Vr16rdhYiIiIhqx61UJFJWVlbtDjj19fbbb5t26OnYsSMee+wxdOjQwew1lQt3LD0v1BY98sgjOHDgADZt2oTFixdLHYeIiMiqcIRSIk0pJgFgyZIlWLJkCWbNmgUnJyds3LjRbH/qStW1RaKqFi1aBADYtGmTxEmIiIisDwtKiej1+ia9v1u3bvD29saQIUPw2GOPAQD27Nlj8fvYizZt2qBr165ISkqSOgoREZHVYUEpEbnccrMNnJ2d0alTJ9y6datZ72PrJk2ahIKCAmRnZ0sdhYiIyKqwoJSIh4eHxa5VVFSEnJycaq9pyfvYuqeffhoAEBUVJXESIiIi68LhK4k4OTlBoVCYzaVMS0uDTqczrTRWq9WmXXYCAgIgiiK++eYbDB48GB07doRcLsetW7dw7NgxGAwGTJo0yewelt71x9ZNnjwZcrkcMTExeOedd6SOQ0REZDVYUEooICAASUlJptZBsbGxyM/PN51PTU1FamoqAGD58uVo164dvLy8kJKSgoKCAuj1eri5ucHHxwcPP/ywaetFoKIPpb+/f8t+Qzagb9++uHDhgtQxiIiIrAp3ypGQWq1u1ser3Cmn4V566SW89957SEhIwOTJk6WOQ0REZBU4h1JCnp6e8PPzM9vhxhIEQYCfnx+LyUaIjIwEAKxdu1biJERERNaDBaXEwsPDm6WgDA8Pt+g17YW3tzfat2+PhIQEqaMQERFZDRaUElMoFAgLC7PoNcPCwqBQKCx6TXsSFBSEGzduoLS0VOooREREVoEFZSsQGBiIkJAQi1wrNDQUgYGBFrmWvZo3bx4AYOPGjdIGISIishJclNOKKJVKxMfHQxRFiKJY7/cJggBBEBAWFsZi0gJ0Oh1cXFwwceJE7N+/X+o4RERErR4LylZGq9UiNjYWGRkZEASh1sKy8ryfnx/Cw8P5mNuCvL29kZeXh9u3b0sdxa7odDpoNBro9XrI5XL2UiUishIsKFsptVqN5ORkpKenQ6PRmJ0zGo3Iy8vDPffcg5EjR3I1dzNYtGgRvvnmG2RkZMDX11fqODat8mc9LS3NrNF/JYVCgYCAAAQFBfFnnYiolWJBaQXuHrVZtGgRdu7ciYULF+Lrr7+WOp5NOnz4MMaPH4+///3v+PDDD6WOY5PuHI2XyWSo7Z+iyvMcjSciap1YUFqhadOmYffu3QAqFo4sWrRI4kS2ydnZGX5+fvjjjz+kjmJzOF+YiMi2sKC0MkajEZ6enrh16xYAQC6XY/fu3VX28aamGzZsGM6ePQudTmfxXqH2LDEx0SJ9PkNCQhAcHGyBRERE1FT8lLQyly9fNhWTACCKIu6//35cvHhRwlS26cEHH4TBYMCuXbukjmIzlEqlxZrGJyQkQKlUWuRaRETUNCworczhw4fNvhZFEQUFBbj//vslSmS7li5dCgBYt26dxElsg1arRXx8vEWvGR8fX+1CHiIiallyqQNQw1QWlHK5HHq9HgDg7++P2bNnw2g0QiaTSRnPpnh5ecHd3R0HDhyQOopNiI2NrXO+5JUrV3DgwAFkZ2dDr9ejffv2GDp0aI1TOkRRRGxsLBYsWNAckYmIqJ5YUFoZT09P9O/fH1OmTMGXX36JXr16IS0tTepYNmv06NH4/fffUVhYCDc3N6njWC21Wo2MjIxaX3P69Gn8/PPPGDhwIGbOnAknJydoNJpae4GKooiMjAyo1Wq2FCIikhAX5VixwYMH48KFC9DpdFJHsVmbNm3Co48+ilWrVuHZZ5+VOo7Vio+PR1JSUo2tgQoKCvDpp59i6NChCA8Pb9C1BUFAUFAQwsLCLBGViIgagXMordjUqVNRXl6OkydPSh3FZs2ZMwcymQxbtmyROopVS0tLq7XPpFKpRHl5OSZMmNDga4uiiPT09KbEIyKiJmJBacUWL14MAPjqq68kTmK75HI5evbsyaK9CcrKyupcOHPlyhW4urri5s2b+OKLL/D666/jgw8+QExMDEpLS+u8h0aj4Ug9EZGEWFBasSFDhsDJyQl79uyROopNmzp1KoqLi3HhwgWpo1il+qzCLigoQHl5ObZu3YpBgwZh4cKFGD9+PE6dOoVNmzbVOrpZ6e4tSomIqOWwoLRyvXv35uO+ZvbMM88AAKKioiROYp0quxHUxmg0Qq/XY+LEiZg4cSJ8fX0xfvx4TJ06FVlZWXUu6KnvfYiIqHmwoLRyU6ZMgU6nw9mzZ6WOYrNGjBgBZ2dni/dQtBdyed3NJNq0aQOgogXWnSq/vnHjhkXuQ0REzYMFpZXjPMqWMWjQIFy6dKlB+05TBQ8Pjzpf06VLl1rP16e/an3uQ0REzYMFpZULDAyEo6Mjdu/eLXUUmzZz5kxTE21qGCcnJygUilpf079/fwCo0lO18usePXrU+n4PDw84OTk1ISURETUFC0ob4Ovry728m1nlNowbNmyQOIl1CggIqHWU0d/fH3369MH+/fuxf/9+XLp0CQcOHMCePXvQp08f9OrVq8b3ymSyKo/KiYioZbGxuQ1YunQp1qxZg/Pnz6Nv375Sx7FZHTt2hIODA3Jzc6WOYnXUanWdi5rKy8uxb98+nDlzBoWFhWjXrh0GDx6MyZMn1zk/ctu2bfDz84OjoyNKS0tRWlqKkpISDB06FF988YUlvxUiIqoGC0obcOTIEYwbNw7//Oc/8f7770sdx2aFh4cjLi4OSqUSR44cgV6v5+45DRAdHY3MzEyLzkOVyWS4ceMGvvzyy2rPjxkzBkeOHLHY/YiIqHosKG2Eo6MjBg0ahBMnTkgdxeYUFRVh586d+OKLL7Br1y7T8Q4dOiAvL0+6YFZGq9UiKirKou195HI5li5dipkzZ+LAgQNVzv/666+4//77LXY/Imuj0+mg0Wig1+shl8s535iaDfts2AgfHx+cP39e6hg2afny5Vi/fn2Vx67Dhg2TJpCVUigUCAsLQ0xMjMWuGRYWho4dOyImJgbDhw/HlStXzEZAly5dips3b+Lxxx+32D3vxA9rao3UajWSk5ORlpZW7cYCCoUCAQEBCAoKgqenpwQJyRZxhNJGPPHEE1i/fj0yMjLg6+srdRybcuLECUyYMAElJSWmHVscHR3x3HPP4YMPPpA4nXUpKirCE088gX79+jX5WqGhoZg4caLp63PnziEoKMi0VeO4ceOQlJSE8vJyuLu74+9//zteeuklCML/1iLqdDr8/PPPmDlzZr0LQX5YU2ul1WoRGxuLjIwMyGSyWneYqjzv5+eH8PDwOjsxENWFq7xtxMKFCwEA69atkziJ7Rk+fDh+/fVXODg4mI6Vl5djxIgREqayLjqdDl9//TU8PDywdetWDBs2DHK53Ky4qw9BECCXyxEREWFWTALAwIEDER0dDQDw8vJCQkICCgsL8fe//x06nQ6vvPIK3Nzc8Oyzz5qKzrVr12Lu3Ln1mgur1WoRHR2NqKgoJCUl1bilpFarRVJSEqKiohAdHV2vrSeJmkqpVCIqKgqZmZkAUOd2pZXnMzMzERUVBaVS2dwRycZxhNJGiKIIJycnDB06FCkpKVLHsUmbN2/G/PnzTV+npaWxXU0dcnJysGbNGnz66ae4efMmAGD06NE4evSo2WiKIAi1LtapPF+f0ZQNGzbA29sbU6dONR0TRREfffQR3nvvPdy6dQtyuRyzZs3CwYMHcf36dQDA+vXra3w0rlQqER8fD1EUG7SoSBAECIKAsLAwBAYG1vt9RA2RmJiIhISEJl8nJCQEwcHBFkhE9ogFpQ3x8/NDTk4OioqKpI5isz766CP87W9/g0wmg8FgqNcOLvaooKAAf/nLX/Ddd99VKcJ++uknzJw50/R15SPk9PR0aDSaKtfy8PCAv7+/xR4hb9q0CS+99BKysrLMjsvlchw+fBgjR440O84Pa2rNlEqlReclR0RE8JcfahQWlDZk0aJF+Oabb3DlyhX07NlT6jg2q0+fPrh8+TJ0Oh3Ky8u5KKMaSqUSI0eOrHY0Lzc3t8bCsKUWuRiNRvj6+uLKlStmxzt27IjU1FR07tzZ9H3ww5paq7o6J5SVlWHv3r04d+4cSkpK0KlTJ0yYMAGDBw+u8ZpyuRyRkZGcU0kNxoLShuzevRvTpk3Dv/71L6xcuVLqODYrLi4On3/+OaZOnYrbt29XOc9FGRV+//13PPjgg6b5igDQu3dvpKenS5iqQnx8PGbMmFHtubZt2yIlJQWdO3eu8cP68uXL+Prrr6t9/5IlS+Dt7V3tOX5YkyXV1dv1m2++wfXr1zF16lR07NgRZ86cgVKpxKxZszBkyJBq3yMIAnx8fLBgwYLmjE42iG2DbEhoaCgEQcBvv/3GgrIZ3Dnnb8yYMdUWk5WvS0pKwvHjx+16BeWAAQNgMBgAwLSgKTQ0VMpIJhqNBu3atYNcLoeTkxOcnJzg4OCAmzdvori4GP369cMnn3xS53zJKVOmwMfHx+xY5ehmdSr3g+eHNTWVWq1GRkZGjecvXryIjIwMPPTQQ6YRSV9fX+Tl5WHXrl0YNGhQtYviRFFERkYG1Gq1Xf9CTA3HVd42RBAEeHt7IzU1VeooNocrKBumtLQUgYGBKC8vx+bNmzF69GgYDIZWM4dw/vz5KCgogEajgUqlwtWrV3H58mXcvn0bRUVF+O6775CXl1dnQenh4QFvb2+z/5ydnWt8/Z0f1kRNkZycXOsc7vPnz8PJyQkDBgwwOz58+HDcvn0b2dnZNb5XEAQkJydbLCvZBxaUNmb8+PEoKioyrVylpktMTERMTAz0en2Dtw0URRF6vR4xMTFITExspoStz9ixY3Hz5k289957eOSRR7B3715s2rQJDz/8sNTR6uTi4oIOHTo024IrfliTJaSlpdX6i21ubi46depk1u4MALp06WI6XxNRFFvF1BSyLiwobcyjjz4KoKJ1CjWdUqm0yApfAEhISLCLkcp58+bh5MmTmDNnDl544QUAgLOzM+bNm2c1C5bq+rCu9Ntvv+H111/HO++8g+jo6CqLfKrDD2tqqrKysjr7mxYXF8PV1bXK8cpjJSUltb5fo9FAp9M1PiTZHRaUNubee++FIAiIjY2VOorV02q1iI+Pt+g14+PjbbrR9QcffIDvvvsOQ4YMwffffy91nEapz4e1i4sLRo8ejfDwcDz22GMICwtDfn4+Nm7cWK9ikR/W1BT1/TekqaPs1bXxIqoJF+XYGEEQ0L17d5w9e1bqKFYvNjYWoiiirKwM+/fvh0qlgkqlQnFxMSZNmoSQkJAq77l+/Tp27dqF7OxsCIIAX19f3HPPPfDw8ABg24sy4uPj8eKLL6Jjx444duyY1HEarT4f1l27dkXXrl1NX/fq1Qv9+vXDF198gV27dtWr4b1Go4GXl1eTspJ9qqlN0J3atGmD4uLiKscrRyarG71szH2IKnGE0gaNGzcOhYWFtc6RodpVrqAURRHFxcVISUmBwWCodQ9qtVqNjRs3wmAwYPbs2XjggQdw69YtbNiwwdRs3lYXZaSlpeGBBx6Ao6MjlEolXFxcpI7UaI39EHV1dUWfPn2Qk5OD8vLyZrsPkVxe91hQ586dcfPmTVOnhUo5OTmm85a4D1ElFpQ26JFHHgHAeZRNcecKSnd3d7z44otYvHgxpkyZUuN7EhISIJfLMW/ePPTp0wcDBgzA/PnzUVRUhMOHD5teZ2uLMgoLCzFq1Cjo9Xrs2LHD6pvqN+VDtCFtfflhTY1V+cSjNv3794dOp8Mff/xhdvzUqVNo164devToYZH7EFViQWmDIiIiIJPJLLrDh725c1GGTCarcy6SwWDAxYsX0b9/f7PROXd3d/j6+pr9o25LizJEUURQUBDy8vKwevXqaqcBWJvGfoiWlJTg4sWL8PLygqOjY7Pdh8jJyanO3rYBAQHw8/NDbGwsUlJScPnyZWzfvh3p6emYNm1atT0o78Rdv6ih+CuyDRIEAV27dsWZM2ekjmKV6rMo425arRZ6vd7UkuNOXbp0waVLl1BeXm4qNCoXZVj7P9gzZ87EhQsXsGTJEvzlL3+ROo5FVH5Y1/Yz8MMPP6BDhw7o1q0b2rRpA41Gg8OHD6OoqAgPPvhgnffghzU1VUBAAJKSkmodFZ8zZw727t2LhIQE09aLdzY6r4kgCPWaB0x0JxaUNmrs2LH48ccfodFoOBLSQI1ZhV05+b22Nh2lpaVmI1fWvijjtddew/bt2zF69GisW7dO6jgWVdeHdZcuXXDu3DkkJydDp9PB1dUVPXv2xKxZs9C9e/dar80Pa7KEoKAgHD9+vNbXODs7IywsDGFhYQ26duWTB6KGYEFpo+bOnYsff/wRGzduxF//+lep41iVpiyWaEibDmtelPHDDz/gzTffRNeuXXHw4EGp41hcXR/WEydOxMSJExt1bX5YkyV4enrCz8+v1r28G6NyL29uu0gNxTmUNurBBx+ETCbD9u3bpY5idRqzWKJNmzYAUGubjrtXPlvrooyzZ89i7ty5cHV1hVKptNrvozaVH9Z1zTNrKEEQ4Ofnxw9rsojw8PBm+RkNDw+36DXJPrCgtFFyuRxdunTByZMnpY5idRozRUChUEAul1fbqiknJwceHh5VFmpY41SEvLw8jB07FkajEfv377fqR/Z14Yc1tXYKhaLBj7PrEhYWVueCH6LqsKC0YaNHj0Z+fj7y8vKkjmJV6rOC8m4ODg7o27cv/vjjD5SVlZmO5+XlITMzE/379zd7vTUuyhBFEcOGDUNhYSG++uorjBw5UupIzYof1mQNAgMDLdZdITQ0FIGBgRa5FtkfFpQ2bM6cOQCA6OhoiZNYn4CAALP5kGlpaTh37hwuXrwIoKKJ+blz53Du3DnTFnqTJ09GeXk5Nm/ejLS0NPzxxx/YvHkz2rRpg3HjxpmuZS2LMnJycsx2XJo2bRquXLmC5557DosWLZIwWcvhhzVZg+DgYEREREAulzd4VF0QBMjlckRERDR6XjARAMiMDenES1ZFp9PB2dkZoaGh2LNnj9RxrIparUZUVJTp648//hj5+fnVvnb58uWmUae6tl6slJycjKFDhwKo6GFZ+d/kyZMRERHRTN9Vw8ybNw9btmzBp59+irS0NHzyyScICQnB3r17pY7W4pRKJeLj4yGKYoMWQAiCAEEQEBYWxmKSmp1Wq0VsbCwyMjIgk8lqbSkkCAJEUYSfnx/Cw8M5ck5NxoLSxnXp0gXl5eXQaDRSR7E60dHRFl9BCQCXL1/G119/DeB/BYdMJkN5eTkeeugh/PDDDxa9X2MYjUZ06dLFbIvInj174vLlyxafV2gt7vywrvwwrgk/rElKarUaP/30E86dO4eOHTtWOe/h4QF/f38EBQVxgRhZDAtKGxceHo64uDjcvn0bbm5uUsexKlqtFlFRURZt7yOXyzF79mwEBgaa9ve+08GDBzF+/HiL3a+xMjIy0Lt3b7NjkyZNwq+//ooOHTpIlKp1UKvVSE5ORnp6erW/qN26dQsDBgzAQw89xA9rkkRZWRm6d++OW7du4ccff8S4ceOg1+shl8utcv42WQfb6/dBZmbPno24uDh8++23WLp0qdRxrErlogxLbmEZFhaGPn364Pfff0dwcLDZKJdMJkN0dDRGjBhRpcVQSztw4ECVY/v370dwcDBOnjzZoH6btsbT09O0WEen00Gj0Zg+rL/66it8+umnaNOmDe677z6Jk5I9MhqNWLp0KW7dugUA+OOPPzBr1iyJU5E9sM9nV3akcmHOTz/9JHES6xQYGGix9j53LsoYP348PvzwQ7PzHTp0wJo1a9CuXTssWrQIhYWFFrnvnXQ6HVQqFbKzs6FSqUwLiu62b98+s68FQYCLiwvuvfdei2eyZk5OTvDy8kKPHj3g5eVl2mWpuLgYISEhZlMGiFrC6tWrsXHjRtPXnD9PLYWPvO1A586dIYoi9u7di8TERAQGBpqtOqbqlZWVYdmyZVi7di3++te/wsPDw6KLMoxGIx566CH8/PPPGDlyJI4dO4bvv/8e//jHP3Dt2jU4ODhg1qxZ+O9//wt3d3fT+95880107twZTz/9dL0yVD6iTUtLq3ZbSYVCgYCAALP5VC4uLqb2R926dcNzzz2HJ554gnMB6zB16lTTB7ggCBg8eDASExPRvn17iZORPdi9ezfuvfdes3+jXFxckJ+fz8fc1OxYUNqwixcv4rfffsM777xjNlIyf/58fPvttxIma92Kioqwdu1avPXWW6bHRiUlJSgpKbH4ooyCggI89thj+Mc//oGxY8eajv/6669Yvnw5rly5YmqGvXbtWmg0GgwYMAAA8Pvvv2PatGk1ZmjIis/K835+figtLcVTTz0FDw8PrF27Fvfff79N7obTHDp16mT6mQEq+pOOHTsWu3btknwaA9m227dvo0ePHigoKKhy7tChQxxEoGbHgtJGGY1GeHh4IC8vz6yYkMlk+M9//oPnn39e4oStj1arxeeff45///vfKCgoMP2ZDR8+HEql0vS6uhZlWHIF5a5du/DnP/8ZaWlpkMlkppXXRqMR7du3x5kzZ9CjR48q72tKmxudTof8/HysWrXKrudKNlROTk6VnYMqf7E4cOAAJkyYIFEysgclJSV4/vnn8fvvvyMzMxNAxSJAvV6Pt99+GytWrJA2INk8DjvYKJlMhjfffBPLli0zG5kyGo38TbUGISEhOHXqlNkxQRAwdepUs2O1Lcqw9ArKadOm4eLFizh48CAef/xxpKWlmc7dvn0bDz30EA4ePGi2rWNiYiISEhIadT9RFCGXy9GxY0ccOHAAwcHBTf4e7MWZM2fMvpbJZFi5ciXCw8MxfPhwiVKRvXB1dcWXX34JAKYnIs888wyOHj2KgIAAKaORneCiHBv2l7/8BcuWLTMbZXJ0dOSHWw1WrFgBZ2dnsz8vURQRFBRU43vuXpTRXPOUJkyYgODgYDg4OJiOGQwGHD9+HI8//rjpmFKpbHQxebeEhASzkVmq3YABAxAZGYmvv/4aS5cuhdFoRGhoKP/3Ri1Kp9MhLy8Po0aNwjvvvIO9e/di9uzZUsciO8BH3jbOYDAgIiIC8fHxAICRI0fi+PHjEqdqvdauXYunnnrK7Fh6enqVnowtTaPRoEuXLjAYDJDL5TAajTAYDKbR53vuuQeffvoptm3bVm3fzJ9//rnK6OudlixZAm9v7yrH5XI5IiMjuRinga5evYpevXphzpw5+P7776WOQ3bkhx9+wOzZs/HJJ59g+fLlUschO8KC0g7cvn0bffr0gUqlwowZMxAXFyd1pFbp5s2b8Pb2hl6vR3BwMPbu3Yt27dohPz9f8rmEpaWleOONN5Cfnw8HBwfTf7dv30ZCQgJu3LiBpUuXon379tXOmdRoNNU2Uv/uu+/g4OCA559/vtodcARBgI+PDxYsWNAs35ctUygUcHR0RG5urtRRyI7Mnz8fmzdvRk5ODjp37ix1HLIjnENpB9q1a4e9e/diwIABcHd3b/Z5f9ZIFEWMHj0apaWl+OGHHzBz5kx88sknMBgMkheTQEXrj3feeafG81euXMHGjRtrXIDj4eFRpZ9mZmYmiouLERwcXON2iqIoIiMjA2q1mru+NNCECRMQGxuLW7duVbv9HVFzOHr0KFxdXVlMUotjQWknOnXqhKeeegqdOnXCu+++W+V8db0I7cn8+fORkZGBZ555Bg899BAA4K9//avEqeovNTW1ztZAd6ucH1nXHD9BEJCcnGxaiET1s3TpUsTGxuLTTz/FypUrpY5DdiIrKwv9+vWTOgbZIRaUNu7OXoTdu3evseDQarVISkrC8ePH6+ydaGvWrVuH77//HkOGDEFUVJTUcRolLS2tQcVkaWkpUlNT4efnV+ffsyiKSE9Pb2pEuxMWFga5XI4ffviBBSW1iAsXLqC8vBwTJ06UOgrZIa7ytmFKpRJRUVGmnmR1FRyV5zMzMxEVFWUXK3zPnTuHpUuXol27djh06JDUcRqlrKys2h1wanPmzBno9fp6r0DWaDQ1btNI1RMEAf3798f58+cb1AuUqLEqN6x45JFHJE5C9ogFpY1KTExETEwM9Hp9gz/MRFGEXq9HTEwMEhMTmymh9EpLSzFhwgTTtpRubm5SR2qUhhaTAHDixAm4urqif//+9X5PdU3cqXYPP/wwDAYDYmJipI5CdmDPnj0QBIG9hkkSLChtEHsR1s/kyZORl5eHjz76qNZek61ddW2CaqNSqXD9+nUMGTKkQVsqNvQ+BPz5z38GAKxZs0biJGQPUlNT4eXlVeMiO6LmxJ86G6PVak09Jy0lPj6+UaNgrdkLL7yAY8eOITw8HM8995zUcZqkoftsnzhxAgAQGBjYrPehisVuXbp0weHDh6WOQjaupKQE+fn5bKRPkuEnhI2JjY2FKIooKyvD/v37oVKpoFKpUFxcjEmTJiEkJMTs9TU1vO7YsSOWLVsGoOIReGxsrM30ItyxYwc++OADdO/eHb/++qvUcZrs7nZAtdHr9Th9+jS6d++OLl26NNt96H+mTJmCzZs3IzMzEz4+PlLHIRv1008/AQDCw8MlTkL2igWlDVGr1cjIyAAAFBcXIyUlBV5eXujXr1+tj63lcjkWLVpkduzOvaFtqRehSqXCgw8+CCcnJxw/ftwmHg05OTlBoVDUaxT5/PnzKCkpafDoJHuVNt5f/vIXbN68GZ988gk++eQTqeOQjfrll18AAHPnzpU2CNkt6/80JZPk5GRTE253d3e8+OKLWLx4MaZMmVLr+2QyGby9vc3+8/LyMntNZS9CayaKIkaNGoWysjL8+OOP6Natm9SRLCYgIKBeDdiVSiUcHR0xaNCgel9bEAT4+/s3JZ5dGzt2LFxcXLgwh5pVUlIS2rZtC3d3d6mjkJ3iCKUNubMXoaV3d7GFXoR/+tOfkJWVheeff97mHgsFBQXVa4/2hQsXNvjaoiha9aKl1mDYsGE4duyYaXcqIku7du0ahgwZInUMsmMcobQRjelFWEmv1+PDDz/E66+/jv/85z+Ii4tDcXFxlddZcy/Czz//HD///DNGjBiBjz76SOo4Fufp6Qk/Pz+LP8IXBAF+fn5WP9VBao8++iiMRiO+/vprqaOQDTp58iT0ej0mTZokdRSyYywobURji0kvLy/cc889mDVrFh599FEMGzYMJ0+exFdffYWysrIqr7fGXoQnT57Es88+iw4dOuDAgQNSx2k24eHhzVJQ2tporhSWLFkCmUzGgpKaxebNmwGwoTlJi89ebERjewSOHTvW7OvevXuja9eu2Lp1K5RKZZXz1taLsHJ1O1DR7N3V1VXiRM1HoVAgLCzMonP1wsLC7GYLzubk4uICb29vpKSkSB2FbFBCQgIcHBwwcuRIqaOQHeMIpY2w5Lysfv36wdHREdnZ2c16n5YwceJEFBQU4LPPPrOL+UWBgYFVWkM1VmhoaINXg1PNZsyYgeLiYpw8eVLqKGRjLly4YFOLDMk6saC0Ec3RI7C6hT2tuRehwWDAlStXTF8vX74cSqUSDz30EJ555hkJk7Ws4OBgREREQC6XN/gRuCAIkMvliIiIwMSJE5spoX2qbKC/atUqaYOQTSksLMTt27e5cI4kx4LSRlT2IrSE1NRUlJeXo0ePHmbHW3svwrVr18LX1xdvvvkmfvnlF6xevRq9evXC1q1bpY7W4gIDAxEZGWlqpF1XYVn5y4OPjw8iIyM5MtkM+vbtCzc3N+zcuVPqKGRDtmzZAgCIiIiQOAnZO+t6fkm1CggIQFJSkql1UFpaGnQ6nWlltlqtxrlz50yvLS4uxo8//ohBgwaZRh6vXLmCo0ePwtPT06yokMlkrb4X4aFDhwAAr732GmQyGZydnW2meXljKBQKLFiwAGq1GsnJyUhPT692UdWtW7eQn5+Pf//731zN3cxGjx6NPXv2oLCwEG5ublLHIRtQOWd69uzZEiche8eC0obc3YswNjYW+fn5pq9TU1ORmpoKoOJxsIuLC9q2bYsjR46gsLAQRqMRHTp0wOjRozFx4kSz0Uij0Yinn34abdq0gZubG4qLi1FcXIySkhK0bdsWSqVS8oa6R44cMRXTRqMR7du3x/Xr19G5c2dJc0nN09MTYWFhAACdTgeNRmPqh+js7Gz6ZWLYsGF4/vnnpYxq85544gns2bMHX375Jf7+979LHYdsQEpKCtq3b89fUEhyMmPlJzDZhOjoaGRmZkIURYtdUyaTobS0FO+++2615z09PZGVlQVnZ2eL3bOhbt++jQ4dOuDOH2eZTAZHR0ecOHECAwYMkCxba5aUlIRRo0aZvl6/fj0ef/xxCRPZNlEU4ejoiGHDhnHFN1mEXC7HiBEjcOzYMamjkJ2zz2eBNqw5ehE6ODjgH//4Bz7//PNqzy9durRZi0mdTgeVSoXs7GyoVKpqm6ufOHHCrJh0cHCA0WjE5MmTq2wjSf9z9uxZs6+feOIJbNu2TaI0tk8QBAQEBODMmTNSRyEbcOzYMRgMBoSGhkodhYiPvG1Nc/YijIyMxJkzZ7BmzRqz4u3NN9/EwYMH8cUXX6Bv374WuWflvL+0tLRqm7YrFAoEBAQgKCgInp6e+Pnnn83Oz5w5Ey+//DKGDRtmkTy26syZM3B0dER5eTmAiqkC8+bNQ4cOHXDPPfdInM42zZw5E++99x727NmDKVOmSB2HrNh3330HAJg/f77ESYg4QmmTmrMX4apVqzBu3Dg4ODjAwcEBM2fOxKBBg5CQkIB+/fph0KBB2L17t9k1zp07h8mTJ+PSpUt13k+r1SI6OhpRUVFISkqqcQcgrVaLpKQkREVF4euvvzaNqs2cORMXLlzAtm3bWEzWw8mTJ03FJFAxsqvX6xEbGythKtu2fPlyAEBUVJTEScja7d+/H3K5HIMGDZI6ChHnUNoypVKJ+Ph4iKLYoDmVgiBAEASEhYVV2z4mNzcXw4YNQ05ODi5dugQfHx9cuHABkZGRSEhIgNFoRNeuXfGvf/0LTz/9NObOnYstW7agb9++SEpKQrt27SyaFwDKy8vh6+uLJUuWNOh99s7Lyws5OTmmr2fMmIG33noLw4YNq7YPKVlGx44dIZPJcPPmTamjkBVzc3ODp6cnLl++LHUUIhaUtk6r1SI2NhYZGRkQBKHWQq3yvJ+fH8LDw2vta5mWlobU1FQ88MADVe63fPlybNmyBTqdDm3btkVRURGAitGv8PBw/PTTT1XmeSYmJiIhIaEJ32mFkJAQBAcHN/k69mL9+vUAgHvuuQc+Pj4ICgri5P4WMHPmTPzyyy+4ceMG5/hSo2g0GnTs2BFz5szB999/L3UcIhaU9qKuXoQeHh7w9/c3zUlsKr1ej9dffx0ffPBBlUU0//rXv7By5UrT10ql0qJzPiMiItiYuxH69OmDK1euoKysTOooNm/37t2YNm0aVqxYgbffflvqOGSFoqKi8Oc//xmbN2/GI488InUcIhaU9ujuXoTNtQOOVqtFt27dUFpaWuXcqlWr8Oyzz0Kr1SIqKgp6vd5i95XL5YiMjLTYzkH24oUXXsAHH3yAPXv2cNVoC3ByckLv3r3xxx9/SB2FrNB9992H3377DSUlJXBxcZE6DhELSmo+n332GZYtWwYHBwfIZDIYDAaz1eFPPPEExo0bh+zsbJSUlGD//v1QqVRQqVQoLi7GpEmTql1cZDAYcOzYMZw8eRIajQYODg7w9PTEPffcg549e0IQBPj4+GDBggUt+e1aPZVKha5du2LmzJn46aefpI5j8wIDA3H69GnodDq73c2JGq9bt24oKSmpceEiUUtj2yBqNpMnT8bf/vY3uLq6wtXVFW3atEGbNm1w+/ZtHD9+HCdPnjTtF15cXIyUlBR4eXmhX79+UCqV1V5TFEV8//33uHr1KsaPHw9vb2+Ul5fj+vXrptXKoigiIyMDarWaWwk2gJeXFzp27Ij9+/dLHcUuzJ07FydOnMAPP/yAhx9+WOo4ZEVEUUROTg7GjBkjdRQiE45QkmTi4+NNe49X/hjKZDIUFRXhww8/rHaE8siRI9i5cycef/xxeHt713htQRAQFBRk2nKQ6mf27Nn44YcfkJWVZSr2qXkUFhaiXbt2mDZtGnbu3Cl1HLIi+/btQ0hISJX56ERS4nMWkkxaWppZIVmfNjXHjh1Dr169ai0mgYrf4NPT0y2S055U9kj8+OOPJU5i+9zc3NC1a1ccPXpU6ihkZbZu3QqADc2pdWFBSZIoKytr8Nyf/Px85OXloXPnzti9ezc+/PBDvP766/j8889x8uTJKq/XaDTVbtNINZswYQKcnZ3x66+/Sh3FLkybNg23b99GWlqa1FHIihw4cABOTk4ICAiQOgqRCQtKkkRjJpIXFBQAAE6dOoULFy5gxowZmD9/Pjw9PfHLL78gJSWlynuqa5FEtQsMDERGRgaL8Rbw3HPPAQA++eQTSXOQdbl06RJ69uwpdQwiMywoSRKNaRNU+Xhcr9dj/vz5GDhwIPz9/TF79mx07dq12sUklmxHZC8WLVoEo9GIjRs3Sh3F5g0fPhyurq6Ii4uTOgpZCZVKhZKSEowePVrqKERmWFCSJOTyhjcYaNOmDQCgU6dOcHd3Nx2XyWTo3bs3CgoKUFhY2OT72LvFixdDJpOxoGwhI0aMwNWrVzkiTPWyefNmAMBDDz0kcRIicywoSRIeHh4Nfo9CoYCjo2Otr7l7YU9j7mPvnJyc4OvrixMnTkgdxS5UjghXboNJVJsdO3YAqNgRjKg1YUFJknBycmrwTjYODg7o27cv1Gq12RxMo9GI9PR0KBQKtG3b1nS8uXYAsgcPPPAASktLcejQIamj2LyFCxdCJpPhm2++wYEDB/Cvf/2r2kVmREDFHPKOHTvy6Qu1OvyJJMkEBASY+lACFW2EdDqd6dGfWq3GuXPnTK91cnJCaGgo0tPT8e2332Ly5MlwdnaGUqmESqXC7NmzTdcWBAH+/v4t/03ZiOeffx4ff/wxVq1ahfHjx0sdx2bdvHkT8fHxcHV1xdGjRxEcHAygYqrGsGHDpA1HrcahQ4dQWlqKoKAgqNVq088JUWvCxuYkGbVajaioKNPXH3/8MfLz86t97fLly00jmjk5Odi9ezeuXLkCURTh5eWFiRMnom/fvmbviYyM5E45TeDh4QEHBweo1Wqpo9ik27dvo2vXrigqKoJMJjPblvSnn37CzJkzJUxHrUnv3r2RkZFh+nr06NH485//jHvuuQddunSRMBnR/7CgJElFR0cjMzMToiha7Jrcy9syZs6ciV9++QU3btyAl5eX1HFsjtFoxGOPPYbo6Gjc/c/wpUuX4OfnJ1Eyam3mzp2Lbdu2mf6ddHBwgMFgwMSJE5GYmChxOqIKnENJkgoPD4cgWPbHUBAEhIeHW/Sa9mjZsmUAgFWrVkmcxDbJZDKsX7++yv8GXF1d4ePjI10wanUmT55s9kuHwWCATCbDyy+/LGEqInMsKElSCoXC4vtth4WFNXjBD1UVGhoKJycn/PTTT1JHsVlyuRxbt27FhAkTTMcGDBhg8V+yyLrdXVACwIcffoh7771XokREVfFfLZJcYGAgQkJCLHKt0NBQBAYGWuRaBAwdOhTp6elsEN+MXFxcEBsbC19fXwAVjzOJ7tS3b1+z3rvz5s3DX//6V+kCEVWDBSW1CsHBwYiIiIBcLm/w6IwgCJDL5YiIiMDEiRObKaF9WrBgAURRxLfffit1FJvWrl07HD16FIIgmFpd6XQ6qFQqZGdnQ6VSsfG5HZPJZOjRoweAigU669atq9Jzl0hqXJRDrYpWq0VsbCwyMjIgCEKti3Uqz/v5+SE8PJyPuZtBaWkp2rRpgwkTJnDyfwt49tlnUVBQgMDAwGr3u1coFAgICEBQUBA7GNiZSZMmITExEVevXoW3t7fUcYiqYEFJrZJarUZycjLS09Oh0WiqnPfw8IC/vz8/WFuAj48PcnNzUVxcLHUUm3XnL1J3txC6W+V5/iJl23Q6HTQaDfR6PeRyOZYuXYry8nLu+06tFgtKavXu/oeVO+C0rGXLluGzzz7D8ePHMXLkSKnj2BylUon4+HiIotig9lmCIEAQBISFhXHesI2o/EU6LS2t2hHqtm3bYuDAgfxFmlolFpREVKvLly/Dz88P8+bNw6ZNm6SOY1MSExORkJDQ5OuEhIRw9xQrxhFqsgUsKImoTu7u7nB2dkZOTo7UUWyGUqlETEyMxa4XERHBkUorxBFqshVc5U1EdZo4cSJyc3Nx8+ZNqaPYBK1Wi/j4eIteMz4+vtrHpNR6JSYmIiYmBnq9vsG7hYmiCL1ej5iYGC6Yo1aBBSUR1SkyMhIAsHr1aomT2IbY2Nh6FxApKSlYuXIl3n777VpfJ4oiYmNjLRGPWoBSqbTIdAcASEhIgFKptMi1iBpLLnUAImr9pk+fDkdHR2zbtg1vvPGG1HGsmlqtRkZGRr1eW1BQgJ07d6Jdu3YoLS2t9bWiKCIjIwNqtZoLNlq5ukaoy8rKsH//fqhUKqhUKhQXF2PSpEm1bgARHx8PX19fzqkkyXCEkojqJJPJMHjwYFy8eLHBj+bIXHJycr2bUsfGxqJXr17w8/Or1+sFQUBycnJT4lELqGuEuri4GCkpKTAYDOjXr1+9rskRapIaC0oiqpd58+ZBFEV89913UkexamlpabWu4q106tQpZGZm4r777qv3tUVRRHp6elPiUTOrHKGuraB0d3fHiy++iMWLF2PKlCn1uu6dI9REUmBBSUT1snTpUgDA+vXrJU5ivcrKyuq1cKawsBA7duzA1KlT0aFDhwbdQ6PRcJvGVqw+I9QymaxRWytyhJqkxIKSiOqlbdu28Pb2xrFjx6SOYrXquwo7Li4OnTp1anQj+ep2l6LWob4j1I3BEWqSEgtKIqq3GTNmoLi4GCdPnpQ6ilXS6/V1viY1NRUXL15EREREo0ap6nsfann1HaFuCo5Qk1RYUBJRvT3//PMAgI8//ljiJNZJLq+9sUZZWRni4uIwatQotGvXDiUlJSgpKYHBYAAAlJSU1KtYqOs+JI2W6hPKEWqSAv/VIaJ669u3L9q1a4edO3dKHcUqeXh41Hq+uLgYRUVFOHLkCI4cOVLl/Pvvv4++ffvikUceadJ9SBotNXLMEWqSAgtKImqQ8ePHY8eOHcjLy4O7u7vUcayKk5MTFApFjSNVbm5uWLRoUZXjBw8exJUrVzB//ny0adOm1nt4eHjAycnJInnJslpq5Jgj1CQF/tQRUYMsXboUO3bswOrVq/Haa69JHcfqBAQEICkpqdqFGY6OjvD19a1y/OTJk5DJZNWeu5u3tzeuX7+O27dv4/bt2ygoKMDt27fh6+uLIUOGWOR7oMZpqZFjjlCTFFhQElGDREREQC6XY9u2bSwoGyEoKAjHjx9vtus/+eST1e65PnjwYJw+fbrZ7kt1q2uE+k5paWnQ6XSmObNqtRrnzp0DUPFLSU2j0ByhJqmwoCSiBhEEAQMGDMC5c+cgiiIEgWv7GsLT0xN+fn7IzMys965DM2fOxMyZM2t9jSAIMBqN1RaTMpkMc+fObVTe+tDpdNBoNNDr9ZDL5SxqalHbCPWdYmNjkZ+fb/o6NTUVqampAIDly5dX++crCAL8/f0tG5ionlhQElGDzZ07FytWrMArr7wCjUaDixcvIi4uDq6urlJHswrh4eGIioqy6DaWgiAgMjISPj4+VeZhGo1GGAwGi/4CoFarkZycjLS0tGpH3BQKBQICAhAUFMS9xe9Q3xHqyo4KDSGKIoKCghoTi6jJZMbm6rBKRDanuLgYmzZtwo8//ojff/8dQMXol9FoRH5+Ptq3by9xQuuhVCoRExNjsetFREQgMDAQAPDCCy/gww8/hNFoNO26IooiXFxc8Mgjj+Cjjz4yW1CVkpKCDz74AGvWrKlzoZVWq0VsbCwyMjJMf/c1qTzv5+eH8PBwKBQKS3yrVi86OrpBI9T1IQgCfHx8sGDBAotdk6gh+KyKiOpt3bp1eOqpp8zaBhmNRnTt2pXFZAP5+PhYrEF8aGioqZgEgHfffde0B7jRaMTJkyfx/vvvo3379tiwYQM8PDwwadIk05zKFStWYOvWrZg9e3atLWeUSiWioqKQmZlpunZtKs9nZmYiKioKSqWyKd+mzQgPD4coihbdMUcQBISHh1vsekQNxRFKIqq3wsJChISE4MSJE6Zm2wAQFhaG3377TcJk1qOsrAzR0dF45plnIJPJEBsbi6SkJIii2KARK0EQIAgCwsLCzIrJSrdv38b48ePh7e2NuLg40/Hff/8d//jHP3DmzBkAQI8ePZCdnQ2gYkRx2bJlWLVqVZXrJSYmIiEhoaHfbhUhISEIDg5u8nWsjSiKSE5ORnx8PP7973+jT58+uP/++y12/TtHqImkwIKSiBpEo9FgwoQJuHDhAkRRhEwmwwsvvIB3331X6mit2q1bt7BmzRp88sknUKvVAIDp06cjPj7e7DGyIAi1FpaV5+vzGNlgMMBgMFS7gOPKlStYtmxZtY/d16xZg6eeesr0dXM+nrdlBoMBW7ZsQVxcHH777Tfk5eWZznl6euKHH36wSJEeGhqKiRMnNvk6RE3BgpKIGkylUmHs2LGmR5+bNm3CvHnzpA3VShUWFuKf//wnvvrqK5SXl5sVizt37sS0adNMX1cudElPT692+zwPDw/4+/tbbKGLVqtF165dUVZWZnZcJpNh8+bNmDt3LrRaLaKioiy6+4pcLkdkZKTNz6ncsWMHwsLCqv0lYdu2bfjTn/4EpVKJ+Pj4Bo9Qi6IIJyenGkeoiVoaC0oiapQrV66gT58+0Ol0OHbsGEaNGiV1pFYpMTERkyZNqnJcJpMhLy+vxrmnLdGKZ9WqVXjuuefg4OBgKnrunMqwbNkyBAYGIisrq9pi58aNG9i7dy9ycnJQXFwMuVyOTp06YeTIkRg6dGiN97WXBSQGgwEPP/wwfv75Z7P5kh06dEBOTg6cnZ0BoFEj1JcuXUJycjIOHDiAjh07Nvv3QlQXtg0iokbp1asX1qxZg8WLF0Oj0bAXYQ2Cg4OxefNmLFy40GyUr1+/frUuZHJycoKXl1ezZhs2bBjmzp0LNzc3tGvXDu3bt0e7du2Ql5eH06dP4+DBg7UWK6WlpWjfvj0GDRqE9u3bQ6fT4cyZM/j555+Rl5dXbSENVIyuZWRkQK1W23RLIQcHB7z88sv45ZdfTAWlXC7HY489ZiomgYoWSwsWLKj3CPWQIUPQo0cPABU/R9999x2mTp3aMt8UUQ04QklEjZabm4vnn38e/fr1q3ZUhb0IK9y8eRM9evRAWVkZHBwcAABPPPEEvvzyS4mT1S4+Pr5eTbjvtnbtWty+fRt//etfa3yNIAgICgpCWFhYU2O2WomJiZg6dSr0ej169uyJq1evwmg04vTp0xg8eHCt763tFzRRFE0/R5X+9re/4e233zYrVIlaEtsGEVGDabVaREdH44svvkDfvn1rfESn1WqRlJSEqKgoREdH12vLOVsjiiICAwNRVlaGNWvWIDAwEAaDAePGjZM6Wp3S0tIa1dqmTZs2dTZQF0UR6enpjY3W6v3www8ICQkBAOzbtw9JSUnw9fXFyJEj6ywmgf+NUPfo0QNeXl5mo/2CIFQpKD/66COMHDkSubm5lv1GiOqJj7yJqEHuXEQANLwXob0tIrj33nuRlZWFv//973jqqaewcOFCbNmyBXPmzJE6Wq3Kysrq/QtAZU/F0tJSnDt3DpcuXcKMGTPqfF/lVAlbmxrx2WefYdmyZXB1dcXx48cxaNAgAMCZM2dQXl5ukXs4OjqazXcVBAEXLlxAVlYWOnfubJF7EDUEC0oiqrem9CKsXMUaExODwsJCu+hFuGLFCuzevRuTJ0/Ghx9+CABwcXGpsjVia9SQ0eS4uDikpKQAqJg3GBYWVu8tADUaTbPPFW1Jr776Kt566y24u7vjzJkzprmOQMXIraU4OjqitLTU9PWDDz6IVatWoXv37ha7B1FDsKAkonpRKpUW6ZkHAAkJCXBzc7Ppkcqff/4Z7777Lrp3745du3ZJHafBGtImaOLEiQgMDERRUREuXryI3377DTqdDuPHj7fofVq7J554AuvXr0e3bt1w7ty5OrexbApXV1fcvn0b9913H+Li4nDx4kUWkyQpFpREVCetVov4+Pgaz9+4cQP79u3DtWvXUFpaig4dOmDw4MEYN25cjY8z4+Pj4evra5O9CNPS0vDwww/DxcUFKSkpkMut75/ahmR2d3c3FU99+vQBAOzZswfDhg1D27ZtLXaf1iw8PBxxcXHo378/Tp482eyP8b///nsoFAoMGzYMM2bMQHx8PPbt24fJkyc3632JasJFOURUp9jY2BoX3uTm5mL9+vXIy8vD9OnTMW/ePAwaNAj79+/Hjz/+WOM1RVFEbGxsc0WWTHFxMUaNGgWDwYBdu3ahS5cuUkdqFA8Pj0a/t3v37hBFsV6PzZtyn9ZAFEWMHDkScXFxGD9+PM6ePdsic0JDQkIwbNgwAMA333wDQRCwePHiZr8vUU1YUBJRrdRqNTIyMmosKM+cOQO9Xo85c+Zg0KBB8PPzQ0hICAIDA3HhwgWUlJRU+747exHakjFjxiAvLw+rVq3ChAkTpI7TaE5OTo0ePb58+TJkMlmd77f2XqWlpaXo06cPkpOTMWvWLBw8eLDO1e3NoVOnTpg7dy4yMzPxww8/tPj9iQAWlERUh+TkZMhkshrPV7Yvubv/nYuLC2QyWZX2JncSBAHJycmWCdoKPProozhz5gzmz5+PZcuWSR2nyQICAmr9u9++fTt+//13nD17FpmZmUhNTcW2bdtw+vRpjB07ttbH3TKZDP7+/s0Ru0XcvHkTvXr1wqVLlxAZGVnraHxLWLt2LRwdHfHnP/9Z0hxkv2xj8goRNZu6ehEOHToUR48eRVxcHKZOnYq2bdsiMzMTycnJGDlyZK0jULbUi/DTTz/Fpk2bMGjQIHz77bdSx7GIoKAgHD9+vMbz3t7eOHHiBE6dOoXS0lI4OTmhS5cumDlzZq1bLwIV7aRWrlyJH374AQ4ODjAajab2Q1OnTm1Ve8MXFBSgsLAQ3bp1A1AxAjts2DAUFBTgzTffxCuvvCJxwooV5M888wxWr16NqKgoREZGSh2J7Ax3yiGiGpWVleG9996r83VqtRpbtmzBzZs3TcdGjx6N6dOn1zrCVemll16y6kefhw4dwsSJE9GhQwdcu3bNou1hpBYdHY3MzMxa95duKJlMhszMTGzYsMH0tSAIkMlk0Ov1mD17NrZu3Wqx+zXVrFmzsHPnThw6dAgGgwHjx49HWVkZ1q5diyVLlkgdz0Sv16N9+/ZwdHSEVquV5PE72S/+tBFRjeqzqEKr1eK7776Dq6srHn74YTz22GOYNm0aTp48ie3bt9frPtXtW2wtVCoVpk6dCgcHBxw7dsymikmgYvWypQsTBwcHvPHGG6YG3EajEQaDwdRC6J///KdF79cUOTk52L59O4qKijBx4kSMGjUK5eXl2L59e6sqJoGKFfMrVqxAQUEB3njjDanjkJ1hQUlENapPj8Ddu3ejrKwMCxYswIABA+Dj44Px48dj+vTpOHHiBDIzMy1yn9ZIr9cjKCgIpaWl2Lp1q6llji1RKBQW3287LCwMPXr0wK5du6odmf7ggw+Ql5dn0XtW0ul0UKlUyM7Ohkqlgk6nq/X10dHRpikft2/fhsFgwC+//ILw8PBmyddUK1asQIcOHfD+++9b7f+uyDpxDiUR1ag+PQJVKhU8PT2rFAaV881yc3Ph4+PT5Pu0RtOmTcO1a9fw0ksvYebMmVLHaTaBgYEoLCy0SGP70NBQU0P7IUOG4IsvvjAb6evWrRu2bduGH3/8EQ888ADWrVtXpbWQSqVCmzZt0L59+3rdU61WIzk5GWlpadWOuisUCgQEBCAoKAienp6m40ajEf/973/NHvcLgoCXXnrJNMWhtREEAR988AGefvppLF++HJ9//rnUkchOcISSiGpUnx6B7dq1Q25uLsrKysyOZ2dnA0C9PvStsRfhP//5T+zbtw9Tp07FO++8I3WcZhccHIyIiAjI5fIGPwIXBAFyuRwRERGYOHGi2bnFixdjwYIFAICpU6fi2rVr2LFjB3x9ffHzzz/D09MT999/P3JzcwFUjDCOHDkSY8aMQXFxca331Wq1iI6ORlRUFJKSkmqcwqHVapGUlISoqChER0ebXnfs2DGkpaWZvVYmk+Hs2bPYvHlzg/4MWtJTTz0FLy8v/Pe//0VhYaHUcchOcFEOEdVq9erVtc6lPH/+PL7//nv06NEDY8aMQZs2bZCdnY2DBw+iQ4cOePrpp2sdgfTw8LC6Fjtbt27FnDlz4O3tjczMTLta/KDVahEbG4uMjAwIglDrYp3K835+fggPD6+xL2VRURGee+45LF++HIMGDTId37t3L5555hlcvHgRMpkM06dPx5QpU/D3v/8dgiBg0aJF+Oqrr6q9plKpRHx8vGkP+foSBAGCICAsLAxz5841Kyg7d+6M++67DzNmzMD999/fqheS/fzzz5g1axbmzJmD77//Xuo4ZAdYUBJRreLj45GUlFRr66DLly/j4MGDyMnJMW292KdPH0ycOLHORSqjRo2y+Bw9S1u/fj3OnDmD9957D5cvX8bgwYPh5OSEK1eumD0itSeVj5HT09OrXVTl4eEBf3//Ko+RGyMxMRFPP/00zp8/X+Xchg0b8Nhjj1V5vSUez+/ZswdKpRIrVqxAeHg4Bg8eXK+uBa1F7969kZmZiRs3bpgWQBE1FxaURFQrtVqNqKioZrv+l19+CScnJ/Ts2dM0miSKIpycnLB+/fpWsdClf//+OH/+PIYPH4709HQUFhbi8OHDGDNmjNTRWgWdTgeNRgO9Xg+5XN5sO+C8/PLLVaYXODk5ISUlxTSyqVQqERMTY7F7RkREmOZ8WpsDBw4gODgY99xzD37//Xep45CNs86Z8ETUYjw9PeHn52fxXoSCIMDV1RU5OTkwGo24evVqlde0hlWq+fn5uHDhAgDgxIkTAIAXXniBxeQdnJyc4OXl1az30Ov12LRpU5XjlXMqU1NT4e7ujvj4eIveNz4+Hr6+vo3ehlJKEydOxJAhQ7Bz505cunQJvXv3ljoS2TCOUBJRnbRaLaKioixa4MnlckRGRuLUqVMIDQ01e6Quk8kQERGBX3/91WL3a6ydO3fi3nvvNTvm5OSEr7/+GnPnzpUolf3JysqCn59fjT+Dzs7OeOONN1BWVlbtLz4ZGRk4ffo0srKyUFBQABcXF3Tr1g2TJk0ydSSojiAI8PHxMS0csjZnz57F4MGDMXr0aBw9elTqOGTD7GcmORE1WnP1IlQoFJg8eTLeeusts3NGoxG//fYbnnnmGZSWllr0vpXq24/w8OHDVebN6XQ6/POf/6x1XilZlre3N27duoWcnByo1WrcvHkTt27dglarxaVLl/Dcc8+hpKSkxlH05ORk5OXlYcyYMZg/fz6mT5+OoqIirFu3DhkZGTXeVxRFZGRkQK1WN9e31qwGDRqECRMm4NixY0hJSZE6DtkwjlASUb1ZarFDaGioWfsYURRx3333YdeuXQCAgQMHQqVSITc3F46Ojnj00Ufx2WefVVngo9frodfr4eLiUq/7NqYfYWBgoOlRNwC0bdsWS5cuxbPPPouePXs2+Hun5lHX4rHCwkK4ubmZHSsrK8Pq1avRuXNnLFq0qMZrC4KAoKCgVr94rCZXr16Fj48P+vfvj3Pnzkkdh2wURyiJqN6aqxehIAj49ttv0aVLFxgMBqxbtw45OTn49ttv0alTJ2zYsAEdOnTAo48+atZX79FHH0Xfvn3r3CKysf0Is7KyTMVkly5d8PHHH+PGjRv497//zWKylUlLS6t1xPjuYhKoeEzu6emJgoKCWq8tiiLS09ObnFEqPXv2xH333YfU1FTTL21ElsYRSiJqsOboRQhUzPc6fPgwnnrqKbPjP/74I55//nlkZWXBwcEBs2bNwnPPPYfx48cDAO677z5s37692iK3Kf0I9Xo94uLiMG3aNLz55ptWu6OPrSsrK8N7773X4PeVlpbi448/hq+vb73mw7700kutuvdkbTQaDTp37ozu3bvjypUrUschG8SCkogarSV7EQJAbGwsli1bZtofXCaTmUal3n//ffzzn/80e31TH9EbjUbIZDKEhIQgODi40deh5qVSqbBmzZoGv+/HH3/EuXPn8MQTT9S6MKfS008/3eyr2ZvTokWL8M0332Dz5s145JFHpI5DNoYFJRFZREv1IgQqGo0/8cQTZsdkMhn27dtnKvzYj9B+ZGdnY/369Q16z969e5GYmIiwsDCMHj26Xu9ZsmQJevTo0ZiIrUJpaSnat2+PDh06WO0iI2q9OIeSiCyishdhjx494OXl1ayPBvft21fl8bbRaMTUqVNx+vRpaLXaZulHWNdcTZJGQ6ci7Nu3D4mJiQgNDa13MdmY+7Q2Li4uWLZsGW7evIlVq1ZJHYdsDEcoicjqtGvXzmxxzp0cHR2xcuVKGAyGGudMZmdnIyEhAVlZWTAajejevTtCQ0NrXWhj7f0IbZlOp8O7775br9fu27cP+/btw+TJkzF58uQG3cea51BWEkUR7dq1gyAIyM/Pt6t96Kl58SeJiKzO2bNncfz4cSQnJ0OpVOLkyZM4ffo0YmNjERkZifLy8hqLyWvXrmHDhg0oLy/HzJkzMWvWLOj1enz99dfIysqq8Z7W3o/Qljk5OdVrJ5v9+/ebpkU0tJhszikcLUkQBLz66qsoLCzEq6++KnUcsiEcoSQim1JXP8Lo6GioVCosX77cVCCUlZVh1apV6NixI5YsWVLjta29H6Etq+vv/fDhw9i5cyf8/f0xadKkKue9vb1rvLat/b2LoohOnTqhuLgYBQUFNlEok/Sse0IIEdFd6upHmJWVhYCAALMPUWdnZ/Tq1Qt//PEHbt++jXbt2lX7XmvvR2jLgoKCcPz48RrPV+7Hnp6eXu3f4cqVK2t8ryiKCAoKanLG1kIQBPznP//B448/jmeffRZffvklTpw4AYPBYFPfJ7UsFpREZDPKysrqXDhjMBiqXVzh4OAAAMjJyamxoAQq+vnpdDqO6rQynp6e8PPzQ2ZmZrXTHRYvXtyo61bOnbVE26vWZPHixXj55Zexbt06XL9+HTExMRgwYAB30qFG4xxKIrIZ9VmF7enpiezsbLOiw2Aw4Nq1awCAkpKSOq9RXc9Nkl54eLjFF5kIgoDw8HCLXrM1yMrKwsCBA2EwGBAbGwsAKC4uljgVWTMWlERkM/R6fZ2vGTVqFG7duoXffvsNBQUFyM/PR2xsLPLy8gBU9LO0xH2o5SkUCovPcwwLC6vXgh9rkpCQgN69e5ua/ldOEdHpdFLGIivHR95EZDPq0ycwMDAQxcXFSExMRHJyMgCgR48eGDduHA4dOlTr4+6G3IekERgYiMLCwibtkFQpNDTUJpvZ9+zZE15eXqZR+UosKKkp+K8iEdkMDw+Per1uwoQJGDNmDG7dugVnZ2e4u7sjJiYGjo6O9dqCr773IWkEBwfDzc2t0Xu4C4KAsLAwmywmAaB37944e/Ys/vznP+Pbb781HS8tLZUwFVk7PvImIptR336EQMUoY5cuXeDu7o68vDycPXsWI0aMgKOjY63vs5V+hLYuMDAQkZGR8PHxAYA651ZWnvfx8UFkZKTNFpOV2rdvj+joaHz33Xdo06YNAKCoqKjK63Q6HVQqFbKzs6FSqTiKSTViH0oisil19SPMycnBH3/8gW7dusHBwQE5OTk4ePAg3N3dsWjRIjg7O9d4bVvrR2gv1Go1kpOTkZ6eXu2CKg8PD/j7+yMoKMjmVnPXx5UrVzB06FDk5+cjKysLzs7OSE5ORlpaWrUL3RQKBQICAuz2z4uqx4KSiGyKWq1GVFRUjedv3ryJmJgY5ObmQqfToUOHDhg0aBAmTJhQr5HHyMhIfohaMZ1OB41GA71ej+DgYBgMBly5ckXqWJLLy8tDYGAg5s+fD7lcDplMVms/18rzfn5+CA8Pt7mFS9RwLCiJyOZER0fX2I+wsbiXt20pKiqCm5sbAODnn3/Ggw8+KG0giSmVSsTFxcFoNNZaSN7NHuacUv1wDiUR2Rz2I6S6/P7776b//7HHHsP169clTCOtxMRExMTEQBTFBhWTQMUuQnq9HjExMUhMTGymhGQNWFASkc1hP0Kqy08//WTqOVpYWIj58+dbdETbWiiVSou0WAIq+lsqlUqLXIusDwtKIrJJgYGBCAkJsci1bLUfob3S6XT49ddfTaNxBoMB+/btw8cffyxxspal1WoRHx9v0WvGx8fXa8cqsj2cQ0lENk2pVLIfIZnZuXMn7r333irH5XI5rl69iq5du0qQquVFR0dj7969OHnyJLKyslBQUAAXFxd069YNkyZNMuvJevToUZw5c8a0l33btm3h7e2NSZMmoXPnzqbXca6x/WJjcyKyaYGBgfD19UVsbCwyMjIgCEKthWXleR8fH65etVF3P+J1dHTE5MmTMWLECHTo0EGiVC1LrVYjIyMDx48fR3FxMcaMGQNPT08UFRXhyJEjWLduHR599FH4+fkBqNjjPiAgAF26dIGrqyu0Wi0OHjyItWvX4umnn0anTp0AVMypzMjIgFqtlqwbwp0r+eVyOXvHthCOUBKR3WA/QgKAq1ev4siRI+jbty/uueceAEBubq7EqVpWZb/W27dvm1a7VyorK8Pq1avRuXNnLFq0qMZrqNVqfP755wgODkZoaKjpuBT9Wiv/t83emdLhCCUR2Q1PT0/ThxxHMexXz5490bNnTwAVO+OcOHFC4kQtLy0tDUajsUoxCQDOzs7w9PREQUFBrdeo3GHn7o4KoigiPT3dcmFrodVqTU8fauudqdVqkZSUhOPHj7N3ZjPhohwisktOTk7w8vJCjx494OXlxWLSTg0cOBB6vd6uFpKUlZXV+v2Wlpbixo0b1Y7kVbYJUqvV2L59O9q2bYvhw4dXeV3lXMvmpFQqERUVhczMTACos+VR5fnMzExERUVxRbqFcYSSiIjs1pgxY7Bx40bs3bsXDz30kNRxWkRdxXNcXBzKy8sRHBxc5dzbb78Ng8EAAOjYsSMee+yxGuedajQaeHl5NT1wNRITExvd7qhygV5MTAwKCwur/T6p4ThCSUREdqty7t/BgwclTtJy9Hp9jef27t2LM2fO4N577zVb5V1pyZIlWLJkCWbNmgUnJyds3LixxvmnlfcxGAw4ePAg/va3v8HPzw/vvvtuk/Kzd2brxBFKIiKyWwEBAZDJZDh16pTUUVqMXF79R/++ffuQmJiI0NBQjB49utrXVBaZ3t7e6Nu3L1avXo09e/bgkUceqfLaXbt24fDhw/jpp5+g0Wggl8uh1+tx69ataq99/fp1/OUvf8GLL76IUaNGVfuayt6ZGRkZOH36dK3tjkRRxNGjR3Hp0iXk5uaipKQE7u7u6Nu3LyZMmABXV1cAFQuUfH19OaeyiThCSUREdq1du3bIyMiQOkaL8fDwqHJs37592LdvHyZPnlzvR8DOzs7o1KlTtQWi0WjE0qVLsW7dOlNHhcoRSzc3t2pbd+3cuRM///wzxo0bh3fffdf0aP1OsbGxEEURycnJyMvLw5gxYzB//nxMnz4dRUVFWLdunenvsry8HPv27YO7uzumT5+O+fPnIzAwECkpKfjqq69QXl4OoKLwjI2Nrdf3TDXjCCUREdm1bt264cqVK1LHaDFOTk5QKBSmuZT79+/Hvn37EBwcjMmTJ9f7OkVFRcjJyTGtmL/7Hu7u7tU+Dn/99dfxxhtvoG3btujatSsCAgIQGBiItLQ00yjmihUr8Ntvv2Hz5s3w9vYG8L/emQAwY8aMKivU/f39sXr1ahw4cAB+fn5wdHTEc889Z1qNDgC+vr7o0KEDtm3bhtTUVAwdOrRV9M60BRyhJCIiu9anTx+UlJTUOrfQ1lQ+6j98+DASEhLg7++PgIAAZGVlmf0HVKz6/u9//4sjR47g4sWLyMjIQFJSEjZs2ACDwYBJkyaZXVsQBAwfPhzXrl3DG2+8AUEQ4ODgYDq/bNkyTJ06FZ6enrh27Rp+++03vPXWW9iyZYvZ38HBgwfRu3dvvPDCCyguLkZycrJp//X6tDsSBMGsmKzUvXt3ADBriyQIApKTkxv7x0ngCCUREdm5ESNGYPv27Th+/DjGjRsndZwWERQUhOPHj+PChQsAgPT09Gp7R65cuRJyuRxeXl5ISUlBQUEB9Ho93Nzc4OPjg4cffths60Wg4hFyUFAQ5HI5Xn31VUydOhVz5sxBVlYWPDw8sHr16iqvT0lJwb333ltlBXp5eTk++OADfPDBB1i5cmWt31NluyNfX99aX3f58mUAMBuNbMnembaKBSUREdm1yse8CQkJdlNQenp6ws/PD0uWLKlzj3u5XI7777+/Xtet3Mv7zmJt7NixOHv2LJ5//nm0a9eu2vcMGTIE+fn5AAAHBwcYDAb06NED9957L/r06YNOnTqZRkxrUlu7o0oFBQXYvXs3unXrhj59+pidq+ydyZ60jcOCkoiI7NqYMWMAACkpKRInaVnh4eGIioqqs6BsCEEQEB4eXuV4+/btsX79+hrfV1ZWBmdnZ/j6+mL27NmYOXMmhgwZYnrErVKpsGbNmhrfX9nuKCwsrNp2RwBQXFyMTZs2AQD+9Kc/VdnhB2je3pm2jgUlERHZNScnJ7i4uJge/9oLhUKBsLAwxMTEWOyaYWFhjWq/0759e9y+fdtsruWdapvfWp92RyUlJYiOjkZBQQEWLVpU7Ur3uu5DteOiHCIisnuenp64fv261DFaXGBgIEJCQixyrdDQUAQGBjb6/TUVk0DtvTPrandUUlKCb775BlqtFgsXLqx1BLKm+1DdWFASEZHd8/X1NVv1a0+Cg4MREREBuVxe7WPg2giCALlcjoiICEycOLGZElbfO7M+7Y7uLCYXLFiArl27Nvg+VD8sxYmIyO4NHToUiYmJuHLlCnr16iV1nBYXGBgIX19fxMbG1qvJuyAIEEURPj4+CA8Pb/ZdZu7unVldu6M7eXt7o7y8HNHR0bhx4wamT58OURTNXte2bVuzAtLDw4MLcpqABSUREdm98ePH49NPP8WuXbvwxBNPSB1HEgqFAgsWLEBOTg6efvppBAYGwmg0Vnmdh4cH/P39ERQU1KKNwAMCApCUlASj0VivdkeFhYWmaQw7duyo8pqhQ4di5syZACoKZH9//2ZMb/tkxup+WoiIiOzIzZs34enpiSVLlmDdunVSx5HUm2++iddeew1ffvklFi9eDI1GA71eD7lcLukonlqtRlRUVLNdPzIykjvlNAFHKImIyO516tQJDg4OOHv2rNRRJJWamorXX38dQMXWik5OTq2mjU5l78zMzEyLtzq6u3cmNRwX5RAREQFwd3e3qz2973bz5k2EhYXBYDAAAE6ePCltoGqEh4c3eOFQXWrqnUkNw4KSiIgIFQs5NBqN1DEkodPp8OCDDyI7O9t0LCEhQcJE1avsnWlJje2dSeZYUBIREQEYMGAAdDodiouLpY7SooxGI55++mkcOXLE7FFydnY2rl27JmGy6rWm3pn0PywoiYiIAIwaNQpARbNse6LVavHtt99CFEXTVoeVDh06JFGq2llD70x7w4KSiIgIwJQpUwBUNMy2Jx4eHjh//jzWrl2Ldu3amZ07ceKERKnqFhgYiMjISPj4+ABAnYVl5dxQHx8fREZGcmTSwtg2iIiICIAoipDL5Zg2bRp+//13qeNIwtnZGf7+/ti5cyeOHj2KkSNHomfPnlLHqpNarUZycjLS09OrnQfr5uaGPXv2IC0tDSkpKWjfvr0EKW0bC0oiIqL/165dO3Tu3BmXLl2SOkqLS01NxcCBAxEZGYnPP/9c6jiNptPpqvTO/OOPPzBs2DAAwJAhQ7B79262CbIwPvImIiL6f15eXsjJyZE6hiS+/vprAMDChQslTtI0lb0ze/ToAS8vLzg5OZktLjp79izGjh1bZbtGahoWlERERP/P398fRUVFFm2cbS12794NBwcHjB49WuooFndnQSmKIjIzMzF69GhcvHhRwlS2hQUlERHR/6tcqHH69GmJk7S88+fPW8V8yca4du0a5PL/bQ5oMBhw48YNPPnkkxKmsi0sKImIiP5fcHAwAGDPnj0SJ2lZKpUKxcXFGDdunNRRmkV2djYMBoOpLZKjoyOeeuqpZt0b3N6woCQiIvp/lX0Jk5KSJE7SsirnTz788MMSJ2keZWVlAICpU6fC0dERnp6eWLNmDQYOHChxMtvBVd5ERER3cHZ2Rr9+/XDq1Cmpo7SY4OBgHDx4EHq93uJ7ZbcG+fn5KC0tRZcuXTBz5kz88ssvuHjxIgICAqSOZjNYUBIREd2hW7duKCsrw61bt6SO0mIUCgVcXFxw48YNqaM0u3PnzmHQoEGYPXs2tm7dKnUcm2F7v4YQERE1gY+PD/Lz86WO0WIKCwuRl5eHoKAgqaO0iIEDB8LLywtxcXFSR7EpLCiJiIjuMGjQIBgMBqhUKqmjtIgtW7YAAGbNmiVxkpazcOFCFBcX49dff5U6is1gQUlERHSHypXO9rLS++effwYAzJkzR+IkLefVV1+FTCbD22+/LXUUm8GCkoiI6A5TpkwBABw+fFjiJC0jOTkZCoUCbdq0kTpKi3Fzc8PgwYORkpKC0tJSqePYBBaUREREd/D29oYgCHbR3Fyv1yM3NxdDhgyROkqL++c//wlRFPHBBx9IHcUmsKAkIiK6S4cOHXD58mWpYzS7uLg4GI1G3HfffVJHaXGPPPIInJ2dsW7dOqmj2AQWlERERHfp3r07bt68KXWMZlfZNmfRokUSJ2l5giBg2rRpyMrKsotfHpobC0oiIqK79O3bF2VlZdDpdFJHaVaHDx9G27Zt0blzZ6mjSKJyUc7LL78scRLrx4KSiIjoLpU9GQ8ePChxkuaVlZWFfv36SR1DMkOGDEHnzp2xfft2qaNYPRaUREREdwkNDQUA7N+/X+Ikzefw4cMwGAyYNm2a1FEk9eijj6KoqAi//fab1FGsGgtKIiKiu4wYMQIAkJKSInGS5vPtt98CAB577DFpg0jstddeg0wmw5tvvil1FKvGvbyJiIiq0bZtW/To0QMXLlyQOkqzGDhwINLT01FWViZ1FMkNHjwYqampKCkpgZOTk9RxrBJHKImIiKrRuXNn3LhxQ+oYzebSpUvw8/OTOkar8Le//Q2iKOI///mP1FGsFgtKIiKiavTu3RuFhYXQarU4cuQI0tPTpY5kMWlpaSgrK8PkyZOljtIqLFy4EE5OTlizZo3UUayWXOoARERErcm2bduwZ88e/PHHHzAajfDw8AAAjBkzBkeOHJE4nWVs3LgRADB//nxpg7QSgiBgypQpiI+Px9WrV9GzZ0+pI1kdjlASERHd4R//+AfWrFmD69evm47JZDJMmDBBwlSWtXPnTjg4OGDcuHFSR2k1KhflvPLKKxInsU5clENERHSHHTt2ICwsrMrxw4cPY+zYsRIksjw3Nzd06tQJmZmZUkdpVTw9PVFWVoaCggKpo1gdjlASERHdYfr06Xj22WchCP/7iPT09MTo0aMlTGU5arUaRUVFNlMcW9K8efNw+/Zt7Nq1S+ooVocFJRER0V3ef/999O3b1/T1Qw89ZFZgWrNvvvkGAPDwww9LnKT1WblyJQDg9ddflzaIFbKN/3UQERFZkIuLC7Zu3Wr6etasWRKmsazY2FjIZDJERERIHaXVUSgU6N+/P44ePQq9Xi91HKvCgpKIiKgagwYNMm1LOGbMGInTWM7p06fh6ekJuZyNXqrz3HPPwWAw4OOPP5Y6ilXhohwiIqIaHDp0CKGhoUhISEDPnj0hl8vh4eFhtbuplJaWwtXVFWFhYdy7ugaiKMLFxQU9evRARkaG1HGsBn89ISIiuotarUZycjLS0tKwYsWKKos0FAoFAgICEBQUBE9PT4lSNtyWLVsAAA8++KC0QVoxQRAQEhKCnTt3Ijs7Gz169JA6klXgCCUREdH/02q1iI2NRUZGBmQyGWr7iKw87+fnh/DwcCgUihZM2jgPPPAAtm/fjtu3b8PNzU3qOK3W8ePHMXr0aDz22GPYsGGD1HGsAgtKIiIiAEqlEvHx8RBFEaIo1vt9giBAEASEhYUhMDCwGRM2Xbdu3VBcXIy8vDypo7R6HTt2hF6vR35+vtRRrAIX5RARkd1LTExETEwM9Hp9g4pJoGLOnV6vR0xMDBITE5spYdOJogiVSoXBgwdLHcUqzJ07FwUFBdi7d6/UUawCC0oiIrJrSqUSCQkJFrlWQkIClEqlRa5laTt27IDRaMSMGTOkjmIVKntRsidl/XBRDhER2S2tVov4+HgAQFlZGfbv3w+VSgWVSoXi4mJMmjQJISEhZu+pbH5dnY4dO0Iul8PX17fVzan8/vvvAQCLFi2SOIl16NSpE/r06YPDhw9Dr9ezzVIdOEJJRER2KzY21vSIu7i4GCkpKTAYDOjXr1+N71myZEmV/6ZPnw4A6N+/P0RRRGxsbIvkb4hDhw6hTZs26Natm9RRrMby5cuh1+vx2WefSR2l1WNBSUREdkmtViMjI8NUULq7u+PFF1/E4sWLMWXKlBrf5+3tXeU/lUoFABg+fDhEUURGRgbUanWLfB/1dfXqVfTp00fqGFZl6dKlkMvlLCjrgQUlERHZpeTkZMhkMtPXMpnM7Ov6Kisrw7lz5+Dj44OOHTsCqFj5nZycbLGsTZWUlAS9Xo+pU6dKHcWqCIKA4OBgXLp0CQcPHsSKFSvwyCOPNHjhlj3ghAAiIrJLaWlptfaZrK+zZ8+ivLzcrGWQKIpIT09v8rUtJTo6GgDnTzZUXl4eAgMDsXfvXkycOBFARZEZHR0NQeCY3J1YUBIRkd0pKyuDVqu1yLWUSiVcXFzQv39/s+MajQY6na5VbNOYkJAAR0dHDBo0SOooViMuLg4zZ86EXq83O1658IrMsbwmIiK7Y6liMjc3F9euXcPgwYPh6OhY5bxGo7HIfRrjwoULSElJgV6vR3p6Onx9fSXLYo169uwJFxeXKiOR3bt3lyhR68aCkoiI7M7do06NVdlzsqYdcix1n8ZYuHAhgoKC0K5dO5SWlsLNzQ0JCQkoKSmRLJM1GTx4MA4dOgSFQgEHBwfT8V69ekmYqvViQUlERHbHEo8s9Xo9Tp8+ja5du6Jr167Ndp/GGjx4MARBQGlpKQDg5MmTCA0NrdJXk2o2ePBgHD161KzVEkcoq8eCkoiI7I6Hh0eTr3HhwgUUFxfXun+3Je7TWBMnTjRbjVz5/8+bN0+qSFapd+/eOHr0KDp37gwAyMrKkjhR68RZpUREZHecnJygUCiqzKVMS0uDTqeDTqcDUNGr8ty5cwCAgIAAswU2J06cgFwur3FvbA8PD0kX5EyYMMHsa5lMhieffBLLli2TKJH16tatG86cOYPu3bubpgzodDpoNBrTLjpS/31LTWa0RM8EIiIiKxMfH4+kpCSz1kEff/wx8vPzq3398uXLTdsp5ufn45NPPsGQIUMwc+bMKq8VRRF6vR6BgYFwdnZGWVmZ6T+dToewsDD07Nmzeb6x/2c0GtGpUydoNBrIZDIEBwdj165d1S4eovp56623kJqaijFjxlS7sEuhUCAgIABBQUHw9PSUIKF0WFASEZFdUqvViIqKarbrf/bZZ7h582a159544w28+uqrzXbvSkOHDsXp06fh6emJ8+fPS/oI3ppptVrExsYiIyMDMpms1v6llef9/PwQHh7e6vZ0by6cQ0lERHbJ09MTfn5+Fm9QLQgC/Pz88NRTT9V4fsGCBRa9Z03atGkDANizZw+LyUZSKpWIiopCZmYmANTZDL/yfGZmJqKiokydAGwdC0oiIrJb4eHhzVJQhoeH4+2338by5curbOfYpk0bnD171qL3rKTT6aBSqZCdnQ2VSgVPT09MmzatxnmeVLvExETExMRAr9c3eLvFymkPMTExSExMbKaErQcfeRMRkV1TKpWIiYmx2PUiIiJMK7/Ly8sRGhqKI0eOwGAwAPjfI1EvLy+88cYbePLJJ83e/80338DNzQ2zZs2q1/3UajWSk5ORlpZW7bw+V1dXDB482C7n9TVFc/5c2CIWlEREZPcSExORkJDQ5OuEhoaa9nyulJubi6FDh0KlUuGee+7Btm3bsHz5cmzevBk6nQ4dOnTA888/j1dffRU3b96Et7c3DAYDEhISqlzrTpzX13y0Wi2ioqJw8eJFnD59GllZWSgoKICLiwu6deuGSZMmmfWmvHLlCk6ePAmVSoXc3FwYDAazRVxARU/SyMhIm/2zZ0FJRESEihGp+Ph4iKLYoMebgiBAEASEhYXVOAKVlJSEhx56CFu3bsWYMWMAVDRGf+WVV/DZZ5+hqKgILi4uGDBgAE6cOAGZTAaFQoFTp05V20i7ObMSEB0djczMTHz//fcoLi7GwIED4enpiaKiIhw5cgTXr1/Ho48+Cj8/PwDAvn37cOLECXTt2hWlpaXIzMysUlAKggAfH58Wmz/b0lhQEhER/b87R/0EQai1WKs8X99RP6PRWGU+JVAx1+7TTz/Fm2++iVu3bpldf8SIEThw4ACcnZ1Nxy01mhoSEoLg4OAmX8fW3Ln6v7CwEG5ubmbny8rKsHr1anTu3BmLFi0CUPF3WDkX99ChQ9i1a1eVgrJSZGSkTU49YGNzIiKi/6dQKLBgwQLTvMT09HRoNJoqr/Pw8IC/v3+D5iVWV0wCFYXj8uXLodfr8fe//910XBRFJCUlYeHChdiyZQuAipFJSxSTAJCQkAA3NzeOVN4lOTnZNEXg7mISAJydneHp6YmCggLTsfou7BIEAcnJyQgLC7NY3taCBSUREdFdPD09TR/6LbEjiiiK+PDDD6s9t3XrVmRnZ2PdunWIj49HWVkZ9u/fD5VKBZVKheLiYkyaNKnKHt1GoxFKpRLJycm4desWHBwc0LlzZ4wfPx59+vQBUNHc3dfX12bn9TVGWlparfNRS0tLcePGDfj6+jb42qIoIj09vSnxWi0WlERERLVwcnKCl5dXs95DJpNh8uTJuHXrFlxcXODi4gJnZ2eUlpbi+PHjOHPmDKKiotC5c2cUFxcjJSUFXl5e6NevX419DhMSEpCYmIigoCBMnToVer0ex44dw+bNm/Hwww9jwIABEEURsbGxNjuvr6HKysqqXSl/p7i4OJSXlzd6uoBGo4FOp7O5bRpZUBIREUlMJpPh+++/r/H8lStXsHHjRoiiCHd3d7z44ouQyWQoKiqqsaA8ceIEevbsifDwcNMxPz8//Pvf/8apU6dMBWVGRgbUarVNzutrqLqKyb179+LMmTMICwszW+XdUBqNptl/SWlpbGxORETUyqWmpprmYMpkshrnY97JwcHBbDEPADg6OkIul0Mu/994UuW8PqpYeV+Tffv2ITExEaGhoRg9enSz3cdasaAkIiJq5eqa11ed0aNHIz09HUqlEiUlJbh9+zZ27NiBsrIys4LIluf1NdSdhfad9u3bh3379mHy5MkWWRlf032sme19R0RERDakPvP6qjN27Fg4OjoiLi4O27dvB1Cxa84jjzyCnj17mr3WVuf1NVR1+53v378f+/btQ3BwMCZPntxs97F2LCiJiIhascYUk0DFHMr4+HiMGjUKAQEBMBgMOHXqFL7//nvMmTMH/v7+Zq+3xXl9DeXk5ASFQmH6Mz98+DASEhLg7++PgIAAZGVlmb3e29sbAFBUVITMzEwAFTsjAUB6ejratGmDtm3bwsfHx/Se5ugS0BqwoCQiImrFGjPfrqSkBHFxcQgMDMS9995rOh4QEIANGzYgNjYWzz33XJPvY4sCAgKQlJQEo9GICxcuAKgoDqubFrBy5UoAFUXktm3bzM7FxcUBAHr16oXFixcDqJivenchbytYUBIREbVijZlvd/PmTej1+mq3bezWrRuuXLmCsrIys0U7tjivrzGCgoJw/PhxADAVgnXx9fU1FZe1EUURQUFBTYnXanFRDhERUSvWmPl27dq1AwBkZ2ebHTcajcjOzoaLi0uVx662OK+vMTw9PeHn51fv3W/qSxAE+Pn52Wx7Jv46QkRE1IrdPa8PqFj1rdPpoNPpAFTsP33u3DkAFY9s3d3d0b9/f6SkpMDBwcE0h/LkyZPIyspCSEiIWeshW53X11jh4eGIioqqdS/3hhIEwawnqK2RGRvah4CIiIhaVHx8vGleHwB8/PHHyM/Pr/a1y5cvh0KhQHl5OY4fP47Tp09Dq9XCwcEBHTt2xKhRozB48GBTQSkIAoKCgmxyf+mmUCqViImJsdj1IiIibHrfdBaURERErZxarUZUVFSzXT8yMtJmH8U2RWJiIhISEpp8ndDQUEycONECiVovzqEkIiJq5TivTxrBwcGIiIiAXC5v8J+9IAiQy+WIiIiw+WIS4AglERGRVdBqtYiKirJoex+5XI7IyEgoFAqLXdMWabVaxMbGIiMjAzKZrNZdiwRBgCiK8PPzQ3h4uN382bKgJCIishKc1ycttVqNl156CW3btq12VbyHhwf8/f0RFBRkd6O+XOVNRERkJQIDA1FYWGixeX0sJhsmKSkJ69evB1DRPD4vLw96vR5yudzuV8pzhJKIiMjKKJVKxMfHQxTFBrW2EQQBgiAgLCyMxWQDnThxAmPHjkVZWRkAIDU1Ff3795c4VevBEUoiIiIrExgYCF9fX9O8vsp5ezUxGAxwcHCAj4+PXc3rs5Ts7GxMnz7d1PcTAI4dO8aC8g4coSQiIrJiarUaycnJSE9Ph0ajqXJeoVBgx44dOHPmDJRKpd3N7WuqgoICjB07FhcvXjQtiHJwcMATTzyBL7/8UuJ0rQcLSiIiIhuh0+mg0WjM5vUVFBSYisjevXtj//791e7xTdV79dVX8dZbb1U5PmDAANPuRMSCkoiIyKYplUqMGDECQMUcyu7du2Pfvn3w8/OTOJl1uH79OtavX4+4uDgcO3bMdFwQBBQUFKBt27YSpms9OIeSiIjIhl29etX0/4uiiOvXr2Ps2LHYt28f5wDWQ7du3fDqq6/C0dERx44dw5tvvgmdTofs7Gy4uLhIHa/VYEFJRERkw7KysswW7RgMBuTm5uLhhx/GmTNnJE5nPbZv3w5BELBixQqL71hkC/gnQkREZMMqRyhlMhmAike1s2bNata9wW3RmTNn0L17dxaTNeAIJRERkQ0rKiqCKIoYN24cUlJS0LZtW/z4449Sx7Iq169fR2FhIe6//36po7RaLLOJiIhs2HvvvYesrCwcOnQIDz30EDQaDU6fPi11LKuydu1aAMCiRYskTtJ6cZU3ERGRnbh8+TL8/PzwwAMP4JdffpE6jtUYOXIklEolysvL+ci7BiwoiYiI7Ej37t2h1WpRXFwsdRSr0bZtW3Tu3BmXL1+WOkqrxTKbiIjIjixYsAAlJSX49ddfpY5iFa5cuYLi4mJMnjxZ6iitGgtKIiIiO7JixQrIZDK8++67UkexCv/9738BAI899pi0QVo5PvImIiKyMwMHDsSFCxdQWloKuZwNX2ozfPhwnDlzBuXl5abWS1QVRyiJiIjszLPPPguDwYBVq1ZJHaXVO3/+PHx9fVlM1oEFJRERkZ158sknIZfL8eWXX0odpVWrHMUNDQ2VOkqrx4KSiIjIzgiCgPHjxyM9PR03b96UOk6rtW7dOgDAkiVLJE7S+rGgJCIiskOvvvoqAOD111+XOEnr9fvvv8PR0RGjRo2SOkqrx0U5REREdqpdu3ZwdXVFbm6u1FFaJRcXF/j4+OD8+fNSR2n1OEJJRERkp8LDw6FWq3H27Fmpo7Q6Z8+eRVlZGaZOnSp1FKvAgpKIiMhOvfHGGwD+9/ib/qdy/+4nnnhC4iTWgY+8iYiI7Fi3bt2Qn5+PoqIiqaO0Kv3790dGRgbKysqkjmIVOEJJRERkxx599FEUFxcjJiZG6iityqVLlxAQECB1DKvBgpKIiMiOvfLKK9yK8S7JyckoLy/HvffeK3UUq8GCkoiIyI61b98e/fr1w/Hjx6HX66WO0yqsX78eQEUDeKofFpRERER2btmyZTAYDPjss8+kjtIq7NmzB87OzujXr5/UUawGF+UQERHZOVEU4ezsDF9fX1y8eFHqOJJzdHTEwIEDcfLkSamjWA2OUBIREdk5QRAwduxYpKWlQaPRSB1HUgcPHoRer8f06dOljmJVWFASERGRqRdlZW9Ke/XVV18BAJ566imJk1gXPvImIiIiABVbMbZp0wY5OTlSR5FM7969cePGDRQXF0sdxapwhJKIiIgAAGFhYcjNzUVqaqrUUSQhiiKuXLmCAQMGSB3F6rCgJCIiIgDAm2++CQB47bXXJE4ijYSEBBgMBtx3331SR7E6fORNREREJl27dkVBQYFdbsW4cOFCREdH4+rVq/D29pY6jlXhCCURERGZzJs3D8XFxYiPj5c6SotLTExE27ZtWUw2AkcoiYiIyCQvLw8KhQLjxo3DoUOHpI7TYkRRhKOjI0aOHImjR49KHcfqcISSiIiITNzd3dGvXz8cO3bMrrZijI+PhyiKiIiIkDqKVWJBSURERGb+/Oc/w2Aw4IsvvpA6SouJjo4GACxZskTiJNaJj7yJiIjIjF6vh6urK3r37o3z589LHadF9OjRAwUFBSgoKJA6ilXiCCURERGZkcvlGDNmDC5cuIC8vDyp4zQ7vV6P69evY8iQIVJHsVosKImIiKiKV155BYB9bMX466+/wmg0YubMmVJHsVp85E1ERETVcnNzg5ubG27cuIFDhw5BEASMGzdO6lgW99BDD+Gnn37CrVu34OHhIXUcq8SCkoiIiKoVFhaGHTt2oHPnzsjNzUWfPn1w4cIFqWNZXLdu3VBcXGwXj/ebCx95ExERkZk9e/ZgxIgR2LFjBwAgNzcXAODl5SVlrGah0+mgUqkwfPhwqaNYNbnUAYiIiKh12bNnD5RKpdkxuVwOHx8faQJZWHl5Od5//3307t0bt27dgtFoxKxZs6SOZdVYUBIREZGZN998EyqVChs2bDAdMxqN6Nmzp4SpLCcnJwevvvqq2bFTp05h27ZtiIiIgIuLi0TJrBcfeRMREZEZBwcHrFu3DsuWLTMdMxgMNlNQdu/evcrim40bN+Lhhx/Gp59+KlEq68aCkoiIiKoQBAGrVq3CihUrTMe6du0qYSLLkclkGDduHATBvAzy8fHB4sWLJUpl3VhQEhERUbVkMhnefvttPPjggwCAmzdvAvjfQpbs7GyoVCrodDoJUzbOne2PZDIZ3NzcsHPnTnTq1EnCVNaLbYOIiIioVqIoYvjw4ZgxYwa6du0KrVZb5TUKhQIBAQEICgqCp6enBCkbZu/evZgyZQqAigVHCQkJmDBhgsSprBcLSiIiIqqRVqtFbGwsMjIyIJPJUFvZUHnez88P4eHhUCgULZi0YW7fvo327dsDAL777jvMnTtX4kTWjQUlERERVUupVCI+Ph6iKEIUxXq/TxAECIKAsLAwBAYGNmPC+tPpdNBoNNDr9ZDL5XBzc0P79u0RFBSE48ePSx3P6rGgJCIioioSExORkJDQ5OuEhIQgODjYAokaTq1WIzk5GWlpadU+ptdoNAgMDMSMGTOs4jF9a8aCkoiIiMwolUrExMRY7HoREREtOlJpq4/pWzMWlERERGSi1WoRFRUFvV5vsWvK5XJERka2SLFmS4/prQkLSiIiIjKJjo5GZmYmSkpKsH//fqhUKqhUKhQXF2PSpEkICQkxe73RaMSxY8eQlJSEvLw8uLq6ol+/fpgyZQpcXV0BVBRrPj4+WLBgQbNmt4XH9NaKfSiJiIgIQMWcw4yMDIiiiOLiYqSkpMBgMKBfv341vmfnzp34/fff0a9fP8ybNw8TJkzAmTNn8M0338BgMACoaDuUkZEBtVrdbNmVSqVFikkASEhIqLKXOdWOe3kTERERACA5Odk0p9Dd3R0vvvgiZDIZioqKqi2wCgoKcPToUYwcORLTpk0DAPTu3Rtt27bFjz/+iJMnT2LEiBEAKkYpk5OTERYWZvHcWq0W8fHxFr1mfHw8fH19OaeynlhQEhEREQAgLS3NtIBFJpPV+frs7GwYjUYEBASYHe/Tpw8AIDU11VRQiqKI9PR0CyeuEBsbaxoFPX36NLKyslBQUAAXFxd069YNkyZNQrdu3czec/36dezatQvZ2dkQBAG+vr645557THt8i6KI2NjYZn9Mbyv4yJuIiIhQVlZWbWud2lQ+0pbLzcenKvfIzsnJMTuu0WjMtmk0Go04cuQI5s+fj6FDh9a4heMvv/yC3Nzcas/d+Zg+OTkZeXl5GDNmDObPn4/p06ejqKgI69atQ0ZGhtl7Nm7cCIPBgNmzZ+OBBx7ArVu3sGHDBhQVFQFomcf0toQFJRERETW4mARg6t149epVs+NZWVkAgJKSkirv0Wg0piJvyJAhGDduHL777jucPn3aVKDeKTMzEzNnzkTv3r2xevXqKqvPKx/TA8CMGTPw2GOPYeTIkfDx8cHAgQOxYMECuLq64sCBA6b3JCQkQC6XY968eejTpw8GDBiA+fPno6ioCIcPHza9rvIxPdWNBSURERE1qk2Ql5cXevXqhcOHD+PcuXMoKSnB1atXERsbC5lMVu1j8/DwcLRv3x5PPvkkzp49C6BipNLR0RGXLl1CYWGh2esrRwgLCwuxfPlyDB48GPv27TOdv/MxvZubW5X7OTs7w9PTEwUFBQAqRlUvXryI/v37w8XFxfQ6d3d3+Pr64o8//jAda87H9LaGcyiJiIioymPr+po9ezZ++eUXbNu2DQDg4OCAMWPGICMjA6WlpVVef/369Wr7Q5aXl2Pw4MGmrx0cHODo6Fgl1/nz5xESEoKAgAC8/PLLdY6slpaW4saNG/D19QVQMRKr1+vRpUuXKq/t0qULLl26hPLycjg6OgL432N6JyenOv4k7BsLSiIiIjItRmkoNzc3PProoygsLERhYSHc3d0h/7/27t+lrS6O4/jH20sES4dkUdBJEgp1KRKsCEqIOAgJXVt1kS6igzrpUujSf8HBRUXwHzClkyZ1KKG2WUQdIqI4qKT1B4IEI/EZ5N4aYvx18jxPTd8vCOTm3pwcMn043/PDtrW8vKwXL14UPZ9OpzU5Oan3798rm826Ze7a2lq9e/dO+/v7+vnzp379+qWjoyPt7u4WjVo67YyPj2tgYODG/n369Em5XM7dV/L09FSS3D0yr3I+y2azbqCULkNlXV3dHf+RvxOBEgAAyOPxyOv1PmgupXQZLJ2SczKZVC6XU0tLS8EzPp9PT58+1ejoqHp6ejQ2NqaZmRlJl9sNffz4sajdiYkJDQ0NSbocRT0/P1dbW5vevn2rly9famFhoWSfFhcXtbKyou7u7qJV3ndZxe4o56lBlYpACQAAJEmBQEDLy8vunMR0Oq2zszN39XUmk9Hq6qr7rMfj0Y8fPyRJXq9X2WxWGxsbSqVS6uzsLAhxlmXJ7/e717W1tZqentbAwIBGRkbU3t5+bZ9OTk4kXY4e9vf3a3BwUE1NTZKkvb29koEykUhoaWlJ4XBYr169cj+vqamR9Huk8ipnEdHVuZXSw6cD/E34hwAAgCQpGAzq27dv7nUsFtPx8bF7vba2prW1NUnS8PCwPB6PLi4ulEwmdXx8rKqqKtXV1enNmzdFp+vk83kFg8Gi32xtbVUymSzZp97eXtXX1+v169d69uxZwb1SZfpEIqFEIqFQKFR0hKLX65Vt29duQ7S/vy+fz1dQ7r7pd/AbgRIAAEi63AaosbFRW1tbyufzGh0dvfU7wWDw2qB4lXOWt7PN0H00NDSor6/v2nvXlem/fPmiRCKhjo4OhUKhou88efJEz58/1/r6urq6ulRdXS1JOjo60tbWllpbWwue9/l8LMi5A7YNAgAArkgk4m5MXi6WZSkSiZS1TUcgEHDnQ379+lXxeFx+v1+BQEA7OzsFL0coFFIul9Pc3JzS6bTW19c1NzenmpoatbW1FfT7apkepVVdOBMlAAAAJKVSKc3Pz5etvWg0qubm5rK1d1Umk9HExIQkaWpqStvb2yWf/fDhg/v+tqMXHYODgw8aWf3bECgBAECRpaUlxeNx43bC4XDJBTflMjs765bpy8Up03OW991Q8gYAAEU6OjoUjUZl2/a9S+CWZcm2bUWj0X89TEqPr0xfiRihBAAAJR0eHioWi2lzc1OWZd04Cujcb2xsVCQSkdfr/c/6+ZjK9JWIQAkAAG6VyWT0/ft3bWxs6ODgoOi+z+eT3+9XMBj83+YcPqYyfaUhUAIAgHs5OzvTwcGBzs/PZdv2H7W1TiqV0ufPn5XP5+81p9KyLFmWpe7ubkYmH4BACQAAKspjKdNXEgIlAACoSI+hTF8pCJQAAKDi/cll+kpAoAQAAIAR9qEEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABghUAIAAMAIgRIAAABGCJQAAAAwQqAEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABghUAIAAMAIgRIAAABGCJQAAAAwQqAEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABghUAIAAMAIgRIAAABGCJQAAAAwQqAEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABghUAIAAMAIgRIAAABGCJQAAAAwQqAEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABj5B9C6Xh9TlurRAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "idx = 0 # Feel free to change this to visualize different graphs.\n", + "\n", + "# Visualize the graphs\n", + "display(test_data[idx].mol)\n", + "visualize_dgl_graph(gs[idx])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "db4b56f6-2aa4-4dcf-bf14-c7a64d88592b", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'n': tensor([[0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.1201],\n", + " [0.0000, 0.0000, 0.0000, ..., 0.0000, 1.0000, 0.1401],\n", + " [0.0000, 0.0000, 0.0000, ..., 0.0000, 1.0000, 0.1201],\n", + " ...,\n", + " [0.0000, 0.0000, 0.0000, ..., 0.0000, 1.0000, 0.1201],\n", + " [0.0000, 0.0000, 0.0000, ..., 0.0000, 1.0000, 0.1201],\n", + " [0.0000, 0.0000, 0.0000, ..., 0.0000, 1.0000, 0.1201]])}\n", + "{'e': tensor([[0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.]],\n", + " dtype=torch.float64)}\n" + ] + } + ], + "source": [ + "# Examine the features\n", + "print(gs[idx].ndata)\n", + "print(gs[idx].edata)" + ] + }, + { + "cell_type": "markdown", + "id": "9958ec7f-fb10-4f42-a6f4-9b3806fcadcc", + "metadata": {}, + "source": [ + "# Use Chemprop featurizer with PyTorch Geometric" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "17f56f95-3df2-466e-b8c1-f9cea1eeb927", + "metadata": {}, + "outputs": [], + "source": [ + "# Install with https://pytorch-geometric.readthedocs.io/en/latest/install/installation.html\n", + "import torch_geometric\n", + "from torch_geometric.data import Data\n", + "import networkx as nx" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "2cb5d0c8-88c2-43dd-8f49-84df9b80624d", + "metadata": {}, + "outputs": [], + "source": [ + "def convert_molgraph_to_pyg_graph(mg):\n", + " \"\"\"\n", + " Takes a Chemprop molgraph from featurizer and converts it to a PyTorch Geometric graph.\n", + " \"\"\"\n", + " # Instantiate a graph from the edges\n", + " data = Data(edge_index=torch.from_numpy(mg.edge_index), x=mg.V, edge_attr=mg.E)\n", + " return data\n", + "\n", + "\n", + "def visualize_pyg_graph(g):\n", + " \"\"\"\n", + " Visualize a PyTorch Geometric graph object.\n", + " \"\"\"\n", + " nx_G = torch_geometric.utils.to_networkx(g, to_undirected=False)\n", + " pos = nx.kamada_kawai_layout(nx_G)\n", + " nx.draw(nx_G, pos, with_labels=True, node_color=[[0.5, 0.5, 0.5]])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "83c1c816-7a5a-4ca0-9d06-59b796d5cee1", + "metadata": {}, + "outputs": [], + "source": [ + "# Convert the molgraphs to PyG graphs\n", + "pygs = [convert_molgraph_to_pyg_graph(x) for x in molgraphs]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "419558f2-434b-43c1-ad12-0fcda9e02bf0", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nO2deVzU1f7/X7OwiAgaKIJSgAqIKRrmhlcTx6VE0wzKEk3L0ZZr1vfaqLcyc2m0foZtNqa5hKlganrJBUwJJRdAUGRRcEEEVARZZBtmzu+PQx8nBATmM/OZmc7z0R90ZuZzXjPCa87nnPciIoSAwWAwGG1FLLQABoPBMG+YjTIYDIZeMBtlMBgMvWA2ymAwGHrBbJTBYDD0gtkog8Fg6AWzUQaDwdALZqMMBoOhF8xGGQyGeVBUVHTixIm5c+cOHjx4+fLllZWVQiuqR8SymBgMhqlRV1eXm5t75cqVixcvpqen0x8KCgp0n+Pv75+SkiKUQl2YjTIYDIEpKSnJysrKyMjIysrKysrKzMzMyclRq9UNnubo6Ojt7S2VSmtqapKTkwGsX79+3rx5Qkj+G8xGGQyGUcnPz+cWmPSHK1euPPw0V1fXPn36eHl5+fn50R88PT1FIhF99KOPPlqxYoWtrW1cXNygQYOM+w4awmyUwWAYitLS0uzsbGqU1DQzMjIe3tO0sbHp0aOHrmn6+vq2b9+++Yu/88473377raura2Jiopubm8HexKNhNspgMPjh4WXm1atXH3aYTp06cQtM+oOHh4dY3OrjbrVaPXbs2OPHjw8dOvTYsWM2NjY8vY9Ww2yUwWC0HbVaPXfu3J07d9bW1mo0mgaP2tnZeXt7+/j4+Pr6+vr6+vj4eHt7P3KZ2XLu3r379NNPX716dd68eevXr+frsq2F2SiDwWg777///pdffkl/5muZ2QwpKSnvvfdeVFSUs7MzHTl37tzw4cMrKytVKpVcLudxrpbDbJTBYLSdbt265efnBwYGRkVFubq6Gnq64cOHnzx5cty4cdHR0RKJhA5GRESEhYVZWVkdPXr0X//6l6E1PAwLv2cwGG0kLy/v1q1bVlZWW7ZsMYKHAtixY0eXLl0OHz783//+lxucPn36ggUL1Gp1aGhoXl6eEWQ0gNloiyCECPLPw2CYMps2bdJoNC+++GLPnj2NM6O7u/uePXusra3XrFmzc+dObvyLL74YN25cYWFhSEhITU2NccRwMBt9NLGxsR06dHB3d7ezs1uwYMHdu3eFVsRgCI9Wq928eTOA119/3ZjzBgYGfvHFF4SQ2bNnJyUl0UGJRPLzzz97eXmdOnVKgB1SwmiW6Ohoa2tr3U/Mzs4uLCwsLi5Oq9UKrY7BEIzDhw8D8PT01Gg0dKSkpGTatGmHDx82wuxvvPEGgCeeeOL27dvcYEpKCg0D+Pbbb42ggYPZaJNotVqlUkm3sXv06JGZmbls2TKZTMblUfTq1Wvp0qXXr18XWimDIQChoaEAli9fzo188803AMaMGWOE2aurqwcPHgxg9OjRarWaG9+9e7dIJLKysjp+/LgRZFCYjTZOeXn5iy++CEAkEikUCu77lhBy48YNpVLp4eFBzVQsFstkssjIyNraWgEFMxjGpKioyMbGRiwW6y4jBgwYAGDnzp3G0ZCfn0+Tl95//33d8YULFwJwcXHJzc01jhJmo42QnZ3dt29fAB06dNi7d2+jz9FoNDExMSEhIdwtf9euXefPn3/hwgUjq2UwjA+NFX3uuee4kdTUVACPPfZYVVWV0WQkJCTQ5KUff/yRG9RoNM8++yyAAQMG3L9/3wgyeLbR4rrinOqcck05NzLq0qivbn/F7ywG5fjx4507dwbg7e2dnp7+yOcXFxerVKp+/fpxm6cBAQEqlaqiosIIahkMQfD39wfwyy+/cCNvv/02gHfffdfISmjykq2t7ZkzZ7jB4uJiGjwwffp0I2jgzUb3luztl94PSUASJMmS8ZfHZ1dnE0JczrssvrmYr1kMjUqlkkqlACZMmFBSUqL70LVr15p/bWJiolwut7e3p2bq4OAgl8vj4+MNqZfBEIBTp04BcHZ2rq6upiNVVVWdOnUCkJKSYnw99Gj+8ccfv3XrFjeYnp7u4OAAIDw83NAC+LHRbXe3iZJEskuy2LLYnOqc6HvRARkBk7InEfOx0aqqqpkzZza6GUoIUalU1tbWu3bteuR1ysrKtm7dKpPJuMWpn5+fUqm8c+eOwbQzGEZlzpw5AP7zn/9wIxEREQCefvppQfTU1taOGDECQGBgYE1NDTe+Z88ekUgklUp///13gwrgwUYrNZVOqU6DMwfXah+csZTVlVVqKomZ2OiNGzcGDhwIwN7eXvc+hRBSXV3NhcV9/PHHLb9mRkaGQqHo0qULfa2NjU1ISMj+/fvr6ur4ls9gGI+Kigq6yrt48SI3OGrUKADff/+9UKoKCwu7d+8O4N///rfu+JIlS+iObU5OjuFm58FGD5YeRBK2393e6KOmb6NxcXEuLi4AevbsmZaWpvvQzZs3hwwZQndetm7d2oaL19TU7N+/PyQkhO4VAOjevbtCoaAFxBgMs+PHH3+k6z5u5MqVKyKRqF27dg32wYzMn3/+SY+bNm7cyA1qNJoJEyYA8Pf3N9xxBQ82uvbWWiQh+X5yo49yNlqnNcVVmEqlsrKyAvDss88WFxfrPnTy5EmaJuzu7n727Fk9J8rLy1MqlV5eXg3CpHTvQRgM0ycwMLDByThd8c2cOVM4UfVs3bqVLnpOnz7NDZaWlvbu3RvA1KlTDZQyw4ONLi9YjiTcqL3R6KPURu+o73RJ7SK/Lk+6n6T/jLxQXV09a9as5jdDAYwYMUJ331pPaJhUWFhYu3btqJ926tRJLpenpqbyNQWDYTiysrJEIpG9vX1ZWRkdqaurc3d3B/DHH38Iq41CAwZcXV1v3rzJDWZmZjo6OgL4/PPPDTEpDzYafiscSTh7v/H1GrXRzUWb6SE+kjAwY+D6O+vv1d3Tf+o2k5eXR/u32NvbR0VF6T6kVqsVCgX1OLlcbqCg+pKSEpVK1b9//wZhUuXl5Y9+MYMhEDSyfc6cOdzIgQMHaHSgieRG19bWjhw5EsDQoUO5QAJCyL59+8RisVgs/u2333iflAcbPVp2FEnYUrSl0Ue5m/qLVRcVeQrnVGdqprbnbEOuhMSUxWiJsT/9+Pj4rl270hTP8+fP6z50+/btZ555hp4I6d62GI5Tp07NmTOH7tkDaN++va+vb9v2YRkMg6JWq+kfzqlTp7jByZMnA1i9erWAwhpQVFTk6ekJYN68ebrjS5cupfd/ly9f5ndGHmy0Vlvret7VP92/StNI9kKDI6ZqbXVkcaTskkyUJKJ+2iut19L8pddrjJSZzt2tP/PMM7pFDQghSUlJjz/+OIBu3brp7q0YgaqqqsjISC5MSiQSbd/e+JEdgyEUe/bsAdCnTx9upLCw0MrKSiqV5ufnCyjsYZKTk+3s7ACoVCpuUKvVTp06FUDv3r1LS0t5nI6fuNG9JXslyZLBmYP3luxNq0qLLYt9/8b7Pxb9SJo+qb9Re0NZqPS44EHNVJwkll2SRRZH6kZN8Ut1dTWtCkPv1nXLGRBCIiIi6H7l8OHDCwoKDKThkRw8eNDJyYne4wulgcFoFHrkvXbtWm5k9erVACZPniygqqb46aefAFhZWelu2paVlfXp0wfAlClTeNyF4C2L6WjZ0eFZwyXJEiTBJtlmUOagfff2EUI8Lnh8nN9kuKWGaGLKYkKuhFgnW1M/7Xq+6/wb8y9U8pyZ3kzoUoPNUMGPzj/66CMatC+sDAZDl7y8PIlEYm1trXsPR0/ADxw4IKCwZliwYAGArl273rjx4AA8KyurY8eOAFatWsXXRDzn1Ndoa+6q77Zhu7OwtvDzws99L/pSMxUliUZmjdx5ZGdlZaX+qpoJXbpz505QUBDdDNUNNxOQixcv0iBWoYUwGA9YsWIFgJCQEG7kjz/+oCbV4MbOdKirqxs3bhyAIUOG6B43HTp0SCKRiMXi//3vf7xMZHIVnhLvJ8qvy+3P2buluEmsJPpnpjcTunTu3Dla787Nze3PP//UW3vbqa2t5X4Xa2pqpFKpRCIxZqUcBqMZtFotrfRx6NAhbpAmTy9ZskRAYY/k7t27NFh7xowZuuPLly+ngTq676jNmJyNUkrrSiMSI2hZVkr//v2//vrrBhHyzdN86NLPP/9MN6GHDRsm7Ab5jBkzrKysDh48yI306tULQIOUKgZDKI4ePUoT8LhU5vLycnt7e5FIxPupN+80WhJfq9XSE11ra+u8vDw9pzBRG+VoNDM9JibmkdvDzYQu1dXVmdRm6Jtvvom/16GZOHEigN27dwuoisHgeOWVVwAsXbqUG/n+++8BjBo1SjhRraDRkviFhYU0Rfu7777T8/qmbqOU1mamNxO6VFRURL+FpFKpUqk0hvpHER4eDuDNN9/kRv7zn/8AWLFihYCqGAxKSUmJnZ2dWCzWrRX59NNPA4iIiBBQWKtYuHBhgzjxqqoqsVgMYP369Xpe3DxslINmptPYWjTRwKOZ0KXU1FT62s6dOx87dszY6pvg4MGDAIKCgriRH374AUBYWJiAqhgMytdffw1g7Nix3MiFCxcAODo6Gqe2PC/U1dU1qJyyd+9eAB07dtQ/8snMbJTSVGZ6cnJyM3frO3fupFskTz31lEn1obt69So95uJG4uPjAQwaNEhAVQwGhXZY0i22++677wJ4++23BVSlP5MmTQJPWfZmaaMcd+7cWbt2LY2n5bCxsdm0aZPu0+hmKO3o+eqrr/ISRMUjGo2GHnZxmRV37twB4ODgIKwwRmtZvHhxQkKC0Cr4JDExEYCTkxMXMFRTU+Ps7AwgObnxom5mQUFBAc2/4iXXxrxtlINr4CGVSmNiYnQfKi0tpSc2prMZ+jC0s41uMxmay2RqOXaMZqCbMxKJ5L333jOju93moeefCxYs4EZ27twJoF+/fgKq0p/PPvsMwAsvvMDL1SzERilPPfUUAN0I0NzcXBrv1qVLF2P2rW4ttOX3tm3buBFa1dHQzQ8YPFJbW6tUKmmQsqenZ2xsrNCK9KWysvLhDktjxowB8M033wgoTH98fX0BREdH83I1MSwI+tFkZmZyI66uru7u7v379z99+jQtn2WaUOVZWVnciI+PT4MRholjZWWlUCjOnj07cODAq1evjhkzZu7cuWVlZULraju7d+8uKSkZPHgwvVsCcO3ataNHj9ra2tIQKDMlLi4uMzOzW7duNMdJfyzKRh+2HqlUGhUVlZCQQLOVTJaHlTMbNVP69ev3559/0mXphg0bfH19f/31V6FFtZFNmzYB4HqRAdi8eTOtk0RXqWYKfV+zZs2SSCT8XJGXNa2JQHdtpkyZIrSQVpOUlATgySef5Ebo39748eMFVMXQh7S0NC4NLyQkxOxaw+bk5IhEovbt23MnnxqNhoZjm/Ve07179+zs7EQiUXZ2Nl/XtKjV6MM39eaCj48PzavTaDTcCMzzvTAoffr0SUhIUKlUtMPCk08+uXv3bqFFtQJaqSc0NJSrKX7kyJHc3FxPT0+aH2im/Pzzz5WVlUFBQT169ODtonz5sSlQWVkpFoutrKwM1PnDoND2sNw3pFqttra2FovFFnPm+48lJyeHVhEDEBwcrNsjyGS5du1a586dAZw4cYIbzM/PX758+YYNGwQUpj8BAQHUTHm8pkXZKCGE7oFmZWUJLaTV0BRV3aNDurhm3e4sAK1Wq1KpOnToAKBjx466JdlNiurqai7r2tXV1d7e/tKlS0KL4pPz58/TfwJ+g8ct6qYe5nwy8/COhPnuUTAaIBKJ5HJ5ZmbmpEmT7t279913F4ODceOG0LJ0SE5Onj9/vpub26RJk6KioiQSiUQiqaioeOGFFyoqKoRWxxtcmjWXAMkLlmaj5ms9D38BmO97YTSKm5vbr7/+unnzZq12dXQ0+vbFxo0gREhJJSUlGzZsCAgICAgIoIUo/fz8lErljRs3MjIy+vTpk5aWRms7CKmSJ2pqanbs2AFg9uzZPF+ax5WtKfDdd98BeP3114UW0mqOHDkCYOTIkdzI5s2bAbzyyivCiWIYhMJCMnUqAQhA/vUvYvz7ZlqVIiQkhCYL4K+qFElJSbpPu3TpEu23YRnFxn7++WcYpsuZpdno77//DiAwMFBoIa0mNzcXgIuLCzeSkJBgoH91hikQGUk6dyYAsbMjSiX5qyCyYcnMzFy6dCmNW4JOjbSmqu4ePnyY9tsw2YZLLWf06NHgo7row1iajd68eROAk5OT0EJajVartbe3B8BV+C8pKQHQvn17HlsYMkyK4mIil9cvS4cNIxkZhpro3j2iUpExY0K529DevXuvWbOmJUUbVq5cCaBDhw4XL140lD7Dc+XKFbFY3K5du1Z10GghlmajhBAa5lZUVCS0kFbzcE0AWvZft68hw/KIjibu7gQgtrZk6VLCY7SeRkPi44lcTtq3JwAZMWKng4NDWFhYS/pHcGi1WlrzwcfH5969e7yJMy4ffvghDFbD1wJtlNbl1o13MxemTZsGYMuWLdzIiBEjADSoWcWwPO7dI3I5EYkIQPz9yd+3KNvC1atk6VLi4VG/1BWLyejRZNeu0rYF+pSXl/ft2xfApEmTNBqNvuKMDpd/ZaD6RJZ2Ug9zPuBmmfX/WBwdoVLh+HH06oXUVAwejEWLUFMDADk5SEqq/5kjIwNXrjRynepqREVh4kT07Illy3DtGrp3h0KBy5cRG4vQUIe2BfrY29vv37/f2dl5//79n376aRuuICyHDh3Kzc318vKi6xLesUAbNV/refgLwHzfC6MNjBiB5GS88w60WqxejaefRmoqPvgAAwdi5cq/PTMsDEuW/G0kKQnvvovu3REaiv/9D1ZWCAnB/v24dg1KJby89NXm4eGxY8cOqVT66aefmldWK/6qRTJnzhxau513mI2aEA/n0ZvvyvphwsPDHRwcrKysRo4ceaXRpRQDsLfH118jPh6+vsjIAC2xYGODNWvQ6G9BYSHWrUP//hg4EF99hbt3ERCA8HDcvInISEycCL5qGAGQyWSrVq0ihMyaNevixYu8XdfAFBUVRUdHS6XSGTNmGGoOQ+wUCAvtt+Xt7S20kFZDawJYW1ur1Wo6kp2dDcDd3V1YYXqi0Wg+/vhj3YWAWCweM2bMzp07udYUjAZUVpJDhwgh5IUXyIgRJCCAjBpFuGOhgADy0kskPp5IpfW7n66u5IMPDHjWz0F38Hv16tWgQ5zJ8vnnnwOYNGmS4aawQButrq6WSCRSqVTwBvRt4IknngDAJTLX1dXZ2tqKRKLy8nJhhbWZsrKyKVOmAJBIJMuXL//++++nTp1KewsC6NixI+1FKLRM0+WFF8gzz5CTJ4lIRLj2CNRGa2pIt24kOJhERvJ5vt88lZWVtLrH2LFj64wT7Koffn5+AH799VfDTWGBNkoIoSWw0tPThRbSamg57v3793MjtGGfmRrNpUuX6C/xY489duTIEW783r17KpWKNkqhBAQEhIeH3717V0C1pgm1UULItGmkc2dCPyFqo4QQQVbzXP2n//73vwJM3xpOnDgBwMXFxaBV3yxwbxRse9Q0+O233wYNGpSent6vX7+zZ8/SHj4UR0dHuVx+4sSJtLQ0hULh7OyclJS0YMGCbt26hYaG0i5GAio3Tb74AjU1WLTob4M2NgIoeeKJJ/bs2WNlZbVq1arIyEgBFLQYrtC9lZWV4WaxTBs1X+tpqkCJeX0lEEJWr149ceLEe/fuhYaGJiQkeDVxVNynTx+lUpmXlxcZGSmTyWpqaqKiosaMGePr6/vJJ5/cMKkiSELj5oZly7BpExIThZYCDB8+fM2aNYSQ119/nZ5GmCAVFRU0qGDmzJmGnclwC10BUalUAF577TWhhbSao0ePAhg+fDg3sm3bNgAv0Vs4c6C8vHzq1KkARCKRQqFoVSZrbm6uUqmkG8QAJBIJzfg2xzrcfMHd1BNC1Gri709GjnxwUy8stFSSh4eHabZIoWXxRowYYeiJLNNGjx8/DmDo0KFCC2k1tCaAs7MzN3L69GkA/fv3F1BVy7l8+fKTTz4JwMHBoc2b+lz9Ie5GzNXVVaFQXL58mV+1ZoGujRJSf9ZkY2MSNlpVVUWTBmUymQkeNw0ZMgTA1q1bDT2RZdpoYWEhgI4dOwotpC30798/ODiYiwQqLS0F0K5dO9NPwjt48CBtGOnj45PBR+hNQUFBeHg4TUOkBAQEqFSqiooK/S9uLjSwUULI7NkEMAkbJYTcvHnT1dUVwAcffCCIgOrq6vPnz0dFRZWVlemOZ2RkAHB0dDRCGx7LtFFCCP17vnXrltBCeID+ml67dk1oIU2i1WqVSiVtVxscHMx7AYvExES5XE4rYOGvE6r4+Hh+ZzFNPvmE/N///W3kzh0yfjwxnRKgJ0+epHVL+W1w1CjFxcWJiYlbt25VKBQhISF+fn5ck+QGvw/vvfcegHnz5hlaErFgG6Xr+bi4OKGF8AC9TZbL5aa5A1VRQcLC1F5eE8Ri8aeffmq4sn6lpaVbt26lTasotFS7aX4s/yi++eYbes+UmJjI1zVramouXrz4yy+/rFq1aubMmYMHD6Y1pBtgZWXl7e39/PPPnz59Wve1NCTr7NmzfOlpBou1UXo2Z+5dDNVqtUKhAODk5ATA2to6ODg4MjKSS3MSnJwc0q8fAUjfvlVGq+ybnp6uUCjo3wkAGxubkJCQVhV/M0eyskinTsTXV2gdTfDGG28AeOKJJ27fvt2GlzdYZgYEBNg0FszVsWPHgICAkJCQpUuXRkZGJiYmNlqzioZh9e3bV++31SIs1kZXrVoF4P8a3A6ZFXfu3KGNeW1sbN5+++0JEyZw9y+PP/740qVLBb/NP36cdOlCANKrFzF+Sd/q6urIyMjg4GDuY3F3d1coFIJ/LAYiN5cApFs3oXU0QW1t7fDhwwEEBQU1/zWvVqtzcnJiYmLCw8PlcrlMJqPbVg/j6uoqk8nkcnl4eHhMTExOTk4LxQwbNgzAV199xcc7ezQWa6N79uwBMGHCBKGFtJHk5GQa9+Pm5nbq1Ck6mJ+fr1Qqe/bsSX/JxGJxYGCgSqUSpJe9SkWsrAhAnnuOCJtdnZeXp1QqPT09uY/lySefXLhwoSAfi+EoKSEAcXQUWkfTFBQUdOvWDcB777338KNRUVGTJ0/28fFpNBLe0dFx0KBBM2bMWLly5e7du9PS0tqczH3o0CEAIpGosLBQvzfUUizWRtPT0wH06NFDaCFtYfv27XZ2dgACAwMbbfNAj1yEykyvqiKvvUYAIhIRhYKYSAQBDZPS7Z1rLlFiLUStJgCRSIgpb10kJCTQm/FNmzY1eEipVDazzGzzhszNmzdjYmJUKtX8+fNlMhk9W6bbC/q+mRZjsTZaU1MjlUolEklVVZXQWlpBXV0d3QwFIJfLm/9CppnptE4ExQiZ6TdukKefJgCxtye7dxtunrZz48aNSZMmicViANnZ2ULL4RNbWwIQE19kb9myBYCtre2ZM2d0xzMyMiIjI1NSUtr8J1lWVpaYmLh9+/YPP/wwJCTE39/f1ta20d0AT0/PtLQ0Pt5Ni7BYGyWE9OrVC0CDTzMiIuLAgQMmGCpMCCkqKqLH0NbW1q06HEtKSnrrrbe4c8wuXVxmz1YbIhzojz+IiwsBSM+e5MIF/q/PI3R37PfffxdaCJ/QTqKmH8U3d+5culWtT8RhcXFxfHy8SqVSKBTBwcFeXl70q7EBnTp1CgwMlMvlSqVy//79WVlZxv/rtmQbnThxIoDdOksmtVrt5uYGk8yKSUlJobt7nTt3blvHGHrkIpPJRoyQ0xqU3t5k6VKSm8uPQm4zdPx4YoDuijxD8xTXr18vtBA+8fQkADH9FXZtbS1t1zFs2LCWbHHW1NTk5OTs379fqVTK5fLAwMAOHTo87JjW1tZeXl7BwcEKhUKlUsXHx5eWlhrh7TwSS7bRhQsXAlihE6ZcWVm5evVqWv6DbkKPGjUqIiKibX2+eGTHjh10M/Spp566fv26nle7fLlu8WLi5lZf0NfKikyZQg4caHsn9Orq+swZuhlqkkv5hqxevRrAggULhBbCJzS2LCVFaB0toLCwsHv37gDeeeedBg+1fJkZEBAQFhamVCojIyPT0tJM8yaSWLaN0qrXw4YNe/gh08mKoZuhtDL89OnTeTR0jYbExJCQkPolJC2QrlCQRpfg6ekkJoY0+GpPTiZJSSQvjwweXN/+lysbbPr8+uuvAMaPHy+0ED4ZNowAxFyyt5KSkuhx39y5c1esWPHqq68OHDiwqWWmn5/fCy+8sHjx4i1btpw+fdpcSutTLNlGv/vuOwA2NjZNbdAInhVz9+7dsWPHApBKpUql0kCzFBSQ8HDSt2+9mQIkIICoVEQ3MX3OHAIQufxvLxw7lowZQw4dIhIJ8fQkqakGEmgQaJlET09PoYXwybhxBCAHDwqto8V8++23YrG4QTvSBsvMxMREc+8lY8k2mpOTI5VK6c379OnTm+n2LkhWzPnz52kVTmdnZ+OchCQmErmc2NvXm6mjI5HLCU3emzOH2NoSsZicPPng+dRGCSE7d5KiIiMI5BO1Wm1tbS0Wiy0penTqVAKQyEihdbSYefPmAfDy8vrggw82bdp08uRJi2xwYMk2SghZv369g4MD9zXYp0+fL7/8sqnFpjGzYn799VcqbMCAAUbOurl3j6xfXx+0RP/bv5/MmUP69SPjx5O+fR909eFs1EyhFa9TzWsV3Sw0XPfHH4XW0TKOHTsmEolsbGyMGXskCBZuoxS62HRxceE2YprPTH84K4bH4sG0GBLdDH3llVcEXCulpxOFgvTsSe7fr7fR9HRiZUXWrKl/grnb6OTJkwHs2rVLaCG88c47BCDr1gmtowXcv3+fptutMJ1SVAbjH2GjlLq6OloMmN7pA+jWrZtCoWgqUffhrJiuXbvOnz///PnzbdZQWlr6/PPPG3oztFXQfQtqo4SQ994j7dsTuj42dxtdtGgRgGXLlgkthDcWLyYAWblSaB0tYMGCBSeh5tAAABDNSURBVAD8/f3/CZ0L/kE2ykEz02n3UG6xuXXr1qZOyYuLi1Uqlb+/P7c5QIsHt7bpcVZWVu/evQE4OTnRrm2mA2ejpaXEzY08/zwh5m+jmzdvpkt+oYXwxsqVBCCLFwut41GcOnWKNjnnsW6eKfNPtFEOGvZEAzbxV2b6uXPnmn8+F7Hh4OAQFhbWzMmVLgcOHHB0dKTfz1euXOHvTfADZ6OEkB076o+Dzd1GExIS6Hee0EJ4Y906ApCHAjFNi+rqatoV3PTbL/PFP9pGKTQz/amnntJdbDaTmV5eXr5x48ahQ4dyz+/bt294eHhxE5k9dDOUBhi//PLLpnlwrGujhBCZjPTpQ4KCzNtGS0pKALRv395i6pBu2bK9QweHmTNnCi2kOZYsWQLA19fXvMpZ6AOz0QfQnum0QDIAW1vb5sOeMjMzFQpFly5d6PMbjd4vKyubMmUKAIlEYiKboY3SwEYzM4m1NbGyMm8bJYTQf50bN24ILYQfaDXiqVOnCi2kSVJSUqysrMRi8YkTJ4TWYjyYjTaEy0ynh+kAvL29lUplU7ULa2pqdu/e/cYbbzzstpcuXfLz8wPw2GOPHTlyxPDa204DGyWELFlCALO3UZrZ3cKNF9Pn4MGDAMaNGye0kMZRq9W03tj7778vtBajwmy0SZrqmd7CBh7R0dG05FK/fv1aXrVbKNaubZjCdP8+ef55IlC3R95YsGDJgAGjf/yxLaVeTJD4+Hg0kd9sCnz66ac0c6y1p6/mDrPRR9CGnum6m6GhoaHm2A1YoyF8N/cUhv/3/8zgTKblpKSk0C9moYU0QkZGhq2trUgkspi1f8thNtpSWtgzvby8fOrUqTQDVaFQmOPhxuHDpF07MnGi0Dr44H//s4StCY7s7GyYZKEAjUYTGBgI4M033xRaiwAwG201jVaH+vPPPwkhsbGxNAHRwcFh3759QittIxcu1NcqtQCyswlA3N2F1sETt27dAtC5c2ehhTRk7dq6kSOVPXv2MpECoEZGRAgBo/WUlpbu2LHjxx9/PHv2LB2xtbWlFWr9/Pz27dtHa++bIzU1aN8eIhHu34e1tdBq9EOjgb09ampQVoa/vvjMmMrKyvbt29va2lZVVQmt5QHZ2fD3R2UlDh5Ujx/fSLs6i6eRaqmMluDo6Dhv3rwzZ87QhH2JREKLfXXt2jU2NtZ8PRSAjQ08PFBXh5wcoaXojUSCHj1ACC5fFloKH9jZ2Uml0urq6rq6OqG11EMI5s1DZSVmzsQ/00PBbFR/evfurVQqi4uLP/74423btt28ebOppttmBO0PkJUltA4+8PUFgMxMoXXwBG0HW1FRIbSQetavx9Gj6NoVa9cKLUU4mI3yg4ODw7Jly8LCwhpth2B2WJL10PdiGV8JAOimfHl5ue7g4cOHCwoKjC8mNxeLFgHAt9/isceMP7+pYAl/8wzesaTVKH0vlvGVAICWdNBdjVZXV0+ZMsXNza1Hjx5z586NiooqKyszjpi5c1FejtBQvPCCcSY0UZiNMhrBkqzHkr4S0NhqtKioaPTo0fb29leuXNmwYUNoaGiXLl1kMtnq1auTk5O1Wq2BlGzZgkOH4OSEr74y0AxmAzupZzTCrVvo2hUdO6KkRGgpelNWBkdHtGuHigpYwI5LUFDQsWPHjh49GhQUpDuu0WhSUlJiY2NjY2Pj4uLUajUdd3Z2HjVqlEwmGz9+/OOPP86XjMJC9OmD4mJERODVV/m6qrnCbJTROI89hpIS3LqFv0qvmDFubigowLVr+Cuz14yZNGnSgQMH9u3bR+t/N0pxcfHRo0djYmKOHDly/fp1btzPz2/y5NOBgfYjR6J9e71kTJ2KPXvw3HOIjtbrOpYBs1FG4wwdilOnEBeHESOElqI3o0bh+HEcPoyxY4WWoh8ajSYgICA1NfWpp56aPXv2mDFjvL29m3/JlStX6BI1JibG2vrxO3dSCYFUCn9/yGSQyfDMM/irHURL2bULL78MBwekpcHdve1vx3IQMvafYcLQ7mkqldA6+GDePLNpYdQMxcXF48aNA/CYzqF4165dQ0JCtm7dWvSo3q1qtTohIeejj8iQIUQiedDQ0NmZvPQS2biRXL/eIhlFRcTFhQDkhx94eFOWAbNRRuN89hkBiGUUPPvySwKQt94SWoceZGZm0jxjZ2fn3bt3R0ZGyuXybt26cX4qFosDAgIUCkVMTMwj276Xl5OYGKJQED+/B34KEC8vIpeTyMjmCtNMm0YAMmoUMcNyEYaC2SijcfbsIQCZMEFoHXxw8CABSFCQ0Drayv79+2kHmv79+1+9elX3oZycnPDwcJlMZmtry1mqnZ2dTCZTKpWJiYmPLI5z+TL57jsyZQpxdHzgp9bWpKSkkSdHRxOAtG9PTK8PjpCwvVFG42RkwM8PPXogO1toKXqTn49lyzBoEF5/XWgprYQQsmbNmiVLlmi12mnTpm3cuJFrHdaAysrK+Pj4I0eOxMTEXLhwgRvv3r379Onr/f2DR49G587NzaXRICUFsbGIjUVZGU6fbuQ55eVQKNC7N/79b73el4XBbJTROLW16NEjr2vXrPj4f9nammuFkooKZGXBxQXduz8YLC7G1avo29fUC6+Ul5fPmDFj3759Eolk5cqVCoWihS+8fft2XFxcbGzsb7/9lpeXN2zY1YQEDwBeXggOxsSJGD4cOovXRsjPR3o6vLzg5fVg8Pp1XL6M0aPxV18Ixl8IvBpmmDD0FPjChQtCC2k7J07UF8rTLce+fTsBiIn3Z9Jtx93mQsharTY1NXXdusqxY0m7dg/u2e3syLPPkrVrSVP/tlFRBCA9exLdrnRffEEAotG0TYslY/7hyAyD4ePjAyDL/BOACgvxySdCi2gN0dHRgwcPzsjI6Nev39mzZ2UyWduuIxKJ+vXrN39+u8OHUVaG+HgoFAgIQHU1Dh7E+++jb1+4uCA0FBs24ObNhi+/eROrVun7Xv4JMBtlNAk9Gs40/5xQuRzr1iE1VWgdLYAQsnr16kmTJt27d++ll15KSEjw9PTk5cpSKYYPh1KJxETk5yMiAjNnwtUVt28jKgpz58LdHQMG4IMPkJRU/5J338WaNZaTR2s4mI0ymsRiVqMzZ2LAAMjlMFh+OT9UVFS8+OKLixYtEolESqVyx44d7fVMNmoCFxe8+iq2bEF+PnJyoFIhJAQdOiAlBZ9/jmPH6p/25pvo1Qtz54IdoDQPs1FGk1jMalQsxtq1OHsWP/wgtJSmyc7OHjJkyJ49exwcHPbu3atQKERGOcrx8oJcjshI3L6No0ehUGDixPqHpFKsXYu4OEREGEGIGcNslNEk1EazsrKI+a9Ghg/HzJlYvBi3bwstpTEOHTo0aNCgixcv+vj4nD59eiLnZEbExgZBQVAq62tiUcaMQUgI/vMfSyhSYziYjTKaxMnJycnJqaysrLCwUGgtPPDFFxCL8eGHD0YyMhqPjjQmdDM0ODi4pKRk4sSJZ86cod9epsO6daipwaefCq3DhGE2ymgOi7mvB+DkhOXLsWnTg7OmVaswZAh698bq1bhzRwBJFRUVoaGhixYt0mq1CoVi3759Dg4OAuhoFldXfPQRvv0Wly4JLcVUYTbKaA56ymSONtroadLcuQgIwNdf1/+vhwe6dEFmJhYtgrs7XnoJR44Y7xgqJydn2LBhu3fv7tChw549e5RKpcl2oHn3Xfj6YvNmoXWYKib6z8YwEcz0sP7OHchk2LWr4bhYjPXrUVtb/7/LlyM/HzExCAmBRoPISIwbh8cfx6JFuHrVsArj4uKGDh164cIFb2/v06dPT5482bDz6YdUiu++g8l0IzU5mI0ymoM7ZRJaSCs4cwYDBuDYMSxbBjs7BARANw09IACLFiEgoD4TVCKBTIbISFy/DqUSPXrg5k2sXo2ePTFmDLZtgyEawm/YsEEmk925c2fChAlnzpyh2UomhYsLZDLY2DwYGT4cS5ZAJmOZoI0hcBYVw7ShBurh4SG0kJYSEVGf9Th8OCkoaPXLNRoSH0/kcmJnV5832bEjkcvJuXP8yKuqqpoxYwYAkUikUCg0LLPSImA2ymgOtVptbW0tFovv378vtJZHoFYThaLe++RyUlOj19Xu3iXr1hF//wd56MOGaTdt2lZWVtbma+bm5g4cOBCAvb39L7/8opc+hinBbJTxCOh9fWpqqtBCmuPOHRIURABiY8NzVfa0NKJQECcnMnDgbQC2trYhISExMTGPrOPZgLi4uC5dugDo2bNnWloanxIZQsNslPEIJkyYAOCTTz4RWkiTnDtHPDwIQNzcyJ9/GmSK+/fJrl2nRowYwWUW9e7d+4svvrh161ZLXq5SqaysrAA8++yzxcXFBpHIEA5mo4xHEBwcTI3D29s7MjKytrZWaEV/4+ef6/cxhw0j+fkGn+7SpUtLly7lOhVLJBKZTBYZGalWqxt9fnV19axZs9hmqGXDbJTxCMrKynx00gNdXFwWLlyYmZkptC5SV8fnZmgrp66LiYkJCQmha0wAbm5uCoUiOztb92l5eXmDBg2im6FRUVHG08cwLsxGGS0iOztbpVL5+/tzfhoQEKBSqcp16yEbkaIiIpMRgEilRKkURAIhhBQUFCiVyp49e9LPRCwWBwYGqlSq+/fvf/PNN87OzgB69Ohx/vx5wSQyDA+zUUbrSExMlMvlHTp0oMbh4OAQFhbW5vLsbSM1VePpSQDStSs5ccKYMzeOVqs9duxYWFhYu3bt6MfC/RAUFHT37l2hBTIMC+vFxGgL1dXVBw4c2LBhw9GjR+mvUO/evWfOnDl79uzOzTdO05tdu3Z9+OEPJSVHnnhCvHcv/tqlNAnKysp27ty5bdu2hIQEQoi/v//p06dtdKPYGZYIs1GGXmRlZW3evHnz5s23b98GYG1t/fzzz4eFhT333HMSiYTfuTQazZIlSz7//HNCyMKF25YvDzNZgzpy5Iirq2vfvn2FFsIwBsxGGTyg0WiOHTu2YcOGvXv31tXVAejWrdv06dPnzp3LVw+MsrKy6dOnHzhwQCqVrlixouVtMhkMQ8NslMEn+fn5P/300w8//JCTkwNALBYHBQWFhYWFhIRw24VtICsra/LkyZmZmc7Ozrt27QoKCuJPMoOhL8xGGfyj1WoTEhJ++umniIiIyspKAJ06dQoJCXnrrbd0z/pbyIEDB6ZPn15WVta/f/+9e/d6eHjwr5jB0ANmowwDcu/evcjIyO+///7cuXN0JCAgQC6XT5s2jTvrbwZCyJo1a5YsWaLVaqdNm7Zx40Y73WJNDIZpwGyUYQySkpK2bdsWERFRXFwMwNbWduLEiXK5fPTo0U01bisvL58xY8a+ffskEsnKlSvZZijDZGE2yjAeD4dJ+fj4zJo1a9asWbRsB8elS5cmT56ckZHh5OS0c+dOmUwmkGQG49EwG2UIwOXLl7dv37558+bc3FwAEolk1KhRcrl8ypQpUqk0Ojr61VdfLS0t9ff337t3L19n/QyGgWA2yhAMtVr922+/bdy48eDBgxqNBoCVlZWrq+uNGzcIIS+//PKmTZvYZijD9GE2yhCegoKCbdu2rVu3rqCgAIBIJPrss88++OCDprZNGQyTgtkow1TQarVfffXV+fPnR4wY8dprrwkth8FoKcxGGQwGQy9YZ1AGg8HQC2ajDAaDoRfMRhkMBkMvmI0yGAyGXjAbZTAYDL1gNspgMBh6wWyUwWAw9ILZKIPBYOjF/wc5HGaMHRX65gAAAdN6VFh0cmRraXRQS0wgcmRraXQgMjAyNC4wOS4xAAB4nHu/b+09BiAQAGImBgiQAGJpIG5gZGNIANKMzOwOGkCamZkNQrNAxJmY2BkUQHwYFyEMVY4mDtfukAGWZ0RiQGQEwQYyYirAcAEWI3AZys3AyMDIlMDEDGQzsLAysLIxsLEzsHMwcHAysHMpcHFnMHHzJPDwZjDx8jHw8ifwC2QwMQlmMAkKJQgJZzAJiySIiGYwiYoliIlnMIkzJnCyMAhwJYgLJTixAM1nZQQqFGdjZWPn4GRh4+bh5RfgYhMWERUTFxLXYgR6hgEWrFfaDti/2dpmD+KETjNwaKzLsQOx/V1MHQ6aLtoPYqc+vmLv68B7AMRe2ddu58R7FSyurypoO73Cah+IPbFs3X6hiTvAep/yKh1Q9xUBq1maIHwg8RMzWK98c/p+3jVqYPbh3uoDjelie0HsJtnWA5+5doLdUHB34wGvun9g9n636weKjSFuC2dmOnhq2WkwW+7D+QOftO6B7TUwWHxA408l2K6PnzscLk68AmZX2C52eLn+PFgNy9pLDme6TcDsW5xPHBzz5oDNUfl+1GEvp6QDiD2jfZpDbQwXmH1Iq9WhvsgZrEYMAGdzeABl6urhAAACVnpUWHRNT0wgcmRraXQgMjAyNC4wOS4xAAB4nH1VW6obMQz9zyq8gTGSLMn2501yKaXcBNq0eyj0s/unRx6S8QXTmVh47GM9j5xTiuf79dvvv+n1yPV0Son+8+u9p1+FiE4fKSbp/P7l6y1dHm/n58rl/vP2+JFEk1ScwfsZ+/a4fzxXOF0SZyNlNkzIRONQpvEcJyXdkuRqnatjWwp14wWuQJ/kVkS6po2zeBW3BVCHYffCndImWaiS0QJosAyDrtUpNLqr+spFh0asFtPiaaPMTZhWPlYAoai49F6wX5y0yQLYYBquWRHqLVRa7YVWyB4qJWOzVo0ZObv3BZIpoJQ7adEBhXVZ5pKjOFvJsGkoyhbBIZ2rJLEEVDNbbyZIrKohYytkVGizbNakMZC9NRRrhYwSbZ6r1NI8yKENaldIS5c/aWuZxdrgEXhCSx6x70q1N6QoYtJSuizDr7unoLwXGTmrDqauoFEpzVJ7rW0nCei0TFRUyjKIIabDPJmXVfgShfJsDTyhwacKsq7Cl6hTRfAARu5FhNdAGSq5ucJj0KWz8NJ2FElzJXgnALJz60uNOoDY1xrhwkV1XwDfb9dPfb/fBOf77XrcBPHK0fC4PlI5+pox9OhexrCjRxnDj05kjHr0G2O0o6sYox+twzHmBol95qkNNATLxHYNwWVitYZgndjLQ9jEUh0rPpExPsG0iXMasbaJWRqC+8QgDSEzUzSE8MQIDSEylV5DSJlqrCFEp2LquLVfC5FX5OMVggyH65QcqfuZw7+o9FzX+H7+O2B++gcuuTJBQIIwXQAAAS96VFh0U01JTEVTIHJka2l0IDIwMjQuMDkuMQAAeJwlUDmSwzAM+8qWyYzC4U1xXLrPJ9zu5AV5/IJeN7JACARwfuR6nG89z/fjsuvC5fd5XfY8T31+Lr3mU/n5PoSCXWQJcajbOpQqWvZiUuMOGWSbaq+XkGZprUMo0wSIknJxrAPs9MrhZLrXIGzhFmAxyVZhSGFsqd1Qt2TfCkgpTGVYUW08kBF+wgZjy5LeAJ0kegecukdi5ysoYusG0nsrvL+S4M960vj24WwSBQkIUvE/x3tzjribtcqtxN15L+xKNHL7gpmqCckpiUgzZTf3wWBfscBJq2v/B2eQgpAUk5HnSJg5kmIj9l1EoTg8K9jKcaWqaB8U2em6sLRFdXSL8R6AJJrGHSd6ZYKCZ67n9w9ramN6pq4WpwAAAABJRU5ErkJggg==", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAApQAAAHzCAYAAACe1o1DAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACQq0lEQVR4nOzdeVhTZ9oG8DuHsCkqQVFcUEBwXxF3RUFtxUJb7VitVq21i2XG2s7W1i5j922mrbalddRqS7VVuwoU64biLhB3qoKIghqIJoCsITn5/uAjY2SHwCHJ/buuXt/HOck5N8qYh/e87/PKjEajEUREREREjSRIHYCIiIiIrBsLSiIiIiJqEhaURERERNQkLCiJiIiIqElYUBIRERFRk7CgJCIiIqImYUFJRERERE3CgpKIiIiImoQFJRERERE1CQtKIiIiImoSFpRERERE1CQsKImIiIioSVhQEhEREVGTsKAkIiIioiZhQUlERERETcKCkoiIiIiahAUlERERETUJC0oiIiIiahIWlERERETUJCwoiYiIiKhJWFASERERUZOwoCQiIiKiJmFBSURERERNwoKSiIiIiJqEBSURERERNQkLSiIiIiJqEhaURERERNQkLCiJiIiIqElYUBIRERFRk7CgJCIiIqImYUFJRERERE3CgpKIiIiImoQFJRERERE1CQtKIiIiImoSFpRERERE1CQsKImIiIioSVhQEhEREVGTyKUOQNZJp9NBo9FAr9dDLpfDw8MDTk5OUsciIiIiCbCgpHpTq9VITk5GWloatFptlfMKhQIBAQEICgqCp6enBAmJiIhICjKj0WiUOgS1blqtFrGxscjIyIBMJkNtPzKV5/38/BAeHg6FQtGCSYmIiEgKLCipVkqlEvHx8RBFEaIo1vt9giBAEASEhYUhMDCwGRMSERGR1FhQUo0SExORkJDQ5OuEhIQgODjYAomIiIioNeIqb6qWUqm0SDEJAAkJCVAqlRa5FhEREbU+XJRDVWi1WsTHxwMAysrKsH//fqhUKqhUKhQXF2PSpEkICQkxvV4URRw9ehSXLl1Cbm4uSkpK4O7ujr59+2LChAlwdXVFfHw8fH19OaeSiIjIBnGEkqqIjY01zZcsLi5GSkoKDAYD+vXrV+3ry8vLsW/fPri7u2P69OmYP38+AgMDkZKSgq+++grl5eUQRRGxsbEt+W0QERFRC+EIJZlRq9XIyMgwfe3u7o4XX3wRMpkMRUVF1T66dnR0xHPPPYc2bdqYjvn6+qJDhw7Ytm0bUlNTMXToUGRkZECtVrOlEBERkY3hCCWZSU5OhkwmM30tk8nMvq6OIAhmxWSl7t27AwAKCgpMr0tOTrZgWiIiImoNWFCSmbS0tFr7TDbE5cuXAcA0IimKItLT0y1ybSIiImo9WFCSSVlZWbU74DRGQUEBdu/ejW7duqFPnz6m4xqNBjqdziL3ICIiotaBBSWZWKqYLC4uxqZNmwAAf/rTnyAI5j9mGo3GIvchIiKi1oEFJZno9fomX6OkpATR0dEoKCjAggUL4OHh0Sz3ISIiotaDBSWZyOVNW/RfUlKCb775BlqtFgsXLoSXl1ez3IeIiIhaFxaUZFLdaGJ93VlMLliwAF27dm2W+xAREVHrw6EiMnFycoJCoagylzItLQ06nc60mEatVuPcuXMAgICAAMhkMkRHR+PGjRuYPn06RFFEVlaW6f1t27Y1FZEeHh5wcnJqoe+IiIiIWgILSjITEBCApKQks9ZBsbGxyM/PN32dmpqK1NRUAMDy5csBANevXwcA7Nixo8o1hw4dipkzZwIA/P39my07ERERSUNmtFTTQbIJarUaUVFRzXb9//73v+jQoQP69esHR0dHODk5wdHREe3bt8fLL7/MXXSIiIisEEcoyYynpyf8/PyQmZlp2s/bEgRBgLOzM65fv47r16/jjz/+AAA4ODhAFEUYjUY8/vjjLCiJiIisEBflUBXh4eFVekc2lSAIePLJJ7Flyxaz4waDATKZDGFhYRgyZIhF70lEREQtgwUlVaFQKBAWFmbRa4aFhUGhUODhhx/GkiVLzPYHF0URZ8+excGDBy16TyIiImoZnENJNUpMTERCQgKMRqNZAdhQoaGhmDhxounroqIiDBkyBFeuXAEAdO3aFdevX4coihgyZAg2bdqEQYMGNTn/nXQ6HTQaDfR6PeRyOVebExERWRALSqqWRqPBwoULcePGDYSHh0MulzdoTqUgCBAEAWFhYQgMDKxyXqlUYvTo0TAajTh//jzat2+PRx99FLt27QIAjBs3Dps3b0avXr1M7zlz5gxOnjyJBQsW1CuDWq1GcnIy0tLSqt1WUqFQICAgAEFBQZy7SURE1AQsKMlMUVERVq1ahXfeeQdFRUUAgOTkZKSmpiIjIwOCINRaWFae9/PzQ3h4OBQKRY2v3bJlCzQaDZ555hnTsYyMDMyfPx9Hjx6FTCbDvffei+joaHh4eGDQoEH4448/sH37dkRERNR4Xa1Wi9jYWGRkZEAmk6G2H/HK8/XJS0RERNVjQUkAKh4Jr1u3Dq+99ho0Go2pCHNyckJpaSlkMplpxC89PR0ajabKNTw8PODv72+REb/KkcizZ89CEASMGjUKR48eBQC4u7vj3Llz6NatW5X3KZVKxMfHQxRFi46oEhERUc1YUBIAYOXKlXj99derHB8yZAhOnTpV5XhLzUnct28fHn/8cVy+fNl0zMHBAcHBwdi9e7fZavTKOZ9NFRISguDg4CZfh4iIyF5wlTcBAB5//HGMHDnSbPGNg4MDBg8eXO3rnZyc4OXlhR49esDLy6vZFrhMnjwZL7zwgtkxg8GAhIQEfPTRR6ZjSqXSIsUkACQkJECpVFrkWkRERPaABSUBAHr27InExERTYVg58tevXz8pY8FgMOC1116r9tw//vEPvP/++9BqtYiPj7fofePj46tdyENERERVcaccMvnTn/6EsrIyREZG4tdff8W1a9ckLygBYNq0acjKyoJOpzP9p1KpoNFo8OKLLyI/Px+urq4oKyvD/v37oVKpoFKpUFxcjEmTJiEkJMTseleuXMHJkyehUqmQm5sLg8GA5cuXmy3IEUURsbGx9V5RTkREZM9YUBIAYNOmTYiLi8PIkSPx+eef45133sG3336L8PBwSXM5ODjg22+/rfac0WjEli1bcOHCBYiiiOLiYqSkpMDLywv9+vWr8bH15cuXkZGRga5du8LZ2RmZmZlVXiOKIjIyMqBWq9lSiIiIqA5clEO4efMmunfvDgcHB+Tm5sLNzU3qSPUWHx+PpKQkGI1G08p0mUyGoqIifPjhh9WOUIqiaHqkf+jQIezatavKCCVQ8dg/KCjI4rsGERER2Rq7H6HkDipAcHAwdDod4uLirKqYBIC0tDSzQrI+6rtPuSiKSE9Pb3Q2IiIie2GXBSV3UPmfl156CX/88QfmzZuHGTNmSB2nQcrKypp94YxGo4FOp7O7XzKIiIgawq4KyvruoKLVapGUlITjx4/b9A4qSqUS77//Pry8vBAdHS11nAZrqVXYGo0GXl5eLXIvIiIia2Q3bYOUSiWioqJMCzDqmjpaeT4zMxNRUVE215dQr9dj2rRpkMlk2LdvX70fA7cmer3epu5DRERkrexihLIpO6hUbuEXExODwsJCm9lBZdasWdBoNHj33XfRt29fqeM0ilzeMj++LXUfIiIia2V9w1INxB1Uqvr+++8RExODESNG4MUXX5Q6TqN5eHjY1H2IiIislU0PvVTuoFLfhtd3MhqN2LBhA65evYqRI0fivvvuA1DRpsbX19dq51TevHkTixYtgqurK/bu3St1nCZxcnKCQqFo1rmU9rjqn4iIqKFseoQyNjbWrOG1wWCo984vx48fh0ajqXK8cgcVazVp0iTodDps3boV7du3lzpOkwUEBJi1C0pLS8O5c+dw8eJFABUr+s+dO4dz585Bp9MBAIqKikzHcnNzAQDp6ek4d+6cWZNzQRDg7+/fct8MERGRlbLZEUq1Wo2MjAwAgLu7O1588UVTw+u6HltrtVrs2bMHM2fOxJYtW8zOWfMOKi+//DJSU1PxyCOPSL4DjqUEBQXh+PHjpq9jY2ORn59v+jo1NRWpqakAgOXLl8PJyQm5ubnYtm2b2XXi4uIAAL169cLixYsBVPxdBwUFNfe3QEREZPVstqBMTk42tQaqb8PrSjExMfDz80P//v2rPS8IApKTk61qB5WTJ0/i3XffhZeXV41bGVojT09P+Pn5ITMzE6Io4vnnn6/zPb6+vli5cmWtrxEEAT4+Plb3SwMREZEUbPaR9507qDRESkoKrl27VmuTb2vbQUWv12Pq1KmQyWTYu3evVbYIqk14eLjFvydBEGxmFJeIiKi52VZl8f8au4NKQUEBdu7ciWnTptU5v7ByBxVr8Kc//Qm3bt3CW2+9VeOoqzVTKBQWHy0OCwuz2oVXRERELc0mC8rGrvqNjY2Fl5cXRowYUa/XV7dop7XZsmULfv31VwQGBuKll16SOk6zCQwMrHXVfkOEhoYiMDDQItciIiKyBzZZUDZmZ5Nz584hPT0d06ZNQ2lpKUpKSlBSUgIAMBgMKCkpgcFgaPJ9WtKtW7ewcOFCuLi4WKwXZ2sWHByMiIgIyOXyBj8CFwQBcrkcERERmDhxYjMlJCIisk02uSinMTub5ObmQhRFrFu3rso5pVIJpVKJOXPmmD0ybu07qEyePBk6nQ6//vqrTbQIqo/AwED4+vqa9mwXBAGiKNb4+srzPj4+NrtnOxERUXNr3RVRIzVmZ5Nhw4bBx8enyvGvv/4a/fr1w+jRo9G5c+cm36c5vfTSS1Cr1fjoo4/w73//G2fPnsWcOXNw//33Sx2tRSkUCixYsABqtRrJyclIT0+vMj3BaDSisLAQU6ZMQVBQEFdzExERNYFNFpTV7aCSlpYGnU5nWkhT2fAaqGiOrVAoahydateuHXx9fc2OtcYdVDZs2ICcnBzExsYiNzcXXbp0webNm6WOJRlPT0/TYh2dTgeNRgO9Xg+5XI7XX38dX375JXx9fa2q/RMREVFrZJMFJVBRJCYlJZlaB9Wn4XVDdOvWDSqVCqIomv3Xs2dPSdry3L59Gzk5OQBg+r+zZ8+GKIo21yaoMZycnODl5WX6unK6wvLlyzFq1CiMHDlSqmhERERWT2ZsTLNGK6BWqxEVFdVs1//ss89w8+bNKsdfffVVvPHGG81235qkpKRUu6vL+PHjsX//fjg4OLR4ptYsPDzctDuOQqHA8ePHuc0iERFRI9ns0FXlDirN0fA6Ly+v2mISqFgII4Xz58+bfV35fZeXl7f61ehSSEpKMv3/+fn5mDJlimlkl4iIiBrGZgtKoPl2UHnllVdwzz33VNnS0cvLq1lazuh0OqhUKmRnZ0OlUlXbUD0lJcXsa19fX/zwww84evQonJ2dLZ7JmuXk5CA3N9f0tSiKyMrKwj333IOysjIJkxEREVknm33kXUmpVCImJsZi14uIiEBgYCBu3bqFIUOGICcnx6w/pbOzM5544gl89NFHZvMyjx49iosXL2LhwoX1uk/lCuW0tLRqG7UrFAoEBASYVih7eHhAq9XC3d0d//nPf7Bw4cJW39ZIKr/99hvuu+++Ksc9PT1x5swZdOnSRYJURERE1svmC0oASExMtEhj79DQULMRyOPHj2P8+PHQ6/UYPnw4nnnmGbz88stQq9VwdHTEokWL8Omnn0IulyMgIACZmZn45Zdf8MADD9R4D61Wa+qhKJPJat2PvPJ8+/bt8cYbb2DQoEHYtWsXXFxcmvy92rLVq1dj+fLlcHBwgCAIcHBwQGJiIgIDAznXlIiIqBHsoqAEKkYq4+PjTaux60sQBAiCgLCwsGq34/v000/x7LPPIjY21jTq9c033+CFF16ASqWCXC7HmDFjcPDgQQCAm5sbTp06BT8/P4tlBCoe295///313jbSnmm1WiQnJ2P06NF4/PHH8eOPPyInJ6dKn1EiIiKqH7spKAHz0b/67qDi5+dX6w4qRqMRWVlZ6NmzZ5VzW7duxfPPP4/r16+bjjk4OGDgwIE4duyY2UiipUZRQ0JCEBwc3OTr2IsdO3YgLCwM//rXv7By5Uqp4xAREVkluyooK9W2gwpQ0bTc39/fIjuobN26FXPmzKlyfNGiRdi4cSOA5pvnSXUTRRFOTk4YPHgwTpw4IXUcIiIiq2SXBeWd7t5BxdI74AwaNMi0I8/dpk+fjnXr1mHjxo0oKirC/v37oVKpoFKpUFxcjEmTJiEkJMTsPUePHsWZM2eg0Wig0+nQtm1beHt7Y9KkSaZHtnK5HJGRkdyXup769++PjIwMrvAmIiJqJJtuG1QflTuo9OjRA15eXhbfTnHkyJEYO3YsJkyYgODgYEyePBmBgYFo27Ytfv/9d7z77rsQRRHFxcVISUmBwWBAv379arxeSUkJAgICcP/992PBggUICQmBSqXC2rVrTb0xRVFEbGysRb8PWzZjxgzodDocOXJE6ihERERWye5HKKV06tQp/PLLLwBgWs0tk8lQVFSEDz/8sNoRyuqo1Wp8/vnnCA4ORmhoqOl4ZGRkkx/Z24OMjAz07t0bCxcuxNdffy11HCIiIqtj9yOUUrp+/bqpObpMJqvSKL2+2rRpAwBmTdwFQUBycnLTQ9oBPz8/uLm5Ye/evVJHISIiskosKCWUlpZWa5/J2oiiCL1eD7Vaje3bt6Nt27YYPny42fn09HRLRbV5w4cPx7Vr16rdhYiIiIhqx61UJFJWVlbtDjj19fbbb5t26OnYsSMee+wxdOjQwew1lQt3LD0v1BY98sgjOHDgADZt2oTFixdLHYeIiMiqcIRSIk0pJgFgyZIlWLJkCWbNmgUnJyds3LjRbH/qStW1RaKqFi1aBADYtGmTxEmIiIisDwtKiej1+ia9v1u3bvD29saQIUPw2GOPAQD27Nlj8fvYizZt2qBr165ISkqSOgoREZHVYUEpEbnccrMNnJ2d0alTJ9y6datZ72PrJk2ahIKCAmRnZ0sdhYiIyKqwoJSIh4eHxa5VVFSEnJycaq9pyfvYuqeffhoAEBUVJXESIiIi68LhK4k4OTlBoVCYzaVMS0uDTqczrTRWq9WmXXYCAgIgiiK++eYbDB48GB07doRcLsetW7dw7NgxGAwGTJo0yewelt71x9ZNnjwZcrkcMTExeOedd6SOQ0REZDVYUEooICAASUlJptZBsbGxyM/PN51PTU1FamoqAGD58uVo164dvLy8kJKSgoKCAuj1eri5ucHHxwcPP/ywaetFoKIPpb+/f8t+Qzagb9++uHDhgtQxiIiIrAp3ypGQWq1u1ser3Cmn4V566SW89957SEhIwOTJk6WOQ0REZBU4h1JCnp6e8PPzM9vhxhIEQYCfnx+LyUaIjIwEAKxdu1biJERERNaDBaXEwsPDm6WgDA8Pt+g17YW3tzfat2+PhIQEqaMQERFZDRaUElMoFAgLC7PoNcPCwqBQKCx6TXsSFBSEGzduoLS0VOooREREVoEFZSsQGBiIkJAQi1wrNDQUgYGBFrmWvZo3bx4AYOPGjdIGISIishJclNOKKJVKxMfHQxRFiKJY7/cJggBBEBAWFsZi0gJ0Oh1cXFwwceJE7N+/X+o4RERErR4LylZGq9UiNjYWGRkZEASh1sKy8ryfnx/Cw8P5mNuCvL29kZeXh9u3b0sdxa7odDpoNBro9XrI5XL2UiUishIsKFsptVqN5ORkpKenQ6PRmJ0zGo3Iy8vDPffcg5EjR3I1dzNYtGgRvvnmG2RkZMDX11fqODat8mc9LS3NrNF/JYVCgYCAAAQFBfFnnYiolWJBaQXuHrVZtGgRdu7ciYULF+Lrr7+WOp5NOnz4MMaPH4+///3v+PDDD6WOY5PuHI2XyWSo7Z+iyvMcjSciap1YUFqhadOmYffu3QAqFo4sWrRI4kS2ydnZGX5+fvjjjz+kjmJzOF+YiMi2sKC0MkajEZ6enrh16xYAQC6XY/fu3VX28aamGzZsGM6ePQudTmfxXqH2LDEx0SJ9PkNCQhAcHGyBRERE1FT8lLQyly9fNhWTACCKIu6//35cvHhRwlS26cEHH4TBYMCuXbukjmIzlEqlxZrGJyQkQKlUWuRaRETUNCworczhw4fNvhZFEQUFBbj//vslSmS7li5dCgBYt26dxElsg1arRXx8vEWvGR8fX+1CHiIiallyqQNQw1QWlHK5HHq9HgDg7++P2bNnw2g0QiaTSRnPpnh5ecHd3R0HDhyQOopNiI2NrXO+5JUrV3DgwAFkZ2dDr9ejffv2GDp0aI1TOkRRRGxsLBYsWNAckYmIqJ5YUFoZT09P9O/fH1OmTMGXX36JXr16IS0tTepYNmv06NH4/fffUVhYCDc3N6njWC21Wo2MjIxaX3P69Gn8/PPPGDhwIGbOnAknJydoNJpae4GKooiMjAyo1Wq2FCIikhAX5VixwYMH48KFC9DpdFJHsVmbNm3Co48+ilWrVuHZZ5+VOo7Vio+PR1JSUo2tgQoKCvDpp59i6NChCA8Pb9C1BUFAUFAQwsLCLBGViIgagXMordjUqVNRXl6OkydPSh3FZs2ZMwcymQxbtmyROopVS0tLq7XPpFKpRHl5OSZMmNDga4uiiPT09KbEIyKiJmJBacUWL14MAPjqq68kTmK75HI5evbsyaK9CcrKyupcOHPlyhW4urri5s2b+OKLL/D666/jgw8+QExMDEpLS+u8h0aj4Ug9EZGEWFBasSFDhsDJyQl79uyROopNmzp1KoqLi3HhwgWpo1il+qzCLigoQHl5ObZu3YpBgwZh4cKFGD9+PE6dOoVNmzbVOrpZ6e4tSomIqOWwoLRyvXv35uO+ZvbMM88AAKKioiROYp0quxHUxmg0Qq/XY+LEiZg4cSJ8fX0xfvx4TJ06FVlZWXUu6KnvfYiIqHmwoLRyU6ZMgU6nw9mzZ6WOYrNGjBgBZ2dni/dQtBdyed3NJNq0aQOgogXWnSq/vnHjhkXuQ0REzYMFpZXjPMqWMWjQIFy6dKlB+05TBQ8Pjzpf06VLl1rP16e/an3uQ0REzYMFpZULDAyEo6Mjdu/eLXUUmzZz5kxTE21qGCcnJygUilpf079/fwCo0lO18usePXrU+n4PDw84OTk1ISURETUFC0ob4Ovry728m1nlNowbNmyQOIl1CggIqHWU0d/fH3369MH+/fuxf/9+XLp0CQcOHMCePXvQp08f9OrVq8b3ymSyKo/KiYioZbGxuQ1YunQp1qxZg/Pnz6Nv375Sx7FZHTt2hIODA3Jzc6WOYnXUanWdi5rKy8uxb98+nDlzBoWFhWjXrh0GDx6MyZMn1zk/ctu2bfDz84OjoyNKS0tRWlqKkpISDB06FF988YUlvxUiIqoGC0obcOTIEYwbNw7//Oc/8f7770sdx2aFh4cjLi4OSqUSR44cgV6v5+45DRAdHY3MzEyLzkOVyWS4ceMGvvzyy2rPjxkzBkeOHLHY/YiIqHosKG2Eo6MjBg0ahBMnTkgdxeYUFRVh586d+OKLL7Br1y7T8Q4dOiAvL0+6YFZGq9UiKirKou195HI5li5dipkzZ+LAgQNVzv/666+4//77LXY/Imuj0+mg0Wig1+shl8s535iaDfts2AgfHx+cP39e6hg2afny5Vi/fn2Vx67Dhg2TJpCVUigUCAsLQ0xMjMWuGRYWho4dOyImJgbDhw/HlStXzEZAly5dips3b+Lxxx+32D3vxA9rao3UajWSk5ORlpZW7cYCCoUCAQEBCAoKgqenpwQJyRZxhNJGPPHEE1i/fj0yMjLg6+srdRybcuLECUyYMAElJSWmHVscHR3x3HPP4YMPPpA4nXUpKirCE088gX79+jX5WqGhoZg4caLp63PnziEoKMi0VeO4ceOQlJSE8vJyuLu74+9//zteeuklCML/1iLqdDr8/PPPmDlzZr0LQX5YU2ul1WoRGxuLjIwMyGSyWneYqjzv5+eH8PDwOjsxENWFq7xtxMKFCwEA69atkziJ7Rk+fDh+/fVXODg4mI6Vl5djxIgREqayLjqdDl9//TU8PDywdetWDBs2DHK53Ky4qw9BECCXyxEREWFWTALAwIEDER0dDQDw8vJCQkICCgsL8fe//x06nQ6vvPIK3Nzc8Oyzz5qKzrVr12Lu3Ln1mgur1WoRHR2NqKgoJCUl1bilpFarRVJSEqKiohAdHV2vrSeJmkqpVCIqKgqZmZkAUOd2pZXnMzMzERUVBaVS2dwRycZxhNJGiKIIJycnDB06FCkpKVLHsUmbN2/G/PnzTV+npaWxXU0dcnJysGbNGnz66ae4efMmAGD06NE4evSo2WiKIAi1LtapPF+f0ZQNGzbA29sbU6dONR0TRREfffQR3nvvPdy6dQtyuRyzZs3CwYMHcf36dQDA+vXra3w0rlQqER8fD1EUG7SoSBAECIKAsLAwBAYG1vt9RA2RmJiIhISEJl8nJCQEwcHBFkhE9ogFpQ3x8/NDTk4OioqKpI5isz766CP87W9/g0wmg8FgqNcOLvaooKAAf/nLX/Ddd99VKcJ++uknzJw50/R15SPk9PR0aDSaKtfy8PCAv7+/xR4hb9q0CS+99BKysrLMjsvlchw+fBgjR440O84Pa2rNlEqlReclR0RE8JcfahQWlDZk0aJF+Oabb3DlyhX07NlT6jg2q0+fPrh8+TJ0Oh3Ky8u5KKMaSqUSI0eOrHY0Lzc3t8bCsKUWuRiNRvj6+uLKlStmxzt27IjU1FR07tzZ9H3ww5paq7o6J5SVlWHv3r04d+4cSkpK0KlTJ0yYMAGDBw+u8ZpyuRyRkZGcU0kNxoLShuzevRvTpk3Dv/71L6xcuVLqODYrLi4On3/+OaZOnYrbt29XOc9FGRV+//13PPjgg6b5igDQu3dvpKenS5iqQnx8PGbMmFHtubZt2yIlJQWdO3eu8cP68uXL+Prrr6t9/5IlS+Dt7V3tOX5YkyXV1dv1m2++wfXr1zF16lR07NgRZ86cgVKpxKxZszBkyJBq3yMIAnx8fLBgwYLmjE42iG2DbEhoaCgEQcBvv/3GgrIZ3Dnnb8yYMdUWk5WvS0pKwvHjx+16BeWAAQNgMBgAwLSgKTQ0VMpIJhqNBu3atYNcLoeTkxOcnJzg4OCAmzdvori4GP369cMnn3xS53zJKVOmwMfHx+xY5ehmdSr3g+eHNTWVWq1GRkZGjecvXryIjIwMPPTQQ6YRSV9fX+Tl5WHXrl0YNGhQtYviRFFERkYG1Gq1Xf9CTA3HVd42RBAEeHt7IzU1VeooNocrKBumtLQUgYGBKC8vx+bNmzF69GgYDIZWM4dw/vz5KCgogEajgUqlwtWrV3H58mXcvn0bRUVF+O6775CXl1dnQenh4QFvb2+z/5ydnWt8/Z0f1kRNkZycXOsc7vPnz8PJyQkDBgwwOz58+HDcvn0b2dnZNb5XEAQkJydbLCvZBxaUNmb8+PEoKioyrVylpktMTERMTAz0en2Dtw0URRF6vR4xMTFITExspoStz9ixY3Hz5k289957eOSRR7B3715s2rQJDz/8sNTR6uTi4oIOHTo024IrfliTJaSlpdX6i21ubi46depk1u4MALp06WI6XxNRFFvF1BSyLiwobcyjjz4KoKJ1CjWdUqm0yApfAEhISLCLkcp58+bh5MmTmDNnDl544QUAgLOzM+bNm2c1C5bq+rCu9Ntvv+H111/HO++8g+jo6CqLfKrDD2tqqrKysjr7mxYXF8PV1bXK8cpjJSUltb5fo9FAp9M1PiTZHRaUNubee++FIAiIjY2VOorV02q1iI+Pt+g14+PjbbrR9QcffIDvvvsOQ4YMwffffy91nEapz4e1i4sLRo8ejfDwcDz22GMICwtDfn4+Nm7cWK9ikR/W1BT1/TekqaPs1bXxIqoJF+XYGEEQ0L17d5w9e1bqKFYvNjYWoiiirKwM+/fvh0qlgkqlQnFxMSZNmoSQkJAq77l+/Tp27dqF7OxsCIIAX19f3HPPPfDw8ABg24sy4uPj8eKLL6Jjx444duyY1HEarT4f1l27dkXXrl1NX/fq1Qv9+vXDF198gV27dtWr4b1Go4GXl1eTspJ9qqlN0J3atGmD4uLiKscrRyarG71szH2IKnGE0gaNGzcOhYWFtc6RodpVrqAURRHFxcVISUmBwWCodQ9qtVqNjRs3wmAwYPbs2XjggQdw69YtbNiwwdRs3lYXZaSlpeGBBx6Ao6MjlEolXFxcpI7UaI39EHV1dUWfPn2Qk5OD8vLyZrsPkVxe91hQ586dcfPmTVOnhUo5OTmm85a4D1ElFpQ26JFHHgHAeZRNcecKSnd3d7z44otYvHgxpkyZUuN7EhISIJfLMW/ePPTp0wcDBgzA/PnzUVRUhMOHD5teZ2uLMgoLCzFq1Cjo9Xrs2LHD6pvqN+VDtCFtfflhTY1V+cSjNv3794dOp8Mff/xhdvzUqVNo164devToYZH7EFViQWmDIiIiIJPJLLrDh725c1GGTCarcy6SwWDAxYsX0b9/f7PROXd3d/j6+pr9o25LizJEUURQUBDy8vKwevXqaqcBWJvGfoiWlJTg4sWL8PLygqOjY7Pdh8jJyanO3rYBAQHw8/NDbGwsUlJScPnyZWzfvh3p6emYNm1atT0o78Rdv6ih+CuyDRIEAV27dsWZM2ekjmKV6rMo425arRZ6vd7UkuNOXbp0waVLl1BeXm4qNCoXZVj7P9gzZ87EhQsXsGTJEvzlL3+ROo5FVH5Y1/Yz8MMPP6BDhw7o1q0b2rRpA41Gg8OHD6OoqAgPPvhgnffghzU1VUBAAJKSkmodFZ8zZw727t2LhIQE09aLdzY6r4kgCPWaB0x0JxaUNmrs2LH48ccfodFoOBLSQI1ZhV05+b22Nh2lpaVmI1fWvijjtddew/bt2zF69GisW7dO6jgWVdeHdZcuXXDu3DkkJydDp9PB1dUVPXv2xKxZs9C9e/dar80Pa7KEoKAgHD9+vNbXODs7IywsDGFhYQ26duWTB6KGYEFpo+bOnYsff/wRGzduxF//+lep41iVpiyWaEibDmtelPHDDz/gzTffRNeuXXHw4EGp41hcXR/WEydOxMSJExt1bX5YkyV4enrCz8+v1r28G6NyL29uu0gNxTmUNurBBx+ETCbD9u3bpY5idRqzWKJNmzYAUGubjrtXPlvrooyzZ89i7ty5cHV1hVKptNrvozaVH9Z1zTNrKEEQ4Ofnxw9rsojw8PBm+RkNDw+36DXJPrCgtFFyuRxdunTByZMnpY5idRozRUChUEAul1fbqiknJwceHh5VFmpY41SEvLw8jB07FkajEfv377fqR/Z14Yc1tXYKhaLBj7PrEhYWVueCH6LqsKC0YaNHj0Z+fj7y8vKkjmJV6rOC8m4ODg7o27cv/vjjD5SVlZmO5+XlITMzE/379zd7vTUuyhBFEcOGDUNhYSG++uorjBw5UupIzYof1mQNAgMDLdZdITQ0FIGBgRa5FtkfFpQ2bM6cOQCA6OhoiZNYn4CAALP5kGlpaTh37hwuXrwIoKKJ+blz53Du3DnTFnqTJ09GeXk5Nm/ejLS0NPzxxx/YvHkz2rRpg3HjxpmuZS2LMnJycsx2XJo2bRquXLmC5557DosWLZIwWcvhhzVZg+DgYEREREAulzd4VF0QBMjlckRERDR6XjARAMiMDenES1ZFp9PB2dkZoaGh2LNnj9RxrIparUZUVJTp648//hj5+fnVvnb58uWmUae6tl6slJycjKFDhwKo6GFZ+d/kyZMRERHRTN9Vw8ybNw9btmzBp59+irS0NHzyyScICQnB3r17pY7W4pRKJeLj4yGKYoMWQAiCAEEQEBYWxmKSmp1Wq0VsbCwyMjIgk8lqbSkkCAJEUYSfnx/Cw8M5ck5NxoLSxnXp0gXl5eXQaDRSR7E60dHRFl9BCQCXL1/G119/DeB/BYdMJkN5eTkeeugh/PDDDxa9X2MYjUZ06dLFbIvInj174vLlyxafV2gt7vywrvwwrgk/rElKarUaP/30E86dO4eOHTtWOe/h4QF/f38EBQVxgRhZDAtKGxceHo64uDjcvn0bbm5uUsexKlqtFlFRURZt7yOXyzF79mwEBgaa9ve+08GDBzF+/HiL3a+xMjIy0Lt3b7NjkyZNwq+//ooOHTpIlKp1UKvVSE5ORnp6erW/qN26dQsDBgzAQw89xA9rkkRZWRm6d++OW7du4ccff8S4ceOg1+shl8utcv42WQfb6/dBZmbPno24uDh8++23WLp0qdRxrErlogxLbmEZFhaGPn364Pfff0dwcLDZKJdMJkN0dDRGjBhRpcVQSztw4ECVY/v370dwcDBOnjzZoH6btsbT09O0WEen00Gj0Zg+rL/66it8+umnaNOmDe677z6Jk5I9MhqNWLp0KW7dugUA+OOPPzBr1iyJU5E9sM9nV3akcmHOTz/9JHES6xQYGGix9j53LsoYP348PvzwQ7PzHTp0wJo1a9CuXTssWrQIhYWFFrnvnXQ6HVQqFbKzs6FSqUwLiu62b98+s68FQYCLiwvuvfdei2eyZk5OTvDy8kKPHj3g5eVl2mWpuLgYISEhZlMGiFrC6tWrsXHjRtPXnD9PLYWPvO1A586dIYoi9u7di8TERAQGBpqtOqbqlZWVYdmyZVi7di3++te/wsPDw6KLMoxGIx566CH8/PPPGDlyJI4dO4bvv/8e//jHP3Dt2jU4ODhg1qxZ+O9//wt3d3fT+95880107twZTz/9dL0yVD6iTUtLq3ZbSYVCgYCAALP5VC4uLqb2R926dcNzzz2HJ554gnMB6zB16lTTB7ggCBg8eDASExPRvn17iZORPdi9ezfuvfdes3+jXFxckJ+fz8fc1OxYUNqwixcv4rfffsM777xjNlIyf/58fPvttxIma92Kioqwdu1avPXWW6bHRiUlJSgpKbH4ooyCggI89thj+Mc//oGxY8eajv/6669Yvnw5rly5YmqGvXbtWmg0GgwYMAAA8Pvvv2PatGk1ZmjIis/K835+figtLcVTTz0FDw8PrF27Fvfff79N7obTHDp16mT6mQEq+pOOHTsWu3btknwaA9m227dvo0ePHigoKKhy7tChQxxEoGbHgtJGGY1GeHh4IC8vz6yYkMlk+M9//oPnn39e4oStj1arxeeff45///vfKCgoMP2ZDR8+HEql0vS6uhZlWHIF5a5du/DnP/8ZaWlpkMlkppXXRqMR7du3x5kzZ9CjR48q72tKmxudTof8/HysWrXKrudKNlROTk6VnYMqf7E4cOAAJkyYIFEysgclJSV4/vnn8fvvvyMzMxNAxSJAvV6Pt99+GytWrJA2INk8DjvYKJlMhjfffBPLli0zG5kyGo38TbUGISEhOHXqlNkxQRAwdepUs2O1Lcqw9ArKadOm4eLFizh48CAef/xxpKWlmc7dvn0bDz30EA4ePGi2rWNiYiISEhIadT9RFCGXy9GxY0ccOHAAwcHBTf4e7MWZM2fMvpbJZFi5ciXCw8MxfPhwiVKRvXB1dcWXX34JAKYnIs888wyOHj2KgIAAKaORneCiHBv2l7/8BcuWLTMbZXJ0dOSHWw1WrFgBZ2dnsz8vURQRFBRU43vuXpTRXPOUJkyYgODgYDg4OJiOGQwGHD9+HI8//rjpmFKpbHQxebeEhASzkVmq3YABAxAZGYmvv/4aS5cuhdFoRGhoKP/3Ri1Kp9MhLy8Po0aNwjvvvIO9e/di9uzZUsciO8BH3jbOYDAgIiIC8fHxAICRI0fi+PHjEqdqvdauXYunnnrK7Fh6enqVnowtTaPRoEuXLjAYDJDL5TAajTAYDKbR53vuuQeffvoptm3bVm3fzJ9//rnK6OudlixZAm9v7yrH5XI5IiMjuRinga5evYpevXphzpw5+P7776WOQ3bkhx9+wOzZs/HJJ59g+fLlUschO8KC0g7cvn0bffr0gUqlwowZMxAXFyd1pFbp5s2b8Pb2hl6vR3BwMPbu3Yt27dohPz9f8rmEpaWleOONN5Cfnw8HBwfTf7dv30ZCQgJu3LiBpUuXon379tXOmdRoNNU2Uv/uu+/g4OCA559/vtodcARBgI+PDxYsWNAs35ctUygUcHR0RG5urtRRyI7Mnz8fmzdvRk5ODjp37ix1HLIjnENpB9q1a4e9e/diwIABcHd3b/Z5f9ZIFEWMHj0apaWl+OGHHzBz5kx88sknMBgMkheTQEXrj3feeafG81euXMHGjRtrXIDj4eFRpZ9mZmYmiouLERwcXON2iqIoIiMjA2q1mru+NNCECRMQGxuLW7duVbv9HVFzOHr0KFxdXVlMUotjQWknOnXqhKeeegqdOnXCu+++W+V8db0I7cn8+fORkZGBZ555Bg899BAA4K9//avEqeovNTW1ztZAd6ucH1nXHD9BEJCcnGxaiET1s3TpUsTGxuLTTz/FypUrpY5DdiIrKwv9+vWTOgbZIRaUNu7OXoTdu3evseDQarVISkrC8ePH6+ydaGvWrVuH77//HkOGDEFUVJTUcRolLS2tQcVkaWkpUlNT4efnV+ffsyiKSE9Pb2pEuxMWFga5XI4ffviBBSW1iAsXLqC8vBwTJ06UOgrZIa7ytmFKpRJRUVGmnmR1FRyV5zMzMxEVFWUXK3zPnTuHpUuXol27djh06JDUcRqlrKys2h1wanPmzBno9fp6r0DWaDQ1btNI1RMEAf3798f58+cb1AuUqLEqN6x45JFHJE5C9ogFpY1KTExETEwM9Hp9gz/MRFGEXq9HTEwMEhMTmymh9EpLSzFhwgTTtpRubm5SR2qUhhaTAHDixAm4urqif//+9X5PdU3cqXYPP/wwDAYDYmJipI5CdmDPnj0QBIG9hkkSLChtEHsR1s/kyZORl5eHjz76qNZek61ddW2CaqNSqXD9+nUMGTKkQVsqNvQ+BPz5z38GAKxZs0biJGQPUlNT4eXlVeMiO6LmxJ86G6PVak09Jy0lPj6+UaNgrdkLL7yAY8eOITw8HM8995zUcZqkoftsnzhxAgAQGBjYrPehisVuXbp0weHDh6WOQjaupKQE+fn5bKRPkuEnhI2JjY2FKIooKyvD/v37oVKpoFKpUFxcjEmTJiEkJMTs9TU1vO7YsSOWLVsGoOIReGxsrM30ItyxYwc++OADdO/eHb/++qvUcZrs7nZAtdHr9Th9+jS6d++OLl26NNt96H+mTJmCzZs3IzMzEz4+PlLHIRv1008/AQDCw8MlTkL2igWlDVGr1cjIyAAAFBcXIyUlBV5eXujXr1+tj63lcjkWLVpkduzOvaFtqRehSqXCgw8+CCcnJxw/ftwmHg05OTlBoVDUaxT5/PnzKCkpafDoJHuVNt5f/vIXbN68GZ988gk++eQTqeOQjfrll18AAHPnzpU2CNkt6/80JZPk5GRTE253d3e8+OKLWLx4MaZMmVLr+2QyGby9vc3+8/LyMntNZS9CayaKIkaNGoWysjL8+OOP6Natm9SRLCYgIKBeDdiVSiUcHR0xaNCgel9bEAT4+/s3JZ5dGzt2LFxcXLgwh5pVUlIS2rZtC3d3d6mjkJ3iCKUNubMXoaV3d7GFXoR/+tOfkJWVheeff97mHgsFBQXVa4/2hQsXNvjaoiha9aKl1mDYsGE4duyYaXcqIku7du0ahgwZInUMsmMcobQRjelFWEmv1+PDDz/E66+/jv/85z+Ii4tDcXFxlddZcy/Czz//HD///DNGjBiBjz76SOo4Fufp6Qk/Pz+LP8IXBAF+fn5WP9VBao8++iiMRiO+/vprqaOQDTp58iT0ej0mTZokdRSyYywobURji0kvLy/cc889mDVrFh599FEMGzYMJ0+exFdffYWysrIqr7fGXoQnT57Es88+iw4dOuDAgQNSx2k24eHhzVJQ2tporhSWLFkCmUzGgpKaxebNmwGwoTlJi89ebERjewSOHTvW7OvevXuja9eu2Lp1K5RKZZXz1taLsHJ1O1DR7N3V1VXiRM1HoVAgLCzMonP1wsLC7GYLzubk4uICb29vpKSkSB2FbFBCQgIcHBwwcuRIqaOQHeMIpY2w5Lysfv36wdHREdnZ2c16n5YwceJEFBQU4LPPPrOL+UWBgYFVWkM1VmhoaINXg1PNZsyYgeLiYpw8eVLqKGRjLly4YFOLDMk6saC0Ec3RI7C6hT2tuRehwWDAlStXTF8vX74cSqUSDz30EJ555hkJk7Ws4OBgREREQC6XN/gRuCAIkMvliIiIwMSJE5spoX2qbKC/atUqaYOQTSksLMTt27e5cI4kx4LSRlT2IrSE1NRUlJeXo0ePHmbHW3svwrVr18LX1xdvvvkmfvnlF6xevRq9evXC1q1bpY7W4gIDAxEZGWlqpF1XYVn5y4OPjw8iIyM5MtkM+vbtCzc3N+zcuVPqKGRDtmzZAgCIiIiQOAnZO+t6fkm1CggIQFJSkql1UFpaGnQ6nWlltlqtxrlz50yvLS4uxo8//ohBgwaZRh6vXLmCo0ePwtPT06yokMlkrb4X4aFDhwAAr732GmQyGZydnW2meXljKBQKLFiwAGq1GsnJyUhPT692UdWtW7eQn5+Pf//731zN3cxGjx6NPXv2oLCwEG5ublLHIRtQOWd69uzZEiche8eC0obc3YswNjYW+fn5pq9TU1ORmpoKoOJxsIuLC9q2bYsjR46gsLAQRqMRHTp0wOjRozFx4kSz0Uij0Yinn34abdq0gZubG4qLi1FcXIySkhK0bdsWSqVS8oa6R44cMRXTRqMR7du3x/Xr19G5c2dJc0nN09MTYWFhAACdTgeNRmPqh+js7Gz6ZWLYsGF4/vnnpYxq85544gns2bMHX375Jf7+979LHYdsQEpKCtq3b89fUEhyMmPlJzDZhOjoaGRmZkIURYtdUyaTobS0FO+++2615z09PZGVlQVnZ2eL3bOhbt++jQ4dOuDOH2eZTAZHR0ecOHECAwYMkCxba5aUlIRRo0aZvl6/fj0ef/xxCRPZNlEU4ejoiGHDhnHFN1mEXC7HiBEjcOzYMamjkJ2zz2eBNqw5ehE6ODjgH//4Bz7//PNqzy9durRZi0mdTgeVSoXs7GyoVKpqm6ufOHHCrJh0cHCA0WjE5MmTq2wjSf9z9uxZs6+feOIJbNu2TaI0tk8QBAQEBODMmTNSRyEbcOzYMRgMBoSGhkodhYiPvG1Nc/YijIyMxJkzZ7BmzRqz4u3NN9/EwYMH8cUXX6Bv374WuWflvL+0tLRqm7YrFAoEBAQgKCgInp6e+Pnnn83Oz5w5Ey+//DKGDRtmkTy26syZM3B0dER5eTmAiqkC8+bNQ4cOHXDPPfdInM42zZw5E++99x727NmDKVOmSB2HrNh3330HAJg/f77ESYg4QmmTmrMX4apVqzBu3Dg4ODjAwcEBM2fOxKBBg5CQkIB+/fph0KBB2L17t9k1zp07h8mTJ+PSpUt13k+r1SI6OhpRUVFISkqqcQcgrVaLpKQkREVF4euvvzaNqs2cORMXLlzAtm3bWEzWw8mTJ03FJFAxsqvX6xEbGythKtu2fPlyAEBUVJTEScja7d+/H3K5HIMGDZI6ChHnUNoypVKJ+Ph4iKLYoDmVgiBAEASEhYVV2z4mNzcXw4YNQ05ODi5dugQfHx9cuHABkZGRSEhIgNFoRNeuXfGvf/0LTz/9NObOnYstW7agb9++SEpKQrt27SyaFwDKy8vh6+uLJUuWNOh99s7Lyws5OTmmr2fMmIG33noLw4YNq7YPKVlGx44dIZPJcPPmTamjkBVzc3ODp6cnLl++LHUUIhaUtk6r1SI2NhYZGRkQBKHWQq3yvJ+fH8LDw2vta5mWlobU1FQ88MADVe63fPlybNmyBTqdDm3btkVRURGAitGv8PBw/PTTT1XmeSYmJiIhIaEJ32mFkJAQBAcHN/k69mL9+vUAgHvuuQc+Pj4ICgri5P4WMHPmTPzyyy+4ceMG5/hSo2g0GnTs2BFz5szB999/L3UcIhaU9qKuXoQeHh7w9/c3zUlsKr1ej9dffx0ffPBBlUU0//rXv7By5UrT10ql0qJzPiMiItiYuxH69OmDK1euoKysTOooNm/37t2YNm0aVqxYgbffflvqOGSFoqKi8Oc//xmbN2/GI488InUcIhaU9ujuXoTNtQOOVqtFt27dUFpaWuXcqlWr8Oyzz0Kr1SIqKgp6vd5i95XL5YiMjLTYzkH24oUXXsAHH3yAPXv2cNVoC3ByckLv3r3xxx9/SB2FrNB9992H3377DSUlJXBxcZE6DhELSmo+n332GZYtWwYHBwfIZDIYDAaz1eFPPPEExo0bh+zsbJSUlGD//v1QqVRQqVQoLi7GpEmTql1cZDAYcOzYMZw8eRIajQYODg7w9PTEPffcg549e0IQBPj4+GDBggUt+e1aPZVKha5du2LmzJn46aefpI5j8wIDA3H69GnodDq73c2JGq9bt24oKSmpceEiUUtj2yBqNpMnT8bf/vY3uLq6wtXVFW3atEGbNm1w+/ZtHD9+HCdPnjTtF15cXIyUlBR4eXmhX79+UCqV1V5TFEV8//33uHr1KsaPHw9vb2+Ul5fj+vXrptXKoigiIyMDarWaWwk2gJeXFzp27Ij9+/dLHcUuzJ07FydOnMAPP/yAhx9+WOo4ZEVEUUROTg7GjBkjdRQiE45QkmTi4+NNe49X/hjKZDIUFRXhww8/rHaE8siRI9i5cycef/xxeHt713htQRAQFBRk2nKQ6mf27Nn44YcfkJWVZSr2qXkUFhaiXbt2mDZtGnbu3Cl1HLIi+/btQ0hISJX56ERS4nMWkkxaWppZIVmfNjXHjh1Dr169ai0mgYrf4NPT0y2S055U9kj8+OOPJU5i+9zc3NC1a1ccPXpU6ihkZbZu3QqADc2pdWFBSZIoKytr8Nyf/Px85OXloXPnzti9ezc+/PBDvP766/j8889x8uTJKq/XaDTVbtNINZswYQKcnZ3x66+/Sh3FLkybNg23b99GWlqa1FHIihw4cABOTk4ICAiQOgqRCQtKkkRjJpIXFBQAAE6dOoULFy5gxowZmD9/Pjw9PfHLL78gJSWlynuqa5FEtQsMDERGRgaL8Rbw3HPPAQA++eQTSXOQdbl06RJ69uwpdQwiMywoSRKNaRNU+Xhcr9dj/vz5GDhwIPz9/TF79mx07dq12sUklmxHZC8WLVoEo9GIjRs3Sh3F5g0fPhyurq6Ii4uTOgpZCZVKhZKSEowePVrqKERmWFCSJOTyhjcYaNOmDQCgU6dOcHd3Nx2XyWTo3bs3CgoKUFhY2OT72LvFixdDJpOxoGwhI0aMwNWrVzkiTPWyefNmAMBDDz0kcRIicywoSRIeHh4Nfo9CoYCjo2Otr7l7YU9j7mPvnJyc4OvrixMnTkgdxS5UjghXboNJVJsdO3YAqNgRjKg1YUFJknBycmrwTjYODg7o27cv1Gq12RxMo9GI9PR0KBQKtG3b1nS8uXYAsgcPPPAASktLcejQIamj2LyFCxdCJpPhm2++wYEDB/Cvf/2r2kVmREDFHPKOHTvy6Qu1OvyJJMkEBASY+lACFW2EdDqd6dGfWq3GuXPnTK91cnJCaGgo0tPT8e2332Ly5MlwdnaGUqmESqXC7NmzTdcWBAH+/v4t/03ZiOeffx4ff/wxVq1ahfHjx0sdx2bdvHkT8fHxcHV1xdGjRxEcHAygYqrGsGHDpA1HrcahQ4dQWlqKoKAgqNVq088JUWvCxuYkGbVajaioKNPXH3/8MfLz86t97fLly00jmjk5Odi9ezeuXLkCURTh5eWFiRMnom/fvmbviYyM5E45TeDh4QEHBweo1Wqpo9ik27dvo2vXrigqKoJMJjPblvSnn37CzJkzJUxHrUnv3r2RkZFh+nr06NH485//jHvuuQddunSRMBnR/7CgJElFR0cjMzMToiha7Jrcy9syZs6ciV9++QU3btyAl5eX1HFsjtFoxGOPPYbo6Gjc/c/wpUuX4OfnJ1Eyam3mzp2Lbdu2mf6ddHBwgMFgwMSJE5GYmChxOqIKnENJkgoPD4cgWPbHUBAEhIeHW/Sa9mjZsmUAgFWrVkmcxDbJZDKsX7++yv8GXF1d4ePjI10wanUmT55s9kuHwWCATCbDyy+/LGEqInMsKElSCoXC4vtth4WFNXjBD1UVGhoKJycn/PTTT1JHsVlyuRxbt27FhAkTTMcGDBhg8V+yyLrdXVACwIcffoh7771XokREVfFfLZJcYGAgQkJCLHKt0NBQBAYGWuRaBAwdOhTp6elsEN+MXFxcEBsbC19fXwAVjzOJ7tS3b1+z3rvz5s3DX//6V+kCEVWDBSW1CsHBwYiIiIBcLm/w6IwgCJDL5YiIiMDEiRObKaF9WrBgAURRxLfffit1FJvWrl07HD16FIIgmFpd6XQ6qFQqZGdnQ6VSsfG5HZPJZOjRoweAigU669atq9Jzl0hqXJRDrYpWq0VsbCwyMjIgCEKti3Uqz/v5+SE8PJyPuZtBaWkp2rRpgwkTJnDyfwt49tlnUVBQgMDAwGr3u1coFAgICEBQUBA7GNiZSZMmITExEVevXoW3t7fUcYiqYEFJrZJarUZycjLS09Oh0WiqnPfw8IC/vz8/WFuAj48PcnNzUVxcLHUUm3XnL1J3txC6W+V5/iJl23Q6HTQaDfR6PeRyOZYuXYry8nLu+06tFgtKavXu/oeVO+C0rGXLluGzzz7D8ePHMXLkSKnj2BylUon4+HiIotig9lmCIEAQBISFhXHesI2o/EU6LS2t2hHqtm3bYuDAgfxFmlolFpREVKvLly/Dz88P8+bNw6ZNm6SOY1MSExORkJDQ5OuEhIRw9xQrxhFqsgUsKImoTu7u7nB2dkZOTo7UUWyGUqlETEyMxa4XERHBkUorxBFqshVc5U1EdZo4cSJyc3Nx8+ZNqaPYBK1Wi/j4eIteMz4+vtrHpNR6JSYmIiYmBnq9vsG7hYmiCL1ej5iYGC6Yo1aBBSUR1SkyMhIAsHr1aomT2IbY2Nh6FxApKSlYuXIl3n777VpfJ4oiYmNjLRGPWoBSqbTIdAcASEhIgFKptMi1iBpLLnUAImr9pk+fDkdHR2zbtg1vvPGG1HGsmlqtRkZGRr1eW1BQgJ07d6Jdu3YoLS2t9bWiKCIjIwNqtZoLNlq5ukaoy8rKsH//fqhUKqhUKhQXF2PSpEm1bgARHx8PX19fzqkkyXCEkojqJJPJMHjwYFy8eLHBj+bIXHJycr2bUsfGxqJXr17w8/Or1+sFQUBycnJT4lELqGuEuri4GCkpKTAYDOjXr1+9rskRapIaC0oiqpd58+ZBFEV89913UkexamlpabWu4q106tQpZGZm4r777qv3tUVRRHp6elPiUTOrHKGuraB0d3fHiy++iMWLF2PKlCn1uu6dI9REUmBBSUT1snTpUgDA+vXrJU5ivcrKyuq1cKawsBA7duzA1KlT0aFDhwbdQ6PRcJvGVqw+I9QymaxRWytyhJqkxIKSiOqlbdu28Pb2xrFjx6SOYrXquwo7Li4OnTp1anQj+ep2l6LWob4j1I3BEWqSEgtKIqq3GTNmoLi4GCdPnpQ6ilXS6/V1viY1NRUXL15EREREo0ap6nsfann1HaFuCo5Qk1RYUBJRvT3//PMAgI8//ljiJNZJLq+9sUZZWRni4uIwatQotGvXDiUlJSgpKYHBYAAAlJSU1KtYqOs+JI2W6hPKEWqSAv/VIaJ669u3L9q1a4edO3dKHcUqeXh41Hq+uLgYRUVFOHLkCI4cOVLl/Pvvv4++ffvikUceadJ9SBotNXLMEWqSAgtKImqQ8ePHY8eOHcjLy4O7u7vUcayKk5MTFApFjSNVbm5uWLRoUZXjBw8exJUrVzB//ny0adOm1nt4eHjAycnJInnJslpq5Jgj1CQF/tQRUYMsXboUO3bswOrVq/Haa69JHcfqBAQEICkpqdqFGY6OjvD19a1y/OTJk5DJZNWeu5u3tzeuX7+O27dv4/bt2ygoKMDt27fh6+uLIUOGWOR7oMZpqZFjjlCTFFhQElGDREREQC6XY9u2bSwoGyEoKAjHjx9vtus/+eST1e65PnjwYJw+fbrZ7kt1q2uE+k5paWnQ6XSmObNqtRrnzp0DUPFLSU2j0ByhJqmwoCSiBhEEAQMGDMC5c+cgiiIEgWv7GsLT0xN+fn7IzMys965DM2fOxMyZM2t9jSAIMBqN1RaTMpkMc+fObVTe+tDpdNBoNNDr9ZDL5SxqalHbCPWdYmNjkZ+fb/o6NTUVqampAIDly5dX++crCAL8/f0tG5ionlhQElGDzZ07FytWrMArr7wCjUaDixcvIi4uDq6urlJHswrh4eGIioqy6DaWgiAgMjISPj4+VeZhGo1GGAwGi/4CoFarkZycjLS0tGpH3BQKBQICAhAUFMS9xe9Q3xHqyo4KDSGKIoKCghoTi6jJZMbm6rBKRDanuLgYmzZtwo8//ojff/8dQMXol9FoRH5+Ptq3by9xQuuhVCoRExNjsetFREQgMDAQAPDCCy/gww8/hNFoNO26IooiXFxc8Mgjj+Cjjz4yW1CVkpKCDz74AGvWrKlzoZVWq0VsbCwyMjJMf/c1qTzv5+eH8PBwKBQKS3yrVi86OrpBI9T1IQgCfHx8sGDBAotdk6gh+KyKiOpt3bp1eOqpp8zaBhmNRnTt2pXFZAP5+PhYrEF8aGioqZgEgHfffde0B7jRaMTJkyfx/vvvo3379tiwYQM8PDwwadIk05zKFStWYOvWrZg9e3atLWeUSiWioqKQmZlpunZtKs9nZmYiKioKSqWyKd+mzQgPD4coihbdMUcQBISHh1vsekQNxRFKIqq3wsJChISE4MSJE6Zm2wAQFhaG3377TcJk1qOsrAzR0dF45plnIJPJEBsbi6SkJIii2KARK0EQIAgCwsLCzIrJSrdv38b48ePh7e2NuLg40/Hff/8d//jHP3DmzBkAQI8ePZCdnQ2gYkRx2bJlWLVqVZXrJSYmIiEhoaHfbhUhISEIDg5u8nWsjSiKSE5ORnx8PP7973+jT58+uP/++y12/TtHqImkwIKSiBpEo9FgwoQJuHDhAkRRhEwmwwsvvIB3331X6mit2q1bt7BmzRp88sknUKvVAIDp06cjPj7e7DGyIAi1FpaV5+vzGNlgMMBgMFS7gOPKlStYtmxZtY/d16xZg6eeesr0dXM+nrdlBoMBW7ZsQVxcHH777Tfk5eWZznl6euKHH36wSJEeGhqKiRMnNvk6RE3BgpKIGkylUmHs2LGmR5+bNm3CvHnzpA3VShUWFuKf//wnvvrqK5SXl5sVizt37sS0adNMX1cudElPT692+zwPDw/4+/tbbKGLVqtF165dUVZWZnZcJpNh8+bNmDt3LrRaLaKioiy6+4pcLkdkZKTNz6ncsWMHwsLCqv0lYdu2bfjTn/4EpVKJ+Pj4Bo9Qi6IIJyenGkeoiVoaC0oiapQrV66gT58+0Ol0OHbsGEaNGiV1pFYpMTERkyZNqnJcJpMhLy+vxrmnLdGKZ9WqVXjuuefg4OBgKnrunMqwbNkyBAYGIisrq9pi58aNG9i7dy9ycnJQXFwMuVyOTp06YeTIkRg6dGiN97WXBSQGgwEPP/wwfv75Z7P5kh06dEBOTg6cnZ0BoFEj1JcuXUJycjIOHDiAjh07Nvv3QlQXtg0iokbp1asX1qxZg8WLF0Oj0bAXYQ2Cg4OxefNmLFy40GyUr1+/frUuZHJycoKXl1ezZhs2bBjmzp0LNzc3tGvXDu3bt0e7du2Ql5eH06dP4+DBg7UWK6WlpWjfvj0GDRqE9u3bQ6fT4cyZM/j555+Rl5dXbSENVIyuZWRkQK1W23RLIQcHB7z88sv45ZdfTAWlXC7HY489ZiomgYoWSwsWLKj3CPWQIUPQo0cPABU/R9999x2mTp3aMt8UUQ04QklEjZabm4vnn38e/fr1q3ZUhb0IK9y8eRM9evRAWVkZHBwcAABPPPEEvvzyS4mT1S4+Pr5eTbjvtnbtWty+fRt//etfa3yNIAgICgpCWFhYU2O2WomJiZg6dSr0ej169uyJq1evwmg04vTp0xg8eHCt763tFzRRFE0/R5X+9re/4e233zYrVIlaEtsGEVGDabVaREdH44svvkDfvn1rfESn1WqRlJSEqKgoREdH12vLOVsjiiICAwNRVlaGNWvWIDAwEAaDAePGjZM6Wp3S0tIa1dqmTZs2dTZQF0UR6enpjY3W6v3www8ICQkBAOzbtw9JSUnw9fXFyJEj6ywmgf+NUPfo0QNeXl5mo/2CIFQpKD/66COMHDkSubm5lv1GiOqJj7yJqEHuXEQANLwXob0tIrj33nuRlZWFv//973jqqaewcOFCbNmyBXPmzJE6Wq3Kysrq/QtAZU/F0tJSnDt3DpcuXcKMGTPqfF/lVAlbmxrx2WefYdmyZXB1dcXx48cxaNAgAMCZM2dQXl5ukXs4OjqazXcVBAEXLlxAVlYWOnfubJF7EDUEC0oiqrem9CKsXMUaExODwsJCu+hFuGLFCuzevRuTJ0/Ghx9+CABwcXGpsjVia9SQ0eS4uDikpKQAqJg3GBYWVu8tADUaTbPPFW1Jr776Kt566y24u7vjzJkzprmOQMXIraU4OjqitLTU9PWDDz6IVatWoXv37ha7B1FDsKAkonpRKpUW6ZkHAAkJCXBzc7Ppkcqff/4Z7777Lrp3745du3ZJHafBGtImaOLEiQgMDERRUREuXryI3377DTqdDuPHj7fofVq7J554AuvXr0e3bt1w7ty5OrexbApXV1fcvn0b9913H+Li4nDx4kUWkyQpFpREVCetVov4+Pgaz9+4cQP79u3DtWvXUFpaig4dOmDw4MEYN25cjY8z4+Pj4evra5O9CNPS0vDwww/DxcUFKSkpkMut75/ahmR2d3c3FU99+vQBAOzZswfDhg1D27ZtLXaf1iw8PBxxcXHo378/Tp482eyP8b///nsoFAoMGzYMM2bMQHx8PPbt24fJkyc3632JasJFOURUp9jY2BoX3uTm5mL9+vXIy8vD9OnTMW/ePAwaNAj79+/Hjz/+WOM1RVFEbGxsc0WWTHFxMUaNGgWDwYBdu3ahS5cuUkdqFA8Pj0a/t3v37hBFsV6PzZtyn9ZAFEWMHDkScXFxGD9+PM6ePdsic0JDQkIwbNgwAMA333wDQRCwePHiZr8vUU1YUBJRrdRqNTIyMmosKM+cOQO9Xo85c+Zg0KBB8PPzQ0hICAIDA3HhwgWUlJRU+747exHakjFjxiAvLw+rVq3ChAkTpI7TaE5OTo0ePb58+TJkMlmd77f2XqWlpaXo06cPkpOTMWvWLBw8eLDO1e3NoVOnTpg7dy4yMzPxww8/tPj9iQAWlERUh+TkZMhkshrPV7Yvubv/nYuLC2QyWZX2JncSBAHJycmWCdoKPProozhz5gzmz5+PZcuWSR2nyQICAmr9u9++fTt+//13nD17FpmZmUhNTcW2bdtw+vRpjB07ttbH3TKZDP7+/s0Ru0XcvHkTvXr1wqVLlxAZGVnraHxLWLt2LRwdHfHnP/9Z0hxkv2xj8goRNZu6ehEOHToUR48eRVxcHKZOnYq2bdsiMzMTycnJGDlyZK0jULbUi/DTTz/Fpk2bMGjQIHz77bdSx7GIoKAgHD9+vMbz3t7eOHHiBE6dOoXS0lI4OTmhS5cumDlzZq1bLwIV7aRWrlyJH374AQ4ODjAajab2Q1OnTm1Ve8MXFBSgsLAQ3bp1A1AxAjts2DAUFBTgzTffxCuvvCJxwooV5M888wxWr16NqKgoREZGSh2J7Ax3yiGiGpWVleG9996r83VqtRpbtmzBzZs3TcdGjx6N6dOn1zrCVemll16y6kefhw4dwsSJE9GhQwdcu3bNou1hpBYdHY3MzMxa95duKJlMhszMTGzYsMH0tSAIkMlk0Ov1mD17NrZu3Wqx+zXVrFmzsHPnThw6dAgGgwHjx49HWVkZ1q5diyVLlkgdz0Sv16N9+/ZwdHSEVquV5PE72S/+tBFRjeqzqEKr1eK7776Dq6srHn74YTz22GOYNm0aTp48ie3bt9frPtXtW2wtVCoVpk6dCgcHBxw7dsymikmgYvWypQsTBwcHvPHGG6YG3EajEQaDwdRC6J///KdF79cUOTk52L59O4qKijBx4kSMGjUK5eXl2L59e6sqJoGKFfMrVqxAQUEB3njjDanjkJ1hQUlENapPj8Ddu3ejrKwMCxYswIABA+Dj44Px48dj+vTpOHHiBDIzMy1yn9ZIr9cjKCgIpaWl2Lp1q6llji1RKBQW3287LCwMPXr0wK5du6odmf7ggw+Ql5dn0XtW0ul0UKlUyM7Ohkqlgk6nq/X10dHRpikft2/fhsFgwC+//ILw8PBmyddUK1asQIcOHfD+++9b7f+uyDpxDiUR1ag+PQJVKhU8PT2rFAaV881yc3Ph4+PT5Pu0RtOmTcO1a9fw0ksvYebMmVLHaTaBgYEoLCy0SGP70NBQU0P7IUOG4IsvvjAb6evWrRu2bduGH3/8EQ888ADWrVtXpbWQSqVCmzZt0L59+3rdU61WIzk5GWlpadWOuisUCgQEBCAoKAienp6m40ajEf/973/NHvcLgoCXXnrJNMWhtREEAR988AGefvppLF++HJ9//rnUkchOcISSiGpUnx6B7dq1Q25uLsrKysyOZ2dnA0C9PvStsRfhP//5T+zbtw9Tp07FO++8I3WcZhccHIyIiAjI5fIGPwIXBAFyuRwRERGYOHGi2bnFixdjwYIFAICpU6fi2rVr2LFjB3x9ffHzzz/D09MT999/P3JzcwFUjDCOHDkSY8aMQXFxca331Wq1iI6ORlRUFJKSkmqcwqHVapGUlISoqChER0ebXnfs2DGkpaWZvVYmk+Hs2bPYvHlzg/4MWtJTTz0FLy8v/Pe//0VhYaHUcchOcFEOEdVq9erVtc6lPH/+PL7//nv06NEDY8aMQZs2bZCdnY2DBw+iQ4cOePrpp2sdgfTw8LC6Fjtbt27FnDlz4O3tjczMTLta/KDVahEbG4uMjAwIglDrYp3K835+fggPD6+xL2VRURGee+45LF++HIMGDTId37t3L5555hlcvHgRMpkM06dPx5QpU/D3v/8dgiBg0aJF+Oqrr6q9plKpRHx8vGkP+foSBAGCICAsLAxz5841Kyg7d+6M++67DzNmzMD999/fqheS/fzzz5g1axbmzJmD77//Xuo4ZAdYUBJRreLj45GUlFRr66DLly/j4MGDyMnJMW292KdPH0ycOLHORSqjRo2y+Bw9S1u/fj3OnDmD9957D5cvX8bgwYPh5OSEK1eumD0itSeVj5HT09OrXVTl4eEBf3//Ko+RGyMxMRFPP/00zp8/X+Xchg0b8Nhjj1V5vSUez+/ZswdKpRIrVqxAeHg4Bg8eXK+uBa1F7969kZmZiRs3bpgWQBE1FxaURFQrtVqNqKioZrv+l19+CScnJ/Ts2dM0miSKIpycnLB+/fpWsdClf//+OH/+PIYPH4709HQUFhbi8OHDGDNmjNTRWgWdTgeNRgO9Xg+5XN5sO+C8/PLLVaYXODk5ISUlxTSyqVQqERMTY7F7RkREmOZ8WpsDBw4gODgY99xzD37//Xep45CNs86Z8ETUYjw9PeHn52fxXoSCIMDV1RU5OTkwGo24evVqlde0hlWq+fn5uHDhAgDgxIkTAIAXXniBxeQdnJyc4OXl1az30Ov12LRpU5XjlXMqU1NT4e7ujvj4eIveNz4+Hr6+vo3ehlJKEydOxJAhQ7Bz505cunQJvXv3ljoS2TCOUBJRnbRaLaKioixa4MnlckRGRuLUqVMIDQ01e6Quk8kQERGBX3/91WL3a6ydO3fi3nvvNTvm5OSEr7/+GnPnzpUolf3JysqCn59fjT+Dzs7OeOONN1BWVlbtLz4ZGRk4ffo0srKyUFBQABcXF3Tr1g2TJk0ydSSojiAI8PHxMS0csjZnz57F4MGDMXr0aBw9elTqOGTD7GcmORE1WnP1IlQoFJg8eTLeeusts3NGoxG//fYbnnnmGZSWllr0vpXq24/w8OHDVebN6XQ6/POf/6x1XilZlre3N27duoWcnByo1WrcvHkTt27dglarxaVLl/Dcc8+hpKSkxlH05ORk5OXlYcyYMZg/fz6mT5+OoqIirFu3DhkZGTXeVxRFZGRkQK1WN9e31qwGDRqECRMm4NixY0hJSZE6DtkwjlASUb1ZarFDaGioWfsYURRx3333YdeuXQCAgQMHQqVSITc3F46Ojnj00Ufx2WefVVngo9frodfr4eLiUq/7NqYfYWBgoOlRNwC0bdsWS5cuxbPPPouePXs2+Hun5lHX4rHCwkK4ubmZHSsrK8Pq1avRuXNnLFq0qMZrC4KAoKCgVr94rCZXr16Fj48P+vfvj3Pnzkkdh2wURyiJqN6aqxehIAj49ttv0aVLFxgMBqxbtw45OTn49ttv0alTJ2zYsAEdOnTAo48+atZX79FHH0Xfvn3r3CKysf0Is7KyTMVkly5d8PHHH+PGjRv497//zWKylUlLS6t1xPjuYhKoeEzu6emJgoKCWq8tiiLS09ObnFEqPXv2xH333YfU1FTTL21ElsYRSiJqsOboRQhUzPc6fPgwnnrqKbPjP/74I55//nlkZWXBwcEBs2bNwnPPPYfx48cDAO677z5s37692iK3Kf0I9Xo94uLiMG3aNLz55ptWu6OPrSsrK8N7773X4PeVlpbi448/hq+vb73mw7700kutuvdkbTQaDTp37ozu3bvjypUrUschG8SCkogarSV7EQJAbGwsli1bZtofXCaTmUal3n//ffzzn/80e31TH9EbjUbIZDKEhIQgODi40deh5qVSqbBmzZoGv+/HH3/EuXPn8MQTT9S6MKfS008/3eyr2ZvTokWL8M0332Dz5s145JFHpI5DNoYFJRFZREv1IgQqGo0/8cQTZsdkMhn27dtnKvzYj9B+ZGdnY/369Q16z969e5GYmIiwsDCMHj26Xu9ZsmQJevTo0ZiIrUJpaSnat2+PDh06WO0iI2q9OIeSiCyishdhjx494OXl1ayPBvft21fl8bbRaMTUqVNx+vRpaLXaZulHWNdcTZJGQ6ci7Nu3D4mJiQgNDa13MdmY+7Q2Li4uWLZsGW7evIlVq1ZJHYdsDEcoicjqtGvXzmxxzp0cHR2xcuVKGAyGGudMZmdnIyEhAVlZWTAajejevTtCQ0NrXWhj7f0IbZlOp8O7775br9fu27cP+/btw+TJkzF58uQG3cea51BWEkUR7dq1gyAIyM/Pt6t96Kl58SeJiKzO2bNncfz4cSQnJ0OpVOLkyZM4ffo0YmNjERkZifLy8hqLyWvXrmHDhg0oLy/HzJkzMWvWLOj1enz99dfIysqq8Z7W3o/Qljk5OdVrJ5v9+/ebpkU0tJhszikcLUkQBLz66qsoLCzEq6++KnUcsiEcoSQim1JXP8Lo6GioVCosX77cVCCUlZVh1apV6NixI5YsWVLjta29H6Etq+vv/fDhw9i5cyf8/f0xadKkKue9vb1rvLat/b2LoohOnTqhuLgYBQUFNlEok/Sse0IIEdFd6upHmJWVhYCAALMPUWdnZ/Tq1Qt//PEHbt++jXbt2lX7XmvvR2jLgoKCcPz48RrPV+7Hnp6eXu3f4cqVK2t8ryiKCAoKanLG1kIQBPznP//B448/jmeffRZffvklTpw4AYPBYFPfJ7UsFpREZDPKysrqXDhjMBiqXVzh4OAAAMjJyamxoAQq+vnpdDqO6rQynp6e8PPzQ2ZmZrXTHRYvXtyo61bOnbVE26vWZPHixXj55Zexbt06XL9+HTExMRgwYAB30qFG4xxKIrIZ9VmF7enpiezsbLOiw2Aw4Nq1awCAkpKSOq9RXc9Nkl54eLjFF5kIgoDw8HCLXrM1yMrKwsCBA2EwGBAbGwsAKC4uljgVWTMWlERkM/R6fZ2vGTVqFG7duoXffvsNBQUFyM/PR2xsLPLy8gBU9LO0xH2o5SkUCovPcwwLC6vXgh9rkpCQgN69e5ua/ldOEdHpdFLGIivHR95EZDPq0ycwMDAQxcXFSExMRHJyMgCgR48eGDduHA4dOlTr4+6G3IekERgYiMLCwibtkFQpNDTUJpvZ9+zZE15eXqZR+UosKKkp+K8iEdkMDw+Per1uwoQJGDNmDG7dugVnZ2e4u7sjJiYGjo6O9dqCr773IWkEBwfDzc2t0Xu4C4KAsLAwmywmAaB37944e/Ys/vznP+Pbb781HS8tLZUwFVk7PvImIptR336EQMUoY5cuXeDu7o68vDycPXsWI0aMgKOjY63vs5V+hLYuMDAQkZGR8PHxAYA651ZWnvfx8UFkZKTNFpOV2rdvj+joaHz33Xdo06YNAKCoqKjK63Q6HVQqFbKzs6FSqTiKSTViH0oisil19SPMycnBH3/8gW7dusHBwQE5OTk4ePAg3N3dsWjRIjg7O9d4bVvrR2gv1Go1kpOTkZ6eXu2CKg8PD/j7+yMoKMjmVnPXx5UrVzB06FDk5+cjKysLzs7OSE5ORlpaWrUL3RQKBQICAuz2z4uqx4KSiGyKWq1GVFRUjedv3ryJmJgY5ObmQqfToUOHDhg0aBAmTJhQr5HHyMhIfohaMZ1OB41GA71ej+DgYBgMBly5ckXqWJLLy8tDYGAg5s+fD7lcDplMVms/18rzfn5+CA8Pt7mFS9RwLCiJyOZER0fX2I+wsbiXt20pKiqCm5sbAODnn3/Ggw8+KG0giSmVSsTFxcFoNNZaSN7NHuacUv1wDiUR2Rz2I6S6/P7776b//7HHHsP169clTCOtxMRExMTEQBTFBhWTQMUuQnq9HjExMUhMTGymhGQNWFASkc1hP0Kqy08//WTqOVpYWIj58+dbdETbWiiVSou0WAIq+lsqlUqLXIusDwtKIrJJgYGBCAkJsci1bLUfob3S6XT49ddfTaNxBoMB+/btw8cffyxxspal1WoRHx9v0WvGx8fXa8cqsj2cQ0lENk2pVLIfIZnZuXMn7r333irH5XI5rl69iq5du0qQquVFR0dj7969OHnyJLKyslBQUAAXFxd069YNkyZNMuvJevToUZw5c8a0l33btm3h7e2NSZMmoXPnzqbXca6x/WJjcyKyaYGBgfD19UVsbCwyMjIgCEKthWXleR8fH65etVF3P+J1dHTE5MmTMWLECHTo0EGiVC1LrVYjIyMDx48fR3FxMcaMGQNPT08UFRXhyJEjWLduHR599FH4+fkBqNjjPiAgAF26dIGrqyu0Wi0OHjyItWvX4umnn0anTp0AVMypzMjIgFqtlqwbwp0r+eVyOXvHthCOUBKR3WA/QgKAq1ev4siRI+jbty/uueceAEBubq7EqVpWZb/W27dvm1a7VyorK8Pq1avRuXNnLFq0qMZrqNVqfP755wgODkZoaKjpuBT9Wiv/t83emdLhCCUR2Q1PT0/ThxxHMexXz5490bNnTwAVO+OcOHFC4kQtLy0tDUajsUoxCQDOzs7w9PREQUFBrdeo3GHn7o4KoigiPT3dcmFrodVqTU8fauudqdVqkZSUhOPHj7N3ZjPhohwisktOTk7w8vJCjx494OXlxWLSTg0cOBB6vd6uFpKUlZXV+v2Wlpbixo0b1Y7kVbYJUqvV2L59O9q2bYvhw4dXeV3lXMvmpFQqERUVhczMTACos+VR5fnMzExERUVxRbqFcYSSiIjs1pgxY7Bx40bs3bsXDz30kNRxWkRdxXNcXBzKy8sRHBxc5dzbb78Ng8EAAOjYsSMee+yxGuedajQaeHl5NT1wNRITExvd7qhygV5MTAwKCwur/T6p4ThCSUREdqty7t/BgwclTtJy9Hp9jef27t2LM2fO4N577zVb5V1pyZIlWLJkCWbNmgUnJyds3LixxvmnlfcxGAw4ePAg/va3v8HPzw/vvvtuk/Kzd2brxBFKIiKyWwEBAZDJZDh16pTUUVqMXF79R/++ffuQmJiI0NBQjB49utrXVBaZ3t7e6Nu3L1avXo09e/bgkUceqfLaXbt24fDhw/jpp5+g0Wggl8uh1+tx69ataq99/fp1/OUvf8GLL76IUaNGVfuayt6ZGRkZOH36dK3tjkRRxNGjR3Hp0iXk5uaipKQE7u7u6Nu3LyZMmABXV1cAFQuUfH19OaeyiThCSUREdq1du3bIyMiQOkaL8fDwqHJs37592LdvHyZPnlzvR8DOzs7o1KlTtQWi0WjE0qVLsW7dOlNHhcoRSzc3t2pbd+3cuRM///wzxo0bh3fffdf0aP1OsbGxEEURycnJyMvLw5gxYzB//nxMnz4dRUVFWLdunenvsry8HPv27YO7uzumT5+O+fPnIzAwECkpKfjqq69QXl4OoKLwjI2Nrdf3TDXjCCUREdm1bt264cqVK1LHaDFOTk5QKBSmuZT79+/Hvn37EBwcjMmTJ9f7OkVFRcjJyTGtmL/7Hu7u7tU+Dn/99dfxxhtvoG3btujatSsCAgIQGBiItLQ00yjmihUr8Ntvv2Hz5s3w9vYG8L/emQAwY8aMKivU/f39sXr1ahw4cAB+fn5wdHTEc889Z1qNDgC+vr7o0KEDtm3bhtTUVAwdOrRV9M60BRyhJCIiu9anTx+UlJTUOrfQ1lQ+6j98+DASEhLg7++PgIAAZGVlmf0HVKz6/u9//4sjR47g4sWLyMjIQFJSEjZs2ACDwYBJkyaZXVsQBAwfPhzXrl3DG2+8AUEQ4ODgYDq/bNkyTJ06FZ6enrh27Rp+++03vPXWW9iyZYvZ38HBgwfRu3dvvPDCCyguLkZycrJp//X6tDsSBMGsmKzUvXt3ADBriyQIApKTkxv7x0ngCCUREdm5ESNGYPv27Th+/DjGjRsndZwWERQUhOPHj+PChQsAgPT09Gp7R65cuRJyuRxeXl5ISUlBQUEB9Ho93Nzc4OPjg4cffths60Wg4hFyUFAQ5HI5Xn31VUydOhVz5sxBVlYWPDw8sHr16iqvT0lJwb333ltlBXp5eTk++OADfPDBB1i5cmWt31NluyNfX99aX3f58mUAMBuNbMnembaKBSUREdm1yse8CQkJdlNQenp6ws/PD0uWLKlzj3u5XI7777+/Xtet3Mv7zmJt7NixOHv2LJ5//nm0a9eu2vcMGTIE+fn5AAAHBwcYDAb06NED9957L/r06YNOnTqZRkxrUlu7o0oFBQXYvXs3unXrhj59+pidq+ydyZ60jcOCkoiI7NqYMWMAACkpKRInaVnh4eGIioqqs6BsCEEQEB4eXuV4+/btsX79+hrfV1ZWBmdnZ/j6+mL27NmYOXMmhgwZYnrErVKpsGbNmhrfX9nuKCwsrNp2RwBQXFyMTZs2AQD+9Kc/VdnhB2je3pm2jgUlERHZNScnJ7i4uJge/9oLhUKBsLAwxMTEWOyaYWFhjWq/0759e9y+fdtsruWdapvfWp92RyUlJYiOjkZBQQEWLVpU7Ur3uu5DteOiHCIisnuenp64fv261DFaXGBgIEJCQixyrdDQUAQGBjb6/TUVk0DtvTPrandUUlKCb775BlqtFgsXLqx1BLKm+1DdWFASEZHd8/X1NVv1a0+Cg4MREREBuVxe7WPg2giCALlcjoiICEycOLGZElbfO7M+7Y7uLCYXLFiArl27Nvg+VD8sxYmIyO4NHToUiYmJuHLlCnr16iV1nBYXGBgIX19fxMbG1qvJuyAIEEURPj4+CA8Pb/ZdZu7unVldu6M7eXt7o7y8HNHR0bhx4wamT58OURTNXte2bVuzAtLDw4MLcpqABSUREdm98ePH49NPP8WuXbvwxBNPSB1HEgqFAgsWLEBOTg6efvppBAYGwmg0Vnmdh4cH/P39ERQU1KKNwAMCApCUlASj0VivdkeFhYWmaQw7duyo8pqhQ4di5syZACoKZH9//2ZMb/tkxup+WoiIiOzIzZs34enpiSVLlmDdunVSx5HUm2++iddeew1ffvklFi9eDI1GA71eD7lcLukonlqtRlRUVLNdPzIykjvlNAFHKImIyO516tQJDg4OOHv2rNRRJJWamorXX38dQMXWik5OTq2mjU5l78zMzEyLtzq6u3cmNRwX5RAREQFwd3e3qz2973bz5k2EhYXBYDAAAE6ePCltoGqEh4c3eOFQXWrqnUkNw4KSiIgIFQs5NBqN1DEkodPp8OCDDyI7O9t0LCEhQcJE1avsnWlJje2dSeZYUBIREQEYMGAAdDodiouLpY7SooxGI55++mkcOXLE7FFydnY2rl27JmGy6rWm3pn0PywoiYiIAIwaNQpARbNse6LVavHtt99CFEXTVoeVDh06JFGq2llD70x7w4KSiIgIwJQpUwBUNMy2Jx4eHjh//jzWrl2Ldu3amZ07ceKERKnqFhgYiMjISPj4+ABAnYVl5dxQHx8fREZGcmTSwtg2iIiICIAoipDL5Zg2bRp+//13qeNIwtnZGf7+/ti5cyeOHj2KkSNHomfPnlLHqpNarUZycjLS09OrnQfr5uaGPXv2IC0tDSkpKWjfvr0EKW0bC0oiIqL/165dO3Tu3BmXLl2SOkqLS01NxcCBAxEZGYnPP/9c6jiNptPpqvTO/OOPPzBs2DAAwJAhQ7B79262CbIwPvImIiL6f15eXsjJyZE6hiS+/vprAMDChQslTtI0lb0ze/ToAS8vLzg5OZktLjp79izGjh1bZbtGahoWlERERP/P398fRUVFFm2cbS12794NBwcHjB49WuooFndnQSmKIjIzMzF69GhcvHhRwlS2hQUlERHR/6tcqHH69GmJk7S88+fPW8V8yca4du0a5PL/bQ5oMBhw48YNPPnkkxKmsi0sKImIiP5fcHAwAGDPnj0SJ2lZKpUKxcXFGDdunNRRmkV2djYMBoOpLZKjoyOeeuqpZt0b3N6woCQiIvp/lX0Jk5KSJE7SsirnTz788MMSJ2keZWVlAICpU6fC0dERnp6eWLNmDQYOHChxMtvBVd5ERER3cHZ2Rr9+/XDq1Cmpo7SY4OBgHDx4EHq93uJ7ZbcG+fn5KC0tRZcuXTBz5kz88ssvuHjxIgICAqSOZjNYUBIREd2hW7duKCsrw61bt6SO0mIUCgVcXFxw48YNqaM0u3PnzmHQoEGYPXs2tm7dKnUcm2F7v4YQERE1gY+PD/Lz86WO0WIKCwuRl5eHoKAgqaO0iIEDB8LLywtxcXFSR7EpLCiJiIjuMGjQIBgMBqhUKqmjtIgtW7YAAGbNmiVxkpazcOFCFBcX49dff5U6is1gQUlERHSHypXO9rLS++effwYAzJkzR+IkLefVV1+FTCbD22+/LXUUm8GCkoiI6A5TpkwBABw+fFjiJC0jOTkZCoUCbdq0kTpKi3Fzc8PgwYORkpKC0tJSqePYBBaUREREd/D29oYgCHbR3Fyv1yM3NxdDhgyROkqL++c//wlRFPHBBx9IHcUmsKAkIiK6S4cOHXD58mWpYzS7uLg4GI1G3HfffVJHaXGPPPIInJ2dsW7dOqmj2AQWlERERHfp3r07bt68KXWMZlfZNmfRokUSJ2l5giBg2rRpyMrKsotfHpobC0oiIqK79O3bF2VlZdDpdFJHaVaHDx9G27Zt0blzZ6mjSKJyUc7LL78scRLrx4KSiIjoLpU9GQ8ePChxkuaVlZWFfv36SR1DMkOGDEHnzp2xfft2qaNYPRaUREREdwkNDQUA7N+/X+Ikzefw4cMwGAyYNm2a1FEk9eijj6KoqAi//fab1FGsGgtKIiKiu4wYMQIAkJKSInGS5vPtt98CAB577DFpg0jstddeg0wmw5tvvil1FKvGvbyJiIiq0bZtW/To0QMXLlyQOkqzGDhwINLT01FWViZ1FMkNHjwYqampKCkpgZOTk9RxrBJHKImIiKrRuXNn3LhxQ+oYzebSpUvw8/OTOkar8Le//Q2iKOI///mP1FGsFgtKIiKiavTu3RuFhYXQarU4cuQI0tPTpY5kMWlpaSgrK8PkyZOljtIqLFy4EE5OTlizZo3UUayWXOoARERErcm2bduwZ88e/PHHHzAajfDw8AAAjBkzBkeOHJE4nWVs3LgRADB//nxpg7QSgiBgypQpiI+Px9WrV9GzZ0+pI1kdjlASERHd4R//+AfWrFmD69evm47JZDJMmDBBwlSWtXPnTjg4OGDcuHFSR2k1KhflvPLKKxInsU5clENERHSHHTt2ICwsrMrxw4cPY+zYsRIksjw3Nzd06tQJmZmZUkdpVTw9PVFWVoaCggKpo1gdjlASERHdYfr06Xj22WchCP/7iPT09MTo0aMlTGU5arUaRUVFNlMcW9K8efNw+/Zt7Nq1S+ooVocFJRER0V3ef/999O3b1/T1Qw89ZFZgWrNvvvkGAPDwww9LnKT1WblyJQDg9ddflzaIFbKN/3UQERFZkIuLC7Zu3Wr6etasWRKmsazY2FjIZDJERERIHaXVUSgU6N+/P44ePQq9Xi91HKvCgpKIiKgagwYNMm1LOGbMGInTWM7p06fh6ekJuZyNXqrz3HPPwWAw4OOPP5Y6ilXhohwiIqIaHDp0CKGhoUhISEDPnj0hl8vh4eFhtbuplJaWwtXVFWFhYdy7ugaiKMLFxQU9evRARkaG1HGsBn89ISIiuotarUZycjLS0tKwYsWKKos0FAoFAgICEBQUBE9PT4lSNtyWLVsAAA8++KC0QVoxQRAQEhKCnTt3Ijs7Gz169JA6klXgCCUREdH/02q1iI2NRUZGBmQyGWr7iKw87+fnh/DwcCgUihZM2jgPPPAAtm/fjtu3b8PNzU3qOK3W8ePHMXr0aDz22GPYsGGD1HGsAgtKIiIiAEqlEvHx8RBFEaIo1vt9giBAEASEhYUhMDCwGRM2Xbdu3VBcXIy8vDypo7R6HTt2hF6vR35+vtRRrAIX5RARkd1LTExETEwM9Hp9g4pJoGLOnV6vR0xMDBITE5spYdOJogiVSoXBgwdLHcUqzJ07FwUFBdi7d6/UUawCC0oiIrJrSqUSCQkJFrlWQkIClEqlRa5laTt27IDRaMSMGTOkjmIVKntRsidl/XBRDhER2S2tVov4+HgAQFlZGfbv3w+VSgWVSoXi4mJMmjQJISEhZu+pbH5dnY4dO0Iul8PX17fVzan8/vvvAQCLFi2SOIl16NSpE/r06YPDhw9Dr9ezzVIdOEJJRER2KzY21vSIu7i4GCkpKTAYDOjXr1+N71myZEmV/6ZPnw4A6N+/P0RRRGxsbIvkb4hDhw6hTZs26Natm9RRrMby5cuh1+vx2WefSR2l1WNBSUREdkmtViMjI8NUULq7u+PFF1/E4sWLMWXKlBrf5+3tXeU/lUoFABg+fDhEUURGRgbUanWLfB/1dfXqVfTp00fqGFZl6dKlkMvlLCjrgQUlERHZpeTkZMhkMtPXMpnM7Ov6Kisrw7lz5+Dj44OOHTsCqFj5nZycbLGsTZWUlAS9Xo+pU6dKHcWqCIKA4OBgXLp0CQcPHsSKFSvwyCOPNHjhlj3ghAAiIrJLaWlptfaZrK+zZ8+ivLzcrGWQKIpIT09v8rUtJTo6GgDnTzZUXl4eAgMDsXfvXkycOBFARZEZHR0NQeCY3J1YUBIRkd0pKyuDVqu1yLWUSiVcXFzQv39/s+MajQY6na5VbNOYkJAAR0dHDBo0SOooViMuLg4zZ86EXq83O1658IrMsbwmIiK7Y6liMjc3F9euXcPgwYPh6OhY5bxGo7HIfRrjwoULSElJgV6vR3p6Onx9fSXLYo169uwJFxeXKiOR3bt3lyhR68aCkoiI7M7do06NVdlzsqYdcix1n8ZYuHAhgoKC0K5dO5SWlsLNzQ0JCQkoKSmRLJM1GTx4MA4dOgSFQgEHBwfT8V69ekmYqvViQUlERHbHEo8s9Xo9Tp8+ja5du6Jr167Ndp/GGjx4MARBQGlpKQDg5MmTCA0NrdJXk2o2ePBgHD161KzVEkcoq8eCkoiI7I6Hh0eTr3HhwgUUFxfXun+3Je7TWBMnTjRbjVz5/8+bN0+qSFapd+/eOHr0KDp37gwAyMrKkjhR68RZpUREZHecnJygUCiqzKVMS0uDTqeDTqcDUNGr8ty5cwCAgIAAswU2J06cgFwur3FvbA8PD0kX5EyYMMHsa5lMhieffBLLli2TKJH16tatG86cOYPu3bubpgzodDpoNBrTLjpS/31LTWa0RM8EIiIiKxMfH4+kpCSz1kEff/wx8vPzq3398uXLTdsp5ufn45NPPsGQIUMwc+bMKq8VRRF6vR6BgYFwdnZGWVmZ6T+dToewsDD07Nmzeb6x/2c0GtGpUydoNBrIZDIEBwdj165d1S4eovp56623kJqaijFjxlS7sEuhUCAgIABBQUHw9PSUIKF0WFASEZFdUqvViIqKarbrf/bZZ7h582a159544w28+uqrzXbvSkOHDsXp06fh6emJ8+fPS/oI3ppptVrExsYiIyMDMpms1v6llef9/PwQHh7e6vZ0by6cQ0lERHbJ09MTfn5+Fm9QLQgC/Pz88NRTT9V4fsGCBRa9Z03atGkDANizZw+LyUZSKpWIiopCZmYmANTZDL/yfGZmJqKiokydAGwdC0oiIrJb4eHhzVJQhoeH4+2338by5curbOfYpk0bnD171qL3rKTT6aBSqZCdnQ2VSgVPT09MmzatxnmeVLvExETExMRAr9c3eLvFymkPMTExSExMbKaErQcfeRMRkV1TKpWIiYmx2PUiIiJMK7/Ly8sRGhqKI0eOwGAwAPjfI1EvLy+88cYbePLJJ83e/80338DNzQ2zZs2q1/3UajWSk5ORlpZW7bw+V1dXDB482C7n9TVFc/5c2CIWlEREZPcSExORkJDQ5OuEhoaa9nyulJubi6FDh0KlUuGee+7Btm3bsHz5cmzevBk6nQ4dOnTA888/j1dffRU3b96Et7c3DAYDEhISqlzrTpzX13y0Wi2ioqJw8eJFnD59GllZWSgoKICLiwu6deuGSZMmmfWmvHLlCk6ePAmVSoXc3FwYDAazRVxARU/SyMhIm/2zZ0FJRESEihGp+Ph4iKLYoMebgiBAEASEhYXVOAKVlJSEhx56CFu3bsWYMWMAVDRGf+WVV/DZZ5+hqKgILi4uGDBgAE6cOAGZTAaFQoFTp05V20i7ObMSEB0djczMTHz//fcoLi7GwIED4enpiaKiIhw5cgTXr1/Ho48+Cj8/PwDAvn37cOLECXTt2hWlpaXIzMysUlAKggAfH58Wmz/b0lhQEhER/b87R/0EQai1WKs8X99RP6PRWGU+JVAx1+7TTz/Fm2++iVu3bpldf8SIEThw4ACcnZ1Nxy01mhoSEoLg4OAmX8fW3Ln6v7CwEG5ubmbny8rKsHr1anTu3BmLFi0CUPF3WDkX99ChQ9i1a1eVgrJSZGSkTU49YGNzIiKi/6dQKLBgwQLTvMT09HRoNJoqr/Pw8IC/v3+D5iVWV0wCFYXj8uXLodfr8fe//910XBRFJCUlYeHChdiyZQuAipFJSxSTAJCQkAA3NzeOVN4lOTnZNEXg7mISAJydneHp6YmCggLTsfou7BIEAcnJyQgLC7NY3taCBSUREdFdPD09TR/6LbEjiiiK+PDDD6s9t3XrVmRnZ2PdunWIj49HWVkZ9u/fD5VKBZVKheLiYkyaNKnKHt1GoxFKpRLJycm4desWHBwc0LlzZ4wfPx59+vQBUNHc3dfX12bn9TVGWlparfNRS0tLcePGDfj6+jb42qIoIj09vSnxWi0WlERERLVwcnKCl5dXs95DJpNh8uTJuHXrFlxcXODi4gJnZ2eUlpbi+PHjOHPmDKKiotC5c2cUFxcjJSUFXl5e6NevX419DhMSEpCYmIigoCBMnToVer0ex44dw+bNm/Hwww9jwIABEEURsbGxNjuvr6HKysqqXSl/p7i4OJSXlzd6uoBGo4FOp7O5bRpZUBIREUlMJpPh+++/r/H8lStXsHHjRoiiCHd3d7z44ouQyWQoKiqqsaA8ceIEevbsifDwcNMxPz8//Pvf/8apU6dMBWVGRgbUarVNzutrqLqKyb179+LMmTMICwszW+XdUBqNptl/SWlpbGxORETUyqWmpprmYMpkshrnY97JwcHBbDEPADg6OkIul0Mu/994UuW8PqpYeV+Tffv2ITExEaGhoRg9enSz3cdasaAkIiJq5eqa11ed0aNHIz09HUqlEiUlJbh9+zZ27NiBsrIys4LIluf1NdSdhfad9u3bh3379mHy5MkWWRlf032sme19R0RERDakPvP6qjN27Fg4OjoiLi4O27dvB1Cxa84jjzyCnj17mr3WVuf1NVR1+53v378f+/btQ3BwMCZPntxs97F2LCiJiIhascYUk0DFHMr4+HiMGjUKAQEBMBgMOHXqFL7//nvMmTMH/v7+Zq+3xXl9DeXk5ASFQmH6Mz98+DASEhLg7++PgIAAZGVlmb3e29sbAFBUVITMzEwAFTsjAUB6ejratGmDtm3bwsfHx/Se5ugS0BqwoCQiImrFGjPfrqSkBHFxcQgMDMS9995rOh4QEIANGzYgNjYWzz33XJPvY4sCAgKQlJQEo9GICxcuAKgoDqubFrBy5UoAFUXktm3bzM7FxcUBAHr16oXFixcDqJivenchbytYUBIREbVijZlvd/PmTej1+mq3bezWrRuuXLmCsrIys0U7tjivrzGCgoJw/PhxADAVgnXx9fU1FZe1EUURQUFBTYnXanFRDhERUSvWmPl27dq1AwBkZ2ebHTcajcjOzoaLi0uVx662OK+vMTw9PeHn51fv3W/qSxAE+Pn52Wx7Jv46QkRE1IrdPa8PqFj1rdPpoNPpAFTsP33u3DkAFY9s3d3d0b9/f6SkpMDBwcE0h/LkyZPIyspCSEiIWeshW53X11jh4eGIioqqdS/3hhIEwawnqK2RGRvah4CIiIhaVHx8vGleHwB8/PHHyM/Pr/a1y5cvh0KhQHl5OY4fP47Tp09Dq9XCwcEBHTt2xKhRozB48GBTQSkIAoKCgmxyf+mmUCqViImJsdj1IiIibHrfdBaURERErZxarUZUVFSzXT8yMtJmH8U2RWJiIhISEpp8ndDQUEycONECiVovzqEkIiJq5TivTxrBwcGIiIiAXC5v8J+9IAiQy+WIiIiw+WIS4AglERGRVdBqtYiKirJoex+5XI7IyEgoFAqLXdMWabVaxMbGIiMjAzKZrNZdiwRBgCiK8PPzQ3h4uN382bKgJCIishKc1ycttVqNl156CW3btq12VbyHhwf8/f0RFBRkd6O+XOVNRERkJQIDA1FYWGixeX0sJhsmKSkJ69evB1DRPD4vLw96vR5yudzuV8pzhJKIiMjKKJVKxMfHQxTFBrW2EQQBgiAgLCyMxWQDnThxAmPHjkVZWRkAIDU1Ff3795c4VevBEUoiIiIrExgYCF9fX9O8vsp5ezUxGAxwcHCAj4+PXc3rs5Ts7GxMnz7d1PcTAI4dO8aC8g4coSQiIrJiarUaycnJSE9Ph0ajqXJeoVBgx44dOHPmDJRKpd3N7WuqgoICjB07FhcvXjQtiHJwcMATTzyBL7/8UuJ0rQcLSiIiIhuh0+mg0WjM5vUVFBSYisjevXtj//791e7xTdV79dVX8dZbb1U5PmDAANPuRMSCkoiIyKYplUqMGDECQMUcyu7du2Pfvn3w8/OTOJl1uH79OtavX4+4uDgcO3bMdFwQBBQUFKBt27YSpms9OIeSiIjIhl29etX0/4uiiOvXr2Ps2LHYt28f5wDWQ7du3fDqq6/C0dERx44dw5tvvgmdTofs7Gy4uLhIHa/VYEFJRERkw7KysswW7RgMBuTm5uLhhx/GmTNnJE5nPbZv3w5BELBixQqL71hkC/gnQkREZMMqRyhlMhmAike1s2bNata9wW3RmTNn0L17dxaTNeAIJRERkQ0rKiqCKIoYN24cUlJS0LZtW/z4449Sx7Iq169fR2FhIe6//36po7RaLLOJiIhs2HvvvYesrCwcOnQIDz30EDQaDU6fPi11LKuydu1aAMCiRYskTtJ6cZU3ERGRnbh8+TL8/PzwwAMP4JdffpE6jtUYOXIklEolysvL+ci7BiwoiYiI7Ej37t2h1WpRXFwsdRSr0bZtW3Tu3BmXL1+WOkqrxTKbiIjIjixYsAAlJSX49ddfpY5iFa5cuYLi4mJMnjxZ6iitGgtKIiIiO7JixQrIZDK8++67UkexCv/9738BAI899pi0QVo5PvImIiKyMwMHDsSFCxdQWloKuZwNX2ozfPhwnDlzBuXl5abWS1QVRyiJiIjszLPPPguDwYBVq1ZJHaXVO3/+PHx9fVlM1oEFJRERkZ158sknIZfL8eWXX0odpVWrHMUNDQ2VOkqrx4KSiIjIzgiCgPHjxyM9PR03b96UOk6rtW7dOgDAkiVLJE7S+rGgJCIiskOvvvoqAOD111+XOEnr9fvvv8PR0RGjRo2SOkqrx0U5REREdqpdu3ZwdXVFbm6u1FFaJRcXF/j4+OD8+fNSR2n1OEJJRERkp8LDw6FWq3H27Fmpo7Q6Z8+eRVlZGaZOnSp1FKvAgpKIiMhOvfHGGwD+9/ib/qdy/+4nnnhC4iTWgY+8iYiI7Fi3bt2Qn5+PoqIiqaO0Kv3790dGRgbKysqkjmIVOEJJRERkxx599FEUFxcjJiZG6iityqVLlxAQECB1DKvBgpKIiMiOvfLKK9yK8S7JyckoLy/HvffeK3UUq8GCkoiIyI61b98e/fr1w/Hjx6HX66WO0yqsX78eQEUDeKofFpRERER2btmyZTAYDPjss8+kjtIq7NmzB87OzujXr5/UUawGF+UQERHZOVEU4ezsDF9fX1y8eFHqOJJzdHTEwIEDcfLkSamjWA2OUBIREdk5QRAwduxYpKWlQaPRSB1HUgcPHoRer8f06dOljmJVWFASERGRqRdlZW9Ke/XVV18BAJ566imJk1gXPvImIiIiABVbMbZp0wY5OTlSR5FM7969cePGDRQXF0sdxapwhJKIiIgAAGFhYcjNzUVqaqrUUSQhiiKuXLmCAQMGSB3F6rCgJCIiIgDAm2++CQB47bXXJE4ijYSEBBgMBtx3331SR7E6fORNREREJl27dkVBQYFdbsW4cOFCREdH4+rVq/D29pY6jlXhCCURERGZzJs3D8XFxYiPj5c6SotLTExE27ZtWUw2AkcoiYiIyCQvLw8KhQLjxo3DoUOHpI7TYkRRhKOjI0aOHImjR49KHcfqcISSiIiITNzd3dGvXz8cO3bMrrZijI+PhyiKiIiIkDqKVWJBSURERGb+/Oc/w2Aw4IsvvpA6SouJjo4GACxZskTiJNaJj7yJiIjIjF6vh6urK3r37o3z589LHadF9OjRAwUFBSgoKJA6ilXiCCURERGZkcvlGDNmDC5cuIC8vDyp4zQ7vV6P69evY8iQIVJHsVosKImIiKiKV155BYB9bMX466+/wmg0YubMmVJHsVp85E1ERETVcnNzg5ubG27cuIFDhw5BEASMGzdO6lgW99BDD+Gnn37CrVu34OHhIXUcq8SCkoiIiKoVFhaGHTt2oHPnzsjNzUWfPn1w4cIFqWNZXLdu3VBcXGwXj/ebCx95ExERkZk9e/ZgxIgR2LFjBwAgNzcXAODl5SVlrGah0+mgUqkwfPhwqaNYNbnUAYiIiKh12bNnD5RKpdkxuVwOHx8faQJZWHl5Od5//3307t0bt27dgtFoxKxZs6SOZdVYUBIREZGZN998EyqVChs2bDAdMxqN6Nmzp4SpLCcnJwevvvqq2bFTp05h27ZtiIiIgIuLi0TJrBcfeRMREZEZBwcHrFu3DsuWLTMdMxgMNlNQdu/evcrim40bN+Lhhx/Gp59+KlEq68aCkoiIiKoQBAGrVq3CihUrTMe6du0qYSLLkclkGDduHATBvAzy8fHB4sWLJUpl3VhQEhERUbVkMhnefvttPPjggwCAmzdvAvjfQpbs7GyoVCrodDoJUzbOne2PZDIZ3NzcsHPnTnTq1EnCVNaLbYOIiIioVqIoYvjw4ZgxYwa6du0KrVZb5TUKhQIBAQEICgqCp6enBCkbZu/evZgyZQqAigVHCQkJmDBhgsSprBcLSiIiIqqRVqtFbGwsMjIyIJPJUFvZUHnez88P4eHhUCgULZi0YW7fvo327dsDAL777jvMnTtX4kTWjQUlERERVUupVCI+Ph6iKEIUxXq/TxAECIKAsLAwBAYGNmPC+tPpdNBoNNDr9ZDL5XBzc0P79u0RFBSE48ePSx3P6rGgJCIioioSExORkJDQ5OuEhIQgODjYAokaTq1WIzk5GWlpadU+ptdoNAgMDMSMGTOs4jF9a8aCkoiIiMwolUrExMRY7HoREREtOlJpq4/pWzMWlERERGSi1WoRFRUFvV5vsWvK5XJERka2SLFmS4/prQkLSiIiIjKJjo5GZmYmSkpKsH//fqhUKqhUKhQXF2PSpEkICQkxe73RaMSxY8eQlJSEvLw8uLq6ol+/fpgyZQpcXV0BVBRrPj4+WLBgQbNmt4XH9NaKfSiJiIgIQMWcw4yMDIiiiOLiYqSkpMBgMKBfv341vmfnzp34/fff0a9fP8ybNw8TJkzAmTNn8M0338BgMACoaDuUkZEBtVrdbNmVSqVFikkASEhIqLKXOdWOe3kTERERACA5Odk0p9Dd3R0vvvgiZDIZioqKqi2wCgoKcPToUYwcORLTpk0DAPTu3Rtt27bFjz/+iJMnT2LEiBEAKkYpk5OTERYWZvHcWq0W8fHxFr1mfHw8fH19OaeynlhQEhEREQAgLS3NtIBFJpPV+frs7GwYjUYEBASYHe/Tpw8AIDU11VRQiqKI9PR0CyeuEBsbaxoFPX36NLKyslBQUAAXFxd069YNkyZNQrdu3czec/36dezatQvZ2dkQBAG+vr645557THt8i6KI2NjYZn9Mbyv4yJuIiIhQVlZWbWud2lQ+0pbLzcenKvfIzsnJMTuu0WjMtmk0Go04cuQI5s+fj6FDh9a4heMvv/yC3Nzcas/d+Zg+OTkZeXl5GDNmDObPn4/p06ejqKgI69atQ0ZGhtl7Nm7cCIPBgNmzZ+OBBx7ArVu3sGHDBhQVFQFomcf0toQFJRERETW4mARg6t149epVs+NZWVkAgJKSkirv0Wg0piJvyJAhGDduHL777jucPn3aVKDeKTMzEzNnzkTv3r2xevXqKqvPKx/TA8CMGTPw2GOPYeTIkfDx8cHAgQOxYMECuLq64sCBA6b3JCQkQC6XY968eejTpw8GDBiA+fPno6ioCIcPHza9rvIxPdWNBSURERE1qk2Ql5cXevXqhcOHD+PcuXMoKSnB1atXERsbC5lMVu1j8/DwcLRv3x5PPvkkzp49C6BipNLR0RGXLl1CYWGh2esrRwgLCwuxfPlyDB48GPv27TOdv/MxvZubW5X7OTs7w9PTEwUFBQAqRlUvXryI/v37w8XFxfQ6d3d3+Pr64o8//jAda87H9LaGcyiJiIioymPr+po9ezZ++eUXbNu2DQDg4OCAMWPGICMjA6WlpVVef/369Wr7Q5aXl2Pw4MGmrx0cHODo6Fgl1/nz5xESEoKAgAC8/PLLdY6slpaW4saNG/D19QVQMRKr1+vRpUuXKq/t0qULLl26hPLycjg6OgL432N6JyenOv4k7BsLSiIiIjItRmkoNzc3PProoygsLERhYSHc3d0h/7/27t+lrS6O4/jH20sES4dkUdBJEgp1KRKsCEqIOAgJXVt1kS6igzrpUujSf8HBRUXwHzClkyZ1KKG2WUQdIqI4qKT1B4IEI/EZ5N4aYvx18jxPTd8vCOTm3pwcMn043/PDtrW8vKwXL14UPZ9OpzU5Oan3798rm826Ze7a2lq9e/dO+/v7+vnzp379+qWjoyPt7u4WjVo67YyPj2tgYODG/n369Em5XM7dV/L09FSS3D0yr3I+y2azbqCULkNlXV3dHf+RvxOBEgAAyOPxyOv1PmgupXQZLJ2SczKZVC6XU0tLS8EzPp9PT58+1ejoqHp6ejQ2NqaZmRlJl9sNffz4sajdiYkJDQ0NSbocRT0/P1dbW5vevn2rly9famFhoWSfFhcXtbKyou7u7qJV3ndZxe4o56lBlYpACQAAJEmBQEDLy8vunMR0Oq2zszN39XUmk9Hq6qr7rMfj0Y8fPyRJXq9X2WxWGxsbSqVS6uzsLAhxlmXJ7/e717W1tZqentbAwIBGRkbU3t5+bZ9OTk4kXY4e9vf3a3BwUE1NTZKkvb29koEykUhoaWlJ4XBYr169cj+vqamR9Huk8ipnEdHVuZXSw6cD/E34hwAAgCQpGAzq27dv7nUsFtPx8bF7vba2prW1NUnS8PCwPB6PLi4ulEwmdXx8rKqqKtXV1enNmzdFp+vk83kFg8Gi32xtbVUymSzZp97eXtXX1+v169d69uxZwb1SZfpEIqFEIqFQKFR0hKLX65Vt29duQ7S/vy+fz1dQ7r7pd/AbgRIAAEi63AaosbFRW1tbyufzGh0dvfU7wWDw2qB4lXOWt7PN0H00NDSor6/v2nvXlem/fPmiRCKhjo4OhUKhou88efJEz58/1/r6urq6ulRdXS1JOjo60tbWllpbWwue9/l8LMi5A7YNAgAArkgk4m5MXi6WZSkSiZS1TUcgEHDnQ379+lXxeFx+v1+BQEA7OzsFL0coFFIul9Pc3JzS6bTW19c1NzenmpoatbW1FfT7apkepVVdOBMlAAAAJKVSKc3Pz5etvWg0qubm5rK1d1Umk9HExIQkaWpqStvb2yWf/fDhg/v+tqMXHYODgw8aWf3bECgBAECRpaUlxeNx43bC4XDJBTflMjs765bpy8Up03OW991Q8gYAAEU6OjoUjUZl2/a9S+CWZcm2bUWj0X89TEqPr0xfiRihBAAAJR0eHioWi2lzc1OWZd04Cujcb2xsVCQSkdfr/c/6+ZjK9JWIQAkAAG6VyWT0/ft3bWxs6ODgoOi+z+eT3+9XMBj83+YcPqYyfaUhUAIAgHs5OzvTwcGBzs/PZdv2H7W1TiqV0ufPn5XP5+81p9KyLFmWpe7ubkYmH4BACQAAKspjKdNXEgIlAACoSI+hTF8pCJQAAKDi/cll+kpAoAQAAIAR9qEEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABghUAIAAMAIgRIAAABGCJQAAAAwQqAEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABghUAIAAMAIgRIAAABGCJQAAAAwQqAEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABghUAIAAMAIgRIAAABGCJQAAAAwQqAEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABghUAIAAMAIgRIAAABGCJQAAAAwQqAEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABj5B9C6Xh9TlurRAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "idx = 0 # Feel free to change this to visualize different graphs.\n", + "\n", + "# Visualize the graphs\n", + "display(test_data[idx].mol)\n", + "visualize_pyg_graph(pygs[idx])" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "3a9d2dd1-2f90-44cc-8859-b93c41167f33", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0. 0. 0. ... 0. 0. 0.12011]\n", + " [0. 0. 0. ... 0. 1. 0.14007]\n", + " [0. 0. 0. ... 0. 1. 0.12011]\n", + " ...\n", + " [0. 0. 0. ... 0. 1. 0.12011]\n", + " [0. 0. 0. ... 0. 1. 0.12011]\n", + " [0. 0. 0. ... 0. 1. 0.12011]]\n", + "[[0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.]]\n" + ] + } + ], + "source": [ + "# Examine the features\n", + "print(pygs[idx].x)\n", + "print(pygs[idx].edge_attr)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chemprop_dgl", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/chemprop/pyproject.toml b/chemprop/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..b1c9154387a4e03aa9b6fa5eaf9c143c10f08202 --- /dev/null +++ b/chemprop/pyproject.toml @@ -0,0 +1,77 @@ +[build-system] +requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.2"] +build-backend = "setuptools.build_meta" + +[project] +name = "chemprop" +description = "Molecular Property Prediction with Message Passing Neural Networks" +version = "2.1.2" +authors = [ + {name = "The Chemprop Development Team (see LICENSE.txt)", email="chemprop@mit.edu"} +] +readme = "README.md" +license = {text = "MIT"} +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.11", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent" +] +keywords = [ + "chemistry", + "machine learning", + "property prediction", + "message passing neural network", + "graph neural network", + "drug discovery" +] +requires-python = ">=3.11" +dependencies = [ + "lightning >= 2.0", + "numpy", + "pandas", + "rdkit", + "scikit-learn", + "scipy", + "torch >= 2.1", + "astartes[molecules]", + "ConfigArgParse", + "rich", + "descriptastorus", +] + +[project.optional-dependencies] +hpopt = ["ray[tune]", "hyperopt", "optuna"] +dev = ["black == 23.*", "bumpversion", "autopep8", "flake8", "pytest", "pytest-cov", "isort"] +docs = ["nbsphinx", "sphinx", "sphinx-argparse != 0.5.0", "sphinx-autobuild", "sphinx-autoapi", "sphinxcontrib-bibtex", "sphinx-book-theme", "nbsphinx-link", "ipykernel", "docutils < 0.21", "readthedocs-sphinx-ext", "pandoc"] +test = ["pytest >= 6.2", "pytest-cov"] +notebooks = ["ipykernel", "matplotlib"] + +[project.urls] +documentation = "https://chemprop.readthedocs.io/en/latest/" +source = "https://github.com/chemprop/chemprop" +PyPi = "https://pypi.org/project/chemprop/" + +[project.scripts] +chemprop = "chemprop.cli.main:main" + +[tool.black] +line-length = 100 +target-version = ["py311"] +skip-magic-trailing-comma = true +required-version = "23" + +[tool.autopep8] +in_place = true +recursive = true +aggressive = 2 +max_line_length = 100 + + +[tool.pytest.ini_options] +addopts = "--cov chemprop" + +[tool.isort] +profile = "black" +line_length = 100 +force_sort_within_sections = true diff --git a/chemprop/tests/cli/test_cli_classification_mol.py b/chemprop/tests/cli/test_cli_classification_mol.py new file mode 100644 index 0000000000000000000000000000000000000000..afb895994bb1ae9e26337dab469c4dfe45017b07 --- /dev/null +++ b/chemprop/tests/cli/test_cli_classification_mol.py @@ -0,0 +1,276 @@ +"""This tests the CLI functionality of training and predicting a regression model on a single molecule. +""" + +import pytest + +from chemprop.cli.main import main +from chemprop.models.model import MPNN + +pytestmark = pytest.mark.CLI + + +@pytest.fixture +def data_path(data_dir): + return str(data_dir / "classification" / "mol.csv") + + +@pytest.fixture +def model_path(data_dir): + return str(data_dir / "example_model_v2_classification_mol.pt") + + +@pytest.fixture +def dirichlet_model_path(data_dir): + return str(data_dir / "example_model_v2_classification_dirichlet_mol.pt") + + +def test_train_quick(monkeypatch, data_path): + base_args = [ + "chemprop", + "train", + "-i", + data_path, + "--epochs", + "3", + "--num-workers", + "0", + "--task-type", + "classification", + "--metric", + "prc", + "accuracy", + "f1", + "roc", + "--show-individual-scores", + ] + + task_types = ["classification", "classification-dirichlet"] + + for task_type in task_types: + args = base_args.copy() + + args += ["--task-type", task_type] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_quick(monkeypatch, data_path, model_path): + args = ["chemprop", "predict", "-i", data_path, "--model-path", model_path] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_dirichlet_quick(monkeypatch, data_path, dirichlet_model_path): + args = [ + "chemprop", + "predict", + "-i", + data_path, + "--model-path", + dirichlet_model_path, + "--uncertainty-method", + "classification-dirichlet", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +@pytest.mark.parametrize("calibration_method", ["platt", "isotonic"]) +def test_predict_unc_quick(monkeypatch, data_path, model_path, calibration_method): + args = [ + "chemprop", + "predict", + "-i", + data_path, + "--model-path", + model_path, + "--cal-path", + data_path, + "--uncertainty-method", + "classification", + "--calibration-method", + calibration_method, + "--evaluation-methods", + "nll-classification", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_quick(monkeypatch, data_path, model_path, ffn_block_index): + args = [ + "chemprop", + "fingerprint", + "-i", + data_path, + "--model-path", + model_path, + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_train_output_structure(monkeypatch, data_path, tmp_path): + args = [ + "chemprop", + "train", + "-i", + data_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--save-smiles-splits", + "--task-type", + "classification", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "model_0" / "best.pt").exists() + assert (tmp_path / "model_0" / "checkpoints" / "last.ckpt").exists() + assert (tmp_path / "model_0" / "trainer_logs" / "version_0").exists() + assert (tmp_path / "train_smiles.csv").exists() + + +def test_train_output_structure_replicate_ensemble(monkeypatch, data_path, tmp_path): + args = [ + "chemprop", + "train", + "-i", + data_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--save-smiles-splits", + "--split-type", + "random", + "--num-replicates", + "3", + "--ensemble-size", + "2", + "--task-type", + "classification", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "replicate_2" / "model_1" / "best.pt").exists() + assert (tmp_path / "replicate_2" / "model_1" / "checkpoints" / "last.ckpt").exists() + assert (tmp_path / "replicate_2" / "model_1" / "trainer_logs" / "version_0").exists() + assert (tmp_path / "replicate_2" / "train_smiles.csv").exists() + + +def test_predict_output_structure(monkeypatch, data_path, model_path, tmp_path): + args = [ + "chemprop", + "predict", + "-i", + data_path, + "--model-path", + model_path, + model_path, + "--output", + str(tmp_path / "preds.csv"), + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "preds.csv").exists() + assert (tmp_path / "preds_individual.csv").exists() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_output_structure( + monkeypatch, data_path, model_path, tmp_path, ffn_block_index +): + args = [ + "chemprop", + "fingerprint", + "-i", + data_path, + "--model-path", + model_path, + "--output", + str(tmp_path / "fps.csv"), + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "fps_0.csv").exists() + + +def test_train_outputs(monkeypatch, data_path, tmp_path): + args = [ + "chemprop", + "train", + "-i", + data_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--task-type", + "classification", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + checkpoint_path = tmp_path / "model_0" / "checkpoints" / "last.ckpt" + + model = MPNN.load_from_checkpoint(checkpoint_path) + assert model is not None + + +def test_class_balance(monkeypatch, data_path, tmp_path): + args = [ + "chemprop", + "train", + "-i", + data_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--task-type", + "classification", + "--class-balance", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() diff --git a/chemprop/tests/cli/test_cli_classification_mol_multiclass.py b/chemprop/tests/cli/test_cli_classification_mol_multiclass.py new file mode 100644 index 0000000000000000000000000000000000000000..9f5164e65a0364f8ff4b8c21cb102d1f5a84a8c4 --- /dev/null +++ b/chemprop/tests/cli/test_cli_classification_mol_multiclass.py @@ -0,0 +1,246 @@ +"""This tests the CLI functionality of training and predicting a regression model on a single molecule. +""" + +import pytest + +from chemprop.cli.main import main +from chemprop.models.model import MPNN + +pytestmark = pytest.mark.CLI + + +@pytest.fixture +def data_path(data_dir): + return str(data_dir / "classification" / "mol_multiclass.csv") + + +@pytest.fixture +def model_path(data_dir): + return str(data_dir / "example_model_v2_classification_mol_multiclass.pt") + + +@pytest.fixture +def dirichlet_model_path(data_dir): + return str(data_dir / "example_model_v2_multiclass_dirichlet_mol.pt") + + +def test_train_quick(monkeypatch, data_path): + base_args = [ + "chemprop", + "train", + "-i", + data_path, + "--epochs", + "3", + "--num-workers", + "0", + "--show-individual-scores", + ] + + task_types = ["multiclass", "multiclass-dirichlet"] + + for task_type in task_types: + args = base_args.copy() + + args += ["--task-type", task_type] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_quick(monkeypatch, data_path, model_path): + args = ["chemprop", "predict", "-i", data_path, "--model-path", model_path] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_dirichlet_quick(monkeypatch, data_path, dirichlet_model_path): + args = [ + "chemprop", + "predict", + "-i", + data_path, + "--model-path", + dirichlet_model_path, + "--uncertainty-method", + "multiclass-dirichlet", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_isotonic_quick(monkeypatch, data_path, model_path): + args = [ + "chemprop", + "predict", + "-i", + data_path, + "--model-path", + model_path, + "--cal-path", + data_path, + "--uncertainty-method", + "classification", + "--calibration-method", + "isotonic-multiclass", + "--evaluation-methods", + "nll-multiclass", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_quick(monkeypatch, data_path, model_path, ffn_block_index): + args = [ + "chemprop", + "fingerprint", + "-i", + data_path, + "--model-path", + model_path, + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_train_output_structure(monkeypatch, data_path, tmp_path): + args = [ + "chemprop", + "train", + "-i", + data_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--save-smiles-splits", + "--task-type", + "multiclass", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "model_0" / "best.pt").exists() + assert (tmp_path / "model_0" / "checkpoints" / "last.ckpt").exists() + assert (tmp_path / "model_0" / "trainer_logs" / "version_0").exists() + assert (tmp_path / "train_smiles.csv").exists() + + +def test_train_output_structure_replicate_ensemble(monkeypatch, data_path, tmp_path): + args = [ + "chemprop", + "train", + "-i", + data_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--save-smiles-splits", + "--split-type", + "random", + "--num-replicates", + "3", + "--ensemble-size", + "2", + "--task-type", + "multiclass", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "replicate_2" / "model_1" / "best.pt").exists() + assert (tmp_path / "replicate_2" / "model_1" / "checkpoints" / "last.ckpt").exists() + assert (tmp_path / "replicate_2" / "model_1" / "trainer_logs" / "version_0").exists() + assert (tmp_path / "replicate_2" / "train_smiles.csv").exists() + + +def test_predict_output_structure(monkeypatch, data_path, model_path, tmp_path): + args = [ + "chemprop", + "predict", + "-i", + data_path, + "--model-path", + model_path, + model_path, + "--output", + str(tmp_path / "preds.csv"), + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "preds.csv").exists() + assert (tmp_path / "preds_individual.csv").exists() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_output_structure( + monkeypatch, data_path, model_path, tmp_path, ffn_block_index +): + args = [ + "chemprop", + "fingerprint", + "-i", + data_path, + "--model-path", + model_path, + "--output", + str(tmp_path / "fps.csv"), + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "fps_0.csv").exists() + + +def test_train_outputs(monkeypatch, data_path, tmp_path): + args = [ + "chemprop", + "train", + "-i", + data_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--task-type", + "multiclass", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + checkpoint_path = tmp_path / "model_0" / "checkpoints" / "last.ckpt" + + model = MPNN.load_from_checkpoint(checkpoint_path) + assert model is not None diff --git a/chemprop/tests/cli/test_cli_regression_mol+mol.py b/chemprop/tests/cli/test_cli_regression_mol+mol.py new file mode 100644 index 0000000000000000000000000000000000000000..cdfa1917dcb20f182866ec827af15cdb0a1fd81f --- /dev/null +++ b/chemprop/tests/cli/test_cli_regression_mol+mol.py @@ -0,0 +1,240 @@ +"""This tests the CLI functionality of training and predicting a regression model on a multi-molecule. +""" + +import json + +import pytest + +from chemprop.cli.main import main + +pytestmark = pytest.mark.CLI + + +@pytest.fixture +def data_path(data_dir): + return ( + str(data_dir / "regression" / "mol+mol" / "mol+mol.csv"), + str(data_dir / "regression" / "mol+mol" / "descriptors.npz"), + ("0", str(data_dir / "regression" / "mol+mol" / "atom_features_0.npz")), + ("1", str(data_dir / "regression" / "mol+mol" / "atom_features_1.npz")), + ("0", str(data_dir / "regression" / "mol+mol" / "bond_features_0.npz")), + ("1", str(data_dir / "regression" / "mol+mol" / "atom_descriptors_1.npz")), + ) + + +@pytest.fixture +def model_path(data_dir): + return str(data_dir / "example_model_v2_regression_mol+mol.pt") + + +def test_train_quick(monkeypatch, data_path): + ( + input_path, + desc_path, + atom_feat_path_0, + atom_feat_path_1, + bond_feat_path_0, + atom_desc_path_1, + ) = data_path + + base_args = [ + "chemprop", + "train", + "-i", + input_path, + "--smiles-columns", + "smiles", + "solvent", + "--epochs", + "3", + "--num-workers", + "0", + "--descriptors-path", + desc_path, + "--atom-features-path", + *atom_feat_path_0, + "--atom-features-path", + *atom_feat_path_1, + "--bond-features-path", + *bond_feat_path_0, + "--atom-descriptors-path", + *atom_desc_path_1, + "--show-individual-scores", + ] + + task_types = ["", "regression-mve", "regression-evidential", "regression-quantile"] + + for task_type in task_types: + args = base_args.copy() + + if task_type: + args += ["--task-type", task_type] + + if task_type == "regression-evidential": + args += ["--evidential-regularization", "0.2"] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_quick(monkeypatch, data_path, model_path): + input_path, _, _, _, _, _ = data_path + + args = [ + "chemprop", + "predict", + "-i", + input_path, + "--smiles-columns", + "smiles", + "solvent", + "--model-path", + model_path, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_quick(monkeypatch, data_path, model_path, ffn_block_index): + input_path, _, _, _, _, _ = data_path + args = [ + "chemprop", + "fingerprint", + "-i", + input_path, + "--smiles-columns", + "smiles", + "solvent", + "--model-path", + model_path, + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_train_output_structure(monkeypatch, data_path, tmp_path): + input_path, *_ = data_path + + args = [ + "chemprop", + "train", + "-i", + input_path, + "--smiles-columns", + "smiles", + "solvent", + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--save-smiles-splits", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "model_0" / "best.pt").exists() + assert (tmp_path / "model_0" / "checkpoints" / "last.ckpt").exists() + assert (tmp_path / "model_0" / "trainer_logs" / "version_0").exists() + assert (tmp_path / "train_smiles.csv").exists() + assert (tmp_path / "model_0" / "test_predictions.csv").exists() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_output_structure( + monkeypatch, data_path, model_path, tmp_path, ffn_block_index +): + input_path, *_ = data_path + args = [ + "chemprop", + "fingerprint", + "-i", + input_path, + "--smiles-columns", + "smiles", + "solvent", + "--model-path", + model_path, + "--output", + str(tmp_path / "fps.csv"), + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "fps_0.csv").exists() + + +def test_train_splits_file(monkeypatch, data_path, tmp_path): + splits_file = str(tmp_path / "splits.json") + splits = [ + {"train": [1, 2], "val": "3-5", "test": "6,7"}, + {"val": [1, 2], "test": "3-5", "train": "6,7"}, + ] + + with open(splits_file, "w") as f: + json.dump(splits, f) + + input_path, *_ = data_path + + args = [ + "chemprop", + "train", + "-i", + input_path, + "--smiles-columns", + "smiles", + "solvent", + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--splits-file", + splits_file, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_train_molecule_featurizers(monkeypatch, data_path): + input_path, descriptors_path, *_ = data_path + + args = [ + "chemprop", + "train", + "-i", + input_path, + "--smiles-columns", + "smiles", + "solvent", + "--epochs", + "3", + "--num-workers", + "0", + "--descriptors-path", + descriptors_path, + "--molecule-featurizers", + "morgan_count", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() diff --git a/chemprop/tests/cli/test_cli_regression_mol.py b/chemprop/tests/cli/test_cli_regression_mol.py new file mode 100644 index 0000000000000000000000000000000000000000..40a0076327c5d0322058b682530cc126cec45b6a --- /dev/null +++ b/chemprop/tests/cli/test_cli_regression_mol.py @@ -0,0 +1,576 @@ +"""This tests the CLI functionality of training and predicting a regression model on a single molecule. +""" + +import json + +import pytest +import torch + +from chemprop.cli.hpopt import NO_HYPEROPT, NO_OPTUNA, NO_RAY +from chemprop.cli.main import main +from chemprop.cli.train import TrainSubcommand +from chemprop.models.model import MPNN + +pytestmark = pytest.mark.CLI + + +@pytest.fixture +def data_path(data_dir): + return ( + str(data_dir / "regression" / "mol" / "mol.csv"), + str(data_dir / "regression" / "mol" / "descriptors.npz"), + str(data_dir / "regression" / "mol" / "atom_features.npz"), + str(data_dir / "regression" / "mol" / "bond_features.npz"), + str(data_dir / "regression" / "mol" / "atom_descriptors.npz"), + ) + + +@pytest.fixture +def model_path(data_dir): + return str(data_dir / "example_model_v2_regression_mol.pt") + + +@pytest.fixture +def mve_model_path(data_dir): + return str(data_dir / "example_model_v2_regression_mve_mol.pt") + + +@pytest.fixture +def evidential_model_path(data_dir): + return str(data_dir / "example_model_v2_regression_evidential_mol.pt") + + +@pytest.fixture +def config_path(data_dir): + return str(data_dir / "regression" / "mol" / "config.toml") + + +def test_train_quick(monkeypatch, data_path): + input_path, *_ = data_path + + args = [ + "chemprop", + "train", + "-i", + input_path, + "--epochs", + "3", + "--num-workers", + "0", + "--show-individual-scores", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_train_config(monkeypatch, config_path, tmp_path): + args = [ + "chemprop", + "train", + "--config-path", + config_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + new_config_path = tmp_path / "config.toml" + parser = TrainSubcommand.parser + + new_args = parser.parse_args(["--config-path", str(new_config_path)]) + old_args = parser.parse_args(["--config-path", str(config_path)]) + + for key, value in old_args.__dict__.items(): + if key not in ["config_path", "output_dir", "epochs"]: + assert getattr(new_args, key) == value + + assert new_args.epochs == 3 + + +def test_train_quick_features(monkeypatch, data_path): + ( + input_path, + descriptors_path, + atom_features_path, + bond_features_path, + atom_descriptors_path, + ) = data_path + + base_args = [ + "chemprop", + "train", + "-i", + input_path, + "--epochs", + "3", + "--num-workers", + "0", + "--descriptors-path", + descriptors_path, + "--atom-features-path", + atom_features_path, + "--bond-features-path", + bond_features_path, + "--atom-descriptors-path", + atom_descriptors_path, + ] + + task_types = ["", "regression-mve", "regression-evidential", "regression-quantile"] + + for task_type in task_types: + args = base_args.copy() + + if task_type: + args += ["--task-type", task_type] + + if task_type == "regression-evidential": + args += ["--evidential-regularization", "0.2"] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_quick(monkeypatch, data_path, model_path): + input_path, *_ = data_path + args = ["chemprop", "predict", "-i", input_path, "--model-path", model_path] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_mve_quick(monkeypatch, data_path, mve_model_path): + input_path, *_ = data_path + args = [ + "chemprop", + "predict", + "-i", + input_path, + "--model-path", + mve_model_path, + "--cal-path", + input_path, + "--uncertainty-method", + "mve", + "--calibration-method", + "zscaling", + "--evaluation-methods", + "nll-regression", + "miscalibration_area", + "ence", + "spearman", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_evidential_quick(monkeypatch, data_path, evidential_model_path): + input_path, *_ = data_path + args = [ + "chemprop", + "predict", + "-i", + input_path, + "--model-path", + evidential_model_path, + "--cal-path", + input_path, + "--uncertainty-method", + "evidential-total", + "--calibration-method", + "zscaling", + "--evaluation-methods", + "nll-regression", + "miscalibration_area", + "ence", + "spearman", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_quick(monkeypatch, data_path, model_path, ffn_block_index): + input_path, *_ = data_path + args = [ + "chemprop", + "fingerprint", + "-i", + input_path, + "--model-path", + model_path, + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_train_output_structure(monkeypatch, data_path, tmp_path): + input_path, *_ = data_path + args = [ + "chemprop", + "train", + "-i", + input_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--save-smiles-splits", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "model_0" / "best.pt").exists() + assert (tmp_path / "model_0" / "checkpoints" / "last.ckpt").exists() + assert (tmp_path / "model_0" / "trainer_logs" / "version_0").exists() + assert (tmp_path / "train_smiles.csv").exists() + assert (tmp_path / "model_0" / "test_predictions.csv").exists() + + +def test_train_output_structure_replicate_ensemble(monkeypatch, data_path, tmp_path): + input_path, *_ = data_path + args = [ + "chemprop", + "train", + "-i", + input_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--save-smiles-splits", + "--split-type", + "random", + "--num-replicates", + "3", + "--ensemble-size", + "2", + "--metrics", + "mse", + "rmse", + "--molecule-featurizers", + "rdkit_2d", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "replicate_2" / "model_1" / "best.pt").exists() + assert (tmp_path / "replicate_2" / "model_1" / "checkpoints" / "last.ckpt").exists() + assert (tmp_path / "replicate_2" / "model_1" / "trainer_logs" / "version_0").exists() + assert (tmp_path / "replicate_2" / "train_smiles.csv").exists() + + +def test_train_csv_splits(monkeypatch, data_dir, tmp_path): + input_path = str(data_dir / "regression" / "mol" / "mol_with_splits.csv") + args = [ + "chemprop", + "train", + "-i", + input_path, + "--smiles-columns", + "smiles", + "--target-columns", + "lipo", + "--splits-column", + "split", + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_train_splits_file(monkeypatch, data_path, tmp_path): + input_path, *_ = data_path + splits_file = str(tmp_path / "splits.json") + splits = [ + {"train": [1, 2], "val": "3-5", "test": "6,7"}, + {"val": [1, 2], "test": "3-5", "train": "6,7"}, + ] + + with open(splits_file, "w") as f: + json.dump(splits, f) + + args = [ + "chemprop", + "train", + "-i", + input_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--splits-file", + splits_file, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_output_structure(monkeypatch, data_path, model_path, tmp_path): + input_path, *_ = data_path + args = [ + "chemprop", + "predict", + "-i", + input_path, + "--model-path", + model_path, + model_path, + "--output", + str(tmp_path / "preds.csv"), + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "preds.csv").exists() + assert (tmp_path / "preds_individual.csv").exists() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_output_structure( + monkeypatch, data_path, model_path, tmp_path, ffn_block_index +): + input_path, *_ = data_path + args = [ + "chemprop", + "fingerprint", + "-i", + input_path, + "--model-path", + model_path, + "--output", + str(tmp_path / "fps.csv"), + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "fps_0.csv").exists() + + +def test_train_outputs(monkeypatch, data_path, tmp_path): + input_path, *_ = data_path + + args = [ + "chemprop", + "train", + "-i", + input_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + checkpoint_path = tmp_path / "model_0" / "checkpoints" / "last.ckpt" + + model = MPNN.load_from_checkpoint(checkpoint_path) + assert model is not None + + +def test_freeze_model(monkeypatch, data_path, model_path, tmp_path): + input_path, *_ = data_path + args = [ + "chemprop", + "train", + "-i", + input_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--checkpoint", + model_path, + "--freeze-encoder", + "--frzn-ffn-layers", + "1", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + checkpoint_path = tmp_path / "model_0" / "checkpoints" / "last.ckpt" + + trained_model = MPNN.load_from_checkpoint(checkpoint_path) + frzn_model = MPNN.load_from_file(model_path) + + assert torch.equal( + trained_model.message_passing.W_o.weight, frzn_model.message_passing.W_o.weight + ) + assert torch.equal( + trained_model.predictor.ffn[0][0].weight, frzn_model.predictor.ffn[0][0].weight + ) + + +def test_checkpoint_model(monkeypatch, data_path, model_path, tmp_path): + input_path, *_ = data_path + args = [ + "chemprop", + "train", + "-i", + input_path, + "--epochs", + "3", + "--num-workers", + "0", + "--save-dir", + str(tmp_path), + "--checkpoint", + model_path, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + checkpoint_path = tmp_path / "model_0" / "checkpoints" / "last.ckpt" + + model = MPNN.load_from_checkpoint(checkpoint_path) + assert model is not None + + +@pytest.mark.skipif(NO_RAY or NO_OPTUNA, reason="Optuna not installed") +def test_optuna_quick(monkeypatch, data_path, tmp_path): + input_path, *_ = data_path + + args = [ + "chemprop", + "hpopt", + "-i", + input_path, + "--epochs", + "6", + "--hpopt-save-dir", + str(tmp_path), + "--raytune-num-samples", + "2", + "--raytune-search-algorithm", + "optuna", + "--molecule-featurizers", + "morgan_count", + "--search-parameter-keywords", + "all", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "best_config.toml").exists() + assert (tmp_path / "best_checkpoint.ckpt").exists() + assert (tmp_path / "all_progress.csv").exists() + assert (tmp_path / "ray_results").exists() + + args = [ + "chemprop", + "train", + "--config-path", + str(tmp_path / "best_config.toml"), + "--save-dir", + str(tmp_path), + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "model_0" / "best.pt").exists() + + +@pytest.mark.skipif(NO_RAY or NO_HYPEROPT, reason="Ray and/or Hyperopt not installed") +def test_hyperopt_quick(monkeypatch, data_path, tmp_path): + input_path, *_ = data_path + + args = [ + "chemprop", + "hpopt", + "-i", + input_path, + "--epochs", + "6", + "--hpopt-save-dir", + str(tmp_path), + "--raytune-num-samples", + "2", + "--raytune-search-algorithm", + "hyperopt", + "--molecule-featurizers", + "morgan_binary", + "--search-parameter-keywords", + "all", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "best_config.toml").exists() + assert (tmp_path / "best_checkpoint.ckpt").exists() + assert (tmp_path / "all_progress.csv").exists() + assert (tmp_path / "ray_results").exists() + + args = [ + "chemprop", + "train", + "--config-path", + str(tmp_path / "best_config.toml"), + "--save-dir", + str(tmp_path), + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + assert (tmp_path / "model_0" / "best.pt").exists() diff --git a/chemprop/tests/cli/test_cli_regression_mol_multitask.py b/chemprop/tests/cli/test_cli_regression_mol_multitask.py new file mode 100644 index 0000000000000000000000000000000000000000..914b594e3084551ee580757a0c056c0acf2629a8 --- /dev/null +++ b/chemprop/tests/cli/test_cli_regression_mol_multitask.py @@ -0,0 +1,68 @@ +"""This tests the CLI functionality of training and predicting a regression model on a single molecule. +""" + +import pytest + +from chemprop.cli.main import main + +pytestmark = pytest.mark.CLI + + +@pytest.fixture +def data_path(data_dir): + return str(data_dir / "regression" / "mol_multitask.csv") + + +@pytest.fixture +def model_path(data_dir): + return str(data_dir / "example_model_v2_regression_mol_multitask.pt") + + +def test_train_quick(monkeypatch, data_path): + args = [ + "chemprop", + "train", + "-i", + data_path, + "--epochs", + "3", + "--num-workers", + "0", + "--show-individual-scores", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_quick(monkeypatch, data_path, model_path): + args = ["chemprop", "predict", "-i", data_path, "--model-path", model_path] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + args = ["chemprop", "predict", "-i", data_path, "--model-path", model_path] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_quick(monkeypatch, data_path, model_path, ffn_block_index): + args = [ + "chemprop", + "fingerprint", + "-i", + data_path, + "--model-path", + model_path, + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() diff --git a/chemprop/tests/cli/test_cli_regression_rxn+mol.py b/chemprop/tests/cli/test_cli_regression_rxn+mol.py new file mode 100644 index 0000000000000000000000000000000000000000..98731cebc8e3ed9fb9105c1a0abac5838df2619f --- /dev/null +++ b/chemprop/tests/cli/test_cli_regression_rxn+mol.py @@ -0,0 +1,146 @@ +"""This tests the CLI functionality of training and predicting a regression model on a multi-molecule. +""" + +import pytest + +from chemprop.cli.main import main + +pytestmark = pytest.mark.CLI + + +@pytest.fixture +def data_path(data_dir): + return ( + str(data_dir / "regression" / "rxn+mol" / "rxn+mol.csv"), + str(data_dir / "regression" / "rxn+mol" / "descriptors.npz"), + ("0", str(data_dir / "regression" / "rxn+mol" / "atom_features.npz")), + ("0", str(data_dir / "regression" / "rxn+mol" / "bond_features.npz")), + ("0", str(data_dir / "regression" / "rxn+mol" / "atom_descriptors.npz")), + ) + + +@pytest.fixture +def model_path(data_dir): + return str(data_dir / "example_model_v2_regression_rxn+mol.pt") + + +def test_train_quick(monkeypatch, data_path): + ( + input_path, + descriptors_path, + atom_features_path, + bond_features_path, + atom_descriptors_path, + ) = data_path + + base_args = [ + "chemprop", + "train", + "-i", + input_path, + "--reaction-columns", + "rxn_smiles", + "--smiles-columns", + "solvent_smiles", + "--epochs", + "3", + "--num-workers", + "0", + "--split-key-molecule", + "1", + "--descriptors-path", + descriptors_path, + "--atom-features-path", + *atom_features_path, + "--bond-features-path", + *bond_features_path, + "--atom-descriptors-path", + *atom_descriptors_path, + "--show-individual-scores", + ] + + task_types = ["", "regression-mve", "regression-evidential", "regression-quantile"] + + for task_type in task_types: + args = base_args.copy() + + if task_type: + args.extend(["--task-type", task_type]) + + if task_type == "regression-evidential": + args += ["--evidential-regularization", "0.2"] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_quick(monkeypatch, data_path, model_path): + input_path, *_ = data_path + args = [ + "chemprop", + "predict", + "-i", + input_path, + "--reaction-columns", + "rxn_smiles", + "--smiles-columns", + "solvent_smiles", + "--model-path", + model_path, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_quick(monkeypatch, data_path, model_path, ffn_block_index): + input_path, *_ = data_path + args = [ + "chemprop", + "fingerprint", + "-i", + input_path, + "--reaction-columns", + "rxn_smiles", + "--smiles-columns", + "solvent_smiles", + "--model-path", + model_path, + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_train_molecule_featurizers(monkeypatch, data_path): + input_path, descriptors_path, *_ = data_path + args = [ + "chemprop", + "train", + "-i", + input_path, + "--reaction-columns", + "rxn_smiles", + "--smiles-columns", + "solvent_smiles", + "--epochs", + "3", + "--num-workers", + "0", + "--split-key-molecule", + "1", + "--descriptors-path", + descriptors_path, + "--molecule-featurizers", + "morgan_count", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() diff --git a/chemprop/tests/cli/test_cli_regression_rxn.py b/chemprop/tests/cli/test_cli_regression_rxn.py new file mode 100644 index 0000000000000000000000000000000000000000..67a4cbb40e37d54a5d024617d3e9c6cc4c1fc84f --- /dev/null +++ b/chemprop/tests/cli/test_cli_regression_rxn.py @@ -0,0 +1,94 @@ +"""This tests the CLI functionality of training and predicting a regression model on a multi-molecule. +""" + +import pytest + +from chemprop.cli.main import main + +pytestmark = pytest.mark.CLI + + +@pytest.fixture +def data_path(data_dir): + return str(data_dir / "regression" / "rxn" / "rxn.csv"), str( + data_dir / "regression" / "rxn" / "descriptors.npz" + ) + + +@pytest.fixture +def model_path(data_dir): + return str(data_dir / "example_model_v2_regression_rxn.pt") + + +def test_train_quick(monkeypatch, data_path): + input_path, descriptors_path = data_path + + base_args = [ + "chemprop", + "train", + "-i", + input_path, + "--reaction-columns", + "smiles", + "--epochs", + "3", + "--num-workers", + "0", + "--descriptors-path", + descriptors_path, + "--show-individual-scores", + ] + + task_types = ["", "regression-mve", "regression-evidential", "regression-quantile"] + + for task_type in task_types: + args = base_args.copy() + + if task_type: + args.extend(["--task-type", task_type]) + + if task_type == "regression-evidential": + args += ["--evidential-regularization", "0.2"] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +def test_predict_quick(monkeypatch, data_path, model_path): + input_path, _ = data_path + args = [ + "chemprop", + "predict", + "-i", + input_path, + "--reaction-columns", + "smiles", + "--model-path", + model_path, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() + + +@pytest.mark.parametrize("ffn_block_index", ["0", "1"]) +def test_fingerprint_quick(monkeypatch, data_path, model_path, ffn_block_index): + input_path, _ = data_path + args = [ + "chemprop", + "fingerprint", + "-i", + input_path, + "--reaction-columns", + "smiles", + "--model-path", + model_path, + "--ffn-block-index", + ffn_block_index, + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", args) + main() diff --git a/chemprop/tests/cli/test_cli_utils.py b/chemprop/tests/cli/test_cli_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..89fe915c66196c4beca53f4b761e99fa1fc068da --- /dev/null +++ b/chemprop/tests/cli/test_cli_utils.py @@ -0,0 +1,170 @@ +import pytest + +from chemprop.cli.common import find_models +from chemprop.cli.utils.parsing import get_column_names, parse_indices + + +def test_parse_indices(): + """ + Testing if CLI parse_indices yields expected results. + """ + splits = {"train": [0, 1, 2, 4], "val": [3, 5, 6], "test": [7, 8, 9]} + split_idxs = {"train": "0-2, 4", "val": "3,5-6", "test": [7, 8, 9]} + split_idxs = {split: parse_indices(idxs) for split, idxs in split_idxs.items()} + + assert split_idxs == splits + + +def test_find_models(data_dir): + """ + Testing if CLI find_models gets the correct model paths. + """ + models = find_models([data_dir / "example_model_v2_regression_mol.pt"]) + assert len(models) == 1 + models = find_models([data_dir / "example_model_v2_regression_mol.ckpt"]) + assert len(models) == 1 + models = find_models([data_dir]) + assert len(models) == 14 + models = find_models( + [ + data_dir / "example_model_v2_regression_mol.pt", + data_dir / "example_model_v2_regression_mol.ckpt", + data_dir, + ] + ) + assert len(models) == 16 + + +@pytest.mark.parametrize( + "path,smiles_cols,rxn_cols,target_cols,ignore_cols,splits_col,weight_col,no_header_row,expected", + [ + ( + "classification/mol.csv", + ["smiles"], + None, + ["NR-AhR", "NR-ER", "SR-ARE", "SR-MMP"], + None, + None, + None, + False, + ["smiles", "NR-AhR", "NR-ER", "SR-ARE", "SR-MMP"], + ), + ( + "classification/mol.csv", + ["smiles"], + None, + None, + None, + None, + None, + False, + ["smiles", "NR-AhR", "NR-ER", "SR-ARE", "SR-MMP"], + ), + ( + "classification/mol.csv", + None, + None, + None, + ["NR-AhR", "SR-ARE"], + None, + None, + False, + ["smiles", "NR-ER", "SR-MMP"], + ), + ("regression/mol/mol.csv", None, None, None, None, None, None, False, ["smiles", "lipo"]), + ( + "regression/mol/mol.csv", + None, + None, + ["lipo"], + None, + None, + None, + False, + ["smiles", "lipo"], + ), + ( + "regression/mol/mol_with_splits.csv", + ["smiles"], + None, + ["lipo"], + None, + "split", + None, + False, + ["smiles", "lipo"], + ), + ( + "regression/mol/mol_with_splits.csv", + None, + None, + None, + None, + "split", + None, + False, + ["smiles", "lipo"], + ), + ( + "regression/rxn/rxn.csv", + None, + ["smiles"], + ["ea"], + None, + None, + None, + False, + ["smiles", "ea"], + ), + ( + "classification/mol+mol.csv", + ["mol a smiles", "mol b Smiles"], + None, + ["synergy"], + None, + None, + None, + False, + ["mol a smiles", "mol b Smiles", "synergy"], + ), + ( + "classification/mol+mol.csv", + ["mol a smiles", "mol b Smiles"], + None, + None, + None, + None, + None, + False, + ["mol a smiles", "mol b Smiles", "synergy"], + ), + ("regression/mol/mol.csv", None, None, None, None, None, None, True, ["SMILES", "pred_0"]), + ], +) +def test_get_column_names( + data_dir, + path, + smiles_cols, + rxn_cols, + target_cols, + ignore_cols, + splits_col, + weight_col, + no_header_row, + expected, +): + """ + Testing if CLI get_column_names gets the correct column names. + """ + input_cols, target_cols = get_column_names( + data_dir / path, + smiles_cols, + rxn_cols, + target_cols, + ignore_cols, + splits_col, + weight_col, + no_header_row, + ) + + assert input_cols + target_cols == expected diff --git a/chemprop/tests/conftest.py b/chemprop/tests/conftest.py new file mode 100644 index 0000000000000000000000000000000000000000..a18e838c7bb4d17ef39219f1071aa2a4dad0fc91 --- /dev/null +++ b/chemprop/tests/conftest.py @@ -0,0 +1,113 @@ +from pathlib import Path + +import numpy as np +import pandas as pd +import pytest +from rdkit import Chem + +_DATA_DIR = Path(__file__).parent / "data" +_DF = pd.read_csv(_DATA_DIR / "smis.csv") +_DF["mol"] = _DF["smiles"].map(Chem.MolFromSmiles) +_DF["smi"] = _DF["mol"].map(Chem.MolToSmiles) + + +@pytest.fixture +def data_dir(): + return _DATA_DIR + + +@pytest.fixture +def smis(): + return _DF.smi + + +@pytest.fixture +def mols(): + return _DF.mol + + +@pytest.fixture +def targets(smis): + return np.random.rand(len(smis), 1) + + +# @pytest.fixture +# def mol_data(mols, targets): +# return [MoleculeDatapoint(mol, y) for mol, y in zip(mols, targets)] + + +# @pytest.fixture +# def rxn_data(rxns, targets): +# return [ReactionDatapoint(mol, y) for mol, y in zip(mols, targets)] + + +@pytest.fixture(params=_DF.smi.sample(5)) +def smi(request): + return request.param + + +@pytest.fixture(params=_DF.mol.sample(5)) +def mol(request): + return request.param + + +@pytest.fixture +def mol_regression_data(data_dir): + df = pd.read_csv(data_dir / "regression/mol/mol.csv") + smis = df["smiles"].to_list() + Y = df["lipo"].to_numpy().reshape(-1, 1) + + return smis, Y + + +@pytest.fixture +def rxn_regression_data(data_dir): + df = pd.read_csv(data_dir / "regression/rxn/rxn.csv") + smis = df["smiles"].to_list() + Y = df["ea"].to_numpy().reshape(-1, 1) + + return smis, Y + + +@pytest.fixture +def mol_mol_regression_data(data_dir): + df = pd.read_csv(data_dir / "regression/mol+mol/mol+mol.csv") + smis1 = df["smiles"].to_list() + smis2 = df["solvent"].to_list() + Y = df["peakwavs_max"].to_numpy().reshape(-1, 1) + + return smis1, smis2, Y + + +@pytest.fixture +def rxn_mol_regression_data(data_dir): + df = pd.read_csv(data_dir / "regression/rxn+mol/rxn+mol.csv") + rxns = df["rxn_smiles"].to_list() + smis = df["solvent_smiles"].to_list() + Y = df["target"].to_numpy().reshape(-1, 1) + + return rxns, smis, Y + + +@pytest.fixture +def mol_classification_data(data_dir): + df = pd.read_csv(data_dir / "classification" / "mol.csv") + smis = df["smiles"].to_list() + Y = df["NR-AhR"].to_numpy().reshape(-1, 1) + + return smis, Y + + +@pytest.fixture +def mol_classification_data_multiclass(data_dir): + df = pd.read_csv(data_dir / "classification" / "mol_multiclass.csv") + smis = df["smiles"].to_list() + activities = df["activity"].unique() + Y = ( + df["activity"] + .map({activity: i for i, activity in enumerate(activities)}) + .to_numpy() + .reshape(-1, 1) + ) + + return smis, Y diff --git a/chemprop/tests/data/classification.csv b/chemprop/tests/data/classification.csv new file mode 100644 index 0000000000000000000000000000000000000000..24cc3dc65c75fcdf6537fea5936183b9831f2628 --- /dev/null +++ b/chemprop/tests/data/classification.csv @@ -0,0 +1,501 @@ +smiles,NR-AR,NR-AR-LBD,NR-AhR,NR-Aromatase,NR-ER,NR-ER-LBD,NR-PPAR-gamma,SR-ARE,SR-ATAD5,SR-HSE,SR-MMP,SR-p53 +CCOc1ccc2nc(S(N)(=O)=O)sc2c1,0,0,1,,,0,0,1,0,0,0,0 +CCN1C(=O)NC(c2ccccc2)C1=O,0,0,0,0,0,0,0,,0,,0,0 +CC[C@]1(O)CC[C@H]2[C@@H]3CCC4=CCCC[C@@H]4[C@H]3CC[C@@]21C,,,,,,,,0,,0,, +CCCN(CC)C(CC)C(=O)Nc1c(C)cccc1C,0,0,0,0,0,0,0,,0,,0,0 +CC(O)(P(=O)(O)O)P(=O)(O)O,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)(C)OOC(C)(C)CCC(C)(C)OOC(C)(C)C,0,0,0,0,0,0,0,,0,0,0,0 +O=S(=O)(Cl)c1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +O=C(O)Cc1cc(I)c(Oc2ccc(O)c(I)c2)c(I)c1,0,,0,,1,,,1,0,1,0,1 +OC[C@H](O)[C@@H](O)[C@H](O)CO,0,0,0,0,0,0,0,0,0,0,,0 +CCCCCCCC(=O)[O-].CCCCCCCC(=O)[O-].[Zn+2],,,,,,,,0,,0,, +NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)([O-])OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1,0,0,0,,0,0,0,,0,,,0 +O=c1[nH]c(=O)n([C@H]2C[C@H](O)[C@@H](CO)O2)cc1I,0,0,,,0,0,0,0,0,0,0,0 +CC(C)COC(=O)C(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +C=C(C)C(=O)OCCOC(=O)C(=C)C,0,0,0,0,0,0,0,0,0,0,0,0 +Cl/C=C\C[N+]12CN3CN(CN(C3)C1)C2,0,0,0,,0,0,,1,0,0,0, +O=C([O-])Cc1cccc2ccccc12,0,0,0,0,0,0,1,0,0,0,0,0 +CCCCCCCCCCOCC(O)CN,0,0,0,,0,,,,,0,, +CCN(CC)C(=O)c1cccnc1,0,0,0,0,,0,0,,0,,0,0 +COc1cc(O)cc(O)c1,0,0,,0,,,0,,0,0,0,0 +CCOC(=O)c1cccnc1,,,,,,,,0,,0,, +CCOc1ccc(S(=O)(=O)O)c2cccnc12,0,0,,0,1,1,0,,0,,0,0 +O=C(O)[C@H](O)c1ccccc1,0,0,0,0,,0,0,0,0,0,0,0 +Nc1ccc(/N=N/c2ccccc2)cc1,0,0,1,,1,0,,1,1,0,,0 +CN[C@@H]1C[C@@H](c2ccc(Cl)c(Cl)c2)c2ccccc21,0,0,1,,0,0,0,,0,,, +CN1[C@H]2CC[C@@H]1C[C@H](OC(=O)c1cc(Cl)cc(Cl)c1)C2,0,0,0,0,0,0,0,0,0,0,,0 +CN(C)CCCN1c2ccccc2CCc2ccc(Cl)cc21,0,0,0,,0,0,,0,0,0,,0 +C#CCO,0,0,0,0,0,0,0,0,0,0,0,0 +Nc1ccccc1S(=O)(=O)O,0,0,0,0,0,0,0,0,0,0,,0 +CC(O)CC(C)(C)O,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)(C)CC(C)(C)N,0,0,0,0,0,0,0,0,0,0,0,0 +CC(=O)CC(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +CCCC(C)=O,0,0,0,0,0,0,0,0,0,0,0,0 +Nc1nc2ccccc2[nH]1,0,0,1,0,0,0,0,0,0,0,0,0 +Oc1c(Cl)c(Cl)c(Cl)c(Cl)c1Cl,0,,1,,,0,,1,0,0,1,1 +c1ccc(-c2ccccc2)cc1,0,0,0,0,0,0,0,0,0,0,0,0 +CNC(=O)Nc1ccc(Cl)c(Cl)c1,0,0,1,0,0,,0,0,0,0,,0 +CC(=O)Nc1ccc(C)c(Cl)c1,0,0,0,0,,0,0,0,0,0,,0 +CCCCNC(=S)NCCCC,0,0,0,0,0,0,0,0,0,1,0,0 +CCCCNC(=O)NCCCC,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)N(c1ccccc1)C(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +CCc1cccc(C)c1,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1,0,0,0,0,0,0,0,,0,0,0,0 +CCCCCCCC/C=C\CCCCCCCC(=O)OC(CO)CO,0,0,0,,0,0,0,0,0,1,0,0 +CCCCCCCCCCC=CC1CC(=O)OC1=O,0,0,0,0,0,0,0,1,0,0,0,0 +CC(C)C(Nc1ccc(C(F)(F)F)cc1Cl)C(=O)OC(C#N)c1cccc(Oc2ccccc2)c1,0,0,,0,0,,,0,0,,1, +CS(=O)(=O)NC(=O)c1cc(Oc2ccc(C(F)(F)F)cc2Cl)ccc1[N+](=O)[O-],0,0,0,0,0,0,0,0,0,0,0,0 +CCOP(=S)(CC)Sc1ccccc1,0,0,0,1,,0,0,0,0,0,0,0 +CC/C=C\CCCCO,0,0,0,0,0,0,0,0,0,0,0,0 +Nc1ccccc1C(=O)Oc1ccc2ccccc2c1,,0,1,,1,0,0,1,1,0,1,0 +C=C[C@H]1CN2CCC1C[C@H]2[C@H](O)c1ccnc2ccc(OC)cc12,0,0,1,0,0,0,0,,0,0,0,0 +CC(=O)CCC(C)=O,0,0,0,0,0,0,0,0,0,0,0,0 +N#CCCNCCC#N,0,0,0,0,0,0,0,0,0,0,0,0 +CCOc1ccc(N=Nc2ccc(C=Cc3ccc(N=Nc4ccc(OCC)cc4)cc3S(=O)(=O)[O-])c(S(=O)(=O)[O-])c2)cc1,0,0,0,0,0,0,0,,0,0,,0 +O=C1c2ccccc2C(=O)C1c1ccc2cc(S(=O)(=O)[O-])cc(S(=O)(=O)[O-])c2n1,0,0,,0,0,0,0,1,0,0,,0 +O=C(Nc1ccc2c(O)c(N=Nc3ccc(N=Nc4ccc(S(=O)(=O)[O-])cc4)cc3)c(S(=O)(=O)[O-])cc2c1)c1ccccc1,0,0,0,,,0,0,1,0,0,,0 +CSc1ccc2c(c1)C(N1CCN(C)CC1)Cc1ccccc1S2,0,0,0,,0,0,,0,0,0,,0 +COCCCC/C(=N\OCCN)c1ccc(C(F)(F)F)cc1,0,0,0,,0,0,,,0,0,,0 +Cc1ccccc1CCO,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1nc(C)c(C)nc1C,0,0,0,0,0,0,0,0,0,0,0,0 +CC1=CC(O)CC(C)(C)C1,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1cnc(C)c(C)n1,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)COC(=O)c1ccccc1,0,0,0,0,,0,0,0,0,0,0,0 +C=C(C)[C@@H]1CC=C(C)CC1,0,0,0,0,,0,0,0,0,0,0,0 +O=[N+]([O-])[O-].O=[N+]([O-])[O-].[Ca+2],0,0,0,0,0,0,0,0,0,0,0,0 +Nc1ccc(N)c([N+](=O)[O-])c1,0,0,1,0,,0,0,1,0,0,1, +CC1COc2ccccc2N1,0,0,1,,0,0,0,0,0,0,,0 +O=C(O)c1cc(Cl)cc(Cl)c1O,0,0,0,0,0,0,0,0,0,0,,0 +CCCCCCCCCCCC(=O)NCCCN(C)C,0,,0,,,,,,0,,, +CC(C)CCCCCOC(=O)CCS,0,0,0,0,0,0,0,0,0,0,0,0 +O=[N+]([O-])c1cc([As](=O)(O)O)ccc1O,0,0,0,0,0,0,0,0,0,0,0,0 +CCC(COC(=O)CCS)(COC(=O)CCS)COC(=O)CCS,0,0,0,0,0,0,0,0,0,0,0,1 +C=CCOc1c(Br)cc(Br)cc1Br,0,0,1,0,0,0,0,0,0,0,,0 +F[B-](F)(F)F.[H+],0,0,0,0,0,0,0,,0,0,0,0 +CC(C)[C@H]1CC[C@H](C)C[C@@H]1O,0,0,0,0,0,0,0,0,0,0,0,0 +C(=C/c1ccccc1)\c1ccccc1,0,0,1,,1,0,0,1,0,0,,0 +Cc1ccc2c(ccc3ccccc32)c1,0,0,,,,0,0,0,0,0,,0 +Cn1c(=O)c2c(ncn2CC2OCCO2)n(C)c1=O,0,0,0,0,0,0,0,0,0,0,0,0 +C[C@H]1O[C@@H](n2cc(F)c(=O)[nH]c2=O)[C@H](O)[C@@H]1O,0,0,0,0,1,1,0,,0,0,0,1 +CN1CCc2cccc3c2[C@H]1Cc1ccc(O)c(O)c1-3,0,0,1,,,0,0,1,0,,,0 +COC(=O)C1=CCCN(C)C1,0,0,0,0,0,0,0,0,0,0,0,0 +COc1ccc(C2C(=O)c3ccccc3C2=O)cc1,,,,,,,,0,,0,, +Cc1ccc(C(=O)O)cc1[N+](=O)[O-],0,0,0,0,0,0,0,0,0,0,0,0 +Cc1cc(C(=O)O)ccc1[N+](=O)[O-],0,0,0,0,0,0,0,0,0,0,0,0 +CCCC(CCC)C(=O)O[C@@H]1C[C@@H]2CC[C@H](C1)[N+]2(C)C,0,0,0,0,0,0,0,,0,,0,0 +CCCCCC(C)O,0,0,0,0,0,0,0,0,0,0,0,0 +O=C([O-])c1ccccc1O,0,0,0,0,0,0,0,0,0,0,,0 +NC(=O)c1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +CCN1CCc2nc(N)oc2CC1,,,,,,,,0,,0,, +CC(C)(C)[C@]1(O)CCN2C[C@H]3c4ccccc4CCc4cccc(c43)[C@@H]2C1,0,0,0,,,0,0,,0,,,0 +O=C1C(N(CO)C(=O)NCO)N(CO)C(=O)N1CO,0,0,0,0,0,0,0,0,0,0,0,0 +O=c1[nH]c(=O)n([C@H]2C[C@H](O)[C@@H](CO)O2)cc1/C=C/Br,,0,0,0,0,0,0,,0,,0,0 +OCCN1CCN(CCCN2c3ccccc3C=Cc3ccccc32)CC1,0,0,0,0,0,0,,0,0,0,,0 +CC(C)NC[C@@H](O)COc1ccc(CC(N)=O)cc1,0,0,0,0,0,0,0,0,0,0,0,0 +CCNC(=O)N1CCN(CCCC(c2ccc(F)cc2)c2ccc(F)cc2)CC1,0,,0,,,0,,1,0,0,0,0 +Nc1ccc([N+](=O)[O-])cc1N,0,0,1,0,1,1,0,1,0,,1,0 +[I-].[K+],0,0,0,0,0,0,0,0,0,0,0,0 +O=C(C=Cc1ccc(O)c(O)c1)O[C@@H]1C[C@](O)(C(=O)O)C[C@@H](O)[C@H]1O,0,0,0,0,0,0,0,0,0,0,0,0 +Oc1nc(Cl)c(Cl)cc1Cl,0,,1,,0,0,,1,0,0,1,0 +C/C=C/C=C/C=O,0,0,0,0,0,0,0,1,0,0,0,0 +O=[N+]([O-])c1cc(C(F)(F)F)cc([N+](=O)[O-])c1Cl,1,,0,,,0,,1,0,0,,0 +C[Si](C)(C)N[Si](C)(C)C,0,0,0,0,0,1,0,0,0,0,0,0 +C=CC(=O)OCCCl,0,0,0,0,0,0,0,,0,0,0,0 +COCC(C)N(C(=O)CCl)c1c(C)csc1C,0,0,0,1,0,0,,1,0,0,,0 +CN(C)CCn1nnnc1SCC1=C(C(=O)O)N2C(=O)[C@@H](NC(=O)Cc3csc(N)n3)[C@H]2SC1,0,0,0,0,0,0,0,,0,0,0,0 +C/C(=N\NC(=O)Nc1cc(F)cc(F)c1)c1ncccc1C(=O)[O-],0,0,,0,0,0,0,0,0,0,0,0 +CC1COC(Cn2cncn2)(c2ccc(Oc3ccc(Cl)cc3)cc2Cl)O1,0,0,0,,,,,1,,0,, +CCN(CC)CCOC(=O)C(Cc1cccc2ccccc12)CC1CCCO1,0,0,0,0,0,0,0,0,0,0,0,0 +CCN[C@H]1C[C@H](C)S(=O)(=O)c2sc(S(N)(=O)=O)cc21,0,0,0,0,0,0,0,0,0,0,0,0 +CCN[C@H]1CN(CCCOC)S(=O)(=O)c2sc(S(N)(=O)=O)cc21,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)N(CCC(C(N)=O)(c1ccccc1)c1ccccn1)C(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +CC[C@H]1[C@@H]2C[C@H]3[C@@H]4N(C)c5ccccc5[C@]45C[C@@H](C2[C@H]5O)N3[C@@H]1O,0,0,0,0,0,0,0,,0,,,0 +CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)C=C[C@]5(C)[C@H]4[C@@H](O)C[C@]3(C)[C@]2(C(=O)CO)O1,1,0,0,,0,,0,0,,0,,0 +CSC(=O)c1c(C(F)F)nc(C(F)(F)F)c(C(=O)SC)c1CC(C)C,0,0,0,,0,0,0,0,0,0,,0 +O=C(O)/C=C(\CC(=O)O)C(=O)O,0,0,0,0,,0,0,0,0,0,0,0 +CCCCCCCCCCCCCCCC(=O)O[C@@H]1CC(C)=C(/C=C/C(C)=C/C=C/C(C)=C/C=C\C=C(C)\C=C\C=C(C)\C=C\C2=C(C)C[C@@H](OC(=O)CCCCCCCCCCCCCCC)CC2(C)C)C(C)(C)C1,0,0,0,0,0,0,0,,0,,0,0 +O=C(CO)[C@@H](O)[C@H](O)[C@@H](O)CO,,,,,,,,0,,0,, +CC1=C(C(=O)Nc2ccccc2)S(=O)(=O)CCO1,0,0,0,,,0,,1,0,,,1 +CNCC(=O)c1ccc(O)c(O)c1,,,,,,,,0,,0,, +CC(C)(C)C1CCC(=O)CC1,0,0,0,,0,0,0,0,0,1,0,0 +CN(C)[C@@H]1C(O)=C(C(=O)NCN2CCCC2)C(=O)[C@@]2(O)C(O)=C3C(=O)c4c(O)cccc4[C@@](C)(O)C3C[C@@H]12,0,0,0,0,0,0,0,0,0,0,0,0 +CN1CCN=C(c2ccccc2)c2cc(Cl)ccc21,0,0,,0,0,0,0,,0,,0, +CN(C)CCc1c[nH]c2ccc(Cn3cncn3)cc12,0,0,0,0,0,0,0,,0,,0,0 +CCCCC(=O)[O-],0,0,0,0,0,0,0,,0,,0,0 +CCCCCCCCCCCCCC(=O)OC,0,0,0,0,0,0,0,0,0,0,0,0 +Nc1ccncc1N,,,,,,,,0,,0,, +CCCCCCC(C)O,0,0,0,0,0,0,0,0,0,0,0,0 +CCC[Si](OC)(OC)OC,0,0,0,0,0,0,0,0,0,0,0,0 +CC1=C(CC=O)C(C)(C)CCC1,0,0,0,0,0,0,0,0,0,0,0,0 +NCCNCCNCCN,0,0,0,0,,0,0,0,0,0,0,0 +C[C@]12CC[C@H]3[C@@H](CC[C@@]45O[C@@H]4C(O)=C(C#N)C[C@]35C)[C@@H]1CC[C@@H]2O,1,1,0,,1,0,0,1,0,0,,1 +CCCC1COC(Cn2cncn2)(c2ccc(Cl)cc2Cl)O1,0,0,1,1,,0,0,1,0,0,0,0 +Cc1ccc(N)c(N)c1,0,0,1,0,1,1,0,1,1,1,,0 +CCCCCNCCCCC,0,0,0,0,0,0,0,0,0,0,0,0 +COCC(C)O,0,0,0,0,1,0,0,0,0,0,0,0 +c1ccc2c(c1)Oc1ccccc1S2,0,0,0,0,1,0,0,0,0,0,1,0 +CC1CN1,0,0,0,0,0,0,0,0,0,0,0,0 +CCc1cnc(C2=NC(C)(C(C)C)C(=O)N2)c(C(=O)O)c1,0,0,0,0,0,0,0,0,0,0,0,0 +NCC(=O)CCC(=O)O,,,,,,,,,,0,, +Clc1ccc(C(Cn2ccnc2)OCc2c(Cl)cccc2Cl)c(Cl)c1,0,,,1,0,,0,,0,1,0, +Clc1cnc(Oc2ccc(Oc3ncc(Cl)cc3Cl)cc2)c(Cl)c1,0,0,0,0,0,0,0,0,0,0,1,0 +COc1ccccc1OCCNCC(O)COc1cccc2[nH]c3ccccc3c12,0,0,1,0,,0,0,0,0,0,1,0 +ClCOCCl,0,0,0,0,1,0,0,0,0,0,0,0 +CC(O)CNCC(C)O,0,0,0,0,0,0,0,0,0,0,0,0 +C[C@H](CCC(=O)[O-])[C@H]1CC[C@H]2[C@H]3[C@H](C[C@H](O)[C@@]21C)[C@@]1(C)CC[C@@H](O)C[C@H]1C[C@H]3O,0,0,,0,0,0,0,0,0,0,0,0 +CC(=O)[C@H]1[C@H](C#N)C[C@H]2[C@@H]3CC=C4C[C@@H](O)CC[C@]4(C)[C@H]3CC[C@@]21C,0,0,0,,,0,0,1,0,0,0,0 +O=[N+]([O-])c1ccc([As](=O)(O)O)cc1,0,0,0,0,0,0,0,,0,0,0,0 +CCOC(=O)C1OC1c1ccccc1,0,0,0,0,0,0,0,,0,0,0,0 +ONc1ccccc1,0,0,1,,1,0,0,,0,0,,0 +O=CC(=O)c1ccccc1,0,0,0,0,0,0,0,0,0,,0,0 +[Cu]I,0,0,0,0,,0,0,,0,1,0,0 +CCCCC(CC)CCC(CC(C)C)OS(=O)(=O)[O-],0,0,0,0,0,0,0,0,0,0,0,0 +ClCc1ccc(Cl)cc1Cl,0,0,0,0,0,0,0,,0,0,, +O=C(O)CCCCCCCCC(=O)O,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCCCCC(=O)OC,0,0,0,0,0,0,0,0,0,0,0,0 +CC(O)COCC(C)O,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1ccc(C(=O)C(=O)[O-])cc1C,,,,,,,,0,,0,, +O=C([O-])COc1nn(Cc2ccccc2)c2ccccc12,0,,0,0,0,0,0,0,0,0,0,0 +Cc1ncc[nH]1,0,0,1,0,0,0,0,0,0,0,0,0 +COc1ccc2sc(C(=O)Nc3nnn[n-]3)c(OC(C)C)c2c1,0,,0,1,,0,1,,0,0,, +Oc1ccc2c(c1)OC[C@@H](N1CCC(O)(c3ccc(F)cc3)CC1)[C@H]2O,0,0,1,,1,0,0,0,1,0,,0 +O=C(O)CCN(C1(C(=O)NO)CCCC1)S(=O)(=O)c1ccc(Oc2ccc(F)cc2)cc1,0,0,0,0,0,0,0,,0,0,,0 +O=C(NO)C1(NS(=O)(=O)c2ccc(Oc3ccc(F)cc3)cc2)CCOCC1,0,0,0,0,,0,0,,0,0,0,0 +Cc1nc(C)nc(N2C[C@H](C)N(c3ccnc([C@@H](C)O)n3)[C@H](C)C2)n1,0,0,0,,0,0,0,,0,0,0,0 +CC[C@H](C)[C@@H](C(=O)O)n1sc2ccccc2c1=O,0,0,0,0,0,0,0,1,0,0,,0 +Cc1cc(SC2=C(O)C[C@@](CCc3ccc(N)cc3)(C(C)C)OC2=O)c(C(C)(C)C)cc1CO,0,,0,0,0,0,,0,0,0,0,0 +CCn1nc(C)c2c1C(=O)NCC(c1ccc(O)cc1)=N2,0,0,,0,0,0,0,,0,0,0,0 +C[C@]12C[C@H](O)[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@]2(O)C(=O)COP(=O)([O-])[O-],1,1,0,,1,,0,,0,,0,0 +CN1C[C@H]2c3ccccc3Oc3ccc(Cl)cc3[C@@H]2C1,,,,,,,,0,,0,, +CO[C@H]1C[C@H](O[C@@H]2[C@@H](C)C(=O)O[C@H](C)[C@H](C)[C@H](OC(C)=O)[C@@H](C)C(=O)[C@@]3(CO3)C[C@H](C)[C@H](O[C@@H]3O[C@H](C)C[C@H](N(C)C)[C@H]3OC(C)=O)[C@H]2C)O[C@@H](C)[C@@H]1OC(C)=O,0,0,0,0,0,0,0,,0,,0,0 +CO[Si](C)(C)OC,0,0,0,0,0,0,0,0,0,0,0,0 +CC(O)(c1ccc(Cl)cc1)c1ccc(Cl)cc1,0,0,0,,0,0,,,0,0,1,0 +CN(C)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)cc1,0,0,1,1,,0,0,1,0,0,1,0 +CC(=O)c1ccccc1O,0,0,0,,0,0,0,0,0,0,0,0 +O=C(O)Cc1c(Cl)ccc(Cl)c1Cl,0,0,0,0,0,0,1,0,0,0,0,0 +O=C(O)c1cccc(Cl)n1,0,0,0,0,0,0,0,0,0,,0,0 +CCCCCCCCCC=O,0,0,0,0,,0,,0,0,0,0,0 +Cc1ccc(C(C)(C)C)cc1,0,0,0,0,0,0,0,0,0,0,0,0 +BrCBr,0,0,0,0,0,0,0,0,0,0,0,0 +Nc1cc(Cl)cc(Cl)c1,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCCCCCCC(=O)O,0,0,0,0,0,0,0,0,0,0,,0 +CC(C)(C)c1cc([N+](=O)[O-])cc(C(C)(C)C)c1O,0,,0,,,,0,,,,1, +O.O.O.O.O.O.O=[N+]([O-])[O-].O=[N+]([O-])[O-].[Mg+2],0,0,0,0,0,0,0,0,0,0,0,0 +CCCCCCCCCOS(=O)(=O)[O-],0,0,0,0,1,0,0,0,0,0,0,0 +O=Cc1ccc(C(=O)O)cc1,0,0,0,0,0,0,0,1,0,0,0,0 +CCC(Cl)CCl,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)(O)c1ccccc1,0,,0,,0,0,0,0,0,0,,0 +O=C1CCCN1,0,0,0,0,1,0,0,0,0,0,0,0 +ClCc1ccccc1Cl,0,,0,0,0,0,0,0,0,0,,0 +Cc1ccc([N+](=O)[O-])c([N+](=O)[O-])c1,0,0,,0,0,0,0,1,0,0,,0 +N#CC1(N=NC2(C#N)CCCCC2)CCCCC1,0,0,0,0,0,0,0,0,0,0,0,0 +C=CC(=O)OCCOC(=O)C=C,0,,0,0,1,1,1,1,1,,0, +CCCC[P+](CCCC)(CCCC)CCCC,0,0,0,0,0,0,0,0,0,0,0, +N#CCc1cccc(C(F)(F)F)c1,0,0,0,0,0,0,0,0,0,,0,0 +COc1cccc(Br)c1,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCCCCCNC,0,0,0,0,0,0,0,0,0,0,0,0 +CCC1OCC(COc2ccc(Oc3ccccc3)cc2)O1,0,,1,,1,0,0,,0,,,0 +CC1=C(C(=O)Nc2ccccc2)SCCO1,0,0,1,,1,0,0,0,1,0,0,0 +CCCCN(CCCC)SN(C)C(=O)Oc1cccc2c1OC(C)(C)C2,0,0,1,,1,0,,0,0,1,,0 +Cc1cc(OC(=O)N(C)C)nn1C(=O)N(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1ccc2c(Br)cc(Br)c(O)c2n1,0,0,1,0,,0,,,1,1,1, +O=c1c(O)c(-c2ccc(O)cc2)oc2cc(O)cc(O)c12,0,0,1,1,1,1,,0,0,0,1,0 +CC(O)COc1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +O=P1(NCCCl)OCCCN1CCCl,0,0,0,0,0,0,0,0,0,0,0,0 +C=CC(=C)C,,,,,,0,0,0,0,0,0,0 +CC(C)O,0,,0,,0,0,0,0,0,,0,0 +CC(C)OC(=O)Nc1cccc(Cl)c1,0,0,,,0,0,0,0,1,0,,0 +CC(C)OC(=O)Nc1ccccc1,0,0,0,,1,0,0,0,0,0,0,0 +CC=Cc1ccc2c(c1)OCO2,0,0,0,0,0,0,0,0,0,0,0,0 +CCCC(CCC)C(=O)[O-],0,0,0,,0,0,0,0,0,0,0,0 +CC1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)cn1c23,0,0,0,0,0,0,0,0,0,0,0,0 +CC(=O)Oc1cc(C)c(OC(C)=O)c2ccccc12,0,0,1,,,0,,,0,,,1 +CCN(Cc1ccc(Cl)nc1)/C(=C/[N+](=O)[O-])NC,0,0,0,0,0,0,0,0,0,0,0,0 +CC1CCC(C(C)C)C(OC(=O)c2ccccc2N)C1,0,0,0,0,1,1,0,0,0,,1, +O=C(c1ccccc1)c1cc(Cl)ccc1O,0,0,,1,,,0,,0,0,1,0 +OC[C@]1(O)OC[C@@H](O)[C@H](O)[C@@H]1O,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1ccc(C=C2C(=O)C3CCC2C3(C)C)cc1,0,0,0,0,0,,0,0,0,0,1,0 +CC(C)C[P+](C)(CC(C)C)CC(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +C=C1C[C@]23CC[C@@H]4[C@](C)(C(=O)O[C@@H]5O[C@H](CO)[C@@H](O)[C@H](O)[C@H]5O)CCC[C@]4(C)[C@@H]2C[C@@H](O[C@@H]2O[C@H](CO)[C@@H](O)[C@H](O[C@@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@H]4O)[C@H]2OC2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)[C@@H]1C3,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCCCCC[P+](CCCCCCCC)(CCCCCCCC)CCCCCCCC,0,,,1,,,,,,,1, +CCCCC(CC)COC(=O)c1ccc(C(=O)OCC(CC)CCCC)c(C(=O)OCC(CC)CCCC)c1,0,0,0,0,1,0,0,0,0,0,0,0 +O=c1n(CCO)c(=O)n(CCO)c(=O)n1CCO,0,0,0,0,0,0,0,0,0,1,0,0 +Cc1cc(C)cc(OP(=O)(Oc2cc(C)cc(C)c2)Oc2cc(C)cc(C)c2)c1,0,0,1,0,0,0,0,,0,0,,0 +O=P(OC(CCl)CCl)(OC(CCl)CCl)OC(CCl)CCl,0,0,0,1,0,0,0,0,0,0,,0 +O=c1n(CC2CO2)c(=O)n(CC2CO2)c(=O)n1CC1CO1,0,0,0,,0,0,0,1,1,1,0,1 +Cc1cc(-c2ccc(N=Nc3c(S(=O)(=O)[O-])cc4cc(S(=O)(=O)[O-])cc(N)c4c3O)c(C)c2)ccc1N=Nc1c(S(=O)(=O)[O-])cc2cc(S(=O)(=O)[O-])cc(N)c2c1O,0,0,,,,0,0,,,,,0 +O=C(O)c1ccc(O)cc1O,0,0,0,0,0,0,0,0,0,0,0,0 +O=C1c2c(O)ccc([N+](=O)[O-])c2C(=O)c2c([N+](=O)[O-])ccc(O)c21,0,,,,,,,1,,,1, +CC1=CC(C)(C)Nc2ccccc21,0,0,0,0,0,0,0,0,0,0,1,0 +Cc1cc(=O)oc2cc(O)cc(O)c12,0,0,1,0,,0,,,0,,1, +CC(C)CNCC(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +CNC1(c2ccccc2Cl)CCCCC1=O,0,0,0,0,0,0,0,,0,,0,0 +Cc1ccccc1OCC(O)CNCCOc1ccc(C(N)=O)cc1,0,0,0,0,0,0,0,,0,,0,0 +O=c1oc2cc(O)ccc2c2oc3cc(O)ccc3c12,0,0,1,,1,,,,1,,1,0 +COc1ccc(-c2coc3cc(O)cc(O)c3c2=O)cc1,0,,1,,1,1,,1,1,0,, +O=c1cc(-c2ccccc2)oc2cc(O)cc(O)c12,0,0,1,,1,1,1,1,0,,1, +O=c1cc(-c2ccc(O)cc2)oc2cc(O)cc(O)c12,0,0,1,,1,1,,1,1,0,1,1 +O=C(CCc1ccc(O)cc1)c1c(O)cc(O)cc1O,,,,,1,1,,1,0,0,1, +CCC(=O)N(c1ccccc1)C1CCN(CCc2ccccc2)CC1,1,1,0,0,0,0,0,,0,,0,0 +O=C(O)CCC(=O)c1ccc(-c2ccccc2)cc1,0,,,,0,0,0,0,0,0,,0 +CCOC(=O)C1=C(C)NC(C)=C(C(=O)OC)C1c1cccc(Cl)c1Cl,0,,,,0,,0,1,0,,, +NC(=O)OCC(COC(N)=O)c1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +CCNC(C)Cc1cccc(C(F)(F)F)c1,0,0,0,0,0,0,0,,0,,,0 +COC(=O)c1ccc(C)cc1C1=NC(=O)C(C)(C(C)C)N1,0,0,0,0,0,0,0,0,0,0,0,0 +CCOP(=S)(NC(C)C)Oc1ccccc1C(=O)OC(C)C,0,0,1,1,0,0,0,,0,0,,0 +CSc1nc(NC2CC2)nc(NC(C)(C)C)n1,0,0,1,,0,0,0,,0,,1,0 +C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@@H]2C(=O)CO,1,1,0,,1,0,0,,0,0,0,0 +CN(C)[C@@H]1C(O)=C(C(N)=O)C(=O)[C@@]2(O)C(O)=C3C(=O)c4c(O)ccc(Cl)c4[C@@H](O)[C@H]3C[C@@H]12,,,,,,,,0,,0,, +CCC1(C)CC(=O)NC(=O)C1,0,0,0,0,0,0,0,,0,,0,0 +O=C1NCN(c2ccccc2)C12CCN(CCCOc1ccc(F)cc1)CC2,0,0,0,,,0,0,1,0,0,1,0 +NC(=S)NNC(N)=S,0,0,0,0,0,0,0,0,0,0,0,0 +NC(=S)C(N)=S,0,0,0,0,0,0,0,,0,0,0,0 +CC1CN1P(=O)(N1CC1C)N1CC1C,0,0,0,0,0,0,,0,0,0,0,0 +O=C(Oc1ccccc1)Oc1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +C[Hg]Cl,0,0,0,,0,,1,,1,1,,1 +S=c1[nH]cnc2[nH]cnc12,0,0,1,,0,0,,1,0,0,0,1 +[Hg+2],0,1,1,,,1,1,,,1,,1 +CCCCCCCCCCCCNC(=N)N,0,0,0,0,0,0,0,,0,0,,0 +CN(C)CCN(Cc1cccs1)c1ccccn1,0,0,0,0,0,0,0,0,0,0,0,0 +COc1nn(CSP(=S)(OC)OC)c(=O)s1,0,0,0,0,0,0,0,0,0,,0,0 +NC1=NCC2c3ccccc3Cc3ccccc3N12,0,0,0,0,0,0,0,0,0,0,0, +CC(=O)[C@H]1CC[C@H]2[C@@H]3CC[C@H]4C[C@](C)(O)CC[C@]4(C)[C@H]3CC[C@]12C,0,0,0,0,0,0,0,0,0,0,0,0 +O=C([O-])CCC/C=C\C[C@H]1[C@@H](O)C[C@@H](O)[C@@H]1/C=C/[C@@H](O)COc1cccc(Cl)c1,0,0,0,0,0,0,0,,0,,,0 +O=C(O)Cc1ccc(CCNS(=O)(=O)c2ccc(Cl)cc2)cc1,0,0,0,0,0,0,1,0,0,0,0,0 +NC(=O)c1cn(Cc2c(F)cccc2F)nn1,0,0,0,0,0,0,0,0,0,0,0,0 +COc1ccc(C=CC(=O)OCCC(C)C)cc1,0,0,,,,0,0,,,,0,0 +O=C(NC1CCN(CCc2c[nH]c3ccccc23)CC1)c1ccccc1,0,0,,0,0,0,0,0,0,0,0,0 +CCn1cc[n+](C)c1C.O=S(=O)([O-])C(F)(F)F,0,0,0,0,1,0,0,0,0,0,0,0 +Clc1ccc2cc3ccccc3cc2c1,0,,1,,,,,0,0,,,1 +CCCCn1cc[n+](C)c1.F[B-](F)(F)F,0,0,0,0,0,0,0,1,0,0,0,0 +F/C(COc1ccc2c(c1)[nH]c1ccccc12)=C1/CN2CCC1CC2,0,0,1,,0,0,,,0,,,0 +CC(C)Cc1ccc([C@@H](C)C(=O)NS(C)(=O)=O)cc1,0,0,0,0,0,0,0,,0,,0,0 +CCCCN(CCCC)C(=S)SSC(=S)N(CCCC)CCCC,0,0,0,,0,0,,,0,1,,0 +CCC[n+]1ccn(C)c1C.O=S(=O)([N-]S(=O)(=O)C(F)(F)F)C(F)(F)F,0,0,0,0,0,0,0,0,0,0,,0 +Brc1c2ccccc2cc2ccccc12,0,,0,,1,,1,,0,,,1 +CCO/C=C1\N=C(c2ccccc2)OC1=O,0,0,,0,0,0,0,0,0,0,0,0 +CNc1cc(OC)c(C(=O)N[C@H]2CCN(Cc3ccccc3)[C@H]2C)cc1Cl,0,0,0,0,0,0,0,,0,,0,0 +CCN1CCCC1CNC(=O)c1cc(S(=O)(=O)CC)c(N)cc1OC,0,0,0,0,0,0,0,0,0,0,0,0 +COc1cc2c(cc1OC)C1CC(=O)C(CC(C)C)CN1CC2,0,0,0,0,0,0,0,0,0,0,,0 +Cc1cc(C)cc(C(=O)OC2C[C@@H]3CC[C@H](C2)N3C)c1,0,0,0,0,0,0,0,,0,,0,0 +CC[N+]1(C)CCCC1.O=S(=O)([O-])C(F)(F)F,0,0,0,0,0,0,0,0,0,0,0,0 +COP(=O)(OC)SCn1c(=O)oc2cc(Cl)cnc21,0,0,1,,0,0,0,1,0,0,, +CNC(=O)/C=C(\C)OP(=O)(OC)OC,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1occc1SSc1ccoc1C,0,,0,,0,0,0,,0,1,0,1 +Cc1cc(C(F)(C(F)(F)F)C(F)(F)F)ccc1NC(=O)c1cccc(I)c1C(=O)NC(C)(C)CS(C)(=O)=O,0,0,0,0,,0,,,0,1,1,0 +CC=CC(=O)CC,0,0,0,0,0,0,0,0,0,0,0,0 +CC1OCCC1=O,0,0,0,0,0,0,0,0,0,0,0,0 +CC1CCCC(=O)C1=O,0,0,0,0,0,0,0,0,0,0,0,0 +CC1=C(O)C(=O)OC1C,0,0,0,0,0,0,0,,0,0,0,0 +CCCCCc1ccco1,0,0,0,,0,0,,,0,,1, +c1cnc2c(n1)CCCC2,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCc1ccc2cccc(S(=O)(=O)[O-])c2c1,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1cc(S(=O)(=O)[O-])ccc1/N=N/c1c(O)ccc2ccccc12,0,0,1,,0,0,0,,0,0,,0 +Cc1ccc(N=Nc2c(O)ccc(N=Nc3ccc(S(=O)(=O)[O-])cc3)c2O)c(C)c1,0,0,1,0,0,0,0,0,0,0,,0 +Nc1cnn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)n1,0,0,0,0,0,0,,0,0,0,,1 +CCNc1nc(Cl)nc(NC(C)(C)C)n1,0,0,,,1,1,0,0,0,0,,0 +NS(=O)(=O)c1cc2c(cc1Cl)N=CNS2(=O)=O,0,0,0,0,0,0,0,0,0,0,0,0 +Oc1c(Cl)cc(Cl)c2cccnc12,0,,1,,,,0,,1,1,1,1 +NC(=O)OCC(O)COc1ccc(Cl)cc1,0,0,,0,0,0,0,,0,,0,0 +CN(C)CCCN1c2ccccc2Sc2ccc(Cl)cc21,0,0,0,0,,0,,,0,0,,1 +CCCCCCCCCCCCCCn1cc[n+](C)c1,0,,0,1,,,,1,0,,1, +O=[Cr](=O)([O-])O[Cr](=O)(=O)[O-],0,,0,,0,0,,1,0,0,,1 +O=P(Cl)(Cl)Cl,0,0,0,0,0,0,0,0,0,0,0,0 +CCN(Cc1cccc(S(=O)(=O)[O-])c1)c1ccc(C(=C2C=CC(=[N+](CC)Cc3cccc(S(=O)(=O)[O-])c3)C=C2)c2ccccc2)cc1,0,0,0,0,0,0,,1,0,0,0,0 +CC(C)COC(=O)COc1cc(Cl)c(Cl)cc1Cl,0,0,0,0,0,0,0,,0,0,,0 +O=C(OC[C@H]1O[C@@H](OC(=O)c2cc(O)c(O)c(OC(=O)c3cc(O)c(O)c(O)c3)c2)[C@H](OC(=O)c2cc(O)c(O)c(OC(=O)c3cc(O)c(O)c(O)c3)c2)[C@@H](OC(=O)c2cc(O)c(O)c(OC(=O)c3cc(O)c(O)c(O)c3)c2)[C@@H]1OC(=O)c1cc(O)c(O)c(OC(=O)c2cc(O)c(O)c(O)c2)c1)c1cc(O)c(O)c(OC(=O)c2cc(O)c(O)c(O)c2)c1,1,0,1,,0,0,,1,1,,1,1 +CN(C)CCOC(c1ccccc1)c1ccccc1,0,0,0,0,0,1,0,0,0,0,0,0 +COC(=O)c1ccc(C)cc1,0,0,0,0,0,0,0,0,0,0,0,0 +CN(C)CCCN1c2ccccc2C(C)(C)c2ccccc21,0,0,0,0,0,0,,,0,,,0 +COc1ccc([C@@H]2Sc3ccccc3N(CCN(C)C)C(=O)[C@@H]2OC(C)=O)cc1,0,0,0,0,0,,0,0,0,0,0, +CC(=O)C=Cc1ccccc1,0,0,0,0,1,0,0,0,0,0,0,0 +Cc1c[nH]c(=S)[nH]c1=O,0,0,0,0,0,0,0,0,0,0,0,0 +COc1ccc2cc1Oc1cc3c(cc1OC)CC[N+](C)(C)[C@H]3Cc1ccc(cc1)Oc1c(OC)c(OC)cc3c1[C@@H](C2)[N+](C)(C)CC3,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCC[C@H](O)/C=C/[C@H]1[C@H](O)CC(=O)[C@@H]1C/C=C\CCCC(=O)O,0,,0,0,,,0,0,0,0,0,0 +CCCCC[C@H](O)/C=C/[C@H]1[C@H](O)C[C@H](O)[C@@H]1C/C=C\CCCC(=O)O,0,0,0,0,1,,0,,0,,0,0 +CC12CCC(CC1)C(C)(C)O2,0,0,1,0,0,0,0,0,0,0,0,0 +C=COCC1CCC(CO)CC1,0,0,0,0,0,0,0,0,0,0,0,0 +CC(=O)/C=C/C1=C(C)CCCC1(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +CC(=O)NC1CCSC1=O,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)(C)CC(C)(C)c1ccc(O)c(Cc2ccc(Cl)cc2Cl)c1,0,,0,0,,,,0,0,,1, +CC1COc2ccccc2N1C(=O)C(Cl)Cl,0,0,1,,0,,,1,1,0,0,1 +CC(N)CN,0,0,0,,0,0,0,0,0,0,0,0 +CCC(C)O,0,0,0,0,,0,0,0,0,0,0,0 +CCCCC(CC)CNC(=N)NC(=N)NCCCCCCNC(=N)NC(=N)NCC(CC)CCCC,0,,,,,,,,,,, +CC(O)CN,0,0,0,0,0,0,0,0,0,0,0,0 +CO/N=C(\C(=O)N[C@@H]1C(=O)N2C(C(=O)[O-])=C(CSc3nc(=O)c([O-])nn3C)CS[C@H]12)c1csc(N)n1,0,0,,0,,0,0,0,0,0,0,0 +O=c1oc2cc(O)ccc2s1,0,0,0,0,0,0,0,,0,,0,0 +C=CCc1ccc(O)c(OC)c1,0,0,0,0,0,0,0,0,0,0,0,0 +COC(=O)[C@@H](N)CCCN/C(N)=N/[N+](=O)[O-],0,0,0,0,0,0,0,0,0,0,0,0 +CC1(S(=O)(=O)[O-])CC(=O)c2ccccc2C1=O,0,0,1,,,,,,0,,,1 +Cc1nnc2n1-c1sc(CCC(=O)N3CCOCC3)cc1C(c1ccccc1Cl)=NC2,0,0,0,0,0,0,0,0,0,0,,0 +C[C@H](N[C@H](CCc1ccccc1)C(=O)O)C(=O)N1CCC[C@H]1C(=O)O,0,0,0,0,0,0,0,0,0,0,0,0 +CCC(C)[C@H]1O[C@]2(C=C[C@@H]1C)C[C@@H]1C[C@@H](CC=C(C)[C@@H](O[C@H]3C[C@H](OC)[C@@H](O[C@H]4C[C@H](OC)[C@H](NC(C)=O)[C@H](C)O4)[C@H](C)O3)[C@@H](C)C=CC=C3CO[C@@H]4[C@H](O)C(C)=C[C@@H](C(=O)O1)[C@]34O)O2,0,0,0,,,0,,,0,,1, +COc1c(Br)cc(Br)c(C)c1Br,0,0,0,0,0,0,0,0,0,0,,0 +C=C[C@H]1CN2CC[C@H]1C[C@H]2[C@H](O)c1ccnc2ccc(OC)cc12.C=C[C@H]1CN2CC[C@H]1C[C@H]2[C@H](O)c1ccnc2ccc(OC)cc12,1,0,1,,1,0,0,,0,,0,0 +CN(C)CCc1c[nH]c2ccc(CS(=O)(=O)N3CCCC3)cc12,0,0,0,0,0,0,0,0,0,0,0,0 +O=C1/C(=C2\Nc3ccc(S(=O)(=O)O)cc3C2=O)Nc2ccc(S(=O)(=O)O)cc21,0,0,0,0,1,0,0,,0,,0,0 +CC(=O)O[Cr](O)OC(C)=O.CC(=O)O[Cr](O)OC(C)=O.CC(=O)O[Cr](OC(C)=O)OC(C)=O,0,0,0,0,0,0,0,,0,0,0,0 +CO[C@H]1CC(O[C@H]2C[C@H]([C@H]3O[C@](C)(O)[C@H](C)C[C@@H]3C)O[C@H]2[C@]2(C)CC[C@H]([C@]3(C)CC[C@]4(C[C@H](O)[C@@H](C)[C@@H]([C@@H](C)[C@@H]5O[C@](O)(CC(=O)[O-])[C@@H](C)[C@H](OC)[C@H]5OC)O4)O3)O2)O[C@@H](C)[C@@H]1OC,0,0,0,0,,0,0,,0,0,,1 +C=CC(=O)OCCn1c(=O)n(CCOC(=O)C=C)c(=O)n(CCOC(=O)C=C)c1=O,0,1,0,,,1,1,,1,,0, +C=C(C)C(=O)OCCNC(C)(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +NS(=O)(=O)c1ccccc1OC(F)(F)F,0,0,0,0,0,0,0,0,0,0,0,0 +O=C=NCC1CCCC(CN=C=O)C1,0,0,0,0,0,0,0,0,0,0,0,0 +C=C[C@H]1CN2CCC1C[C@@H]2[C@@H](O)c1ccnc2ccccc12,0,0,,,0,0,0,,0,,,0 +Cc1cc(N)c2cc(NC(=O)Nc3ccc4nc(C)cc(N)c4c3)ccc2n1,,,,,,,,,,0,, +O=C(O)c1ccccc1O.Oc1cccc2cccnc12,0,0,0,,1,1,,0,0,0,, +C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1CC[C@@H]2OP(=O)(O)O,1,,0,0,1,1,0,,0,,0,0 +O=C(O)[C@@H](S)[C@H](S)C(=O)O,0,0,0,0,,0,,1,0,0,0, +CN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1,0,0,0,,0,0,,,,0,, +Cc1ncc(CO)c(CN)c1O,,,,,,,,0,,0,, +NS(=O)(=O)c1ccc(C(=O)O)cc1,0,0,0,0,0,0,0,0,0,0,0,0 +O=C(CCS)OCC(COC(=O)CCS)(COC(=O)CCS)COC(=O)CCS,0,0,0,0,0,0,0,,0,0,,1 +C[C@@H]1NC(=O)[C@@H](N)CNC(=O)[C@H]([C@@H]2CCNC(N)=N2)NC(=O)/C(=C/NC(N)=O)NC(=O)[C@H](CNC(=O)C[C@@H](N)CCCN)NC1=O,,,,,,,,0,,0,, +OCCOCCN1CCN(C2=Nc3ccccc3Sc3ccccc32)CC1,0,0,0,0,0,0,0,0,0,0,,0 +Cc1cc(/C=C/c2ccc3cc(N(C)C)ccc3[n+]2C)c(C)n1-c1ccccc1.Cc1cc(/C=C/c2ccc3cc(N(C)C)ccc3[n+]2C)c(C)n1-c1ccccc1.O=C([O-])c1cc2ccccc2c(Cc2c(O)c(C(=O)[O-])cc3ccccc23)c1O,0,0,,,,0,0,,0,,1,1 +COc1ccc(CN(CCN(C)C)c2ccccn2)cc1,0,0,0,0,0,0,0,0,0,0,1,0 +CN(C)C(=O)Oc1ccc[n+](C)c1,1,0,0,0,1,0,0,0,0,0,0,0 +Cc1ncc(CO)c(CO)c1O,0,0,0,0,0,0,0,0,0,0,0,0 +CCC1NC(=O)c2cc(S(N)(=O)=O)c(Cl)cc2N1,0,0,0,0,0,0,0,0,0,0,,0 +C=C[C@H]1CN2CCC1C[C@@H]2[C@@H](O)c1ccnc2ccc(OC)cc12,0,0,1,0,1,1,0,0,0,0,,0 +Brc1cc2ccccc2c2ccccc12,0,0,,0,0,0,0,0,0,0,,0 +CC(C)(N)CO,0,0,,0,0,0,0,0,0,0,0,0 +CC(C)(CO)CO,0,0,0,0,0,1,0,0,0,0,0,0 +O=S1(=O)CCCC1,0,0,0,0,0,0,0,0,0,0,0,0 +O=[N+]([O-])C(CO)(CO)CO,0,0,0,0,0,0,0,1,0,0,0,0 +OCC(CO)(CO)COCC(CO)(CO)CO,0,0,0,0,0,0,0,0,0,0,,0 +O=[N+]([O-])OCCN(CCO[N+](=O)[O-])CCO[N+](=O)[O-],0,0,0,0,0,0,0,0,0,0,0,0 +NC(CO)(CO)CO,0,0,0,0,0,0,0,0,0,0,0,0 +O=C(Cl)c1cc(C(=O)Cl)cc(C(=O)Cl)c1,0,0,,,0,0,0,,0,1,0,1 +CO[Si](CCCS)(OC)OC,0,0,,0,0,0,0,0,0,0,0,0 +COc1cc2c3cc1Oc1cc(ccc1O)C[C@@H]1c4c(cc(OC)c(O)c4Oc4ccc(cc4)C[C@@H]3N(C)CC2)CC[N+]1(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +O=C(O[C@@H]1C[C@@H]2CC[C@H](C1)[N+]21CCCC1)C(O)(c1ccccc1)c1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +COc1cc(C(=O)NC2CCCNC2)cc(OC)c1OC,0,0,0,0,0,0,0,0,0,0,0,0 +C[N+](C)=CCl,0,0,0,0,0,0,0,0,0,0,0,0 +CC(=O)c1cccnc1,0,0,0,0,0,0,0,0,0,0,0,0 +O=S1(=O)OC(c2ccc([O-])cc2)(c2ccc(O)cc2)c2ccccc21,0,0,0,,0,0,0,0,0,0,,0 +O=CN1CCOCC1,0,0,0,0,0,0,0,0,0,0,0,0 +COC(=O)CCC(=O)O,0,0,0,0,0,0,0,0,0,0,0,0 +NCc1cccnc1,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCCCCCn1sc(Cl)c(Cl)c1=O,0,,0,,,0,0,,0,1,,1 +Cc1cc(O)cc(C)c1Cl,0,0,0,0,0,0,0,0,0,0,1,0 +O=[Zr](Cl)Cl,0,0,0,0,0,0,0,,0,0,0,0 +CSc1ccc2c(c1)N(CCC1CCCCN1C)c1ccccc1S2,0,0,0,,,0,,,,0,, +c1ccc2cnncc2c1,0,0,0,0,0,0,0,0,0,0,0,0 +COC(=O)c1ccc(CBr)cc1,0,0,1,,1,0,0,0,1,0,0,0 +CN1CCc2cc(Cl)c(O)cc2[C@H]2c3ccccc3CC[C@@H]21,0,0,0,0,0,0,0,0,0,0,,0 +O=P(O)(OCc1ccccc1)OCc1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +S=C=NCc1ccccc1,0,0,,0,0,0,,,0,1,1,1 +Oc1ccc(Cl)cc1Cc1ccccc1,0,0,0,,0,0,0,1,0,0,1,0 +ClCc1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +OCc1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +CC(=O)OCc1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +COCCc1ccc(OCC(O)CNC(C)C)cc1.COCCc1ccc(OCC(O)CNC(C)C)cc1,0,0,0,0,0,0,0,0,0,0,0,0 +CN(C)C(=N)NC(=N)N,0,0,0,,0,0,0,0,0,0,0,0 +CCCC[Sn](CCCC)(OC(C)=O)OC(C)=O,0,,,,,0,,1,,,1,1 +C[NH+](C)CCC(c1ccccc1)c1cccc[nH+]1,0,0,0,0,1,0,0,0,0,0,0,0 +CCOc1ccc(N)cc1,0,0,1,1,0,0,0,,0,0,0,0 +CC(C)=CCC[C@H](C)CCO,0,0,0,0,0,0,0,0,0,0,0,0 +CCOc1cccc(N)c1,0,0,,0,0,0,0,0,0,0,0,0 +Nc1ccccc1C(=O)OCCc1ccccc1,1,0,1,,1,0,0,0,1,0,,0 +CC(C)CC(O)CC(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +C=C(C)C(=O)OCCOP(=O)(O)OCCOC(=O)C(=C)C,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)OS(C)(=O)=O,,0,0,0,0,0,0,0,0,0,,0 +c1ccc2c(c1)OCC(CN1CCCCC1)O2,0,0,0,0,0,0,0,,0,,0,0 +C=CCN1CCCC1CNC(=O)c1cc(S(N)(=O)=O)cc(OC)c1OC,0,0,0,0,0,0,0,,0,,0,0 +C=C(C)OC(C)=O,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1cc(O)cc2c1O[C@](C)(CCC[C@H](C)CCC[C@H](C)CCCC(C)C)CC2,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1ncc(C[n+]2csc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1,0,0,0,0,0,0,0,,0,,0,0 +NC(=O)[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1O,0,0,0,0,0,0,0,,0,,0,0 +O=C1CC[C@@H](C(=O)O)N1,0,0,0,0,0,0,0,0,0,0,0,0 +CN1C(=S)CN=C(c2ccccc2)c2cc(Cl)ccc21,0,0,0,,,0,0,0,,0,,0 +CC(C)(C)OC(=O)c1ncn2c1[C@@H]1CCCN1C(=O)c1c(Br)cccc1-2,0,0,,0,0,0,0,,0,0,0,0 +CCC(Cc1c(I)cc(I)c(O)c1I)C(=O)O,0,0,0,0,,,,,0,,,0 +CCOc1cc(NC(C)=O)ccc1C(=O)OC,0,0,,0,0,0,0,0,0,0,0,0 +CC(O)C#CC(C)O,0,,0,,0,0,1,,,,,0 +COc1ccc(N)cc1N,0,0,1,,0,,0,1,0,1,1,1 +CC1(C)[C@@H](O[C@H]2O[C@H](C(=O)O)[C@@H](O)[C@H](O)[C@H]2O[C@@H]2O[C@H](C(=O)[O-])[C@@H](O)[C@H](O)[C@H]2O)CC[C@@]2(C)[C@H]1CC[C@]1(C)[C@@H]2C(=O)C=C2[C@@H]3C[C@@](C)(C(=O)O)CC[C@]3(C)CC[C@]21C,0,0,0,0,0,0,0,0,0,0,0,0 +O=C1NC(=O)C(=O)C(=O)N1,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)(C)c1cc(/C=C2\SC(=N)NC2=O)cc(C(C)(C)C)c1O,0,0,0,,,,,1,,,1, +OCCCC1CCCCC1,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1cc2c3c(c1)C(c1ccccc1)=N[C@@H](NC(=O)c1ccncc1)C(=O)N3CC2,0,0,0,0,0,0,1,,0,0,0,0 +CCc1cc(C2=C(C(=O)[O-])N(c3ccccc3C(F)(F)F)S(=O)(=O)c3ccccc32)cc2c1OCO2,0,0,0,0,0,0,,,0,0,1,0 +O=S(=O)([O-])c1ccc2c(/N=N\c3ccc(S(=O)(=O)[O-])c4ccccc34)c(O)c(S(=O)(=O)[O-])cc2c1,0,0,0,0,0,0,0,0,0,0,0,0 +O=C=Nc1ccc(Cl)cc1,0,0,,,,0,0,,,0,1,0 +CC(C)OC(=O)c1ccccc1C(=O)OC(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +CO[C@H]1[C@H]([C@@]2(C)O[C@@H]2CC=C(C)C)[C@]2(CC[C@H]1OC(=O)/C=C/C=C/C=C/C=C/C(=O)O)CO2,0,0,0,1,0,0,0,1,0,0,0,0 +C1CCC2(CCCCO2)OC1,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCC(CC)COC(=O)c1ccccc1O,,,,,,,,0,,0,, +C[C@H]1O[C@H](O[C@@H]2[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)[C@H]2O)[C@@H](N)C[C@@H]1NC(=N)C(=O)O,0,0,0,0,0,0,0,0,0,0,0,0 +CN(C)CCCN1c2ccccc2CCc2ccccc21,0,0,0,0,,0,,,0,0,,0 +CCCCOCCO,0,0,0,0,0,0,0,0,0,0,0,0 +[O-][n+]1ccccc1[S-],0,1,0,,,0,1,,1,1,1, +CCCN(CCC)C(=O)SCC,0,0,0,0,0,0,0,0,0,0,0,0 +O=S(=O)([O-])c1cccc2ccccc12,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)C1=CC2=CC[C@H]3[C@](C)(C(=O)[O-])CCC[C@]3(C)[C@H]2CC1,0,0,1,0,0,0,,,0,0,, +CN(C)c1ccc(C(=O)c2ccc(N(C)C)cc2)cc1,0,1,1,,,,0,0,1,0,1,1 +N#CCCC#N,0,0,0,0,0,0,0,0,0,0,0,0 +Cc1ncc([N+](=O)[O-])n1CCO,0,0,0,0,0,0,0,0,0,0,0,0 +Nc1c(CC(=O)[O-])cccc1C(=O)c1ccccc1,0,,0,0,1,0,1,,0,,0,0 +C[N+]1(C)[C@H]2CC[C@@H]1C[C@H](OC(=O)C(CO)c1ccccc1)C2,1,0,0,0,1,0,0,0,0,0,0,0 +CC(Cl)(Cl)C(=O)O,0,0,0,0,0,0,0,0,0,0,0,0 +CN[C@@H]1[C@H](O[C@H]2[C@H](O[C@@H]3[C@@H](NC(=N)N)[C@H](O)[C@@H](NC(=N)N)[C@H](O)[C@H]3O)O[C@@H](C)[C@]2(O)C=O)O[C@@H](CO)[C@H](O)[C@H]1O.CN[C@@H]1[C@H](O[C@H]2[C@H](O[C@@H]3[C@@H](NC(=N)N)[C@H](O)[C@@H](NC(=N)N)[C@H](O)[C@H]3O)O[C@@H](C)[C@]2(O)C=O)O[C@@H](CO)[C@H](O)[C@H]1O,0,0,0,0,0,0,0,,0,,0,0 +O=C(CCl)CCl,0,,0,,0,1,1,,1,0,,1 +CC(=O)C(Cl)Cl,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)(c1ccccc1)c1ccc(O)cc1,0,0,0,,1,1,0,1,0,0,1, +Cc1cc(O)c2c(O)c3c(O)cccc3cc2c1,0,0,1,0,0,0,1,1,0,0,1,0 +CCC(=O)[N-]S(=O)(=O)c1ccc(-c2c(-c3ccccc3)noc2C)cc1,0,0,0,,0,0,,,0,,0,0 +Cc1ccccc1N1CCN(CCc2nnc3n2CCCC3)CC1,0,0,0,0,1,1,0,0,0,0,0,0 +C=Cc1ccc(S(=O)(=O)[O-])cc1,0,0,0,0,0,0,,1,0,0,,0 +C[C@]12CC[C@@H]3c4ccc(OC(=O)N(CCCl)CCCl)cc4CC[C@H]3[C@@H]1CC[C@@H]2OP(=O)(O)O,1,0,0,0,1,1,,,0,,0,1 +CC1Cc2ccccc2N1NC(=O)c1ccc(Cl)c(S(N)(=O)=O)c1,0,0,0,0,0,0,0,0,0,0,0,0 +CC(=O)OC(C)C(=O)OCC[N+](C)(C)C.CC(=O)OC(C)C(=O)OCC[N+](C)(C)C.O=S(=O)([O-])c1cccc2c(S(=O)(=O)[O-])cccc12,0,0,0,0,1,0,0,,0,,0,0 +CC(=O)CC(=O)Nc1ccc2[nH]c(=O)[nH]c2c1,0,0,0,,0,0,0,0,0,0,0,0 +CCO[Si](C)(CCCOCC1CO1)OCC,0,0,0,0,0,0,0,0,0,0,0,0 +O=[N+]([O-])c1cc(C(F)(F)F)c(Cl)c([N+](=O)[O-])c1Cl,0,0,0,,,,,,,,, +CCCCOCCOCCOCCO,0,0,0,0,0,0,0,0,0,1,0,0 +CCCCCCCC/C=C/C(=O)[O-].CCCCCCCC/C=C/C(=O)[O-],,,,,,,,0,,0,, +Nc1cc(C(F)(F)F)ccc1S,,,,,,,,,,1,, +Cc1cccc(Cc2c[nH]cn2)c1C,0,,,,0,0,0,0,0,,0,0 +CCOC(=O)CC(=O)OCC,0,0,,0,0,0,0,0,0,0,0,0 +COc1ccc(CNCC(O)COc2ccc3[nH]c(=O)ccc3c2)cc1OC,0,0,0,0,0,0,0,,0,,0,0 +COC(=O)C1=C(C)NC(COC(N)=O)=C(C(=O)OC(C)C)C1c1cccc(Cl)c1Cl,0,1,,,0,,1,1,0,1,,1 +CCNC(=O)NCCCOc1cccc(CN2CCCCC2)c1,0,0,0,0,0,0,0,0,0,0,0,0 +CC(=O)SCC(CC(=O)c1ccc(C)cc1)C(=O)O,0,0,0,0,0,0,0,0,0,0,0,0 +CCOC(=O)Cn1cccc1-c1nc(-c2ccc(OC)cc2)c(-c2ccc(OC)cc2)s1,0,0,0,0,0,0,0,1,0,,0, +O=C(CCCN1CCN(c2ccc(F)cc2)CC1)NC1c2ccccc2CSc2ccccc21,0,0,0,,0,0,,,0,0,,0 +CC(C)(C)NC[C@H](O)c1ccc(O)cc1Cl,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCC/C=C\C/C=C\CCCCCCCC(=O)NC(C)c1ccccc1,0,0,0,0,0,0,0,1,0,0,0,0 +CC(NN)c1ccccc1,0,0,,0,,0,0,0,0,0,0,0 +O=Cc1ccc(Cl)cc1,,,0,,0,0,0,0,0,0,,0 +CCN(C)C(=O)Oc1cccc([C@H](C)N(C)C)c1,0,0,0,0,0,0,0,0,0,0,0,0 +CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)cc2)ccn1,0,,0,,,0,,,,,1,1 +CCN(CC)C(C)C(=O)c1ccccc1,0,0,,0,0,0,0,0,0,0,0,0 +CCN1CC(CCN2CCOCC2)C(c2ccccc2)(c2ccccc2)C1=O,0,0,0,0,0,0,0,,0,0,0,0 +Cc1cccc(C(=O)O)c1[N+](=O)[O-],0,0,0,0,0,0,0,0,0,0,0,0 +CCN(CC)CCOC(=O)C1(c2ccccc2)CCCC1.CCN(CC)CCOC(=O)C1(c2ccccc2)CCCC1.O=S(=O)(O)CCS(=O)(=O)O,0,0,0,0,0,0,0,0,0,0,0,0 +Clc1ccccn1,0,0,0,0,0,0,0,0,0,0,0,0 +CCC(=O)/C=C/C1C(C)=CCCC1(C)C,0,0,0,0,0,0,0,0,0,0,0,0 +CC1CC(OC(=O)c2ccccc2O)CC(C)(C)C1,0,0,0,0,0,0,0,0,0,1,0,0 +CCCCCCCCCO,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCCCNCCCCCC,0,0,0,0,0,0,0,0,0,0,0,0 +CCN(CC)c1ccc(N)cc1,0,0,1,,,0,0,1,,1,1, +ClCCCCl,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCCCOC(C)=O,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCC(CC)COC(=O)CCCCCCCCC(=O)OCC(CC)CCCC,0,0,0,0,,0,0,0,0,0,0,0 +CCOC(C)=O,0,0,0,0,0,0,0,0,0,0,,0 +NCCNCCN,1,1,0,0,0,0,0,0,0,0,0,0 +CCOP(=O)(CC)OCC,0,0,0,0,0,0,0,0,0,0,,0 +Cc1c2oc3c(C)ccc(C(=O)N[C@@H]4C(=O)N[C@H](C(C)C)C(=O)N5CCC[C@H]5C(=O)N(C)CC(=O)N(C)[C@@H](C(C)C)C(=O)O[C@@H]4C)c3nc-2c(C(=O)N[C@@H]2C(=O)N[C@H](C(C)C)C(=O)N3CCC[C@H]3C(=O)N(C)CC(=O)N(C)[C@@H](C(C)C)C(=O)O[C@@H]2C)c(N)c1=O,0,,0,1,,0,,,0,0,,1 +NC(=O)CCCCC(N)=O,0,0,0,0,0,0,0,0,0,0,0,0 +CNC(=O)ON=CC(C)(C)SC,0,0,0,0,0,0,0,0,0,0,0,0 +C=CCOc1ccc(CC(=O)O)cc1Cl,1,0,0,0,1,1,1,,0,,0,0 +NN,0,0,1,0,0,0,0,0,0,0,0,0 +N[C@@H](Cc1cnc[nH]1)C(=O)O,0,0,,0,,0,0,,0,,0, +NNc1nc(-c2ccccc2)cs1,0,0,1,,0,0,0,1,0,1,1, +NNc1nc(-c2ccc(N)cc2)cs1,0,0,1,,0,0,,1,0,,1,0 +Cc1ccccc1CO[C@H]1C[C@]2(C(C)C)CC[C@@]1(C)O2,0,0,,,0,0,0,0,0,0,0,0 +NNc1ccc(C(=O)O)cc1,1,0,,,,0,0,0,0,0,0,0 +CCCCCCOc1ccccc1C(=O)O,0,0,0,0,,0,0,0,0,0,0,0 +O=C(OCc1ccccc1)C(=O)OCc1ccccc1,0,0,0,0,0,0,0,0,0,0,0,0 +CCCSc1ccc2[nH]c(NC(=O)OC)nc2c1,0,,1,,1,0,0,0,1,1,1,1 \ No newline at end of file diff --git a/chemprop/tests/data/classification.npz b/chemprop/tests/data/classification.npz new file mode 100644 index 0000000000000000000000000000000000000000..871ce27559c9f3b2b0e20b8b7e3c53849f6e0047 --- /dev/null +++ b/chemprop/tests/data/classification.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:843a6b271daacd34b8225c14e1b0f933b78e6ff8f0f7a9766de7a51bbdedb906 +size 267678 diff --git a/chemprop/tests/data/classification/mol+mol.csv b/chemprop/tests/data/classification/mol+mol.csv new file mode 100644 index 0000000000000000000000000000000000000000..c805a1326b48177c68838d177bfd3738ba853ebb --- /dev/null +++ b/chemprop/tests/data/classification/mol+mol.csv @@ -0,0 +1,260 @@ +mol a smiles,mol b Smiles,synergy +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C1CCC(C(O1)OC2C(CC(C(C2O)OC3C(C(C(CO3)(C)O)NC)O)N)N)N)NC,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C)(C(=O)O)ON=C(C1=CSC(=N1)N)C(=O)NC2C3N(C2=O)C(=C(CS3)C[N+]4=CC=CC=C4)C(=O)[O-],0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C)(C)NCC(=O)NC1=CC(=C2CC3CC4C(C(=O)C(=C(C4(C(=O)C3=C(C2=C1O)O)O)O)C(=O)N)N(C)C)N(C)C,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CN1C(=NC(=O)C(=O)N1)SCC2=C(N3C(C(C3=O)NC(=O)C(=NOC)C4=CSC(=N4)N)SC2)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C=CC=CC=CC=CC=CC=CC=CC(CC2C(C(CC(O2)(CC(CC(C(CCC(CC(CC(=O)OC(C(C1O)C)C)O)O)O)O)O)O)O)C(=O)O)OC3C(C(C(C(O3)C)O)N)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)O)(C)O,0 +N=C(N)NCCC[C@H](NC(=O)[C@H](CCCNC(=N)N)NC(=O)[C@H](Cc1c[nH]c2ccccc12)NC(=O)[C@H](CCCNC(=N)N)NC(=O)[C@H](CCCNC(=N)N)NC(=O)[C@H](Cc1c[nH]c2ccccc12)NC(=O)[C@@H](N)Cc1ccccc1)C(=O)O[NH2],CC1=C(C=CC2=C1OC(=O)C(=C2O)NC(=O)C3=CC(=C(C=C3)O)CC=C(C)C)OC4C(C(C(C(O4)(C)C)OC)OC(=O)N)O,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)NC(=O)C(CCN)NC(=O)O)C(C)O)CCN)CCN,C(C(C1C(=C(C(=O)O1)O)O)O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C1C2CC(=C(N2C1=O)C(=O)O)SCCN=CN)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2C(C(=O)N2C(=C1SC3CC(NC3)CNS(=O)(=O)N)C(=O)O)C(C)O,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O)O)C,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1C(C(C(C(C1NC(=O)C(CCN)O)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(C(C(C(O3)CN)O)O)O)N,1 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)O)(C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2C(C(=O)N2C(=C1SC3CC(NC3)CNS(=O)(=O)N)C(=O)O)C(C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1=CC(=NO1)NS(=O)(=O)C2=CC=C(C=C2)N.COC1=CC(=CC(=C1OC)OC)CC2=CN=C(N=C2N)N,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C1C2CC(=C(N2C1=O)C(=O)O)SCCN=CN)O,0 +C1=COC(=C1)CNCCS(=O)(=O)O,C1=CC(=C(C=C1F)F)C(CN2C=NC=N2)(CN3C=NC=N3)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C1C2CC(=C(N2C1=O)C(=O)O)SCCN=CN)O,0 +CC1=NC2=C(N1CC(C)OC3=CC=CC=N3)N=C(C=C2)C4=CC(=NC(=C4)N)N,[Na+].[Cl-],0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCC1C(C(C(N(CC(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)C)O)(C)O,1 +CCOP(=O)(O)OP(=O)(O)O,CCN1C=C(C(=O)C2=C1N=C(C=C2)C)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C=CC=CC=CC=CC=CC=CC=CC(CC2C(C(CC(O2)(CC(CC(C(CCC(CC(CC(=O)OC(C(C1O)C)C)O)O)O)O)O)O)O)C(=O)O)OC3C(C(C(C(O3)C)O)N)O,1 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,C1=CC(=CC=C1C(C(CO)NC(=O)C(Cl)Cl)O)[N+](=O)[O-],0 +CC(C)CC1C(=O)NC(C(=O)N2CCCC2C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)N3CCCC3C(=O)NC(C(=O)NC(C(=O)N1)CCCN)C(C)C)CC4=CC=CC=C4)CC(C)C)CCCN)C(C)C)CC5=CC=CC=C5,[Ag],0 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2C(C(=O)N2C(=C1SC3CC(NC3)CNS(=O)(=O)N)C(=O)O)C(C)O,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2CCC3(C(C2(CCC1O)C)C(CC4C3(CC(C4=C(CCC=C(C)C)C(=O)O)OC(=O)C)C)O)C,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC(C1C2CC(=C(N2C1=O)C(=O)O)SCCN=CN)O,1 +N=C(N)NCCC[C@H](NC(=O)[C@H](CCCNC(=N)N)NC(=O)[C@H](Cc1c[nH]c2ccccc12)NC(=O)[C@H](CCCNC(=N)N)NC(=O)[C@H](Cc1c[nH]c2ccccc12)NC(=O)[C@@H](N)Cc1ccccc1)C(=O)O[NH2],CC1=C(C=CC2=C1OC(=O)C(=C2O)NC(=O)C3=CC(=C(C=C3)O)CC=C(C)C)OC4C(C(C(C(O4)(C)C)OC)OC(=O)N)O,0 +CCOP(=O)(O)OP(=O)(O)O,CC(=O)OCC1=C(N2C(C(C2=O)NC(=O)C(=NOC)C3=CSC(=N3)N)SC1)C(=O)O,0 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)O)(C)O,1 +CC[C@H](C)[C@H](NC(=O)[C@H](CCCNC(=N)N)NC(=O)[C@H](Cc1c[nH]c2ccccc12)NC(=O)[C@@H](N)Cc1ccccc1)C(=O)N[C@@H](CCCNC(=N)N)C(=O)N[C@@H](CCCCN)C(=O)N[C@H](C(=O)N[C@@H](CCCNC(=N)N)C(=O)O)C(C)C[NH2],CC1=C(C=CC2=C1OC(=O)C(=C2O)NC(=O)C3=CC(=C(C=C3)O)CC=C(C)C)OC4C(C(C(C(O4)(C)C)OC)OC(=O)N)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1=CC(=C(C=C1F)F)C(CN2C=NC=N2)(CN3C=NC=N3)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCC(C)CC(C)CCCCCCCCC(=O)NC1CC(C(NC(=O)C2C(CCN2C(=O)C(NC(=O)C(NC(=O)C3CC(CN3C(=O)C(NC1=O)C(C)O)O)C(C(C4=CC=C(C=C4)O)O)O)C(CCN)O)O)NCCN)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O)O)C,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC(C)(C)CC(C)(C)C1=CC=C(C=C1)OCCOCCO,1 +CC(C(=O)NC(CCC(=O)O)C(=O)N)NC(=O)COC1C(C(OC(C1O)CO)O)NC(=O)C,CC(C1CCC(C(O1)OC2C(CC(C(C2O)OC3C(C(C(CO3)(C)O)NC)O)N)N)N)NC,1 +N[C@@]([H])(C(C)C)C(=O)N[C@@]([H])(C)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCC(=O)O)C(=O)O,CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C1CCC(C(O1)OC2C(CC(C(C2O)OC3C(C(C(CO3)(C)O)NC)O)N)N)N)NC,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1=CC=C2C(=C1)C=CN2,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCC(C)CC(C)CCCCCCCCC(=O)NC1CC(C(NC(=O)C2C(CCN2C(=O)C(NC(=O)C(NC(=O)C3CC(CN3C(=O)C(NC1=O)C(C)O)O)C(C(C4=CC=C(C=C4)O)O)O)C(CCN)O)O)NCCN)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2C(C3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O,0 +N[C@@]([H])(C(C)C)C(=O)N[C@@]([H])(C)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCC(=O)O)C(=O)O,C1C(C(C(C(C1NC(=O)C(CCN)O)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(C(C(C(O3)CN)O)O)O)N,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC(=O)OCC1=C(N2C(C(C2=O)NC(=O)CC3=CC=CS3)SC1)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)O)(C)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,N[C@@]([H])(Cc1ccccc1)C(=O)N[C@@]([H])([C@]([H])(CC)C)C(=O)N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(C)C)C(=O)O[NH2],1 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C(C=CC(=C41)Cl)O)O)O)O)C(=O)N)N(C)C)O,1 +O.O.O.O.O.O.O.O.[V].[V].[V],C1C(C(C(C(C1N)OC2C(C(C(C(O2)CN)O)O)O)O)OC3C(C(C(C(O3)CO)O)N)O)N,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1C(C(C(C(C1NC(=O)C(CCN)O)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(C(C(C(O3)CN)O)O)O)N,0 +N[C@@]([H])(Cc1ccc(O)cc1)C(=O)N[C@@]([H])(CO)C(=O)N1[C@@]([H])(CCC1)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])([C@]([H])(O)C)C(=O)N[C@@]([H])(CC(=O)N)C(=O)N[C@@]([H])(Cc1ccccc1)C(=O)O,CC(C1C2CC(=C(N2C1=O)C(=O)O)SCCN=CN)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCN1CCN(C(=O)C1=O)C(=O)NC(C2=CC=CC=C2)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)[O-].CC1(C(N2C(S1(=O)=O)CC2=O)C(=O)O)CN3C=CN=N3.[Na+],0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)NC(=O)N4CCNC4=O)C(=O)O)C,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCCC1CC(N(C1)C)C(=O)NC(C2C(C(C(C(O2)SC)O)O)O)C(C)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2C(C(=O)N2C(=C1SC3CC(NC3)C(=O)N(C)C)C(=O)O)C(C)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2C(C3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O,0 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCCC=CC=O,1 +CCCOCCN1C2=C(C=CC(=N2)C3=CN=C(C=C3)OC)N=C(C1=O)NCCN4CCOCC4,N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)O,1 +CCCCCCCC(=O)NC(C(C)O)C(=O)NC(CC)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC2=CC=CC=C2)CC(C)C)CCN)CCN)C(C)O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,1 +C[As](=O)(O)[O-].C[As](=O)(O)[O-].[Ca+2],C1CC1N2C=C(C(=O)C3=CC(=C(C=C32)N4CCNCC4)F)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(=O)NC(CS)C(=O)O,0 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,CC1C(C(C(O1)OC2C(C(C(C(C2O)O)N=C(N)N)O)N=C(N)N)OC3C(C(C(C(O3)CO)O)O)NC)(C=O)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C(O1)P(=O)(O)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C(C(=O)N1S(=O)(=O)O)NC(=O)C(=NOC(C)(C)C(=O)O)C2=CSC(=N2)N,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1CC1N2C=C(C(=O)C3=CC(=C(C=C32)N4CCNCC4)F)C(=O)O,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1C(C(=O)N1S(=O)(=O)O)NC(=O)C(=NOC(C)(C)C(=O)O)C2=CSC(=N2)N,1 +C1C(=O)N(C2=C(S1)C=CC(=C2)C(=O)NCC3=CC=CO3)CC4=C(C=CC=C4Cl)F,C1C(C(C(C(C1N)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(CC(C(O3)CN)O)N)N,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,C1C(=C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CS3)C(=O)[O-])C[N+]4=CC=CC=C4,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1=CC(=CC=C1C(=O)O)N,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,[Mg],0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C(O1)P(=O)(O)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2C(C3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)NC(=O)C(CCN)NC(=O)O)C(C)O)CCN)CCN,C1=CN=C(N=C1)NS(=O)(=O)C2=CC=C(C=C2)N,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCCCCCCCNCCCC(C)C1CCC2C1(C(CC3C2C(CC4C3(CCC(C4)OCCCN)C)OCCCN)OCCCN)C,1 +O.O.O.O.O.O.O.O.[V].[V].[V],CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C(O1)P(=O)(O)O,0 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C(C=CC(=C41)Cl)O)O)O)O)C(=O)N)N(C)C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1C(C2C(C(=O)N1)N=C(N2)NC3C(C(C(C(O3)CO)OC(=O)N)O)NC(=O)CC(CCCN)N)O,0 +CC1=C(C=CC(=C1)CNCCS(=O)(=O)O)F,CCC(C)CC(C)CCCCCCCCC(=O)NC1CC(C(NC(=O)C2C(CCN2C(=O)C(NC(=O)C(NC(=O)C3CC(CN3C(=O)C(NC1=O)C(C)O)O)C(C(C4=CC=C(C=C4)O)O)O)C(CCN)O)O)NCCN)O,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)O)(C)O,1 +CCOP(=O)(O)OP(=O)(O)O,CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CSC=C3)C(=O)O)C(=O)O)C,1 +CC1=C(C(CC=C1)(C)C)C=CC(=CC=CC(=CCO)C)C,C1C(C(C(C(C1N)OC2C(C(C(C(O2)CN)O)O)O)O)OC3C(C(C(C(O3)CO)O)N)O)N,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCN1C=C(C(=O)C2=C1N=C(C=C2)C)C(=O)O,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CCCC1CC(N(C1)C)C(=O)NC(C2C(C(C(C(O2)SC)O)O)O)C(C)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1=CC(=NO1)NS(=O)(=O)C2=CC=C(C=C2)N.COC1=CC(=CC(=C1OC)OC)CC2=CN=C(N=C2N)N,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CCN1CCN(C(=O)C1=O)C(=O)NC(C2=CC=C(C=C2)O)C(=O)NC3C4N(C3=O)C(=C(CS4)CSC5=NN=NN5C)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCN1CCN(C(=O)C1=O)C(=O)NC(C2=CC=CC=C2)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCN1CCN(C(=O)C1=O)C(=O)NC(C2=CC=CC=C2)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C(N2C(S1(=O)=O)CC2=O)C(=O)O)C,0 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,CC(C1CCC(C(O1)OC2C(CC(C(C2O)OC3C(C(C(CO3)(C)O)NC)O)N)N)N)NC,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1=NC(=O)NC(=C1F)N,0 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,C1=CC(=CC=C1C(C(CO)NC(=O)C(Cl)Cl)O)[N+](=O)[O-],1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C)(C(=O)O)ON=C(C1=CSC(=N1)N)C(=O)NC2C3N(C2=O)C(=C(CS3)C[N+]4=CC=CC=C4)C(=O)[O-],1 +N[C@@]([H])(Cc1ccc(O)cc1)C(=O)N[C@@]([H])(CO)C(=O)N1[C@@]([H])(CCC1)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])([C@]([H])(O)C)C(=O)N[C@@]([H])(CC(=O)N)C(=O)N[C@@]([H])(Cc1ccccc1)C(=O)O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,0 +N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)O,CN1CCN(CC1)C2=NC3=CC=CC=C3C=C2CN4C5=NC=NC(=C5C(=N4)C6=CC7=C(C=C6)N=C(S7)N)N,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CCOC1=C(C2=CC=CC=C2C=C1)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1=CN=C(N=C1)[N-]S(=O)(=O)C2=CC=C(C=C2)N.[Na+],1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC(C)(C(=O)O)ON=C(C1=CSC(=N1)N)C(=O)NC2C3N(C2=O)C(=C(CS3)C[N+]4=CC=CC=C4)C(=O)[O-],1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1=C(C(=NO1)C2=CC=CC=C2Cl)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)O,1 +C[As](=O)(O)[O-].C[As](=O)(O)[O-].[Ca+2],C1C(C(C(C(C1N)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(CC(C(O3)CN)O)N)N,1 +N[C@@]([H])(C(C)C)C(=O)N[C@@]([H])(C)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCC(=O)O)C(=O)O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC(=O)OCC1=C(N2C(C(C2=O)NC(=O)C(=NOC)C3=CSC(=N3)N)SC1)C(=O)O,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC(=O)OCC1=C(N2C(C(C2=O)NC(=O)CC3=CC=CS3)SC1)C(=O)O,1 +CCCCCCCCNCCCC(C)C1CCC2C1(C(CC3C2C(CC4C3(CCC(C4)OCCCN)C)OCCCN)OCCCN)C,CC1CO1.C1CO1,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(=O)NCC1CN(C(=O)O1)C2=CC(=C(C=C2)N3CCOCC3)F,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1COC2=C3N1C=C(C(=O)C3=CC(=C2N4CCN(CC4)C)F)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C(CN(C=O)O)CP(=O)(O)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C(C(=O)N1OS(=O)(=O)O)NC(=O)C(=NOCC2=CC(=O)C(=CN2O)O)C3=CSC(=N3)N)C,1 +CC1=C(C(CC=C1)(C)C)C=CC(=CC=CC(=CCO)C)C,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)OC)C)C)O)(C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1CC1N2C=C(C(=O)C3=CC(=C(C=C32)N4CCNCC4)F)C(=O)O,0 +C1=CC=C(C=C1)CC(C(=O)NC(CCCN=C(N)N)C(=O)NC2=CC3=CC=CC=C3C=C2)N,CC1COC2=C3N1C=C(C(=O)C3=CC(=C2N4CCN(CC4)C)F)C(=O)O,1 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,C1CC1N2C=C(C(=O)C3=CC(=C(C=C32)N4CCNCC4)F)C(=O)O,0 +P#P,CC1=C(C=CC2=C1OC(=O)C(=C2O)NC(=O)C3=CC(=C(C=C3)O)CC=C(C)C)OC4C(C(C(C(O4)(C)C)OC)OC(=O)N)O,1 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC(C1C2CC(=C(N2C1=O)C(=O)O)SCCN=CN)O,1 +C1=COC(=C1)CNCCS(=O)(=O)O,CCC(C)CC(C)CCCCCCCCC(=O)NC1CC(C(NC(=O)C2C(CCN2C(=O)C(NC(=O)C(NC(=O)C3CC(CN3C(=O)C(NC1=O)C(C)O)O)C(C(C4=CC=C(C=C4)O)O)O)C(CCN)O)O)NCCN)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C(N2C(S1(=O)=O)CC2=O)C(=O)O)C,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1=C(C=CC2=C1OC(=O)C(=C2O)NC(=O)C3=CC(=C(C=C3)O)CC=C(C)C)OC4C(C(C(C(O4)(C)C)OC)OC(=O)N)O,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O)O)C,1 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,CC(C1CCC(C(O1)OC2C(CC(C(C2O)OC3C(C(C(CO3)(C)O)NC)O)N)N)N)NC,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O,0 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C(CN(C=O)O)CP(=O)(O)O,1 +C[As](=O)(O)[O-].C[As](=O)(O)[O-].[Ca+2],CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,1 +C1=CC=C(C=C1)CCC(C(=CS(=O)(=O)OC2=CC=C(C=C2)[N+](=O)[O-])S)NC(=O)C(CC3=CC=CC=C3)NC(=O)OCC4=CC=CC=C4,C1=CC(=C(C=C1F)F)C(CN2C=NC=N2)(CN3C=NC=N3)O,1 +CCOP(=O)(O)OP(=O)(O)O,CC1C2CCC3(C(C2(CCC1O)C)C(CC4C3(CC(C4=C(CCC=C(C)C)C(=O)O)OC(=O)C)C)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCNC1CC(C(C(C1OC2C(C(C(CO2)(C)O)NC)O)O)OC3C(CC=C(O3)CN)N)N,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)NC(=O)N4CCNC4=O)C(=O)O)C,1 +N[C@@]([H])(Cc1ccc(O)cc1)C(=O)N[C@@]([H])(CO)C(=O)N1[C@@]([H])(CCC1)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])([C@]([H])(O)C)C(=O)N[C@@]([H])(CC(=O)N)C(=O)N[C@@]([H])(Cc1ccccc1)C(=O)O,CC1=NN=C(S1)SCC2=C(N3C(C(C3=O)NC(=O)CN4C=NN=N4)SC2)C(=O)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1C(C(C(C(C1NC(=O)C(CCN)O)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(C(C(C(O3)CN)O)O)O)N,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CCCC1CC(N(C1)C)C(=O)NC(C2C(C(C(C(O2)SC)O)O)O)C(C)Cl,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C)(C)NCC(=O)NC1=CC(=C2CC3CC4C(C(=O)C(=C(C4(C(=O)C3=C(C2=C1O)O)O)O)C(=O)N)N(C)C)N(C)C,0 +N[C@@]([H])(C(C)C)C(=O)N[C@@]([H])(C)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCC(=O)O)C(=O)O,CC1COC2=C3N1C=C(C(=O)C3=CC(=C2N4CCN(CC4)C)F)C(=O)O,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CN1C(=NN=N1)SCC2=C(N3C(C(C3=O)(NC(=O)C(C4=CC=C(C=C4)O)C(=O)O)OC)OC2)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C(C(C(O1)OC2C(C(C(C(C2O)O)N=C(N)N)O)N=C(N)N)OC3C(C(C(C(O3)CO)O)O)NC)(C=O)O,1 +CCOP(=O)(O)OP(=O)(O)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CCCC1CC(N(C1)C)C(=O)NC(C2C(C(C(C(O2)SC)O)O)O)C(C)O,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,C1C(=C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CS3)C(=O)[O-])C[N+]4=CC=CC=C4,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1=CC(=CC=C1C(C(CO)NC(=O)C(Cl)Cl)O)[N+](=O)[O-],1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCN1CCN(C(=O)C1=O)C(=O)NC(C2=CC=C(C=C2)O)C(=O)NC3C4N(C3=O)C(=C(CS4)CSC5=NN=NN5C)C(=O)O.CC1(C(N2C(S1(=O)=O)CC2=O)C(=O)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1CC1N2C=C(C(=O)C3=CC(=C(C=C32)N4CCNCC4)F)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CN(C)C1C2CC3CC4=C(C=CC(=C4C(=C3C(=O)C2(C(=C(C1=O)C(=O)N)O)O)O)O)N(C)C,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CCN1CCN(C(=O)C1=O)C(=O)NC(C2=CC=CC=C2)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1=CC(=CC=C1NC(=NC(=NCCCCCCN=C(N)N=C(N)NC2=CC=C(C=C2)Cl)N)N)Cl,1 +N[C@@]([H])(C(C)C)C(=O)N[C@@]([H])(C)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCC(=O)O)C(=O)O,CC1C2C(C(=O)N2C(=C1SC3CC(NC3)C(=O)N(C)C)C(=O)O)C(C)O,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,C1CC1N2C=C(C(=O)C3=CC(=C(C=C32)N4CCNCC4)F)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCCCCCCCOC1=CC=C(C=C1)C2=CC=C(C=C2)C(=O)NC3CC(CNC(=O)C4C(C(CN4C(=O)C(NC(=O)C(NC(=O)C5CC(CN5C(=O)C(NC3=O)C(C)O)O)C(CC6=CC=CC=C6)O)CO)C)O)NCCN,1 +CCOP(=O)(O)OP(=O)(O)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CSC=C3)C(=O)O)C(=O)O)C,0 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O)O)C,0 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)O)(C)O,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC(=O)OCC1=C(N2C(C(C2=O)NC(=O)C(=NOC)C3=CSC(=N3)N)SC1)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)C(=O)O)C(=O)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1=C(ON=C1C)NS(=O)(=O)C2=CC=C(C=C2)N,1 +CCOP(=O)(O)OP(=O)(O)O,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)O)(C)O,1 +CCCCCCCCCCCCCCCCCC(=O)OCC1C(C(C(C(O1)O)NC(=O)C)OC(C)C(=O)NC(C)C(=O)NC(CCC(=O)N)C(=O)O)O,CC(C1CCC(C(O1)OC2C(CC(C(C2O)OC3C(C(C(CO3)(C)O)NC)O)N)N)N)NC,1 +NCC(=O)N[C@@]([H])(Cc1ccccc1)C(=O)N[C@@]([H])(Cc1ccccc1)C(=O)N[C@@]([H])(C)C(=O)N[C@@]([H])(CC(C)C)C(=O)N[C@@]([H])([C@]([H])(CC)C)C(=O)N1[C@@]([H])(CCC1)C(=O)NCC(=O)N[C@@]([H])([C@]([H])(CC)C)C(=O)N[C@@]([H])(CCC(=O)O)C(=O)O,CC1C(C(C(O1)OC2C(C(C(C(C2O)O)N=C(N)N)O)N=C(N)N)OC3C(C(C(C(O3)CO)O)O)NC)(C=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1C(C(C(C(C1N)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(CC(C(O3)CN)O)N)N,0 +N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)O,N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)O,1 +N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCCNC(=N)N)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)O,CN1CCN(CC1)C2=NC3=CC=CC=C3C=C2CN4C5=NC=NC(=C5C(=N4)C6=CC7=C(C=C6)N=C(S7)N)N,1 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,0 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,CCC(=O)C1CC(=O)OC(CC2C(O2)C=CC(C(CC(C(C1OC)OC3C(C(C(C(O3)C)OC4CC(C(C(O4)C)OC(=O)CC)(C)O)N(C)C)O)CC=O)C)O)C,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC(C)(C(=O)O)ON=C(C1=CSC(=N1)N)C(=O)NC2C3N(C2=O)C(=C(CS3)C[N+]4=CC=CC=C4)C(=O)[O-],1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C[As](=O)(O)[O-].C[As](=O)(O)[O-].[Ca+2],0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,[Ca],0 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,CC1C(C(C(O1)OC2C(C(C(C(C2O)O)N=C(N)N)O)N=C(N)N)OC3C(C(C(C(O3)CO)O)O)NC)(C=O)O,1 +CC1=C(C=CC(=C1)CNCCS(=O)(=O)O)F,C1=CC(=C(C=C1F)F)C(CN2C=NC=N2)(CN3C=NC=N3)O,1 +N[C@@]([H])(C(C)C)C(=O)N[C@@]([H])(C)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCC(=O)O)C(=O)O,CC(C1C2CC(=C(N2C1=O)C(=O)O)SCCN=CN)O,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1C2CCC3(C(C2(CCC1O)C)C(CC4C3(CC(C4=C(CCC=C(C)C)C(=O)O)OC(=O)C)C)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCC(C)CC(C)CCCCCCCCC(=O)NC1CC(C(NC(=O)C2C(CCN2C(=O)C(NC(=O)C(NC(=O)C3CC(CN3C(=O)C(NC1=O)C(C)O)O)C(C(C4=CC=C(C=C4)O)O)O)C(CCN)O)O)NCCN)O,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CN1C(=NN=N1)SCC2=C(N3C(C(C3=O)(NC(=O)C(C4=CC=C(C=C4)O)C(=O)O)OC)OC2)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C1=NC=NC=C1F)C(CN2C=NC=N2)(C3=C(C=C(C=C3)F)F)O,0 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1=CN=C(N=C1)NS(=O)(=O)C2=CC=C(C=C2)N,1 +CCOP(=O)(O)OP(=O)(O)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O)O)C,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1C2CCC3(C(C2(CCC1O)C)C(CC4C3(CC(C4=C(CCC=C(C)C)C(=O)O)OC(=O)C)C)O)C,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)C(=O)O)C(=O)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C(O1)P(=O)(O)O,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CCN1CCN(C(=O)C1=O)C(=O)NC(C2=CC=CC=C2)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)O,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,C1=CC(=CC=C1C(C(CO)NC(=O)C(Cl)Cl)O)[N+](=O)[O-],1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C1=NC=NC=C1F)C(CN2C=NC=N2)(C3=C(C=C(C=C3)F)F)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C)(C(=O)O)ON=C(C1=CSC(=N1)N)C(=O)NC2C3N(C2=O)C(=C(CS3)C[N+]4=CC=CC=C4)C(=O)[O-],0 +CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,C1CC1N2C=C(C(=O)C3=CC(=C(C=C32)N4CCNCC4)F)C(=O)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C)(C)NCC(=O)NC1=CC(=C2CC3CC4C(C(=O)C(=C(C4(C(=O)C3=C(C2=C1O)O)O)O)C(=O)N)N(C)C)N(C)C,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1C(C(=O)N1S(=O)(=O)O)NC(=O)C(=NOC(C)(C)C(=O)O)C2=CSC(=N2)N,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C,1 +N[C@@]([H])(Cc1ccc(O)cc1)C(=O)N[C@@]([H])(CO)C(=O)N1[C@@]([H])(CCC1)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])([C@]([H])(O)C)C(=O)N[C@@]([H])(CC(=O)N)C(=O)N[C@@]([H])(Cc1ccccc1)C(=O)O,CC1COC2=C3N1C=C(C(=O)C3=CC(=C2N4CCN(CC4)C)F)C(=O)O,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CCOC1=C(C2=CC=CC=C2C=C1)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)O,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1C2C(C(=O)N2C(=C1SC3CC(NC3)C(=O)N(C)C)C(=O)O)C(C)O,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CN1C(=NC(=O)C(=O)N1)SCC2=C(N3C(C(C3=O)NC(=O)C(=NOC)C4=CSC(=N4)N)SC2)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2C(C(=O)N2C(=C1SC3CC(NC3)C(=O)NC4=CC=CC(=C4)C(=O)O)C(=O)O)C(C)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1=CC(=NO1)NS(=O)(=O)C2=CC=C(C=C2)N,0 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,C1=CC(=CC=C1C(C(CO)NC(=O)C(Cl)Cl)O)[N+](=O)[O-],1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C1C2CC(=C(N2C1=O)C(=O)O)SCCN=CN)O,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ga+3],1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CN(C)C1C2CC3CC4=C(C=CC(=C4C(=C3C(=O)C2(C(=C(C1=O)C(=O)N)O)O)O)O)N(C)C,0 +CCC1(CC=NCCN1CC)C(=O)NCC2=CC=CC=C2,[Na+].[Cl-],0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCN1C=C(C(=O)C2=C1N=C(C=C2)C)C(=O)O,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CCN1CCN(C(=O)C1=O)C(=O)NC(C2=CC=C(C=C2)O)C(=O)NC3C4N(C3=O)C(=C(CS4)CSC5=NN=NN5C)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1C(C(C(C(C1N)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(CC(C(O3)CN)O)N)N,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CSC=C3)C(=O)O)C(=O)O)C,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1=C(C=CC2=C1OC(=O)C(=C2O)NC(=O)C3=CC(=C(C=C3)O)CC=C(C)C)OC4C(C(C(C(O4)(C)C)OC)OC(=O)N)O,0 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)O)(C)O,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,COC1=CC(=CC(=C1OC)OC)CC2=CN=C(N=C2N)N,1 +N[C@@]([H])(Cc1ccc(O)cc1)C(=O)N[C@@]([H])(CO)C(=O)N1[C@@]([H])(CCC1)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])([C@]([H])(O)C)C(=O)N[C@@]([H])(CC(=O)N)C(=O)N[C@@]([H])(Cc1ccccc1)C(=O)O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1=CC(=NO1)NS(=O)(=O)C2=CC=C(C=C2)N,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1=CC(=CC=C1C(=O)O)N,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCC1C(C(C(N(CC(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)C)O)(C)O,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CSC=C3)C(=O)O)C(=O)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCCC=CC=O,0 +C1=CC=C(C=C1)CCC(C(=O)C(=O)S)NC(=O)C(CC2=CC=CC=C2)NC(=O)OCC3=CN=CC=C3,CC1=C(C=CC2=C1OC(=O)C(=C2O)NC(=O)C3=CC(=C(C=C3)O)CC=C(C)C)OC4C(C(C(C(O4)(C)C)OC)OC(=O)N)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O)O)C,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C=CC=CC=CC=CC=CC=CC=CC(CC2C(C(CC(O2)(CC(CC(C(CCC(CC(CC(=O)OC(C(C1O)C)C)O)O)O)O)O)O)O)C(=O)O)OC3C(C(C(C(O3)C)O)N)O,0 +CCCCCCCC(=O)NC(C(C)O)C(=O)NC(CC)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC2=CC=CC=C2)CC(C)C)CCN)CCN)C(C)O,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)OC)C)C)O)(C)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2C(C(=O)N2C(=C1SC3CC(NC3)C(=O)NC4=CC=CC(=C4)C(=O)O)C(=O)O)C(C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1CC(=O)C2(C(O1)OC3C(C(C(C(C3O2)NC)O)NC)O)O,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O)O)C,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1=CC(=NO1)NS(=O)(=O)C2=CC=C(C=C2)N,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1=C(C=CC2=C1OC(=O)C(=C2O)NC(=O)C3=CC(=C(C=C3)O)CC=C(C)C)OC4C(C(C(C(O4)(C)C)OC)OC(=O)N)O,1 +CC(C)CC1C(=O)N2CCCC2C(=O)N1,C1CC2C(=O)NC(C(=O)N2C1)CC3=CC=CC=C3,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1C2C(C(=O)N2C(=C1SC3CC(NC3)C(=O)N(C)C)C(=O)O)C(C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)C(=O)O)C(=O)O)C,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C)(C)NCC(=O)NC1=CC(=C2CC3CC4C(C(=O)C(=C(C4(C(=O)C3=C(C2=C1O)O)O)O)C(=O)N)N(C)C)N(C)C,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C1CCC(C(O1)OC2C(CC(C(C2O)OC3C(C(C(CO3)(C)O)NC)O)N)N)N)NC,0 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1=C(C(=NO1)C2=CC=CC=C2Cl)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(=O)OCC1=C(N2C(C(C2=O)NC(=O)C(=NOC)C3=CSC(=N3)N)SC1)C(=O)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C(CC(=O)O)C(=O)O,1 +C[As](=O)(O)[O-].C[As](=O)(O)[O-].[Ca+2],C1C(C(C(C(C1N)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(CC(C(O3)CN)O)N)N,0 +N[C@@]([H])(C(C)C)C(=O)N[C@@]([H])(C)C(=O)N[C@@]([H])(CC(=CN2)C1=C2C=CC=C1)C(=O)N[C@@]([H])(CCC(=O)O)C(=O)O,CCC1C(C(C(C(=O)C(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)O)(C)O,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCN1CCN(C(=O)C1=O)C(=O)NC(C2=CC=CC=C2)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)[O-].CC1(C(N2C(S1(=O)=O)CC2=O)C(=O)O)CN3C=CN=N3.[Na+],1 +CCOP(=O)(O)OP(=O)(O)O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O)O)C,1 +C[C@H]([C@H]1C(=O)NCC[C@@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CCN)CCN)CC(C)C)CC2=CC=CC=C2)CCN)NC(=O)[C@H](CCN)NC(=O)[C@H]([C@@H](C)O)N)O,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,1 +C1=CC=C(C=C1)CCC(C(CS(=O)(=O)CC2=CC=CC=C2)S)NC(=O)C(CC3=CC=CC=C3)NC(=O)OCC4=CC=CC=C4,C1=CC(=C(C=C1F)F)C(CN2C=NC=N2)(CN3C=NC=N3)O,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCC(C)N1C(=O)N(C=N1)C2=CC=C(C=C2)N3CCN(CC3)C4=CC=C(C=C4)OCC5COC(O5)(CN6C=NC=N6)C7=C(C=C(C=C7)Cl)Cl,1 +CC(C(=O)NC(C(CC=O)C(=O)NO)C(=O)O)N,CCC(=O)C1CC(=O)OC(CC2C(O2)C=CC(C(CC(C(C1OC)OC3C(C(C(C(O3)C)OC4CC(C(C(O4)C)OC(=O)CC)(C)O)N(C)C)O)CC=O)C)O)C,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1C(C(C(C(C1N)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(CC(C(O3)CN)O)N)N,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1C(C(C(C(C1NC(=O)C(CCN)O)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(C(C(C(O3)CN)O)O)O)N,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,COC1=CC(=CC(=C1OC)OC)CC2=CN=C(N=C2N)N,1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CCN1CCN(C(=O)C1=O)C(=O)NC(C2=CC=C(C=C2)O)C(=O)NC3C4N(C3=O)C(=C(CS4)CSC5=NN=NN5C)C(=O)O.CC1(C(N2C(S1(=O)=O)CC2=O)C(=O)O)C,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1C(C(C(C(C1N)OC2C(C(C(C(O2)CO)O)N)O)O)OC3C(CC(C(O3)CN)O)N)N,0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,CC(C1=NC=NC=C1F)C(CN2C=NC=N2)(C3=C(C=C(C=C3)F)F)O,0 +CC(C)CC1C(=O)NC(C(=O)N2CCCC2C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)N3CCCC3C(=O)NC(C(=O)NC(C(=O)N1)CCCN)C(C)C)CC4=CC=CC=C4)CC(C)C)CCCN)C(C)C)CC5=CC=CC=C5,[N+](=O)([O-])[O-].[Ag+],0 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1=CC=C2C(=C1)C=CN2,1 +CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCCC(C(=O)NC(C(=O)NC(C(=O)N1)CC2=CC=CC=C2)CCN)NC(=O)C(CCN)NC(=O)C(C(C)O)N)C(C)O)CCN)CCN,CC1C2C(C(=O)N2C(=C1SC3CC(NC3)C(=O)N(C)C)C(=O)O)C(C)O,1 +CCOP(=O)(O)OP(=O)(O)O,C1=CC(=CC=C1C(C(CO)NC(=O)C(Cl)Cl)O)[N+](=O)[O-],1 +CCC(C)CCCC(=O)NC(CCN)C(=O)NC(C(C)O)C(=O)NC(CCN)C(=O)NC1CCNC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC1=O)CCN)CC(C)C)CC(C)C)CCN)CCN)C(C)O,C1C(C(C(C(C1N)OC2C(C(C(C(O2)CN)O)O)O)O)OC3C(C(C(C(O3)CO)O)N)O)N,1 \ No newline at end of file diff --git a/chemprop/tests/data/classification/mol.csv b/chemprop/tests/data/classification/mol.csv new file mode 100644 index 0000000000000000000000000000000000000000..464ff6c0822218e00fdf02d551412d4c21372215 --- /dev/null +++ b/chemprop/tests/data/classification/mol.csv @@ -0,0 +1,501 @@ +"smiles","NR-AhR","NR-ER","SR-ARE","SR-MMP" +"CCOc1ccc2nc(S(N)(=O)=O)sc2c1",1,,1,0 +"CCN1C(=O)NC(c2ccccc2)C1=O",0,0,,0 +"CC[C@]1(O)CC[C@H]2[C@@H]3CCC4=CCCC[C@@H]4[C@H]3CC[C@@]21C",,,0, +"CCCN(CC)C(CC)C(=O)Nc1c(C)cccc1C",0,0,,0 +"CC(O)(P(=O)(O)O)P(=O)(O)O",0,0,0,0 +"CC(C)(C)OOC(C)(C)CCC(C)(C)OOC(C)(C)C",0,0,,0 +"O=S(=O)(Cl)c1ccccc1",0,0,0,0 +"O=C(O)Cc1cc(I)c(Oc2ccc(O)c(I)c2)c(I)c1",0,1,1,0 +"OC[C@H](O)[C@@H](O)[C@H](O)CO",0,0,0, +"CCCCCCCC(=O)[O-].CCCCCCCC(=O)[O-].[Zn+2]",,,0, +"NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)([O-])OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1",0,0,, +"O=c1[nH]c(=O)n([C@H]2C[C@H](O)[C@@H](CO)O2)cc1I",,0,0,0 +"CC(C)COC(=O)C(C)C",0,0,0,0 +"C=C(C)C(=O)OCCOC(=O)C(=C)C",0,0,0,0 +"Cl/C=C\C[N+]12CN3CN(CN(C3)C1)C2",0,0,1,0 +"O=C([O-])Cc1cccc2ccccc12",0,0,0,0 +"CCCCCCCCCCOCC(O)CN",0,0,, +"CCN(CC)C(=O)c1cccnc1",0,,,0 +"COc1cc(O)cc(O)c1",,,,0 +"CCOC(=O)c1cccnc1",,,0, +"CCOc1ccc(S(=O)(=O)O)c2cccnc12",,1,,0 +"O=C(O)[C@H](O)c1ccccc1",0,,0,0 +"Nc1ccc(/N=N/c2ccccc2)cc1",1,1,1, +"CN[C@@H]1C[C@@H](c2ccc(Cl)c(Cl)c2)c2ccccc21",1,0,, +"CN1[C@H]2CC[C@@H]1C[C@H](OC(=O)c1cc(Cl)cc(Cl)c1)C2",0,0,0, +"CN(C)CCCN1c2ccccc2CCc2ccc(Cl)cc21",0,0,0, +"C#CCO",0,0,0,0 +"Nc1ccccc1S(=O)(=O)O",0,0,0, +"CC(O)CC(C)(C)O",0,0,0,0 +"CC(C)(C)CC(C)(C)N",0,0,0,0 +"CC(=O)CC(C)C",0,0,0,0 +"CCCC(C)=O",0,0,0,0 +"Nc1nc2ccccc2[nH]1",1,0,0,0 +"Oc1c(Cl)c(Cl)c(Cl)c(Cl)c1Cl",1,,1,1 +"c1ccc(-c2ccccc2)cc1",0,0,0,0 +"CNC(=O)Nc1ccc(Cl)c(Cl)c1",1,0,0, +"CC(=O)Nc1ccc(C)c(Cl)c1",0,,0, +"CCCCNC(=S)NCCCC",0,0,0,0 +"CCCCNC(=O)NCCCC",0,0,0,0 +"CC(C)N(c1ccccc1)C(C)C",0,0,0,0 +"CCc1cccc(C)c1",0,0,0,0 +"CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1",0,0,,0 +"CCCCCCCC/C=C\CCCCCCCC(=O)OC(CO)CO",0,0,0,0 +"CCCCCCCCCCC=CC1CC(=O)OC1=O",0,0,1,0 +"CC(C)C(Nc1ccc(C(F)(F)F)cc1Cl)C(=O)OC(C#N)c1cccc(Oc2ccccc2)c1",,0,0,1 +"CS(=O)(=O)NC(=O)c1cc(Oc2ccc(C(F)(F)F)cc2Cl)ccc1[N+](=O)[O-]",0,0,0,0 +"CCOP(=S)(CC)Sc1ccccc1",0,,0,0 +"CC/C=C\CCCCO",0,0,0,0 +"Nc1ccccc1C(=O)Oc1ccc2ccccc2c1",1,1,1,1 +"C=C[C@H]1CN2CCC1C[C@H]2[C@H](O)c1ccnc2ccc(OC)cc12",1,0,,0 +"CC(=O)CCC(C)=O",0,0,0,0 +"N#CCCNCCC#N",0,0,0,0 +"CCOc1ccc(N=Nc2ccc(C=Cc3ccc(N=Nc4ccc(OCC)cc4)cc3S(=O)(=O)[O-])c(S(=O)(=O)[O-])c2)cc1",0,0,, +"O=C1c2ccccc2C(=O)C1c1ccc2cc(S(=O)(=O)[O-])cc(S(=O)(=O)[O-])c2n1",,0,1, +"O=C(Nc1ccc2c(O)c(N=Nc3ccc(N=Nc4ccc(S(=O)(=O)[O-])cc4)cc3)c(S(=O)(=O)[O-])cc2c1)c1ccccc1",0,,1, +"CSc1ccc2c(c1)C(N1CCN(C)CC1)Cc1ccccc1S2",0,0,0, +"COCCCC/C(=N\OCCN)c1ccc(C(F)(F)F)cc1",0,0,, +"Cc1ccccc1CCO",0,0,0,0 +"Cc1nc(C)c(C)nc1C",0,0,0,0 +"CC1=CC(O)CC(C)(C)C1",0,0,0,0 +"Cc1cnc(C)c(C)n1",0,0,0,0 +"CC(C)COC(=O)c1ccccc1",0,,0,0 +"C=C(C)[C@@H]1CC=C(C)CC1",0,,0,0 +"O=[N+]([O-])[O-].O=[N+]([O-])[O-].[Ca+2]",0,0,0,0 +"Nc1ccc(N)c([N+](=O)[O-])c1",1,,1,1 +"CC1COc2ccccc2N1",1,0,0, +"O=C(O)c1cc(Cl)cc(Cl)c1O",0,0,0, +"CCCCCCCCCCCC(=O)NCCCN(C)C",0,,, +"CC(C)CCCCCOC(=O)CCS",0,0,0,0 +"O=[N+]([O-])c1cc([As](=O)(O)O)ccc1O",0,0,0,0 +"CCC(COC(=O)CCS)(COC(=O)CCS)COC(=O)CCS",0,0,0,0 +"C=CCOc1c(Br)cc(Br)cc1Br",1,0,0, +"F[B-](F)(F)F.[H+]",0,0,,0 +"CC(C)[C@H]1CC[C@H](C)C[C@@H]1O",0,0,0,0 +"C(=C/c1ccccc1)\c1ccccc1",1,1,1, +"Cc1ccc2c(ccc3ccccc32)c1",,,0, +"Cn1c(=O)c2c(ncn2CC2OCCO2)n(C)c1=O",0,0,0,0 +"C[C@H]1O[C@@H](n2cc(F)c(=O)[nH]c2=O)[C@H](O)[C@@H]1O",0,1,,0 +"CN1CCc2cccc3c2[C@H]1Cc1ccc(O)c(O)c1-3",1,,1, +"COC(=O)C1=CCCN(C)C1",0,0,0,0 +"COc1ccc(C2C(=O)c3ccccc3C2=O)cc1",,,0, +"Cc1ccc(C(=O)O)cc1[N+](=O)[O-]",0,0,0,0 +"Cc1cc(C(=O)O)ccc1[N+](=O)[O-]",0,0,0,0 +"CCCC(CCC)C(=O)O[C@@H]1C[C@@H]2CC[C@H](C1)[N+]2(C)C",0,0,,0 +"CCCCCC(C)O",0,0,0,0 +"O=C([O-])c1ccccc1O",0,0,0, +"NC(=O)c1ccccc1",0,0,0,0 +"CCN1CCc2nc(N)oc2CC1",,,0, +"CC(C)(C)[C@]1(O)CCN2C[C@H]3c4ccccc4CCc4cccc(c43)[C@@H]2C1",0,,, +"O=C1C(N(CO)C(=O)NCO)N(CO)C(=O)N1CO",0,0,0,0 +"O=c1[nH]c(=O)n([C@H]2C[C@H](O)[C@@H](CO)O2)cc1/C=C/Br",0,0,,0 +"OCCN1CCN(CCCN2c3ccccc3C=Cc3ccccc32)CC1",0,0,0, +"CC(C)NC[C@@H](O)COc1ccc(CC(N)=O)cc1",0,0,0,0 +"CCNC(=O)N1CCN(CCCC(c2ccc(F)cc2)c2ccc(F)cc2)CC1",0,,1,0 +"Nc1ccc([N+](=O)[O-])cc1N",1,1,1,1 +"[I-].[K+]",0,0,0,0 +"O=C(C=Cc1ccc(O)c(O)c1)O[C@@H]1C[C@](O)(C(=O)O)C[C@@H](O)[C@H]1O",0,0,0,0 +"Oc1nc(Cl)c(Cl)cc1Cl",1,0,1,1 +"C/C=C/C=C/C=O",0,0,1,0 +"O=[N+]([O-])c1cc(C(F)(F)F)cc([N+](=O)[O-])c1Cl",0,,1, +"C[Si](C)(C)N[Si](C)(C)C",0,0,0,0 +"C=CC(=O)OCCCl",0,0,,0 +"COCC(C)N(C(=O)CCl)c1c(C)csc1C",0,0,1, +"CN(C)CCn1nnnc1SCC1=C(C(=O)O)N2C(=O)[C@@H](NC(=O)Cc3csc(N)n3)[C@H]2SC1",0,0,,0 +"C/C(=N\NC(=O)Nc1cc(F)cc(F)c1)c1ncccc1C(=O)[O-]",,0,0,0 +"CC1COC(Cn2cncn2)(c2ccc(Oc3ccc(Cl)cc3)cc2Cl)O1",0,,1, +"CCN(CC)CCOC(=O)C(Cc1cccc2ccccc12)CC1CCCO1",0,0,0,0 +"CCN[C@H]1C[C@H](C)S(=O)(=O)c2sc(S(N)(=O)=O)cc21",0,0,0,0 +"CCN[C@H]1CN(CCCOC)S(=O)(=O)c2sc(S(N)(=O)=O)cc21",0,0,0,0 +"CC(C)N(CCC(C(N)=O)(c1ccccc1)c1ccccn1)C(C)C",0,0,0,0 +"CC[C@H]1[C@@H]2C[C@H]3[C@@H]4N(C)c5ccccc5[C@]45C[C@@H](C2[C@H]5O)N3[C@@H]1O",0,0,, +"CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)C=C[C@]5(C)[C@H]4[C@@H](O)C[C@]3(C)[C@]2(C(=O)CO)O1",0,0,0, +"CSC(=O)c1c(C(F)F)nc(C(F)(F)F)c(C(=O)SC)c1CC(C)C",0,0,0, +"O=C(O)/C=C(\CC(=O)O)C(=O)O",0,,0,0 +"CCCCCCCCCCCCCCCC(=O)O[C@@H]1CC(C)=C(/C=C/C(C)=C/C=C/C(C)=C/C=C\C=C(C)\C=C\C=C(C)\C=C\C2=C(C)C[C@@H](OC(=O)CCCCCCCCCCCCCCC)CC2(C)C)C(C)(C)C1",0,0,,0 +"O=C(CO)[C@@H](O)[C@H](O)[C@@H](O)CO",,,0, +"CC1=C(C(=O)Nc2ccccc2)S(=O)(=O)CCO1",0,,1, +"CNCC(=O)c1ccc(O)c(O)c1",,,0, +"CC(C)(C)C1CCC(=O)CC1",0,0,0,0 +"CN(C)[C@@H]1C(O)=C(C(=O)NCN2CCCC2)C(=O)[C@@]2(O)C(O)=C3C(=O)c4c(O)cccc4[C@@](C)(O)C3C[C@@H]12",0,0,0,0 +"CN1CCN=C(c2ccccc2)c2cc(Cl)ccc21",,0,,0 +"CN(C)CCc1c[nH]c2ccc(Cn3cncn3)cc12",0,0,,0 +"CCCCC(=O)[O-]",0,0,,0 +"CCCCCCCCCCCCCC(=O)OC",0,0,0,0 +"Nc1ccncc1N",,,0, +"CCCCCCC(C)O",0,0,0,0 +"CCC[Si](OC)(OC)OC",0,0,0,0 +"CC1=C(CC=O)C(C)(C)CCC1",0,0,0,0 +"NCCNCCNCCN",0,,0,0 +"C[C@]12CC[C@H]3[C@@H](CC[C@@]45O[C@@H]4C(O)=C(C#N)C[C@]35C)[C@@H]1CC[C@@H]2O",0,1,1, +"CCCC1COC(Cn2cncn2)(c2ccc(Cl)cc2Cl)O1",1,,1,0 +"Cc1ccc(N)c(N)c1",1,1,1, +"CCCCCNCCCCC",0,0,0,0 +"COCC(C)O",0,1,0,0 +"c1ccc2c(c1)Oc1ccccc1S2",0,1,0,1 +"CC1CN1",0,0,0,0 +"CCc1cnc(C2=NC(C)(C(C)C)C(=O)N2)c(C(=O)O)c1",0,0,0,0 +"NCC(=O)CCC(=O)O",,,, +"Clc1ccc(C(Cn2ccnc2)OCc2c(Cl)cccc2Cl)c(Cl)c1",,0,,0 +"Clc1cnc(Oc2ccc(Oc3ncc(Cl)cc3Cl)cc2)c(Cl)c1",0,0,0,1 +"COc1ccccc1OCCNCC(O)COc1cccc2[nH]c3ccccc3c12",1,,0,1 +"ClCOCCl",0,1,0,0 +"CC(O)CNCC(C)O",0,0,0,0 +"C[C@H](CCC(=O)[O-])[C@H]1CC[C@H]2[C@H]3[C@H](C[C@H](O)[C@@]21C)[C@@]1(C)CC[C@@H](O)C[C@H]1C[C@H]3O",,0,0,0 +"CC(=O)[C@H]1[C@H](C#N)C[C@H]2[C@@H]3CC=C4C[C@@H](O)CC[C@]4(C)[C@H]3CC[C@@]21C",0,,1,0 +"O=[N+]([O-])c1ccc([As](=O)(O)O)cc1",0,0,,0 +"CCOC(=O)C1OC1c1ccccc1",0,0,,0 +"ONc1ccccc1",1,1,, +"O=CC(=O)c1ccccc1",0,0,0,0 +"[Cu]I",0,,,0 +"CCCCC(CC)CCC(CC(C)C)OS(=O)(=O)[O-]",0,0,0,0 +"ClCc1ccc(Cl)cc1Cl",0,0,, +"O=C(O)CCCCCCCCC(=O)O",0,0,0,0 +"CCCCCCCC(=O)OC",0,0,0,0 +"CC(O)COCC(C)O",0,0,0,0 +"Cc1ccc(C(=O)C(=O)[O-])cc1C",,,0, +"O=C([O-])COc1nn(Cc2ccccc2)c2ccccc12",0,0,0,0 +"Cc1ncc[nH]1",1,0,0,0 +"COc1ccc2sc(C(=O)Nc3nnn[n-]3)c(OC(C)C)c2c1",0,,, +"Oc1ccc2c(c1)OC[C@@H](N1CCC(O)(c3ccc(F)cc3)CC1)[C@H]2O",1,1,0, +"O=C(O)CCN(C1(C(=O)NO)CCCC1)S(=O)(=O)c1ccc(Oc2ccc(F)cc2)cc1",0,0,, +"O=C(NO)C1(NS(=O)(=O)c2ccc(Oc3ccc(F)cc3)cc2)CCOCC1",0,,,0 +"Cc1nc(C)nc(N2C[C@H](C)N(c3ccnc([C@@H](C)O)n3)[C@H](C)C2)n1",0,0,,0 +"CC[C@H](C)[C@@H](C(=O)O)n1sc2ccccc2c1=O",0,0,1, +"Cc1cc(SC2=C(O)C[C@@](CCc3ccc(N)cc3)(C(C)C)OC2=O)c(C(C)(C)C)cc1CO",0,0,0,0 +"CCn1nc(C)c2c1C(=O)NCC(c1ccc(O)cc1)=N2",,0,,0 +"C[C@]12C[C@H](O)[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@]2(O)C(=O)COP(=O)([O-])[O-]",0,1,,0 +"CN1C[C@H]2c3ccccc3Oc3ccc(Cl)cc3[C@@H]2C1",,,0, +"CO[C@H]1C[C@H](O[C@@H]2[C@@H](C)C(=O)O[C@H](C)[C@H](C)[C@H](OC(C)=O)[C@@H](C)C(=O)[C@@]3(CO3)C[C@H](C)[C@H](O[C@@H]3O[C@H](C)C[C@H](N(C)C)[C@H]3OC(C)=O)[C@H]2C)O[C@@H](C)[C@@H]1OC(C)=O",0,0,,0 +"CO[Si](C)(C)OC",0,0,0,0 +"CC(O)(c1ccc(Cl)cc1)c1ccc(Cl)cc1",0,0,,1 +"CN(C)C(=O)Nc1ccc(Oc2ccc(Cl)cc2)cc1",1,,1,1 +"CC(=O)c1ccccc1O",0,0,0,0 +"O=C(O)Cc1c(Cl)ccc(Cl)c1Cl",0,0,0,0 +"O=C(O)c1cccc(Cl)n1",0,0,0,0 +"CCCCCCCCCC=O",0,,0,0 +"Cc1ccc(C(C)(C)C)cc1",0,0,0,0 +"BrCBr",0,0,0,0 +"Nc1cc(Cl)cc(Cl)c1",0,0,0,0 +"CCCCCCCCCC(=O)O",0,0,0, +"CC(C)(C)c1cc([N+](=O)[O-])cc(C(C)(C)C)c1O",0,,,1 +"O.O.O.O.O.O.O=[N+]([O-])[O-].O=[N+]([O-])[O-].[Mg+2]",0,0,0,0 +"CCCCCCCCCOS(=O)(=O)[O-]",0,1,0,0 +"O=Cc1ccc(C(=O)O)cc1",0,0,1,0 +"CCC(Cl)CCl",0,0,0,0 +"CC(C)(O)c1ccccc1",0,0,0, +"O=C1CCCN1",0,1,0,0 +"ClCc1ccccc1Cl",0,0,0, +"Cc1ccc([N+](=O)[O-])c([N+](=O)[O-])c1",,0,1, +"N#CC1(N=NC2(C#N)CCCCC2)CCCCC1",0,0,0,0 +"C=CC(=O)OCCOC(=O)C=C",0,1,1,0 +"CCCC[P+](CCCC)(CCCC)CCCC",0,0,0,0 +"N#CCc1cccc(C(F)(F)F)c1",0,0,0,0 +"COc1cccc(Br)c1",0,0,0,0 +"CCCCCCCCNC",0,0,0,0 +"CCC1OCC(COc2ccc(Oc3ccccc3)cc2)O1",1,1,, +"CC1=C(C(=O)Nc2ccccc2)SCCO1",1,1,0,0 +"CCCCN(CCCC)SN(C)C(=O)Oc1cccc2c1OC(C)(C)C2",1,1,0, +"Cc1cc(OC(=O)N(C)C)nn1C(=O)N(C)C",0,0,0,0 +"Cc1ccc2c(Br)cc(Br)c(O)c2n1",1,,,1 +"O=c1c(O)c(-c2ccc(O)cc2)oc2cc(O)cc(O)c12",1,1,0,1 +"CC(O)COc1ccccc1",0,0,0,0 +"O=P1(NCCCl)OCCCN1CCCl",0,0,0,0 +"C=CC(=C)C",,,0,0 +"CC(C)O",0,0,0,0 +"CC(C)OC(=O)Nc1cccc(Cl)c1",,0,0, +"CC(C)OC(=O)Nc1ccccc1",0,1,0,0 +"CC=Cc1ccc2c(c1)OCO2",0,0,0,0 +"CCCC(CCC)C(=O)[O-]",0,0,0,0 +"CC1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)cn1c23",0,0,0,0 +"CC(=O)Oc1cc(C)c(OC(C)=O)c2ccccc12",1,,, +"CCN(Cc1ccc(Cl)nc1)/C(=C/[N+](=O)[O-])NC",0,0,0,0 +"CC1CCC(C(C)C)C(OC(=O)c2ccccc2N)C1",0,1,0,1 +"O=C(c1ccccc1)c1cc(Cl)ccc1O",,,,1 +"OC[C@]1(O)OC[C@@H](O)[C@H](O)[C@@H]1O",0,0,0,0 +"Cc1ccc(C=C2C(=O)C3CCC2C3(C)C)cc1",0,0,0,1 +"CC(C)C[P+](C)(CC(C)C)CC(C)C",0,0,0,0 +"C=C1C[C@]23CC[C@@H]4[C@](C)(C(=O)O[C@@H]5O[C@H](CO)[C@@H](O)[C@H](O)[C@H]5O)CCC[C@]4(C)[C@@H]2C[C@@H](O[C@@H]2O[C@H](CO)[C@@H](O)[C@H](O[C@@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@H]4O)[C@H]2OC2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)[C@@H]1C3",0,0,0,0 +"CCCCCCCC[P+](CCCCCCCC)(CCCCCCCC)CCCCCCCC",,,,1 +"CCCCC(CC)COC(=O)c1ccc(C(=O)OCC(CC)CCCC)c(C(=O)OCC(CC)CCCC)c1",0,1,0,0 +"O=c1n(CCO)c(=O)n(CCO)c(=O)n1CCO",0,0,0,0 +"Cc1cc(C)cc(OP(=O)(Oc2cc(C)cc(C)c2)Oc2cc(C)cc(C)c2)c1",1,0,, +"O=P(OC(CCl)CCl)(OC(CCl)CCl)OC(CCl)CCl",0,0,0, +"O=c1n(CC2CO2)c(=O)n(CC2CO2)c(=O)n1CC1CO1",0,0,1,0 +"Cc1cc(-c2ccc(N=Nc3c(S(=O)(=O)[O-])cc4cc(S(=O)(=O)[O-])cc(N)c4c3O)c(C)c2)ccc1N=Nc1c(S(=O)(=O)[O-])cc2cc(S(=O)(=O)[O-])cc(N)c2c1O",,,, +"O=C(O)c1ccc(O)cc1O",0,0,0,0 +"O=C1c2c(O)ccc([N+](=O)[O-])c2C(=O)c2c([N+](=O)[O-])ccc(O)c21",,,1,1 +"CC1=CC(C)(C)Nc2ccccc21",0,0,0,1 +"Cc1cc(=O)oc2cc(O)cc(O)c12",1,,,1 +"CC(C)CNCC(C)C",0,0,0,0 +"CNC1(c2ccccc2Cl)CCCCC1=O",0,0,,0 +"Cc1ccccc1OCC(O)CNCCOc1ccc(C(N)=O)cc1",0,0,,0 +"O=c1oc2cc(O)ccc2c2oc3cc(O)ccc3c12",1,1,,1 +"COc1ccc(-c2coc3cc(O)cc(O)c3c2=O)cc1",1,1,1, +"O=c1cc(-c2ccccc2)oc2cc(O)cc(O)c12",1,1,1,1 +"O=c1cc(-c2ccc(O)cc2)oc2cc(O)cc(O)c12",1,1,1,1 +"O=C(CCc1ccc(O)cc1)c1c(O)cc(O)cc1O",,1,1,1 +"CCC(=O)N(c1ccccc1)C1CCN(CCc2ccccc2)CC1",0,0,,0 +"O=C(O)CCC(=O)c1ccc(-c2ccccc2)cc1",,0,0, +"CCOC(=O)C1=C(C)NC(C)=C(C(=O)OC)C1c1cccc(Cl)c1Cl",,0,1, +"NC(=O)OCC(COC(N)=O)c1ccccc1",0,0,0,0 +"CCNC(C)Cc1cccc(C(F)(F)F)c1",0,0,, +"COC(=O)c1ccc(C)cc1C1=NC(=O)C(C)(C(C)C)N1",0,0,0,0 +"CCOP(=S)(NC(C)C)Oc1ccccc1C(=O)OC(C)C",1,0,, +"CSc1nc(NC2CC2)nc(NC(C)(C)C)n1",1,0,,1 +"C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@@H]2C(=O)CO",0,1,,0 +"CN(C)[C@@H]1C(O)=C(C(N)=O)C(=O)[C@@]2(O)C(O)=C3C(=O)c4c(O)ccc(Cl)c4[C@@H](O)[C@H]3C[C@@H]12",,,0, +"CCC1(C)CC(=O)NC(=O)C1",0,0,,0 +"O=C1NCN(c2ccccc2)C12CCN(CCCOc1ccc(F)cc1)CC2",0,,1,1 +"NC(=S)NNC(N)=S",0,0,0,0 +"NC(=S)C(N)=S",0,0,,0 +"CC1CN1P(=O)(N1CC1C)N1CC1C",0,0,0,0 +"O=C(Oc1ccccc1)Oc1ccccc1",0,0,0,0 +"C[Hg]Cl",0,0,, +"S=c1[nH]cnc2[nH]cnc12",1,0,1,0 +"[Hg+2]",1,,, +"CCCCCCCCCCCCNC(=N)N",0,0,, +"CN(C)CCN(Cc1cccs1)c1ccccn1",0,0,0,0 +"COc1nn(CSP(=S)(OC)OC)c(=O)s1",0,0,0,0 +"NC1=NCC2c3ccccc3Cc3ccccc3N12",0,0,0,0 +"CC(=O)[C@H]1CC[C@H]2[C@@H]3CC[C@H]4C[C@](C)(O)CC[C@]4(C)[C@H]3CC[C@]12C",0,0,0,0 +"O=C([O-])CCC/C=C\C[C@H]1[C@@H](O)C[C@@H](O)[C@@H]1/C=C/[C@@H](O)COc1cccc(Cl)c1",0,0,, +"O=C(O)Cc1ccc(CCNS(=O)(=O)c2ccc(Cl)cc2)cc1",0,0,0,0 +"NC(=O)c1cn(Cc2c(F)cccc2F)nn1",0,0,0,0 +"COc1ccc(C=CC(=O)OCCC(C)C)cc1",,,,0 +"O=C(NC1CCN(CCc2c[nH]c3ccccc23)CC1)c1ccccc1",,0,0,0 +"CCn1cc[n+](C)c1C.O=S(=O)([O-])C(F)(F)F",0,1,0,0 +"Clc1ccc2cc3ccccc3cc2c1",1,,0, +"CCCCn1cc[n+](C)c1.F[B-](F)(F)F",0,0,1,0 +"F/C(COc1ccc2c(c1)[nH]c1ccccc12)=C1/CN2CCC1CC2",1,0,, +"CC(C)Cc1ccc([C@@H](C)C(=O)NS(C)(=O)=O)cc1",0,0,,0 +"CCCCN(CCCC)C(=S)SSC(=S)N(CCCC)CCCC",0,0,, +"CCC[n+]1ccn(C)c1C.O=S(=O)([N-]S(=O)(=O)C(F)(F)F)C(F)(F)F",0,0,0, +"Brc1c2ccccc2cc2ccccc12",0,1,, +"CCO/C=C1\N=C(c2ccccc2)OC1=O",,0,0,0 +"CNc1cc(OC)c(C(=O)N[C@H]2CCN(Cc3ccccc3)[C@H]2C)cc1Cl",0,0,,0 +"CCN1CCCC1CNC(=O)c1cc(S(=O)(=O)CC)c(N)cc1OC",0,0,0,0 +"COc1cc2c(cc1OC)C1CC(=O)C(CC(C)C)CN1CC2",0,0,0, +"Cc1cc(C)cc(C(=O)OC2C[C@@H]3CC[C@H](C2)N3C)c1",0,0,,0 +"CC[N+]1(C)CCCC1.O=S(=O)([O-])C(F)(F)F",0,0,0,0 +"COP(=O)(OC)SCn1c(=O)oc2cc(Cl)cnc21",1,0,1, +"CNC(=O)/C=C(\C)OP(=O)(OC)OC",0,0,0,0 +"Cc1occc1SSc1ccoc1C",0,0,,0 +"Cc1cc(C(F)(C(F)(F)F)C(F)(F)F)ccc1NC(=O)c1cccc(I)c1C(=O)NC(C)(C)CS(C)(=O)=O",0,,,1 +"CC=CC(=O)CC",0,0,0,0 +"CC1OCCC1=O",0,0,0,0 +"CC1CCCC(=O)C1=O",0,0,0,0 +"CC1=C(O)C(=O)OC1C",0,0,,0 +"CCCCCc1ccco1",0,0,,1 +"c1cnc2c(n1)CCCC2",0,0,0,0 +"CCCCc1ccc2cccc(S(=O)(=O)[O-])c2c1",0,0,0,0 +"Cc1cc(S(=O)(=O)[O-])ccc1/N=N/c1c(O)ccc2ccccc12",1,0,, +"Cc1ccc(N=Nc2c(O)ccc(N=Nc3ccc(S(=O)(=O)[O-])cc3)c2O)c(C)c1",1,0,0, +"Nc1cnn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)n1",0,0,0, +"CCNc1nc(Cl)nc(NC(C)(C)C)n1",,1,0, +"NS(=O)(=O)c1cc2c(cc1Cl)N=CNS2(=O)=O",0,0,0,0 +"Oc1c(Cl)cc(Cl)c2cccnc12",1,,,1 +"NC(=O)OCC(O)COc1ccc(Cl)cc1",,0,,0 +"CN(C)CCCN1c2ccccc2Sc2ccc(Cl)cc21",0,,, +"CCCCCCCCCCCCCCn1cc[n+](C)c1",0,,1,1 +"O=[Cr](=O)([O-])O[Cr](=O)(=O)[O-]",0,0,1, +"O=P(Cl)(Cl)Cl",0,0,0,0 +"CCN(Cc1cccc(S(=O)(=O)[O-])c1)c1ccc(C(=C2C=CC(=[N+](CC)Cc3cccc(S(=O)(=O)[O-])c3)C=C2)c2ccccc2)cc1",0,0,1,0 +"CC(C)COC(=O)COc1cc(Cl)c(Cl)cc1Cl",0,0,, +"O=C(OC[C@H]1O[C@@H](OC(=O)c2cc(O)c(O)c(OC(=O)c3cc(O)c(O)c(O)c3)c2)[C@H](OC(=O)c2cc(O)c(O)c(OC(=O)c3cc(O)c(O)c(O)c3)c2)[C@@H](OC(=O)c2cc(O)c(O)c(OC(=O)c3cc(O)c(O)c(O)c3)c2)[C@@H]1OC(=O)c1cc(O)c(O)c(OC(=O)c2cc(O)c(O)c(O)c2)c1)c1cc(O)c(O)c(OC(=O)c2cc(O)c(O)c(O)c2)c1",1,0,1,1 +"CN(C)CCOC(c1ccccc1)c1ccccc1",0,0,0,0 +"COC(=O)c1ccc(C)cc1",0,0,0,0 +"CN(C)CCCN1c2ccccc2C(C)(C)c2ccccc21",0,0,, +"COc1ccc([C@@H]2Sc3ccccc3N(CCN(C)C)C(=O)[C@@H]2OC(C)=O)cc1",0,0,0,0 +"CC(=O)C=Cc1ccccc1",0,1,0,0 +"Cc1c[nH]c(=S)[nH]c1=O",0,0,0,0 +"COc1ccc2cc1Oc1cc3c(cc1OC)CC[N+](C)(C)[C@H]3Cc1ccc(cc1)Oc1c(OC)c(OC)cc3c1[C@@H](C2)[N+](C)(C)CC3",0,0,0,0 +"CCCCC[C@H](O)/C=C/[C@H]1[C@H](O)CC(=O)[C@@H]1C/C=C\CCCC(=O)O",0,,0,0 +"CCCCC[C@H](O)/C=C/[C@H]1[C@H](O)C[C@H](O)[C@@H]1C/C=C\CCCC(=O)O",0,1,,0 +"CC12CCC(CC1)C(C)(C)O2",1,0,0,0 +"C=COCC1CCC(CO)CC1",0,0,0,0 +"CC(=O)/C=C/C1=C(C)CCCC1(C)C",0,0,0,0 +"CC(=O)NC1CCSC1=O",0,0,0,0 +"CC(C)(C)CC(C)(C)c1ccc(O)c(Cc2ccc(Cl)cc2Cl)c1",0,,0,1 +"CC1COc2ccccc2N1C(=O)C(Cl)Cl",1,0,1,0 +"CC(N)CN",0,0,0,0 +"CCC(C)O",0,,0,0 +"CCCCC(CC)CNC(=N)NC(=N)NCCCCCCNC(=N)NC(=N)NCC(CC)CCCC",,,, +"CC(O)CN",0,0,0,0 +"CO/N=C(\C(=O)N[C@@H]1C(=O)N2C(C(=O)[O-])=C(CSc3nc(=O)c([O-])nn3C)CS[C@H]12)c1csc(N)n1",,,0,0 +"O=c1oc2cc(O)ccc2s1",0,0,,0 +"C=CCc1ccc(O)c(OC)c1",0,0,0,0 +"COC(=O)[C@@H](N)CCCN/C(N)=N/[N+](=O)[O-]",0,0,0,0 +"CC1(S(=O)(=O)[O-])CC(=O)c2ccccc2C1=O",1,,, +"Cc1nnc2n1-c1sc(CCC(=O)N3CCOCC3)cc1C(c1ccccc1Cl)=NC2",0,0,0, +"C[C@H](N[C@H](CCc1ccccc1)C(=O)O)C(=O)N1CCC[C@H]1C(=O)O",0,0,0,0 +"CCC(C)[C@H]1O[C@]2(C=C[C@@H]1C)C[C@@H]1C[C@@H](CC=C(C)[C@@H](O[C@H]3C[C@H](OC)[C@@H](O[C@H]4C[C@H](OC)[C@H](NC(C)=O)[C@H](C)O4)[C@H](C)O3)[C@@H](C)C=CC=C3CO[C@@H]4[C@H](O)C(C)=C[C@@H](C(=O)O1)[C@]34O)O2",0,,,1 +"COc1c(Br)cc(Br)c(C)c1Br",0,0,0, +"C=C[C@H]1CN2CC[C@H]1C[C@H]2[C@H](O)c1ccnc2ccc(OC)cc12.C=C[C@H]1CN2CC[C@H]1C[C@H]2[C@H](O)c1ccnc2ccc(OC)cc12",1,1,,0 +"CN(C)CCc1c[nH]c2ccc(CS(=O)(=O)N3CCCC3)cc12",0,0,0,0 +"O=C1/C(=C2\Nc3ccc(S(=O)(=O)O)cc3C2=O)Nc2ccc(S(=O)(=O)O)cc21",0,1,,0 +"CC(=O)O[Cr](O)OC(C)=O.CC(=O)O[Cr](O)OC(C)=O.CC(=O)O[Cr](OC(C)=O)OC(C)=O",0,0,,0 +"CO[C@H]1CC(O[C@H]2C[C@H]([C@H]3O[C@](C)(O)[C@H](C)C[C@@H]3C)O[C@H]2[C@]2(C)CC[C@H]([C@]3(C)CC[C@]4(C[C@H](O)[C@@H](C)[C@@H]([C@@H](C)[C@@H]5O[C@](O)(CC(=O)[O-])[C@@H](C)[C@H](OC)[C@H]5OC)O4)O3)O2)O[C@@H](C)[C@@H]1OC",0,,, +"C=CC(=O)OCCn1c(=O)n(CCOC(=O)C=C)c(=O)n(CCOC(=O)C=C)c1=O",0,,,0 +"C=C(C)C(=O)OCCNC(C)(C)C",0,0,0,0 +"NS(=O)(=O)c1ccccc1OC(F)(F)F",0,0,0,0 +"O=C=NCC1CCCC(CN=C=O)C1",0,0,0,0 +"C=C[C@H]1CN2CCC1C[C@@H]2[C@@H](O)c1ccnc2ccccc12",,0,, +"Cc1cc(N)c2cc(NC(=O)Nc3ccc4nc(C)cc(N)c4c3)ccc2n1",,,, +"O=C(O)c1ccccc1O.Oc1cccc2cccnc12",0,1,0, +"C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1CC[C@@H]2OP(=O)(O)O",0,1,,0 +"O=C(O)[C@@H](S)[C@H](S)C(=O)O",0,,1,0 +"CN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1",0,0,, +"Cc1ncc(CO)c(CN)c1O",,,0, +"NS(=O)(=O)c1ccc(C(=O)O)cc1",0,0,0,0 +"O=C(CCS)OCC(COC(=O)CCS)(COC(=O)CCS)COC(=O)CCS",0,0,, +"C[C@@H]1NC(=O)[C@@H](N)CNC(=O)[C@H]([C@@H]2CCNC(N)=N2)NC(=O)/C(=C/NC(N)=O)NC(=O)[C@H](CNC(=O)C[C@@H](N)CCCN)NC1=O",,,0, +"OCCOCCN1CCN(C2=Nc3ccccc3Sc3ccccc32)CC1",0,0,0, +"Cc1cc(/C=C/c2ccc3cc(N(C)C)ccc3[n+]2C)c(C)n1-c1ccccc1.Cc1cc(/C=C/c2ccc3cc(N(C)C)ccc3[n+]2C)c(C)n1-c1ccccc1.O=C([O-])c1cc2ccccc2c(Cc2c(O)c(C(=O)[O-])cc3ccccc23)c1O",,,,1 +"COc1ccc(CN(CCN(C)C)c2ccccn2)cc1",0,0,0,1 +"CN(C)C(=O)Oc1ccc[n+](C)c1",0,1,0,0 +"Cc1ncc(CO)c(CO)c1O",0,0,0,0 +"CCC1NC(=O)c2cc(S(N)(=O)=O)c(Cl)cc2N1",0,0,0, +"C=C[C@H]1CN2CCC1C[C@@H]2[C@@H](O)c1ccnc2ccc(OC)cc12",1,1,0, +"Brc1cc2ccccc2c2ccccc12",,0,0, +"CC(C)(N)CO",,0,0,0 +"CC(C)(CO)CO",0,0,0,0 +"O=S1(=O)CCCC1",0,0,0,0 +"O=[N+]([O-])C(CO)(CO)CO",0,0,1,0 +"OCC(CO)(CO)COCC(CO)(CO)CO",0,0,0, +"O=[N+]([O-])OCCN(CCO[N+](=O)[O-])CCO[N+](=O)[O-]",0,0,0,0 +"NC(CO)(CO)CO",0,0,0,0 +"O=C(Cl)c1cc(C(=O)Cl)cc(C(=O)Cl)c1",,0,,0 +"CO[Si](CCCS)(OC)OC",,0,0,0 +"COc1cc2c3cc1Oc1cc(ccc1O)C[C@@H]1c4c(cc(OC)c(O)c4Oc4ccc(cc4)C[C@@H]3N(C)CC2)CC[N+]1(C)C",0,0,0,0 +"O=C(O[C@@H]1C[C@@H]2CC[C@H](C1)[N+]21CCCC1)C(O)(c1ccccc1)c1ccccc1",0,0,0,0 +"COc1cc(C(=O)NC2CCCNC2)cc(OC)c1OC",0,0,0,0 +"C[N+](C)=CCl",0,0,0,0 +"CC(=O)c1cccnc1",0,0,0,0 +"O=S1(=O)OC(c2ccc([O-])cc2)(c2ccc(O)cc2)c2ccccc21",0,0,0, +"O=CN1CCOCC1",0,0,0,0 +"COC(=O)CCC(=O)O",0,0,0,0 +"NCc1cccnc1",0,0,0,0 +"CCCCCCCCn1sc(Cl)c(Cl)c1=O",0,,, +"Cc1cc(O)cc(C)c1Cl",0,0,0,1 +"O=[Zr](Cl)Cl",0,0,,0 +"CSc1ccc2c(c1)N(CCC1CCCCN1C)c1ccccc1S2",0,,, +"c1ccc2cnncc2c1",0,0,0,0 +"COC(=O)c1ccc(CBr)cc1",1,1,0,0 +"CN1CCc2cc(Cl)c(O)cc2[C@H]2c3ccccc3CC[C@@H]21",0,0,0, +"O=P(O)(OCc1ccccc1)OCc1ccccc1",0,0,0,0 +"S=C=NCc1ccccc1",,0,,1 +"Oc1ccc(Cl)cc1Cc1ccccc1",0,0,1,1 +"ClCc1ccccc1",0,0,0,0 +"OCc1ccccc1",0,0,0,0 +"CC(=O)OCc1ccccc1",0,0,0,0 +"COCCc1ccc(OCC(O)CNC(C)C)cc1.COCCc1ccc(OCC(O)CNC(C)C)cc1",0,0,0,0 +"CN(C)C(=N)NC(=N)N",0,0,0,0 +"CCCC[Sn](CCCC)(OC(C)=O)OC(C)=O",,,1,1 +"C[NH+](C)CCC(c1ccccc1)c1cccc[nH+]1",0,1,0,0 +"CCOc1ccc(N)cc1",1,0,,0 +"CC(C)=CCC[C@H](C)CCO",0,0,0,0 +"CCOc1cccc(N)c1",,0,0,0 +"Nc1ccccc1C(=O)OCCc1ccccc1",1,1,0, +"CC(C)CC(O)CC(C)C",0,0,0,0 +"C=C(C)C(=O)OCCOP(=O)(O)OCCOC(=O)C(=C)C",0,0,0,0 +"CC(C)OS(C)(=O)=O",0,0,0, +"c1ccc2c(c1)OCC(CN1CCCCC1)O2",0,0,,0 +"C=CCN1CCCC1CNC(=O)c1cc(S(N)(=O)=O)cc(OC)c1OC",0,0,,0 +"C=C(C)OC(C)=O",0,0,0,0 +"Cc1cc(O)cc2c1O[C@](C)(CCC[C@H](C)CCC[C@H](C)CCCC(C)C)CC2",0,0,0,0 +"Cc1ncc(C[n+]2csc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1",0,0,,0 +"NC(=O)[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1O",0,0,,0 +"O=C1CC[C@@H](C(=O)O)N1",0,0,0,0 +"CN1C(=S)CN=C(c2ccccc2)c2cc(Cl)ccc21",0,,0, +"CC(C)(C)OC(=O)c1ncn2c1[C@@H]1CCCN1C(=O)c1c(Br)cccc1-2",,0,,0 +"CCC(Cc1c(I)cc(I)c(O)c1I)C(=O)O",0,,, +"CCOc1cc(NC(C)=O)ccc1C(=O)OC",,0,0,0 +"CC(O)C#CC(C)O",0,0,, +"COc1ccc(N)cc1N",1,0,1,1 +"CC1(C)[C@@H](O[C@H]2O[C@H](C(=O)O)[C@@H](O)[C@H](O)[C@H]2O[C@@H]2O[C@H](C(=O)[O-])[C@@H](O)[C@H](O)[C@H]2O)CC[C@@]2(C)[C@H]1CC[C@]1(C)[C@@H]2C(=O)C=C2[C@@H]3C[C@@](C)(C(=O)O)CC[C@]3(C)CC[C@]21C",0,0,0,0 +"O=C1NC(=O)C(=O)C(=O)N1",0,0,0,0 +"CC(C)(C)c1cc(/C=C2\SC(=N)NC2=O)cc(C(C)(C)C)c1O",0,,1,1 +"OCCCC1CCCCC1",0,0,0,0 +"Cc1cc2c3c(c1)C(c1ccccc1)=N[C@@H](NC(=O)c1ccncc1)C(=O)N3CC2",0,0,,0 +"CCc1cc(C2=C(C(=O)[O-])N(c3ccccc3C(F)(F)F)S(=O)(=O)c3ccccc32)cc2c1OCO2",0,0,,1 +"O=S(=O)([O-])c1ccc2c(/N=N\c3ccc(S(=O)(=O)[O-])c4ccccc34)c(O)c(S(=O)(=O)[O-])cc2c1",0,0,0,0 +"O=C=Nc1ccc(Cl)cc1",,,,1 +"CC(C)OC(=O)c1ccccc1C(=O)OC(C)C",0,0,0,0 +"CO[C@H]1[C@H]([C@@]2(C)O[C@@H]2CC=C(C)C)[C@]2(CC[C@H]1OC(=O)/C=C/C=C/C=C/C=C/C(=O)O)CO2",0,0,1,0 +"C1CCC2(CCCCO2)OC1",0,0,0,0 +"CCCCC(CC)COC(=O)c1ccccc1O",,,0, +"C[C@H]1O[C@H](O[C@@H]2[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)[C@H]2O)[C@@H](N)C[C@@H]1NC(=N)C(=O)O",0,0,0,0 +"CN(C)CCCN1c2ccccc2CCc2ccccc21",0,,, +"CCCCOCCO",0,0,0,0 +"[O-][n+]1ccccc1[S-]",0,,,1 +"CCCN(CCC)C(=O)SCC",0,0,0,0 +"O=S(=O)([O-])c1cccc2ccccc12",0,0,0,0 +"CC(C)C1=CC2=CC[C@H]3[C@](C)(C(=O)[O-])CCC[C@]3(C)[C@H]2CC1",1,0,, +"CN(C)c1ccc(C(=O)c2ccc(N(C)C)cc2)cc1",1,,0,1 +"N#CCCC#N",0,0,0,0 +"Cc1ncc([N+](=O)[O-])n1CCO",0,0,0,0 +"Nc1c(CC(=O)[O-])cccc1C(=O)c1ccccc1",0,1,,0 +"C[N+]1(C)[C@H]2CC[C@@H]1C[C@H](OC(=O)C(CO)c1ccccc1)C2",0,1,0,0 +"CC(Cl)(Cl)C(=O)O",0,0,0,0 +"CN[C@@H]1[C@H](O[C@H]2[C@H](O[C@@H]3[C@@H](NC(=N)N)[C@H](O)[C@@H](NC(=N)N)[C@H](O)[C@H]3O)O[C@@H](C)[C@]2(O)C=O)O[C@@H](CO)[C@H](O)[C@H]1O.CN[C@@H]1[C@H](O[C@H]2[C@H](O[C@@H]3[C@@H](NC(=N)N)[C@H](O)[C@@H](NC(=N)N)[C@H](O)[C@H]3O)O[C@@H](C)[C@]2(O)C=O)O[C@@H](CO)[C@H](O)[C@H]1O",0,0,,0 +"O=C(CCl)CCl",0,0,, +"CC(=O)C(Cl)Cl",0,0,0,0 +"CC(C)(c1ccccc1)c1ccc(O)cc1",0,1,1,1 +"Cc1cc(O)c2c(O)c3c(O)cccc3cc2c1",1,0,1,1 +"CCC(=O)[N-]S(=O)(=O)c1ccc(-c2c(-c3ccccc3)noc2C)cc1",0,0,,0 +"Cc1ccccc1N1CCN(CCc2nnc3n2CCCC3)CC1",0,1,0,0 +"C=Cc1ccc(S(=O)(=O)[O-])cc1",0,0,1, +"C[C@]12CC[C@@H]3c4ccc(OC(=O)N(CCCl)CCCl)cc4CC[C@H]3[C@@H]1CC[C@@H]2OP(=O)(O)O",0,1,,0 +"CC1Cc2ccccc2N1NC(=O)c1ccc(Cl)c(S(N)(=O)=O)c1",0,0,0,0 +"CC(=O)OC(C)C(=O)OCC[N+](C)(C)C.CC(=O)OC(C)C(=O)OCC[N+](C)(C)C.O=S(=O)([O-])c1cccc2c(S(=O)(=O)[O-])cccc12",0,1,,0 +"CC(=O)CC(=O)Nc1ccc2[nH]c(=O)[nH]c2c1",0,0,0,0 +"CCO[Si](C)(CCCOCC1CO1)OCC",0,0,0,0 +"O=[N+]([O-])c1cc(C(F)(F)F)c(Cl)c([N+](=O)[O-])c1Cl",0,,, +"CCCCOCCOCCOCCO",0,0,0,0 +"CCCCCCCC/C=C/C(=O)[O-].CCCCCCCC/C=C/C(=O)[O-]",,,0, +"Nc1cc(C(F)(F)F)ccc1S",,,, +"Cc1cccc(Cc2c[nH]cn2)c1C",,0,0,0 +"CCOC(=O)CC(=O)OCC",,0,0,0 +"COc1ccc(CNCC(O)COc2ccc3[nH]c(=O)ccc3c2)cc1OC",0,0,,0 +"COC(=O)C1=C(C)NC(COC(N)=O)=C(C(=O)OC(C)C)C1c1cccc(Cl)c1Cl",,0,1, +"CCNC(=O)NCCCOc1cccc(CN2CCCCC2)c1",0,0,0,0 +"CC(=O)SCC(CC(=O)c1ccc(C)cc1)C(=O)O",0,0,0,0 +"CCOC(=O)Cn1cccc1-c1nc(-c2ccc(OC)cc2)c(-c2ccc(OC)cc2)s1",0,0,1,0 +"O=C(CCCN1CCN(c2ccc(F)cc2)CC1)NC1c2ccccc2CSc2ccccc21",0,0,, +"CC(C)(C)NC[C@H](O)c1ccc(O)cc1Cl",0,0,0,0 +"CCCCC/C=C\C/C=C\CCCCCCCC(=O)NC(C)c1ccccc1",0,0,1,0 +"CC(NN)c1ccccc1",,,0,0 +"O=Cc1ccc(Cl)cc1",0,0,0, +"CCN(C)C(=O)Oc1cccc([C@H](C)N(C)C)c1",0,0,0,0 +"CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)cc2)ccn1",0,,,1 +"CCN(CC)C(C)C(=O)c1ccccc1",,0,0,0 +"CCN1CC(CCN2CCOCC2)C(c2ccccc2)(c2ccccc2)C1=O",0,0,,0 +"Cc1cccc(C(=O)O)c1[N+](=O)[O-]",0,0,0,0 +"CCN(CC)CCOC(=O)C1(c2ccccc2)CCCC1.CCN(CC)CCOC(=O)C1(c2ccccc2)CCCC1.O=S(=O)(O)CCS(=O)(=O)O",0,0,0,0 +"Clc1ccccn1",0,0,0,0 +"CCC(=O)/C=C/C1C(C)=CCCC1(C)C",0,0,0,0 +"CC1CC(OC(=O)c2ccccc2O)CC(C)(C)C1",0,0,0,0 +"CCCCCCCCCO",0,0,0,0 +"CCCCCCNCCCCCC",0,0,0,0 +"CCN(CC)c1ccc(N)cc1",1,,1,1 +"ClCCCCl",0,0,0,0 +"CCCCCCOC(C)=O",0,0,0,0 +"CCCCC(CC)COC(=O)CCCCCCCCC(=O)OCC(CC)CCCC",0,,0,0 +"CCOC(C)=O",0,0,0, +"NCCNCCN",0,0,0,0 +"CCOP(=O)(CC)OCC",0,0,0, +"Cc1c2oc3c(C)ccc(C(=O)N[C@@H]4C(=O)N[C@H](C(C)C)C(=O)N5CCC[C@H]5C(=O)N(C)CC(=O)N(C)[C@@H](C(C)C)C(=O)O[C@@H]4C)c3nc-2c(C(=O)N[C@@H]2C(=O)N[C@H](C(C)C)C(=O)N3CCC[C@H]3C(=O)N(C)CC(=O)N(C)[C@@H](C(C)C)C(=O)O[C@@H]2C)c(N)c1=O",0,,, +"NC(=O)CCCCC(N)=O",0,0,0,0 +"CNC(=O)ON=CC(C)(C)SC",0,0,0,0 +"C=CCOc1ccc(CC(=O)O)cc1Cl",0,1,,0 +"NN",1,0,0,0 +"N[C@@H](Cc1cnc[nH]1)C(=O)O",,,,0 +"NNc1nc(-c2ccccc2)cs1",1,0,1,1 +"NNc1nc(-c2ccc(N)cc2)cs1",1,0,1,1 +"Cc1ccccc1CO[C@H]1C[C@]2(C(C)C)CC[C@@]1(C)O2",,0,0,0 +"NNc1ccc(C(=O)O)cc1",,,0,0 +"CCCCCCOc1ccccc1C(=O)O",0,,0,0 +"O=C(OCc1ccccc1)C(=O)OCc1ccccc1",0,0,0,0 +"CCCSc1ccc2[nH]c(NC(=O)OC)nc2c1",1,1,0,1 diff --git a/chemprop/tests/data/classification/mol_multiclass.csv b/chemprop/tests/data/classification/mol_multiclass.csv new file mode 100644 index 0000000000000000000000000000000000000000..bc14f7fd74c37ebffa5eefab411859e46efe99ac --- /dev/null +++ b/chemprop/tests/data/classification/mol_multiclass.csv @@ -0,0 +1,500 @@ +smiles,activity +CCC1=[O+][Cu-3]2([O+]=C(CC)C1)[O+]=C(CC)CC(CC)=[O+]2,0 +C(=Cc1ccccc1)C1=[O+][Cu-3]2([O+]=C(C=Cc3ccccc3)CC(c3ccccc3)=[O+]2)[O+]=C(c2ccccc2)C1,0 +CC(=O)N1c2ccccc2Sc2c1ccc1ccccc21,0 +Nc1ccc(C=Cc2ccc(N)cc2S(=O)(=O)O)c(S(=O)(=O)O)c1,0 +O=S(=O)(O)CCS(=O)(=O)O,0 +CCOP(=O)(Nc1cccc(Cl)c1)OCC,0 +O=C(O)c1ccccc1O,0 +CC1=C2C(=COC(C)C2C)C(O)=C(C(=O)O)C1=O,0 +O=[N+]([O-])c1ccc(SSc2ccc([N+](=O)[O-])cc2[N+](=O)[O-])c([N+](=O)[O-])c1,0 +O=[N+]([O-])c1ccccc1SSc1ccccc1[N+](=O)[O-],0 +CC(C)(CCC(=O)O)CCC(=O)O,0 +O=C(O)Cc1ccc(SSc2ccc(CC(=O)O)cc2)cc1,1 +O=C(O)c1ccccc1SSc1ccccc1C(=O)O,0 +CCCCCCCCCCCC(=O)Nc1ccc(SSc2ccc(NC(=O)CCCCCCCCCCC)cc2)cc1,0 +Sc1cccc2c(S)cccc12,0 +CCOP(N)(=O)c1ccccc1,0 +NNP(=S)(NN)c1ccccc1,1 +O=P(Nc1ccccc1)(Nc1ccccc1)Nc1ccccc1,0 +O=C1C(O)=C(CCCc2ccc(Oc3ccccc3)cc2)C(=O)c2ccccc21,0 +CC(C)N(C(C)C)P(=O)(OP(=O)(c1ccc([N+](=O)[O-])cc1)N(C(C)C)C(C)C)c1ccc([N+](=O)[O-])cc1,0 +c1ccc2c(c1)Sc1ccccc1S2,0 +CC(C)CCS(=O)(=O)O,0 +Cc1ccccc1NC(=N)Nc1ccccc1C,0 +CCCNP(=S)(NCCC)NCCC,0 +CCCCCCCCCCCCNP(=S)(NCCCCCCCCCCCC)NCCCCCCCCCCCC,0 +O=C1OC(=O)c2c1ccc1ccccc21,0 +S=P(NC1CCCCC1)(NC1CCCCC1)NC1CCCCC1,0 +Clc1ccnc2c1ccc1c(Cl)ccnc12,0 +O=C(OOC(=O)c1ccccc1)c1ccccc1,0 +c1ccc2nsnc2c1,0 +S=C1NCCS1,0 +CN(C)C1=[S+][Zn-2]2(S1)SC(N(C)C)=[S+]2,0 +CN(Cc1cnc2nc(N)nc(N)c2n1)c1ccc(C(=O)NC(CCC(=O)O)C(=O)O)cc1,0 +[N-]=[N+]=CC(=O)OCC(N)C(=O)O,0 +Nc1nc(O)c2nn[nH]c2n1,0 +CS(=O)(=O)OCCCCOS(C)(=O)=O,0 +Nc1nc(S)c2nc[nH]c2n1,0 +Sc1ncnc2[nH]cnc12,0 +COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1C(NC(C)=O)CC2,0 +CN(CCCl)CCCl,0 +CS(C)=O,0 +CCCCOB(OCCCC)OCCCC,0 +CCCCCOB(OCCCCC)OCCCCC,0 +CC1CC(C)(C)OB(OC(C)CC(C)(C)OB2OC(C)CC(C)(C)O2)O1,0 +c1ccn2nnnc2c1,0 +c1ccn2nncc2c1,0 +Clc1ccc(Cl)c(SSc2cc(Cl)ccc2Cl)c1,0 +CN(C)c1ccc(SSc2ccc(N(C)C)cc2)cc1,0 +Brc1ccc(SSc2ccc(Br)cc2)cc1,0 +Cc1ccc(SSc2ccc(C)cc2)cc1,0 +COc1ccc(SSc2ccc(OC)cc2)cc1,0 +NC1(C(=O)O)CCCC1,0 +CC(C)(Br)C(=O)C(Br)Br,0 +CCOC(=S)SCCSC,0 +CCOCC(C)(CO)CC(C)CO,0 +O=C(O)C1CC1,0 +O=C(O)C1(O)CC(O)C(O)C(O)C1,0 +Nc1c(Cl)cc(Cl)cc1C(=O)O,0 +CCCCCOC(=S)S,0 +O=C(O)c1ccc([N+](=O)[O-])cc1S(=O)(=O)O,0 +NC(=O)c1cc(O)c(O)c(O)c1,0 +C1C[S+]2CC[S+]1CC2,0 +Nc1cc(Cl)c(S(=O)(=O)O)cc1Cl,0 +CC12CCC(C(Br)C1=O)C2(C)CS(=O)(=O)O,0 +CCC(C)(C(=O)O)C(=O)O,0 +CC(C)C(C(=O)O)C(=O)O,0 +CCOC(=O)C(=O)C1CCCCC1=O,0 +CCOC(=O)CNS(=O)(=O)c1ccccc1,0 +CCN(CC)C(C)(O)CN,0 +Cc1cccc2c(=O)c3ccccc3oc12,0 +CCCCCCCCCCCC(=O)OCCOCCOCCOCCOCCOCCOCCOCCOCCO,0 +C1CN[Co-4]23(N1)(NCCN2)NCCN3,0 +CC(C)OC(=S)SSC(=S)OC(C)C,0 +O=C(Nc1ccccc1)OCC1OCOC(COC(=O)Nc2ccccc2)C1OC(=O)Nc1ccccc1,0 +OCC1OCOC2COCOC12,0 +CC(=O)OC1C(OC(C)=O)C(OC(C)=O)C2(CO2)C(OC(C)=O)C1OC(C)=O,0 +CCN(CC)C(=O)N1CCN(C)CC1,0 +CC(=O)OC1COC(c2ccccc2)OC1C1OC(c2ccccc2)OCC1OC(C)=O,0 +Oc1ncnc2[nH]ncc12,0 +O=C1O[Cu-5]2(O)(O)(OC1=O)OC(=O)C(=O)O2,0 +O=Nc1ccc(O)c(N=O)c1O,1 +Oc1ccc(Nc2ccccc2)cc1,0 +CCCCCCc1ccc(O)cc1O,0 +CCCCCCCC[N+]12CN3CN(CN(C3)C1)C2,0 +CC(C)(O)O.CC1(O)C(O)C(O)C1(O)CO,0 +OC1COCOC1C(O)C1OCOCC1O,0 +CN(C)C(=S)SSC(=S)N(C)C,0 +O=[N+]([O-])c1ccc(C=Cc2ccc([N+](=O)[O-])cc2S(=O)(=O)O)c(S(=O)(=O)O)c1,0 +CCc1cc[n+]([Mn](SC#N)(SC#N)([n+]2ccc(CC)cc2)([n+]2ccc(CC)cc2)[n+]2ccc(CC)cc2)cc1,0 +N=c1[nH][nH]c(=N)[nH]1,0 +O=S(=O)(O)CCO,0 +O=C1CSC(=S)N1,0 +C1CCNCC1.S=C(S)N1CCCCC1,0 +C1SCSCS1,0 +CCC(CC)(C(=O)O)C(=O)O,0 +N#CC(=Cc1ccccc1)c1ccccc1,0 +N#CNC(=N)N,0 +O=C1C(O)=C(CCCC2CCC3CCCCC3C2)C(=O)c2ccccc21,0 +O=[N+]([O-])c1cc([As](=O)(O)O)ccc1O,0 +O=C(O)c1ccccc1S,0 +CCOC(=O)C(C(=O)OCC)C(C(=O)OCC)C(=O)OCC,0 +C=C(C)CS(=O)(=O)O,0 +CCSc1ccc(N=[N+]([O-])c2ccc(SCC)c(Cl)c2)cc1Cl,0 +COC1C(OC(N)=O)C(O)C(Oc2ccc3c(O)c(NC(=O)c4ccc(O)c(CC=C(C)C)c4)c(=O)oc3c2C)OC1(C)C,0 +O=C1C(=Cc2ccccc2)CCCC1=Cc1ccccc1,0 +CCCCCCC(O)CCCCCCCCCCC(=O)OCC(COC(=O)CCCCCCCCCCC(O)CCCCCC)OC(=O)CCCCCCCCCCC(O)CCCCCC,0 +O=S(=O)(O)CO,0 +CCN(CC)CCCCCCNc1cc(OC)cc2c(C)ccnc12,0 +CCCC(O)CNCc1ccc(N(C)C)cc1,0 +N=C1NC(=O)C(c2ccccc2)S1,0 +Cc1cc(SCC(=O)c2ccccc2[N+](=O)[O-])cc(C)[o+]1,0 +CC12CCC(CC1)C(C)(C)NC(=N)S2,0 +CCSC(SCC)C(O)C(O)C(O)C(O)C(O)C(O)CO,0 +O=C(O)C(O)C(O)C(O)C(O)C(O)C(O)CO,0 +Nc1ccc(S(=O)(=O)Nc2ccccc2)cc1,0 +ClP1(Cl)=NP(Cl)(Cl)=NP(Cl)(Cl)=N1,0 +CCN(CC)CC.O=C(Nc1ccc(Cl)cc1)P(=O)(O)c1ccccc1,0 +O=S(=O)(O)CCCCBr,0 +c1ccc(SSc2ccccc2)cc1,0 +O=S(=O)(O)CC(S(=O)(=O)O)S(=O)(=O)O,0 +O=S1(=O)CCCc2c1ccc1ccccc21,0 +CCOC(=O)C(CCCCS(=O)(=O)O)C(=O)OCC,0 +O=S1(=O)CCc2ccccc2C(Br)C1,0 +CCCCCCCCS(=O)(=O)O,0 +CCOCCCCS(=O)(=O)O,0 +CCOC(C)CCCS(=O)(=O)O,0 +O=S(=O)(O)CCCCCO,0 +O=C(O)CN(CCN(CC(=O)O)CC(=O)O)CC(=O)O,0 +CC(=O)N1C(=O)C(=O)c2ccccc21,0 +O=C1NC2NC(=O)NC2N1,0 +O=C1c2ccccc2C(=O)N1Cc1ccccc1,0 +CN(C)c1ccc(N=O)cc1,0 +O=S1(=O)CS(=O)(=O)CS(=O)(=O)C1,0 +ClC(Cl)(Cl)C1OCOC(C(Cl)(Cl)Cl)OCO1,0 +O=C1c2c(O)cc(O)cc2OC(c2ccc(O)c(O)c2)C1O,0 +O=C(CCc1ccc(O)cc1)c1c(O)cc(O)cc1OC1OC(CO)C(O)C(O)C1O,0 +N=C1NC(=O)CS1,0 +O=C1C[N+]23CC[N+]45CC(=O)O[Ni-4]24(O1)(OC(=O)C3)OC(=O)C5,0 +CC(=O)c1cc2c(cc1C(C)C)CCC1C(C)(C#N)CCCC21C,0 +COC(=O)C1(C)CCCC2(C)c3cc(Br)c(C(C)C)cc3CCC12,0 +COC(=O)C1(C)CCCC2(C)c3ccccc3C(OO)CC12,0 +COC(=O)C1(C)CCCC2(C)c3ccc(C(C)C)cc3C(=O)CC12,0 +C=C(C)c1ccc2c(c1)C(=O)CC1C(C)(C#N)CCCC21C,0 +CC(C)c1ccc2c(c1)CCC1C2(C)CCCC1(C)C(O)c1ccccc1,0 +CC(=O)OC(C)(C)c1ccc2c(c1)C(=O)CC1C(C)(C#N)CCCC21C,0 +CC(O)c1ccc2c(c1)C(O)CC1C(C)(CO)CCCC21C,0 +CC(C)C1(Cl)CCC2C3(C)CCCC(C)(C(=O)O)C3CC(Cl)C2(Cl)C1Cl,0 +COC(=O)C1(C)CCCC2(C)c3ccc(C(C)C)cc3C(=O)C(Br)C12,0 +CNC=O,0 +Cc1c2oc3c(C)ccc(C(=O)NC4C(=O)NC(C(C)C)C(=O)N5CCCC5C(=O)N(C)CC(=O)N(C)C(C(C)C)C(=O)OC4C)c3nc-2c(C(=O)NC2C(=O)NC(C(C)C)C(=O)N3CCCC3C(=O)N(C)CC(=O)N(C)C(C(C)C)C(=O)OC2C)c(N)c1=O,0 +COc1ccc(CC(N)C(=O)NC2C(CO)OC(n3cnc4c(N(C)C)ncnc43)C2O)cc1,0 +O=[As]O,0 +CCc1nc(N)nc(N)c1-c1ccc(Cl)cc1,0 +CCc1nc(N)nc(N)c1-c1ccc(Cl)c(Cl)c1,0 +O=C(NC(CO)C(O)c1ccc([N+](=O)[O-])cc1)C(Cl)Cl,0 +CC1(C)NC(=N)NC(=N)N1c1ccc(Cl)c(Cl)c1,0 +CC(C)(CO)C(O)C(=O)NCCS(=O)(=O)O,0 +O=C(O)CCCc1ccc(N(CCCl)CCCl)cc1,0 +CN(C)c1ccc(C(=C2C=CC(=[N+](C)C)C=C2)c2ccc(N(C)C)cc2)cc1,0 +CC[N+](C)(C)c1ccc(C(=C2C=CC(=[N+](C)C)C=C2)c2ccc(N(C)C)cc2)cc1,0 +CNC1CCc2cc(OC)c(OC)c(OC)c2-c2ccc(OC)c(=O)cc21,0 +Nc1cc([As]=[As]c2ccc(O)c(N)c2)ccc1O,0 +Oc1nc(O)c2nnoc2n1,0 +N=C1NCCS1,0 +O=Nc1ccc(O)cc1,0 +NNC(=O)C(CC(C(=O)NN)C(=O)NN)C(=O)NN,0 +O=C(O)Cc1cc(=O)[nH]n(-c2ccccc2)c1=O,0 +NC(=O)C(=O)NN=Cc1ccc([N+](=O)[O-])o1,0 +C[N+]1=Cc2ccccc2O[Cu-3]12Oc1ccccc1C=[N+]2C,0 +O=S1OCCO1,0 +CCCCS(=O)(=O)O,0 +COc1ccc2c(ccc(=O)n2C)c1,0 +CC(CN1CCCCC1)SSC(C)CN1CCCCC1,0 +Cn1c(SSc2ccc(-c3cccnc3)n2C)ccc1-c1cccnc1,0 +O=C1N(CO)C2C(N1CO)N(CO)C(=O)N2CO,0 +CC(=O)OC1CCC2(C)C3=CCC4(C)C(C(C)=O)CCC4C34C=CC2(C1)C1C(=O)OC(=O)C14,0 +CNC(=O)C(C)C1C(=O)C(=C(O)C=CC(C)=CC(C)C2OC3(C)OC(C=CC34CO4)C2C)C(=O)N1C1CCC(O)C(C)O1,0 +CCCCCC(O)C1C(=O)OC(C)C(O)C=CC=CC=CC=CC=C(C)C(O)CC(O)CC(O)CC(O)CC(O)CC(O)CC1O,0 +CCC(C(=O)O)c1ccc([N+](=O)[O-])cc1.COc1cc2c(cc1OC)C13CCN4CC5=CCOC6CC(=O)N2C1C6C5CC43,0 +CCC(C)OC(=O)c1ccccc1C(=O)O.COc1cc2c(cc1OC)C13CCN4CC5=CCOC6CC(=O)N2C1C6C5C4C3,0 +O=c1ssc(=Nc2ccccc2)n1-c1ccccc1,0 +CCCN=c1ssc(=O)n1CCC,0 +CCCCN=c1ssc(=O)n1CCCC,0 +O=c1ssc(=Nc2ccc(Cl)cc2)n1-c1ccc(Cl)cc1,0 +CCCCCCCN=c1ssc(=O)n1CCCCCCC,0 +NS(=O)(=O)c1cc(O)nc(O)n1,0 +CS(=O)(=O)c1cc(O)nc(O)n1,0 +CCS(=O)(=O)c1cc(O)nc(O)n1,0 +O=S(=O)(Cc1ccccc1)c1cc(O)nc(O)n1,0 +Cc1oc(C)c2c1C(=O)c1ccccc1C2=O,0 +Nc1nc(O)c2c(n1)NCC(CNc1ccc(C(=O)NC(CCC(=O)O)C(=O)O)cc1)N2C=O,0 +CCOC(=O)Nc1ccc(C(=O)C=Cc2ccc(N(CC)CC)cc2)cc1,0 +CC1NC(C)SC(C)S1,0 +N#CNC(=N)NC(=O)C=CC=Cc1ccc(Cl)cc1,0 +O=C(O)C=CC(=O)NNC(=O)C=CC(=O)O,0 +CC1C(=O)N2C(=O)C(C)C(C)C(=O)N2C(=O)C1C,0 +O=C(O)CCC(=O)NNC(=O)CCC(=O)O,0 +Nc1ccc(-c2ccc(N)c(S(=O)(=O)O)c2)cc1S(=O)(=O)O,0 +CCC(CS(=O)(=O)O)[N+](=O)[O-],0 +Oc1nnc(O)c2ncccc12,0 +Clc1nc(Cl)nc(Nc2ccccc2Cl)n1,0 +O=Nc1ccc(O)c2ncccc12,0 +Oc1ccc(Cl)cc1C(c1cc(Cl)ccc1O)C(Cl)(Cl)Cl,1 +CCCCCCCCCCCCCCCCCC(=O)OCC(O)CO,0 +c1ccc2c(c1)SC1=[S+][Cu-3]3([S+]=C4Sc5ccccc5N43)N12,0 +Oc1c(Cl)cc(Cl)c2cccnc12,0 +CC(=O)Nc1c2sscc-2n(C)c1=O,0 +[O-][N+]1=Cc2ccc[o+]2[Cu-3]12[N+]([O-])=Cc1ccc[o+]12,0 +O=C1O[Cu-3]2(Nc3ccccc31)Nc1ccccc1C(=O)O2,0 +O=C(Oc1cccc2cccnc12)c1ccccc1,0 +CCN(CC)C(=S)S[Se](SC(=S)N(CC)CC)(SC(=S)N(CC)CC)SC(=S)N(CC)CC,0 +C1COCCN1.S=C(S)N1CCOCC1,0 +CC1=NNC(=O)C1,0 +O=c1ccc2cc(Cl)ccc2o1,0 +CN(C)c1ccc(C=Cc2ccnc3ccccc23)cc1,0 +CN(C)c1ccc(C=Cc2ccc3ccccc3[n+]2C)cc1,0 +CN(C)c1ccc(C=Cc2cc[n+](C)c3ccccc23)cc1,0 +c1cnc2c(c1)ccc1cccnc12,0 +Cc1ccc2ccc3ccc(C)nc3c2n1,0 +O=S(=O)(O)c1ccc2c(N=Nc3ccc(S(=O)(=O)O)c4ccccc34)c(O)c(S(=O)(=O)O)cc2c1,0 +O=C1OC(=O)C2C1C(c1ccccc1)N1C3C(=O)OC(=O)C3C(c3ccccc3)N21,0 +CCN(Cc1cccc(S(=O)(=O)O)c1)c1ccc(C(=C2C=CC(=[N+](CC)Cc3cccc(S(=O)(=O)O)c3)C=C2)c2ccccc2S(=O)(=O)O)cc1,0 +COc1ccc(C=CC(=O)O)cc1OC,0 +Cc1ccc(S(=O)(=O)Nc2cccc3c(O)ncnc23)cc1,0 +CCN(CCO)CCCC(C)Nc1ccnc2cc(Cl)ccc12.O=S(=O)(O)O,0 +c1ccc(C2=NC3(CCCCC3)N=C2c2ccccc2)cc1,0 +CSc1nc(N)nc(N)n1,0 +Nc1nc(N)nc(-c2ccccc2-c2nc(N)nc(N)n2)n1,0 +C1CN2CN1CN1CCN(C1)C2,0 +N=C(N)Nc1nnn[nH]1,0 +O=[N+]([O-])c1cc(S(=O)(=O)c2ccc(Cl)c([N+](=O)[O-])c2)ccc1Cl,0 +O=C1N(Cl)C2(c3ccccc3)N(Cl)C(=O)N(Cl)C2(c2ccccc2)N1Cl,0 +Nc1nc(N)nc(SCCOCCSc2nc(N)nc(N)n2)n1,0 +NNC(=O)c1ccccc1SSc1ccccc1C(=O)NN,1 +O=C(NN=Cc1ccc(Cl)cc1Cl)c1ccccc1SSc1ccccc1C(=O)NN=Cc1ccc(Cl)cc1Cl,1 +CCCC1CC=CC=CC=CC=CC(OC2OC(C)C(O)C(N)C2O)CC(O)C(C(=O)O)C(O)CC(=O)CC(O)CCCC(=O)CC(O)C(CC)C(=O)O1.O=S(=O)(O)O,0 +O=c1c2ccccc2oc2cc([N+](=O)[O-])cc([N+](=O)[O-])c12,0 +CC12CCC(C(=O)OC1=O)C2(C)C,0 +C[Si](C)(CCC(=O)O)O[Si](C)(C)CCC(=O)O,0 +O=[N+]([O-])c1ccc(SSc2ccc([N+](=O)[O-])cc2)cc1,0 +COc1ccc(C2c3cc(OC)c(OC)cc3CC3C(=O)OCC32)cc1OC,0 +O=S(=O)(O)C(Br)(Br)Br,0 +O=C(O)c1ccc(S(=O)(=O)O)o1,0 +S=c1[nH][nH]c(=S)s1,1 +OCC(S)CS,0 +O=C(O)C(O)(O)C(O)(O)C(=O)O,0 +CC1=[O+][V-]2(=O)([O+]=C(C)C1)[O+]=C(C)CC(C)=[O+]2,0 +CC1=[O+][Zr]234([O+]=C(C)C1)([O+]=C(C)CC(C)=[O+]2)([O+]=C(C)CC(C)=[O+]3)[O+]=C(C)CC(C)=[O+]4,0 +CC(=O)Oc1ccc2c(c1)Oc1cc(OC(C)=O)ccc1C21OC(=O)c2ccccc21,0 +N=c1[nH]ncs1,0 +CN1CSC(=S)N(C)C1,0 +CCCCC(CC)COS(=O)(=O)O,0 +O=C1CSC(=O)N1c1ccccc1,0 +O=C(O)C1=NN(c2ccc(S(=O)(=O)O)cc2)C(=O)C1N=Nc1ccc(S(=O)(=O)O)cc1,0 +Oc1ccc(O)c([PH](c2ccccc2)(c2ccccc2)c2ccccc2)c1,0 +CN(C)C(=S)SC(=S)N(C)C,0 +O=Nc1ccc(N=O)cc1,0 +O=c1oc(=O)c2cc3c(=O)oc(=O)c3cc12,0 +S=C(SSSSC(=S)N1CCCCC1)N1CCCCC1,0 +CC(C)S(=O)(=O)O,0 +CCN(CC)C(=S)S,0 +O=C1NS(=O)(=O)c2ccccc21,0 +O=C1c2ccc(O)c(O)c2C(=O)c2c(O)ccc(O)c21,0 +OCC1OC(n2cnc3c(Cl)ncnc32)C(O)C1O,0 +Oc1ncc(S)c(O)n1,0 +Cc1nc(N)c2cnn(C)c2n1,0 +Cc1n[nH]c2c(N(C)C)ncnc12,0 +Cc1cc(O)cc(C)c1Cl,0 +CC(C)=C1C=C2CCC3C(C)(C(=O)O)CCCC3(C)C2CC1,0 +CCN(CC)c1ccc(C(=C2C=CC(=[N+](CC)CC)C=C2)c2ccccc2)cc1.O=S(=O)(O)O,0 +Cc1cc(-c2ccc(N=Nc3cc(S(=O)(=O)O)c4ccccc4c3N)c(C)c2)ccc1N=Nc1cc(S(=O)(=O)O)c2ccccc2c1N,2 +O=Nc1ccc2ccccc2c1O,0 +c1ccc(N2N=C3N(c4ccccc4)C2N3c2ccccc2)cc1,0 +CN(C)c1ccc(C=C2SC(=S)NC2=O)cc1,0 +Cc1ccc2nc(-c3ccc(N=NNc4ccc(-c5nc6ccc(C)c(S(=O)(=O)O)c6s5)cc4)cc3)sc2c1S(=O)(=O)O,0 +O=C(N=Nc1ccccc1)NNc1ccccc1.O=C(NNc1ccccc1)NNc1ccccc1,0 +Cc1cc(O)cc(O)c1N=Nc1ccc([N+](=O)[O-])cc1,0 +Nc1ccc(N=Nc2ccc(C=Cc3ccc(N=Nc4ccc(N)c5ccccc45)cc3S(=O)(=O)O)c(S(=O)(=O)O)c2)c2ccccc12,0 +Nc1ccc2ccccc2c1N=Nc1ccc(C=Cc2ccc(N=Nc3c(N)ccc4ccccc34)cc2S(=O)(=O)O)c(S(=O)(=O)O)c1,1 +O=C(O)CC1OCC=C2CN3CCC45C6=CC(=O)C(=O)C([N+](=O)[O-])=C6NC4C1C2CC35,0 +O=[N+]([O-])c1ccc([As](=O)(O)O)cc1,0 +CC(C)c1cccc(C(C)C)c1O,0 +CCCCCCCCCCCCCCC(C(=O)O)S(=O)(=O)O,0 +O=c1c(-c2ccc(O)cc2)coc2cc(OC3OC(CO)C(O)C(O)C3O)cc(O)c12,0 +Cc1c(O)ccc2c(O)c(NC(=O)c3ccc4c(c3)CCC(C)(C)O4)c(=O)oc12,0 +C1CN2CCOB(O1)OCC2,0 +CCO[Si](C)(OCC)OCC,0 +CCCCOC(=S)SSC(=S)OCCCC,0 +O=c1c2ccccc2c2ccc3c4ccc5c(=O)c6ccccc6c6ccc(c7ccc1c2c73)c4c56,0 +CC[Sn](Cl)(CC)CC,0 +Cc1cc(=O)oc2cc(O)cc(O)c12,0 +COC(=O)c1ccccc1SSc1ccccc1C(=O)OC,0 +COc1ccc2c(c1OC)C(=O)OC2C1c2c(cc3c(c2OC)OCO3)CCN1C,0 +O=[N+]([O-])c1ccc(N=Nc2ccc(O)c3c(O)cc(S(=O)(=O)O)cc23)cc1S(=O)(=O)O,0 +CCCC(C)SP(=S)(SC(C)CCC)SC(C)CCC,0 +CCN(CC)CCCC(C)Nc1cc(-c2ccccc2)nc2ccc(OC)cc12.O=P(O)(O)O,0 +CCCCCCCCCCCCCCCCCCN(C)C,0 +CC1OC(OC2C(COc3cc(O)c4c(c3)OC(c3ccc(O)cc3)CC4=O)OOC(CO)C2O)C(O)C(O)C1O,0 +O=Nc1ccc(O)cc1O,0 +CCCCCCOP(OCCCCCC)OCCCCCC,0 +CC(=NN=C(C)C1CC(CC(=O)O)C1(C)C)C1CC(CC(=O)O)C1(C)C,0 +CCCCCCCCCCCCCCCC[N+](C)(C)CCN(Cc1ccc(OC)cc1)c1ncccn1,0 +CCCCCCCCCCCCCCCCC(C(=O)O)S(=O)(=O)O,0 +Nc1ccc2c(c1)N[Cu-3]1(Nc3cc(N)ccc3O1)O2,0 +C1CCc2nnnn2CC1,0 +CC1=[O+][Cu-3]2([O+]=C(C)CC(Nc3ccccc3)=[O+]2)[O+]=C(Nc2ccccc2)C1,0 +O=[N+]([O-])c1ccc(S(=O)(=O)NN2CCOCC2)cc1,0 +O=[N+]([O-])c1ccc(S(=O)(=O)NN=Cc2ccco2)cc1,0 +O=[N+]([O-])c1ccc(S(=O)(=O)NN=C2CCCCC2)cc1,0 +CC(=O)Nc1ccc(S(=O)(=O)NN=Cc2ccco2)cc1,0 +CC(C=Cc1ccccc1)=NNS(=O)(=O)c1ccc([N+](=O)[O-])cc1,0 +CC1CCCN1c1ccnc2cc(Cl)ccc12,0 +CN(C)C(=O)N1CC[N+](C)([O-])CC1,0 +Cc1ccc2oc(=O)ccc2c1,0 +COc1cc(C)cc2c1OC(c1ccccc1)CC2=O,0 +CC(=O)c1c(O)c(C)c(O)c2c1OC1=CC(=O)C(C(C)=O)C(=O)C12C,0 +C=C1C(=O)OC(CCCCCCCCCCCCC)C1C(=O)O,0 +CC(CCC(C)C(=O)O)C(=O)O,0 +O=C(Nc1ccccc1)Nc1ccccn1,0 +CC1CCCC2(C1)OCC(O)CO2,0 +O=c1c2cc3c(=O)n(O)c(=O)c3cc2c(=O)n1O,0 +Nc1ccc(S(=O)(=O)c2ccc(N)cc2)cc1,0 +CC(=NNc1ccc([N+](=O)[O-])cc1[N+](=O)[O-])c1ccccc1,0 +CCCCC(CC)CO[Si](OCC(CC)CCCC)(OCC(CC)CCCC)OCC(CC)CCCC,0 +O=C(O)C1C2OC3C(OC(=O)C31)C2Br,0 +Cc1c(C(=O)O)c(O)cc2c1C(=O)c1c(O)c(OC3OC(CO)C(O)C(O)C3O)c(O)c(O)c1C2=O,1 +CC1=CC(=C(c2cc(C)c(O)c(Br)c2)c2ccccc2S(=O)(=O)O)C=C(Br)C1=O,0 +O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1,0 +O=C(O)c1cc(I)cc(I)c1O,0 +NC(CO)(CO)CO,0 +C=CCc1cc(-c2cc(CC=C)c(O)c(CN(CC)CC)c2)cc(CN(CC)CC)c1O,0 +S=P(N1CC1)(N1CC1)N1CC1,0 +CC(CCN1CCN(CCC(C)CC(C)(C)C)S1(=O)=O)CC(C)(C)C,0 +O=C1NCCN1N=Cc1ccc([N+](=O)[O-])o1,0 +CCCSP(SCCC)SCCC,0 +CCCCOP(=S)(OCCCC)OCCCC,0 +COP(=O)(OC)OC=C(Cl)Cl,0 +CC(O)=C[PH](c1ccccc1)(c1ccccc1)c1ccccc1,0 +CC(=O)C[PH](c1ccccc1)(c1ccccc1)c1ccccc1,0 +OC(=C[PH](c1ccccc1)(c1ccccc1)c1ccccc1)c1ccccc1,0 +O=C(C[PH](c1ccccc1)(c1ccccc1)c1ccccc1)c1ccccc1,0 +O=C1CSC(=O)N1,0 +S=C1SCCS1,0 +OCC1(CO)CCCC(CO)(CO)C1O,0 +CCCCCCC(=O)C(O)C(O)C(=O)CCCCCCCC(=O)O,0 +Nc1cc(Cl)ccc1S,0 +O=C1c2ccccc2C(=O)c2c1ccc(O)c2O,0 +CCOc1ccc(N=Nc2ccc(N)cc2N)cc1,0 +O=[N+]([O-])c1ccc2c(N=Nc3ccc4ccccc4c3O)c(O)cc(S(=O)(=O)O)c2c1,0 +Cc1cc(C2(c3ccc(O)c(C)c3)OS(=O)(=O)c3ccccc32)ccc1O,0 +O=C1c2ccccc2C(=O)c2c(Nc3cccc4c3C(=O)c3ccccc3C4=O)cccc21,0 +O=S(=O)(O)c1cc2ccc1ccc1ccc(cc1S(=O)(=O)O)nnc1ccc(ccc3ccc(cc3S(=O)(=O)O)nn2)c(S(=O)(=O)O)c1,2 +Nc1c(N=Nc2ccc(-c3ccc(N=Nc4cc(S(=O)(=O)O)c5ccccc5c4O)cc3)cc2)cc(S(=O)(=O)O)c2ccccc12,2 +N=C(N)N.O=S(=O)(O)O,0 +CC[N+]12CN3CN(CN(C3)C1)C2,0 +O=C1C=C2CC3(O)COc4c(ccc(O)c4O)C3=C2C=C1O,0 +O=C(O)CN(CCN(CC(=O)O)CC(=O)O)CCN(CC(=O)O)CC(=O)O,0 +O=C1C[N+]23CC[N+]45CC(=O)O[Cu-5]24(O1)(OC(=O)C3)OC(=O)C5,0 +OCCCN(CCO)CCN(CCCO)CCCO,0 +Cc1nc(N)nc(N)c1-c1ccc(Cl)c(Cl)c1,0 +O=S(c1ccccc1O)S(=O)c1ccccc1O,1 +COc1c2c(cc3c1OCO3)C13C=CC(OC)CC1N(CC3)C2.[O-][Cl+3]([O-])([O-])O,0 +CC1=C2C(=O)C3C(CC=C4CC(O)CCC43C)C2CCC12OC1CC(C)CNC1C2C,0 +CC1OC(OC2C=C3CCC4C(CCC5(C)C(c6ccc(=O)oc6)CCC45O)C3(C)CC2)C(O)C(O)C1O,0 +COC1CC(OC2CCC3(C=O)C4CCC5(C)C(C6=CC(=O)OC6)CCC5(O)C4CCC3(O)C2)OC(C)C1O,0 +CC1OC(OC2C=C3CCC4C(CCC5(C)C(c6ccc(=O)oc6)CCC45O)C3(C)CC2)C(O)C(OC2OC(CO)C(O)C(O)C2O)C1O,0 +CCC(C)C(=O)OC1C(O)C2C(CN3CC(C)CCC3C2(C)O)C2CC34OC5(O)C(OC(=O)C(C)(O)CC)CCC3(C)C5C(OC(C)=O)C(OC(C)=O)C4C21O,0 +Cc1cc(O)n2cnnc2n1,0 +Nc1c2ccccc2nc2ccccc12,0 +O=C1C2=C(C(=O)c3ccccc31)C(O)(S(=O)(=O)O)C(O)(S(=O)(=O)O)C1N=CC=CC21,0 +CC(=O)C1C(=O)OC2C3C=CC(C3)C2C1=O,0 +CC(C)(O)S(=O)(=O)O,0 +CC(=O)Nc1ccccc1,0 +[O-][N+]1=c2c([nH]c3ccccc23)=[O+][Cu-3]12[O+]=c1[nH]c3ccccc3c1=[N+]2[O-],0 +C[n+]1ccccc1C=NO,0 +COc1ccc(C(=O)c2ccccc2)c(O)c1,0 +CCCCCCCCOC(=O)CC(C(=O)OCCCCCCCC)S(=O)(=O)O,0 +CCCCCCCCCCCCCOC(=O)CC(C(=O)OCCCCCCCCCCCCC)S(=O)(=O)O,0 +O=C1c2ccccc2C(=O)c2c1cc(S(=O)(=O)O)c(O)c2O,0 +O=C(O)c1cc(N=Nc2cccc([N+](=O)[O-])c2)ccc1O,0 +O=C1C=CC(=C(c2ccc(O)cc2)c2ccc(O)cc2)C=C1,0 +O=S(=O)(O)c1ccc(N=Nc2cc(S(=O)(=O)O)c3ccccc3c2O)c2ccccc12,0 +O=S(=O)(O)c1ccc(N=Nc2ccc(N=Nc3cccc4ccccc34)c(S(=O)(=O)O)c2)cc1,0 +O=S1(=O)OC(c2cc(Cl)c(O)c(Br)c2)(c2cc(Cl)c(O)c(Br)c2)c2ccccc21,1 +Cc1c(C2(c3cc(Br)c(O)c(Br)c3C)OS(=O)(=O)c3ccccc32)cc(Br)c(O)c1Br,0 +O=S1(=O)OC(c2cc(Br)c(O)c(Br)c2)(c2cc(Br)c(O)c(Br)c2)c2ccccc21,0 +Cc1c(C2(c3cc(C(C)C)c(O)c(Br)c3C)OS(=O)(=O)c3ccccc32)cc(C(C)C)c(O)c1Br,1 +Nc1c(N=Nc2ccc([N+](=O)[O-])cc2)c(S(=O)(=O)O)cc2cc(S(=O)(=O)O)c(N=Nc3ccccc3)c(O)c12,0 +O=S1(=O)OC(c2ccc(O)c(Cl)c2)(c2ccc(O)c(Cl)c2)c2ccccc21,0 +O=S(=O)(O)c1cccc2c(N=Nc3ccc(N=Nc4ccc(Nc5ccccc5)c5c(S(=O)(=O)O)cccc45)c4ccccc34)cccc12,0 +O=S(=O)(O)c1cc(S(=O)(=O)O)c2c(N=Nc3ccc(N=Nc4ccccc4)cc3)c(O)ccc2c1,0 +N=C(N)NN.O=C(O)O,0 +N=C(N)NCCCC(N)C(=O)O,0 +CN(C)P(=O)(N(C)C)N(C)C,0 +O=C1C=C2C(=CCOC2O)O1,0 +O=C(O)C=CCCCCCCCCC(=O)O,0 +CC(=O)C(C)(CCC(=O)O)CCC(=O)O,0 +N#CNC(=N)NC#N,0 +N#CN(CS(=O)(=O)O)C(=N)N,0 +N=c1nc2[nH][nH]c(=N)n2c(=N)[nH]1,0 +Nc1ccccc1SSc1ccccc1N,0 +CC(C)(C)CC(C)(C)SSc1n[nH]c(=S)s1,0 +CC(CN(C)C)Sc1nnc(SC(C)CN(C)C)s1,0 +C=Cc1ccc(CC)cn1,0 +O=C(O)c1cc(O)ccc1O,0 +Oc1ccccc1C1SC(c2ccccc2O)SC(c2ccccc2O)S1,0 +O=S(=O)(O)C(C(I)I)S(=O)(=O)O,0 +O=S(=O)(O)CS(=O)(=O)O,0 +O=C1C=C(O)C(=O)c2ccccc21,0 +O=C1C(=C2Nc3ccc(S(=O)(=O)O)cc3C2=O)Nc2ccc(S(=O)(=O)O)cc21,0 +Oc1ccc2c(c1)OCC1(O)Cc3cc(O)c(O)cc3C21,0 +Oc1cc2c(cc1O)C1c3ccc(O)c(O)c3OCC1(O)C2,0 +CC[n+]1c2ccc(C)cc2nc2c3ccccc3c(N)cc21,0 +Cc1cc(C(=C2C=CC(=[N+](C)C)C=C2)c2ccc([N+](C)(C)C)cc2)ccc1N(C)C,0 +CCN(CC)c1ccc(C(=C2C=CC(=[N+](CC)CC)C=C2)c2ccc(N(CC)CC)cc2)cc1,0 +O=C1NC(=O)C(c2ccccc2)(c2ccccc2)N1,0 +O=c1ccc2ccccc2o1,0 +CN(C)P(=O)(OP(=O)(N(C)C)N(C)C)N(C)C,0 +CCOP(=S)(OCC)Oc1cc(C)nc(C(C)C)n1,0 +CCOP(=S)(OCC)Oc1ccc2c(C)c(Cl)c(=O)oc2c1,0 +C=CS(=O)(=O)O,0 +Ic1nc(-c2ccccc2)[nH]c1I,0 +CC1(C)SSC(C)(C)SS1,0 +CCCC(C(=O)OCC(C)N1CCCC1(C)C)C1CCCC1,0 +O=C(OCCN1CCCC12CCCCC2)C(c1ccccc1)C1CCCC1,0 +CC(=O)C1CCC2C1(C)CC1OC13C21C=CC2(CC(O)CCC23C)C2C(=O)OC(=O)C21,0 +CC12CCC(=O)C=C1CCC1C2C(O)CC2(C)C1CCC2(O)C(=O)COC(=O)CCC(=O)O,0 +C[N+]1(CCC(C(N)=O)(c2ccccc2)c2ccccc2)CCCC12CCCCC2,0 +COC1C(OC(=O)C=CC=CC=CC=CC(=O)O)CCC2(CO2)C1C1(C)OC1CC=C(C)C,2 +CN(C)C1C(O)=C(C(N)=O)C(=O)C2(O)C(O)=C3C(=O)c4c(O)cccc4C(C)(O)C3C(O)C12,0 +O=c1c(O)c(-c2ccc(O)c(O)c2)oc2cc(O)cc(O)c12,0 +CC1OC(OCC2OC(Oc3c(-c4ccc(O)c(O)c4)oc4cc(O)cc(O)c4c3=O)C(O)C(O)C2O)C(O)C(O)C1O,0 +CC1OC(Oc2c(-c3ccc(O)c(O)c3)oc3cc(O)cc(O)c3c2=O)C(O)C(O)C1O,0 +CC1OC(OCC2OC(Oc3c(-c4ccc(O)cc4)oc4cc(OC5OC(C)C(O)C(O)C5O)cc(O)c4c3=O)C(O)C(O)C2O)C(O)C(O)C1O,1 +CN(C)c1ccc2nc3c(ccc4ccccc43)[o+]c2c1,0 +CCN(CC)CCCOP(=O)(OCCCN(CC)CC)OCCCN(CC)CC,0 +Oc1nnc(O)c2c1[nH]c(=S)n2-c1ccccc1,0 +Cc1cc(=O)oc2cc(O)ccc12,0 +Cc1ncc2c(n1)NC(=O)NC2,0 +O=C(O)CCCCCCCCCCCC(=O)O,0 +O=S(=O)(O)c1ccc(NN(c2c(O)ccc3ccccc23)S(=O)(=O)O)cc1,0 +Cc1cc(O)ccc1C1(c2ccc(O)cc2C)OS(=O)(=O)c2ccccc21,0 +COc1cc(-c2ccc(N=Nc3ccc4c(S(=O)(=O)O)cc(S(=O)(=O)O)c(N)c4c3O)c(OC)c2)ccc1N=Nc1ccc2c(S(=O)(=O)O)cc(S(=O)(=O)O)c(N)c2c1O,2 +CCN(Cc1cccc(S(=O)(=O)O)c1)c1ccc(C(=C2C=CC(=[N+](CC)Cc3cccc(S(=O)(=O)O)c3)C=C2)c2ccc(S(=O)(=O)O)cc2)cc1,0 +NNC(=O)c1ccncc1,0 +COC1C(O)CCC2(CO2)C1C1(C)OC1CC=C(C)C,0 +COc1ccc2c(c1)CCCN2CCCCCCCCCCN(CCC(C)C)CCC(C)C,0 +CC12CCC(=O)C=C1CCC1C2C(=O)CC2(C)C1CCC2(O)C(=O)CO,0 +CC(=O)C1CCC2C3CCC4=CC(=O)CCC4(C)C3CCC12C,0 +C1CN1c1nc(N2CC2)nc(N2CC2)n1,0 +CC(=O)C1CCC2C1(C)CC=C1C23C=CC2(CC(O)CCC12C)C(C(=O)O)C3C(=O)O,0 +Cc1cc(S(=O)(=O)O)ccc1N=Nc1ccc(N=Nc2cc(S(=O)(=O)O)c3ccccc3c2O)c(C)c1,0 +CC12CCC3C(CC=C4CC(O)CCC43C)C1CCC2=O,0 +CC(=O)OCC[N+](C)(C)C.CCCCC(CC)COC(=O)CC(C(=O)OCC(CC)CCCC)S(=O)(=O)O,0 +Oc1nnc(O)c2[nH]cnc12,0 +COc1ccc2nc(C)cc(NCCCCCCCCCNc3cc(C)nc4ccc(OC)cc34)c2c1,0 +CCC(N)CS(=O)(=O)O,0 +Cc1cc(N=Nc2ccc([N+](=O)[O-])cc2)c(NCC(O)C(O)C(O)CO)cc1C,0 +N#CC1(c2ccccc2)CCOCC1,0 +CC1=CC(C)=Nc2ccccc2N1,0 +COc1cc2c(cc1OC)NC(C)=CC(C)=N2,0 +COc1ccc2c(c1OC)CC1COCC21,0 +COc1cc([N+](=O)[O-])c([N+](=O)[O-])c([N+](=O)[O-])c1OC,0 +O=C1CCc2ccccc2O1,0 +CC1=CC(C)=Nc2cc(Cl)ccc2N1,0 +CCOC=C1C(=O)N(C(C)=O)c2cc(OC)c(OC)cc21,0 +C1=C(c2ccccc2)Nc2ccccc2N=C1c1ccccc1,0 +COc1ccc2c(c1OC)C(=O)OC2c1c2c(c(OC)c3c1OCO3)C(C1OC(=O)c3c1ccc(OC)c3OC)N(C)CC2,0 +CN1BN(C)BN(C)B1,0 +O=c1oc2ccc([N+](=O)[O-])cc2cc1Br,0 +Cc1ccc(C=C2CCOC2=O)cc1,0 +S=P(N1CCOCC1)(N1CC1)N1CC1,0 +O=S(=O)(O)c1ccc(N=Nc2ccc(O)cc2O)cc1,0 +Cc1ccc(N=Nc2c(O)c(S(=O)(=O)O)cc3cc(S(=O)(=O)O)ccc23)c(C)c1,0 +O=S1(=O)OC(c2ccc(O)cc2)(c2ccc(O)cc2)c2ccccc21,0 +N=C1C=CC(=C(c2ccc(N)cc2)c2ccc(N)cc2)C=C1,0 +Cc1ccc(N=Nc2ccc(O)c(N=Nc3ccc(S(=O)(=O)O)cc3)c2O)c(C)c1,0 +Cc1cc(C2(c3cc(C)c(O)cc3C)OS(=O)(=O)c3ccccc32)c(C)cc1O,0 +CCCCCCCCCCCCCCCCCC(=O)O.CCN(CC)c1ccc2c(-c3ccccc3C(=O)O)c3ccc(N(CC)CC)cc3[o+]c2c1,0 +CNCCS(=O)(=O)O,0 +CSP(=S)(SC)SC(C)c1ccc(C)cc1,0 +CSSC(SC)SC,0 +OC1CN=C2C=CC=CN2C1,0 +CC(=O)Oc1ccc2c(oc(=O)c3nc(C)oc32)c1C,0 +N#CCCN1C(=O)CCC2(CCC(=O)N(CCC#N)C2=O)C1=O,0 +CSc1ccc(C=NC(=N)SN)cc1C,0 +O=c1oc2c(ccc3c(O)cccc32)c2c1CCCC2,0 +Nc1nc(N)nc(Nc2ccc([As](=O)(O)O)cc2)n1,0 +CC(=O)C(CCC(=O)O)(CCC(=O)O)C(C)C,0 +Cc1cnc2cc(Cl)ccc2c1NCCN(CC(C)C)CC(C)C.O=P(O)(O)O,0 +CCCCN(CCCC)CCCCNc1c(C)cnc2cc(Cl)ccc12.O=P(O)(O)O,0 +Cc1ccc2oc3ccccc3c(=O)c2c1,0 +O=C1OS(=O)(=O)c2ccccc21,0 +Cc1cc2nc3ccc(N(C)C)cc3[s+]c2cc1N,0 +O=S1(=O)OC(c2cc(Br)c(O)c(Br)c2)(c2cc(Br)c(O)c(Br)c2)c2c(Br)c(Br)c(Br)c(Br)c21,0 +Cc1cc(O)c(C(C)C)cc1C1(c2cc(C(C)C)c(O)cc2C)OS(=O)(=O)c2ccccc21,0 +CC1(C)Nc2cccc3c(N=Nc4ccc(N=Nc5ccccc5)c5ccccc45)ccc(c23)N1,0 +Nc1c(S(=O)(=O)O)cc2cc(S(=O)(=O)O)ccc2c1N=Nc1ccc(-c2ccc(N=Nc3c(N)c(S(=O)(=O)O)cc4cc(S(=O)(=O)O)ccc34)c(S(=O)(=O)O)c2)cc1,1 +Cc1cc(-c2ccc(N=Nc3c(S(=O)(=O)O)cc4cc(S(=O)(=O)O)cc(N)c4c3O)c(C)c2)ccc1N=Nc1c(S(=O)(=O)O)cc2cc(S(=O)(=O)O)cc(N)c2c1O,1 diff --git a/chemprop/tests/data/classification/test.npz b/chemprop/tests/data/classification/test.npz new file mode 100644 index 0000000000000000000000000000000000000000..2388779431d95786f12c0d54a13424568b9ffcd7 --- /dev/null +++ b/chemprop/tests/data/classification/test.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a55cb4edcb2d9ddfda244d363ff0adaf6eed4b39f1212b43ee58ba4c47866b6a +size 7368 diff --git a/chemprop/tests/data/classification/test_true.csv b/chemprop/tests/data/classification/test_true.csv new file mode 100644 index 0000000000000000000000000000000000000000..521510a318ab22fb477fb9d305fbba248227d954 --- /dev/null +++ b/chemprop/tests/data/classification/test_true.csv @@ -0,0 +1,11 @@ +smiles,NR-AR,NR-AR-LBD,NR-AhR,NR-Aromatase,NR-ER,NR-ER-LBD,NR-PPAR-gamma,SR-ARE,SR-ATAD5,SR-HSE,SR-MMP,SR-p53 +CCc1cccc(C)c1N(C(=O)CCl)[C@@H](C)COC,0,0,0,,0,0,,1,0,0,,0 +O=C(O)c1ccccc1C(=O)Nc1cccc2ccccc12,0,0,1,,0,0,0,1,0,0,,0 +CCC(=O)OC1(c2ccccc2)CCN(C)CC1,0,,0,0,0,0,0,0,0,0,0,0 +COc1cc(-c2ccc(=O)[nH]n2)ccc1OC(F)F,0,0,,0,,0,0,0,1,0,0,0 +CCOc1ccc([N+](=O)[O-])cc1,0,0,0,,1,1,,,0,,1, +CCCCN(CCCC)CCCOC(=O)c1ccc(N)cc1,0,0,0,0,0,0,0,0,0,0,0,0 +CCCCOc1cc(C(=O)NCCN(CC)CC)c2ccccc2n1,0,0,0,0,0,0,0,0,0,0,,0 +CC(C)(c1cc(Br)c(OCC(Br)CBr)c(Br)c1)c1cc(Br)c(OCC(Br)CBr)c(Br)c1,0,0,0,0,0,0,0,0,0,0,0,0 +CC(C)c1ccc(C(C)C)cc1,0,0,0,0,0,0,0,0,0,0,0,0 +COc1c2occc2cc2ccc(=O)oc12,0,0,1,0,0,1,0,0,0,0,0,0 diff --git a/chemprop/tests/data/example_model_v1_4.pt b/chemprop/tests/data/example_model_v1_4.pt new file mode 100644 index 0000000000000000000000000000000000000000..75d086a86bb59f04ba495624133dcc0e1c4cf694 --- /dev/null +++ b/chemprop/tests/data/example_model_v1_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff25a9e3bb73ef6e659fd030f9c025050ae19a073daaa2cf5d5b50bcdb1bd05a +size 44070866 diff --git a/chemprop/tests/data/example_model_v1_regression_mol.pt b/chemprop/tests/data/example_model_v1_regression_mol.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a95ed0312789058ceb78af262ee29eac84a5911 --- /dev/null +++ b/chemprop/tests/data/example_model_v1_regression_mol.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dad80688a828c46861df5c8b1100c4d4ea030eec28aab906577c8dd70f1fb4b +size 1428085 diff --git a/chemprop/tests/data/example_model_v1_regression_mol_prediction.csv b/chemprop/tests/data/example_model_v1_regression_mol_prediction.csv new file mode 100644 index 0000000000000000000000000000000000000000..78a3c6358385125640d9fd6f79d0b77b6754b57f --- /dev/null +++ b/chemprop/tests/data/example_model_v1_regression_mol_prediction.csv @@ -0,0 +1,51 @@ +smiles,logSolubility +C/C1CCC(\C)CC1,-2.4685160026205875 +Cc1ccc(OP(=O)(Oc2cccc(C)c2)Oc3ccccc3C)cc1,-3.690997102347433 +c1c(Br)ccc2ccccc12,-4.400622334163299 +CCOc1ccc(cc1)C(C)(C)COCc3cccc(Oc2ccccc2)c3,-3.648697421001764 +CCC1(C(=O)NC(=O)NC1=O)C2=CCCCC2,-3.3428182733716643 +C(Cc1ccccc1)c2ccccc2,-3.503486854056 +Cc1cccc(N)c1,-3.5574339504454997 +CCN(CC(C)=C)c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O,-3.516950400153098 +CC(C)O,-1.7148572271406979 +CCCCCCCCO,-2.239583430830765 +CN(C)C(=O)SCCCCOc1ccccc1,-2.915142669267167 +CC2Nc1cc(Cl)c(cc1C(=O)N2c3ccccc3C)S(N)(=O)=O,-4.213283027865243 +CCCCCCC#C,-2.273294986429117 +COP(=S)(OC)Oc1cc(Cl)c(I)cc1Cl,-3.607098153202899 +CC(C)CCOC(=O)C,-2.0786930741810843 +CCN(CC)c1ccccc1,-3.343242983008425 +O=N(=O)c1cc(Cl)c(Cl)cc1,-3.7967865273571073 +ClC1=C(Cl)C2(Cl)C3C4CC(C=C4)C3C1(Cl)C2(Cl)Cl,-4.632335644884678 +CC(=O)Nc1ccc(F)cc1,-3.5089342880842556 +CC1(C)C(C=C(Br)Br)C1C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2,-3.640108243303718 +O=C1NC(=O)NC(=O)C1(CC)c1ccccc1,-3.764417879682843 +c1ccncc1,-2.8943703242072205 +OC1CCCCCC1,-2.25571404330129 +CCCCCCCCCCCCCCCCO,-2.35840188440722 +COC(=O)C1=C(C)NC(=C(C1c2ccccc2N(=O)=O)C(=O)OC)C,-3.6143835926716297 +Nc1nccs1,-3.0722337202225756 +CCCC(C)C,-2.022391309844789 +Cc1cccc(C)c1,-3.5775675448351674 +Clc2ccc(Oc1ccc(cc1)N(=O)=O)c(Cl)c2,-3.9504066050044386 +O=C1NC(=O)C(N1)(c2ccccc2)c3ccccc3,-4.051156505350688 +Oc1ccc(Cl)cc1C(=O)Nc2ccc(cc2Cl)N(=O)=O,-4.043370213576292 +CC(C)(C)Cc1ccccc1,-3.1206711627922026 +CCOC(=O)c1cncn1C(C)c2ccccc2,-3.6367735594944337 +OCC1OC(CO)(OC2OC(COC3OC(CO)C(O)C(O)C3O)C(O)C(O)C2O)C(O)C1O,-2.397844343815426 +ClCC#N,-2.244219687088453 +CCCCCCCCC(=O)C,-2.2648864967385514 +CCCOC(=O)C,-2.071270881117651 +OC3N=C(c1ccccc1)c2cc(Cl)ccc2NC3=O,-4.090765668009631 +CCOc2ccc1nc(sc1c2)S(N)(=O)=O,-3.7640808841730014 +CC(C)C(C)O,-1.9086673917669121 +Oc2ccc1ncccc1c2,-4.176121940189839 +Cc3nnc4CN=C(c1ccccc1Cl)c2cc(Cl)ccc2n34,-4.591402014379124 +COc1ccc(cc1)C(c2ccc(OC)cc2)C(Cl)(Cl)Cl,-3.806374501332118 +CCSCc1ccccc1OC(=O)NC,-3.2378846102796097 +Clc1ccc(c(Cl)c1)c2c(Cl)ccc(Cl)c2Cl,-4.711614117032452 +Clc1ccc(cc1)c2ccc(Cl)cc2,-4.262654293284488 +CCCC1CCCC1,-2.47728326938902 +CCCC(O)CC,-2.0445744136508104 +CCCCCCCC#C,-2.291813588932748 +ClC1=C(Cl)C(Cl)(C(=C1Cl)Cl)C2(Cl)C(=C(Cl)C(=C2Cl)Cl)Cl,-4.421838323973119 diff --git a/chemprop/tests/data/example_model_v2_classification_dirichlet_mol.pt b/chemprop/tests/data/example_model_v2_classification_dirichlet_mol.pt new file mode 100644 index 0000000000000000000000000000000000000000..3dbf9fd9446d785d9964d062cecb8e8b3563e22e --- /dev/null +++ b/chemprop/tests/data/example_model_v2_classification_dirichlet_mol.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aff5fab340eabe94c131880eb3292dc8737f4a2de0ebf7c492b43bf07aa935fc +size 1287822 diff --git a/chemprop/tests/data/example_model_v2_classification_mol.pt b/chemprop/tests/data/example_model_v2_classification_mol.pt new file mode 100644 index 0000000000000000000000000000000000000000..56135039a2a0cf411e318c667d7a2ea2dcf75ec6 --- /dev/null +++ b/chemprop/tests/data/example_model_v2_classification_mol.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f6d13a6094d197f543d32e502991fc5af4b37d92dfe03c8b6eef11c9b5c61ed +size 1290660 diff --git a/chemprop/tests/data/example_model_v2_classification_mol_multiclass.pt b/chemprop/tests/data/example_model_v2_classification_mol_multiclass.pt new file mode 100644 index 0000000000000000000000000000000000000000..bcc416f7dde670e4f64775d819d07da32c03b32c --- /dev/null +++ b/chemprop/tests/data/example_model_v2_classification_mol_multiclass.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:845c7960c3c0245b8f7de946c2f90d4e7704860a53001cc31be3d031cfeb70af +size 1289770 diff --git a/chemprop/tests/data/example_model_v2_classification_mol_with_metrics.ckpt b/chemprop/tests/data/example_model_v2_classification_mol_with_metrics.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..33b7c505a5772490792be48426958184668f2c03 --- /dev/null +++ b/chemprop/tests/data/example_model_v2_classification_mol_with_metrics.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ad46d783130249352d3da372a250ba2baca8a655f69c47ea7d6c9f183af8c43 +size 3858271 diff --git a/chemprop/tests/data/example_model_v2_multiclass_dirichlet_mol.pt b/chemprop/tests/data/example_model_v2_multiclass_dirichlet_mol.pt new file mode 100644 index 0000000000000000000000000000000000000000..de7e938459c5b6e38f1607e3563f4aec95b86576 --- /dev/null +++ b/chemprop/tests/data/example_model_v2_multiclass_dirichlet_mol.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2301dd7918851f19022938716c15501c9397e0b2f629753c0b7c03349851b21b +size 1281870 diff --git a/chemprop/tests/data/example_model_v2_regression_evidential_mol.pt b/chemprop/tests/data/example_model_v2_regression_evidential_mol.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc414e4ccb58c8953d276d03e1d1ebb82eb01b2f --- /dev/null +++ b/chemprop/tests/data/example_model_v2_regression_evidential_mol.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c056822e6e83801b7f4c89693df5404123a105142ece80737096be19a065f06 +size 1291106 diff --git a/chemprop/tests/data/example_model_v2_regression_mol+mol.ckpt b/chemprop/tests/data/example_model_v2_regression_mol+mol.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..d5e0d81f7e695b1e2f8a9a1e6259e141a2d54004 --- /dev/null +++ b/chemprop/tests/data/example_model_v2_regression_mol+mol.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0314c3a1c8971ee55e6d5cf7fb90b867b861ea1950087701d7d1761a7a96314 +size 7680454 diff --git a/chemprop/tests/data/example_model_v2_regression_mol+mol.pt b/chemprop/tests/data/example_model_v2_regression_mol+mol.pt new file mode 100644 index 0000000000000000000000000000000000000000..c300fa6d16acafce7ea16bcda008ee8c22967d84 --- /dev/null +++ b/chemprop/tests/data/example_model_v2_regression_mol+mol.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8a0a992770e486982f5ff7b24425886a6b7ffba92d975fc0840185d97b03da4 +size 2565818 diff --git a/chemprop/tests/data/example_model_v2_regression_mol.ckpt b/chemprop/tests/data/example_model_v2_regression_mol.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..09d85938196dcc30fb6beb0b21fd6faaa79343cc --- /dev/null +++ b/chemprop/tests/data/example_model_v2_regression_mol.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec996c623cea37b6690f13c6e41560937eb1768c8b180a41e97b03526b12fd1b +size 3852134 diff --git a/chemprop/tests/data/example_model_v2_regression_mol.pt b/chemprop/tests/data/example_model_v2_regression_mol.pt new file mode 100644 index 0000000000000000000000000000000000000000..037a89f604600f1e706f698d14ece71b9a107054 --- /dev/null +++ b/chemprop/tests/data/example_model_v2_regression_mol.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a222b920c9152d67f6e1970881221079ab8a321d2db57759921da51b5b4a825 +size 1287750 diff --git a/chemprop/tests/data/example_model_v2_regression_mol_multitask.pt b/chemprop/tests/data/example_model_v2_regression_mol_multitask.pt new file mode 100644 index 0000000000000000000000000000000000000000..78054c01c3c344e490cb7132765412e1e6a41d0b --- /dev/null +++ b/chemprop/tests/data/example_model_v2_regression_mol_multitask.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d053982d9a9e524d9b30aa2df76f38f07d0752663515fc997d4824305b5b332 +size 1301582 diff --git a/chemprop/tests/data/example_model_v2_regression_mol_with_metrics.ckpt b/chemprop/tests/data/example_model_v2_regression_mol_with_metrics.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..ebc80590f655051f92fee02cc88af098c0ed9924 --- /dev/null +++ b/chemprop/tests/data/example_model_v2_regression_mol_with_metrics.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18ac4e7f54c39128cbde9d3f04312af57db6b1ffe2e9f6154aceeca42964c81d +size 3852975 diff --git a/chemprop/tests/data/example_model_v2_regression_mve_mol.pt b/chemprop/tests/data/example_model_v2_regression_mve_mol.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c33dd3511e45334955dacb55b88284d04781cc9 --- /dev/null +++ b/chemprop/tests/data/example_model_v2_regression_mve_mol.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b95ee7e06ff51b29868badb2f548bb24d458f351a979a0393321d003fa1ad15 +size 1288470 diff --git a/chemprop/tests/data/example_model_v2_regression_rxn+mol.pt b/chemprop/tests/data/example_model_v2_regression_rxn+mol.pt new file mode 100644 index 0000000000000000000000000000000000000000..172524ba7e3d88c3590a3171cb01b436a305a7ee --- /dev/null +++ b/chemprop/tests/data/example_model_v2_regression_rxn+mol.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10908249c6ed837dab533ddb9b1613a229198f6210bdf8dc9b0ad9dbba4bce33 +size 2664186 diff --git a/chemprop/tests/data/example_model_v2_regression_rxn.ckpt b/chemprop/tests/data/example_model_v2_regression_rxn.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..5af92b627ef62d17bf6922e0d3cc078bf1f4d772 --- /dev/null +++ b/chemprop/tests/data/example_model_v2_regression_rxn.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63289026b497e50517ba7b89aeb892bd0f6a0a2aaeea8053fd2a23c5c7eec892 +size 4147238 diff --git a/chemprop/tests/data/example_model_v2_regression_rxn.pt b/chemprop/tests/data/example_model_v2_regression_rxn.pt new file mode 100644 index 0000000000000000000000000000000000000000..13fdc0051ec9e03f9b21faeccb6e068ec9474eb9 --- /dev/null +++ b/chemprop/tests/data/example_model_v2_regression_rxn.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aa792943e8575599b1d3af31667bcd8ec8961a04a6deb11d9ffe0e9cbcce278 +size 1386118 diff --git a/chemprop/tests/data/example_model_v2_trained_on_cuda.pt b/chemprop/tests/data/example_model_v2_trained_on_cuda.pt new file mode 100644 index 0000000000000000000000000000000000000000..d0bcc7be413683e858d6eaf9b07c4345ba2291bf --- /dev/null +++ b/chemprop/tests/data/example_model_v2_trained_on_cuda.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1db40c0b5fae241ec8697cd066f0e3fa7384086db8f207f144229fb6be44241 +size 1280828 diff --git a/chemprop/tests/data/regression.csv b/chemprop/tests/data/regression.csv new file mode 100644 index 0000000000000000000000000000000000000000..1231d3626b5f6d59b5a61d715cb793c58e2e8a45 --- /dev/null +++ b/chemprop/tests/data/regression.csv @@ -0,0 +1,501 @@ +smiles,logSolubility +OCC3OC(OCC2OC(OC(C#N)c1ccccc1)C(O)C(O)C2O)C(O)C(O)C3O,-0.77 +Cc1occc1C(=O)Nc2ccccc2,-3.3 +CC(C)=CCCC(C)=CC(=O),-2.06 +c1ccc2c(c1)ccc3c2ccc4c5ccccc5ccc43,-7.87 +c1ccsc1,-1.33 +c2ccc1scnc1c2,-1.5 +Clc1cc(Cl)c(c(Cl)c1)c2c(Cl)cccc2Cl,-7.32 +CC12CCC3C(CCc4cc(O)ccc34)C2CCC1O,-5.03 +ClC4=C(Cl)C5(Cl)C3C1CC(C2OC12)C3C4(Cl)C5(Cl)Cl,-6.29 +COc5cc4OCC3Oc2c1CC(Oc1ccc2C(=O)C3c4cc5OC)C(C)=C,-4.42 +O=C1CCCN1,1.07 +Clc1ccc2ccccc2c1,-4.14 +CCCC=C,-2.68 +CCC1(C(=O)NCNC1=O)c2ccccc2,-2.64 +CCCCCCCCCCCCCC,-7.96 +CC(C)Cl,-1.41 +CCC(C)CO,-0.47 +N#Cc1ccccc1,-1 +CCOP(=S)(OCC)Oc1cc(C)nc(n1)C(C)C,-3.64 +CCCCCCCCCC(C)O,-2.94 +Clc1ccc(c(Cl)c1)c2c(Cl)ccc(Cl)c2Cl,-7.43 +O=c2[nH]c1CCCc1c(=O)n2C3CCCCC3,-4.593999999999999 +CCOP(=S)(OCC)SCSCC,-4.11 +CCOc1ccc(NC(=O)C)cc1,-2.35 +CCN(CC)c1c(cc(c(N)c1N(=O)=O)C(F)(F)F)N(=O)=O,-5.47 +CCCCCCCO,-1.81 +Cn1c(=O)n(C)c2nc[nH]c2c1=O,-1.39 +CCCCC1(CC)C(=O)NC(=O)NC1=O,-1.661 +ClC(Cl)=C(c1ccc(Cl)cc1)c2ccc(Cl)cc2,-6.9 +CCCCCCCC(=O)OC,-3.17 +CCc1ccc(CC)cc1,-3.75 +CCOP(=S)(OCC)SCSC(C)(C)C,-4.755 +COC(=O)Nc1cccc(OC(=O)Nc2cccc(C)c2)c1,-4.805 +ClC(=C)Cl,-1.64 +Cc1cccc2c1Cc3ccccc32,-5.22 +CCCCC=O,-0.85 +N(c1ccccc1)c2ccccc2,-3.5039999999999996 +CN(C)C(=O)SCCCCOc1ccccc1,-3.927 +CCCOP(=S)(OCCC)SCC(=O)N1CCCCC1C,-4.15 +CCCCCCCI,-4.81 +c1c(Cl)cccc1c2ccccc2,-4.88 +OCCCC=C,-0.15 +O=C2NC(=O)C1(CCC1)C(=O)N2,-1.655 +CC(C)C1CCC(C)CC1O,-2.53 +CC(C)OC=O,-0.63 +CCCCCC(C)O,-1.55 +CC(=O)Nc1ccc(Br)cc1,-3.083 +c1ccccc1n2ncc(N)c(Br)c2(=O),-3.127 +COC(=O)C1=C(C)NC(=C(C1c2ccccc2N(=O)=O)C(=O)OC)C,-4.76 +c2c(C)cc1nc(C)ccc1c2,-1.94 +CCCCCCC#C,-3.66 +CCC1(C(=O)NC(=O)NC1=O)C2=CCCCC2,-2.17 +c1ccc2c(c1)ccc3c4ccccc4ccc23,-8.057 +CCC(C)n1c(=O)[nH]c(C)c(Br)c1=O,-2.523 +Clc1cccc(c1Cl)c2c(Cl)c(Cl)cc(Cl)c2Cl,-8.6 +Cc1ccccc1O,-0.62 +CC(C)CCC(C)(C)C,-5.05 +Cc1ccc(C)c2ccccc12,-4.14 +Cc1cc2c3ccccc3ccc2c4ccccc14,-6.57 +CCCC(=O)C,-0.19 +Clc1cc(Cl)c(Cl)c(c1Cl)c2c(Cl)c(Cl)cc(Cl)c2Cl,-9.15 +CCCOC(=O)CC,-0.82 +CC34CC(O)C1(F)C(CCC2=CC(=O)C=CC12C)C3CC(O)C4(O)C(=O)CO,-3.68 +Nc1ccc(O)cc1,-0.8 +O=C(Cn1ccnc1N(=O)=O)NCc2ccccc2,-2.81 +OC4=C(C1CCC(CC1)c2ccc(Cl)cc2)C(=O)c3ccccc3C4=O,-5.931 +CCNc1nc(Cl)nc(n1)N(CC)CC,-4.06 +NC(=O)c1cnccn1,-0.667 +CCC(Br)(CC)C(=O)NC(N)=O,-2.68 +Clc1ccccc1c2ccccc2Cl,-5.27 +O=C2CN(N=Cc1ccc(o1)N(=O)=O)C(=O)N2,-3.38 +Clc2ccc(Oc1ccc(cc1)N(=O)=O)c(Cl)c2,-5.46 +CC1(C)C2CCC1(C)C(=O)C2,-1.96 +O=C1NC(=O)NC(=O)C1(CC=C)c1ccccc1,-2.369 +CCCCC(=O)OCC,-2.25 +CC(C)CCOC(=O)C,-1.92 +O=C1N(COC(=O)CCCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3,-5.886 +Clc1cccc(c1)c2cc(Cl)ccc2Cl,-6.01 +CCCBr,-1.73 +CCCC1COC(Cn2cncn2)(O1)c3ccc(Cl)cc3Cl,-3.4930000000000003 +COP(=S)(OC)SCC(=O)N(C)C=O,-1.995 +Cc1ncnc2nccnc12,-0.466 +NC(=S)N,0.32 +Cc1ccc(C)cc1,-2.77 +CCc1ccccc1CC,-3.28 +ClC(Cl)(Cl)C(Cl)(Cl)Cl,-3.67 +CC(C)C(C(=O)OC(C#N)c1cccc(Oc2ccccc2)c1)c3ccc(OC(F)F)cc3,-6.876 +CCCN(=O)=O,-0.8 +CC(C)C1CCC(C)CC1=O,-2.35 +CCN2c1cc(Cl)ccc1NC(=O)c3cccnc23,-5.36 +O=N(=O)c1c(Cl)c(Cl)ccc1,-3.48 +CCCC(C)C1(CC=C)C(=O)NC(=S)NC1=O,-3.46 +c1ccc2c(c1)c3cccc4cccc2c34,-6 +CCCOC(C)C,-1.34 +Cc1cc(C)c2ccccc2c1,-4.29 +CCC(=C(CC)c1ccc(O)cc1)c2ccc(O)cc2,-4.07 +c1(C#N)c(Cl)c(C#N)c(Cl)c(Cl)c(Cl)1,-5.64 +Clc1ccc(Cl)c(c1)c2ccc(Cl)c(Cl)c2,-7.25 +C1OC1c2ccccc2,-1.6 +CC(C)c1ccccc1,-3.27 +CC12CCC3C(CCC4=CC(=O)CCC34C)C2CCC1C(=O)CO,-3.45 +c2(Cl)c(Cl)c(Cl)c1nccnc1c2(Cl),-5.43 +C1OC(O)C(O)C(O)C1O,0.39 +ClCCl,-0.63 +CCc1cccc2ccccc12,-4.17 +COC=O,0.58 +Oc1ccccc1N(=O)=O,-1.74 +Cc1c[nH]c(=O)[nH]c1=O,-1.506 +CC(C)C,-2.55 +OCC1OC(C(O)C1O)n2cnc3c(O)ncnc23,-1.23 +Oc1c(I)cc(C#N)cc1I,-3.61 +Oc1ccc(Cl)cc1C(=O)Nc2ccc(cc2Cl)N(=O)=O,-4.7 +CCCCC,-3.18 +c1ccccc1O,0 +Nc3ccc2cc1ccccc1cc2c3,-5.17 +Cn1cnc2n(C)c(=O)[nH]c(=O)c12,-2.523 +c1ccc2cnccc2c1,-1.45 +COP(=S)(OC)SCC(=O)N(C(C)C)c1ccc(Cl)cc1,-4.4319999999999995 +CCCCCCc1ccccc1,-5.21 +Clc1ccccc1c2ccccc2,-4.54 +CCCC(=C)C,-3.03 +CC(C)C(C)C(C)C,-4.8 +Clc1cc(Cl)c(Cl)c(Cl)c1Cl,-5.65 +Oc1cccc(c1)N(=O)=O,-1.01 +CCCCCCCCC=C,-5.51 +CC(=O)OCC(COC(=O)C)OC(=O)C,-0.6 +CCCCc1c(C)nc(nc1O)N(C)C,-2.24 +CC1(C)C(C=C(Cl)Cl)C1C(=O)OC(C#N)c2ccc(F)c(Oc3ccccc3)c2,-7.337000000000001 +c1ccncc1,0.76 +CCCCCCCBr,-4.43 +Cc1ccncc1C,0.36 +CC34CC(O)C1(F)C(CCC2=CC(=O)CCC12C)C3CCC4(O)C(=O)CO,-3.43 +CCSCc1ccccc1OC(=O)NC,-2.09 +CCOC(=O)CC(=O)OCC,-0.82 +CC1=CCC(CC1)C(C)=C,-4.26 +C1Cc2ccccc2C1,-3.04 +CC(C)(C)c1ccc(O)cc1,-2.41 +O=C2NC(=O)C1(CC1)C(=O)N2,-1.886 +Clc1cccc(I)c1,-3.55 +Brc1cccc2ccccc12,-4.35 +CC/C=C/C,-2.54 +Cc1cccc(C)n1,0.45 +ClC=C(Cl)Cl,-1.96 +Nc1cccc2ccccc12,-1.92 +Cc1cccc(C)c1,-2.82 +Oc2ncc1nccnc1n2,-1.9469999999999998 +CO,1.57 +CCC1(CCC(C)C)C(=O)NC(=O)NC1=O,-2.468 +CCC(=O)C,0.52 +Fc1c[nH]c(=O)[nH]c1=O,-1.077 +Nc1ncnc2n(ccc12)C3OC(CO)C(O)C3O,-1.95 +Oc1cccc(O)c1,0.81 +CCCCCCO,-1.24 +CCCCCCl,-2.73 +C=CC=C,-1.87 +CCCOC(=O)C,-0.72 +Oc2ccc1CCCCc1c2,-1.99 +NC(=O)CCl,-0.02 +COP(=S)(OC)Oc1cc(Cl)c(I)cc1Cl,-6.62 +Cc1ccc(Cl)cc1,-3.08 +CSc1nnc(c(=O)n1N)C(C)(C)C,-2.253 +Cc1ccc(OP(=O)(Oc2cccc(C)c2)Oc3ccccc3C)cc1,-6.01 +CCCCCC=O,-1.3 +CCCCOC(=O)c1ccc(N)cc1,-3.082 +O2c1cc(C)ccc1N(C)C(=O)c3cc(N)cnc23,-3.043 +CC(C)=CCC/C(C)=C\CO,-2.46 +Clc1ccc(cc1)c2ccccc2Cl,-5.28 +O=C1N(COC(=O)CCCCCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3,-6.523 +CCN(=O)=O,-0.22 +CCN(CC(C)=C)c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O,-6.124 +Clc1ccc(Cl)c(Cl)c1Cl,-4.57 +CCCC(C)(COC(N)=O)COC(N)=O,-1.807 +CC(=O)C3CCC4C2CC=C1CC(O)CCC1(C)C2CCC34C,-4.65 +CI,-1 +CC1CC(C)C(=O)C(C1)C(O)CC2CC(=O)NC(=O)C2,-1.13 +O=C1N(COC(=O)CCCCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3,-6.301 +CC1=CC(=O)CC(C)(C)C1,-1.06 +O=C1NC(=O)NC(=O)C1(CC)C(C)CC,-2.39 +CCCCC(=O)CCCC,-2.58 +CCC1(CCC(=O)NC1=O)c2ccccc2,-2.3369999999999997 +CCC(C)CC,-3.68 +CCOc1ccc(cc1)C(C)(C)COCc3cccc(Oc2ccccc2)c3,-8.6 +Cc1ccccc1n3c(C)nc2ccccc2c3=O,-2.925 +ClCC#N,-0.092 +CCOP(=S)(CC)Oc1cc(Cl)c(Cl)cc1Cl,-5.752000000000001 +CC12CCC(=O)C=C1CCC3C2CCC4(C)C3CCC4(O)C#C,-5.66 +c1ccnnc1,1.1 +Clc1cc(Cl)c(Cl)c(Cl)c1,-4.63 +C1C(O)CCC2(C)CC3CCC4(C)C5(C)CC6OCC(C)CC6OC5CC4C3C=C21,-7.32 +Nc1ccccc1O,-0.72 +CCCCCCCCC(=O)OCC,-3.8 +COCC(=O)N(C(C)C(=O)OC)c1c(C)cccc1C,-1.601 +CNC(=O)Oc1ccccc1OC(C)C,-2.05 +CCC(C)Cl,-1.96 +Oc1ccc2ccccc2c1,-2.28 +CC(C)Oc1cc(c(Cl)cc1Cl)n2nc(oc2=O)C(C)(C)C,-5.696000000000001 +CCCCC#C,-2.36 +CCCCCCCC#C,-4.24 +Cc1ccccc1Cl,-3.52 +CC(C)OC(C)C,-1.1 +Nc1ccc(cc1)S(=O)(=O)c2ccc(N)cc2,-3.094 +CNN,1.34 +CC#C,-0.41 +CCOP(=S)(OCC)ON=C(C#N)c1ccccc1,-4.862 +CCNP(=S)(OC)OC(=CC(=O)OC(C)C)C,-3.408 +C=CC=O,0.57 +O=c1[nH]cnc2nc[nH]c12,-2.296 +Oc2ccc1ncccc1c2,-2.16 +Fc1ccccc1,-1.8 +CCCCl,-1.47 +CCOC(=O)C,-0.04 +CCCC(C)(C)C,-4.36 +Cc1cc(C)c(C)c(C)c1C,-4 +CC12CCC(CC1)C(C)(C)O2,-1.64 +CCCCOC(=O)CCCCCCCCC(=O)OCCCC,-3.8960000000000004 +Clc1ccc(cc1)c2ccc(Cl)cc2,-6.56 +Cc1cccnc1C,0.38 +CC(=C)C1CC=C(C)C(=O)C1,-2.06 +CCOP(=S)(OCC)SCSc1ccc(Cl)cc1,-5.736000000000001 +COc1cc(cc(OC)c1O)C6C2C(COC2=O)C(OC4OC3COC(C)OC3C(O)C4O)c7cc5OCOc5cc67,-3.571 +c1cc2cccc3c4cccc5cccc(c(c1)c23)c54,-8.804 +Cc1ccc(cc1N(=O)=O)N(=O)=O,-2.82 +c1c(Br)ccc2ccccc12,-4.4 +CNC(=O)Oc1cccc(N=CN(C)C)c1,-2.34 +COc2cnc1ncncc1n2,-1.139 +Cc3ccnc4N(C1CC1)c2ncccc2C(=O)Nc34,-3.19 +CCOP(=S)(OCC)Oc1nc(Cl)n(n1)C(C)C,-3.658 +CC(=C)C=C,-2.03 +CC(C)=CCCC(O)(C)C=C,-1.99 +COP(=S)(OC)Oc1ccc(SC)c(C)c1,-4.57 +OC1CCCCC1,-0.44 +O=C1NC(=O)NC(=O)C1(C)CC=C,-1.16 +CC34CCC1C(CCC2CC(O)CCC12C)C3CCC4=O,-4.16 +OCC(O)C(O)C(O)C(O)CO,0.06 +Cc1ccc(cc1)c2ccccc2,-4.62 +CCNc1nc(Cl)nc(NC(C)C)n1,-3.85 +NC(=S)Nc1ccccc1,-1.77 +CCCC(=O)CCC,-1.3 +CC(=O)C(C)(C)C,-0.72 +Oc1ccc(Cl)cc1,-0.7 +O=C1CCCCC1,-0.6 +Cc1cccc(N)c1,-0.85 +ClC(Cl)(Cl)C#N,-2.168 +CNc2cnn(c1cccc(c1)C(F)(F)F)c(=O)c2Cl,-4.046 +CCCCCCCCC(=O)C,-3.3 +CCN(CC)c1nc(Cl)nc(NC(C)C)n1,-3.785 +CCOC(=O)c1ccc(N)cc1,-2.616 +Clc1ccc(Cl)c(Cl)c1,-3.59 +Cc3nnc4CN=C(c1ccccc1Cl)c2cc(Cl)ccc2n34,-4.09 +Oc1ccccc1O,0.62 +CCN2c1ncccc1N(C)C(=O)c3cccnc23,-2.62 +CSC,-0.45 +Cc1ccccc1Br,-2.23 +CCOC(=O)N,0.85 +CC(=O)OC3(CCC4C2C=C(C)C1=CC(=O)CCC1(C)C2CCC34C)C(C)=O,-5.35 +CC(C)C(O)C(C)C,-1.22 +c1ccc2ccccc2c1,-3.6 +CCNc1ccccc1,-1.7 +O=C1NC(=O)C(N1)(c2ccccc2)c3ccccc3,-4.0969999999999995 +Cc1c2ccccc2c(C)c3ccc4ccccc4c13,-7.02 +CCOP(=S)(OCC)SC(CCl)N1C(=O)c2ccccc2C1=O,-6.34 +COc1ccc(cc1)C(c2ccc(OC)cc2)C(Cl)(Cl)Cl,-6.89 +Fc1cccc(F)c1C(=O)NC(=O)Nc2cc(Cl)c(F)c(Cl)c2F,-7.28 +O=C1N(COC(=O)CCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3,-4.678 +CN(C)C(=O)Nc1ccc(Cl)cc1,-2.89 +OC(Cn1cncn1)(c2ccc(F)cc2)c3ccccc3F,-3.37 +CC(=O)OCC(=O)C3(O)C(CC4C2CCC1=CC(=O)C=CC1(C)C2(F)C(O)CC34C)OC(C)=O,-4.13 +CCCCBr,-2.37 +Brc1cc(Br)c(Br)cc1Br,-6.98 +CC(C)CC(=O)C,-0.74 +CCSC(=O)N(CC)C1CCCCC1,-3.4 +COc1ccc(Cl)cc1,-2.78 +CC1(C)C(C=C(Br)Br)C1C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2,-8.402000000000001 +CCC(C)C1(CC=C)C(=O)NC(=O)NC1=O,-2.016 +COP(=S)(OC)Oc1ccc(N(=O)=O)c(C)c1,-4.04 +Ic1cccc2ccccc12,-4.55 +OCC(O)C(O)C(O)C(O)CO,1.09 +CCS,-0.6 +ClCC(Cl)Cl,-1.48 +CN(C)C(=O)Oc1cc(C)nn1c2ccccc2,-2.09 +NC(=O)c1ccccc1O,-1.82 +Cc1ccccc1N(=O)=O,-2.33 +O=C1NC(=O)NC(=O)C1(C(C)C)C(C)C,-2.766 +CCc1ccccc1C,-3.21 +CCCCCCCCl,-4 +O=C1NC(=O)NC(=O)C1(CC)CC,-2.4 +C(Cc1ccccc1)c2ccccc2,-4.62 +ClC(Cl)C(Cl)Cl,-1.74 +CCN2c1cc(OC)cc(C)c1NC(=O)c3cccnc23,-5.153 +Cc1ccc2c(ccc3ccccc32)c1,-5.84 +CCCCOC(=O)c1ccccc1C(=O)OCCCC,-4.4 +COc1c(O)c(Cl)c(Cl)c(Cl)c1Cl,-4.02 +CCN(CC)C(=O)C(=CCOP(=O)(OC)OC)Cl,0.523 +CC34CCC1C(=CCc2cc(O)ccc12)C3CCC4=O,-5.282 +CCOC(=O)c1ccccc1S(=O)(=O)NN(C=O)c2nc(Cl)cc(OC)n2,-4.5760000000000005 +COc1ccc(cc1)N(=O)=O,-2.41 +CCCCCCCl,-3.12 +Clc1cc(c(Cl)c(Cl)c1Cl)c2cc(Cl)c(Cl)c(Cl)c2Cl,-9.16 +OCC1OC(CO)(OC2OC(COC3OC(CO)C(O)C(O)C3O)C(O)C(O)C2O)C(O)C1O,-0.41 +CCCCCCCCCCCCCCCCCCCCCCCCCC,-8.334 +CCN2c1ccccc1N(C)C(=O)c3cccnc23,-3.324 +CC(Cl)Cl,-1.29 +Nc1ccc(cc1)S(N)(=O)=O,-1.34 +CCCN(CCC)c1c(cc(cc1N(=O)=O)C(C)C)N(=O)=O,-6.49 +ClC1C(Cl)C(Cl)C(Cl)C(Cl)C1Cl,-4.64 +CCOP(=S)(NC(C)C)Oc1ccccc1C(=O)OC(C)C,-4.194 +Clc1cccc(Cl)c1Cl,-4 +ClC(Cl)(Cl)Cl,-2.31 +O=N(=O)c1cc(Cl)c(Cl)cc1,-3.2 +OC1CCCCCCC1,-1.29 +CC1(O)CCC2C3CCC4=CC(=O)CCC4(C)C3CCC21C,-3.9989999999999997 +CCOc1ccc(NC(N)=O)cc1,-2.17 +C/C1CCC(\C)CC1,-4.47 +c1cnc2c(c1)ccc3ncccc23,-2.68 +COC(C)(C)C,-0.24 +COc1ccc(C=CC)cc1,-3.13 +CCCCCCCCCCCCCCCCO,-7 +O=c1cc[nH]c(=O)[nH]1,-1.4880000000000002 +Nc1ncnc2nc[nH]c12,-2.12 +Clc1cc(Cl)c(cc1Cl)c2cccc(Cl)c2Cl,-7.21 +COc1ccc(cc1)C(O)(C2CC2)c3cncnc3,-2.596 +c1ccc2c(c1)c3cccc4c3c2cc5ccccc54,-8.23 +O=C(Nc1ccccc1)Nc2ccccc2,-3.15 +CCC1(C(=O)NC(=O)NC1=O)c2ccccc2,-2.322 +Clc1ccc(cc1)c2cccc(Cl)c2Cl,-6.29 +CC(C)c1ccc(NC(=O)N(C)C)cc1,-3.536 +CCN(CC)C(=O)CSc1ccc(Cl)nn1,-1.716 +CCC(C)(C)CO,-1.04 +CCCOC(=O)CCC,-1.75 +Cc1c(cc(cc1N(=O)=O)N(=O)=O)N(=O)=O,-3.22 +CC(C)OP(=S)(OC(C)C)SCCNS(=O)(=O)c1ccccc1,-4.2 +C1CCCCCC1,-3.51 +CCCOC=O,-0.49 +CC(C)c1ccccc1C,-3.76 +Nc1cccc(Cl)c1,-1.37 +CC(C)CC(C)C,-4.26 +o1c2ccccc2c3ccccc13,-4.6 +CCOC2Oc1ccc(OS(C)(=O)=O)cc1C2(C)C,-3.42 +CN(C)C(=O)Nc1cccc(c1)C(F)(F)F,-3.43 +c3ccc2nc1ccccc1cc2c3,-3.67 +CC12CC(=O)C3C(CCC4=CC(=O)CCC34C)C2CCC1(O)C(=O)CO,-3.11 +OCC1OC(O)C(O)C(O)C1O,0.74 +Cc1cccc(O)c1,-0.68 +CC2Cc1ccccc1N2NC(=O)c3ccc(Cl)c(c3)S(N)(=O)=O,-3.5860000000000003 +CCC(C)C(=O)OC2CC(C)C=C3C=CC(C)C(CCC1CC(O)CC(=O)O1)C23,-6.005 +O=N(=O)c1ccc(cc1)N(=O)=O,-3.39 +CCC1(C(=O)NC(=O)NC1=O)C2=CCC3CCC2C3,-2.696 +CCCCCCCCCC(=O)OCC,-4.1 +CN(C)C(=O)Nc1ccccc1,-1.6 +CCCOCC,-0.66 +CC(C)O,0.43 +Cc1ccc2ccccc2c1,-3.77 +ClC(Br)Br,-1.9 +CCC(C(CC)c1ccc(O)cc1)c2ccc(O)cc2,-4.43 +CCOC(=O)CC(SP(=S)(OC)OC)C(=O)OCC,-3.37 +ClCc1ccccc1,-2.39 +C/C=C/C=O,0.32 +CON(C)C(=O)Nc1ccc(Br)c(Cl)c1,-3.924 +Cc1c2ccccc2c(C)c3ccccc13,-6.57 +CCCCCC(=O)OC,-1.87 +CN(C)C(=O)Nc1ccc(c(Cl)c1)n2nc(oc2=O)C(C)(C)C,-4.328 +CC(=O)Nc1ccc(F)cc1,-1.78 +CCc1cccc(CC)c1N(COC)C(=O)CCl,-3.26 +C1CCC=CC1,-2.59 +CC12CC(O)C3C(CCC4=CC(=O)CCC34C)C2CCC1(O)C(=O)CO,-3.09 +c1cncnc1,1.1 +Clc1ccc(cc1)N(=O)=O,-2.92 +CCC(=O)OC,-0.14 +Clc1ccccc1N(=O)=O,-2.55 +CCCCN(C)C(=O)Nc1ccc(Cl)c(Cl)c1,-4.77 +CN1CC(O)N(C1=O)c2nnc(s2)C(C)(C)C,-1.8769999999999998 +O=N(=O)c1ccccc1,-1.8 +Ic1ccccc1,-3.01 +CC2Nc1cc(Cl)c(cc1C(=O)N2c3ccccc3C)S(N)(=O)=O,-3.78 +COc1ccccc1OCC(O)COC(N)=O,-0.985 +CCCCOCN(C(=O)CCl)c1c(CC)cccc1CC,-4.19 +Oc1cccc(Cl)c1Cl,-1.3 +CCCC(=O)OC,-1.92 +CCC(=O)Nc1ccc(Cl)c(Cl)c1,-3 +Nc3nc(N)c2nc(c1ccccc1)c(N)nc2n3,-2.404 +CCCCCC(=O)OCC,-2.35 +OCC(O)C2OC1OC(OC1C2O)C(Cl)(Cl)Cl,-1.84 +CN(C=Nc1ccc(C)cc1C)C=Nc2ccc(C)cc2C,-5.47 +COc1nc(NC(C)C)nc(NC(C)C)n1,-2.478 +CCCCCCC=C,-4.44 +Cc1ccc(N)cc1,-1.21 +Nc1nccs1,-0.36 +c1ccccc1(OC(=O)NC),-1.8030000000000002 +CCCC(O)CC,-0.8 +c3ccc2c(O)c1ccccc1cc2c3,-4.73 +Cc1ccc2cc3ccccc3cc2c1,-6.96 +Cc1cccc(C)c1C,-3.2 +CNC(=O)Oc1ccc(N(C)C)c(C)c1,-2.36 +CCCCCCCC(C)O,-2.74 +CN(C(=O)NC(C)(C)c1ccccc1)c2ccccc2,-3.35 +CCCC(=O)CC,-0.83 +Oc1c(Br)cc(C#N)cc1Br,-3.33 +Clc1ccc(cc1Cl)c2ccccc2,-6.39 +CN(C(=O)COc1nc2ccccc2s1)c3ccccc3,-4.873 +Oc1cccc2ncccc12,-2.54 +CC1=C(SCCO1)C(=O)Nc2ccccc2,-3.14 +CCOc2ccc1nc(sc1c2)S(N)(=O)=O,-3.81 +Oc1c(Cl)c(Cl)c(Cl)c(Cl)c1Cl,-4.28 +ClCBr,-0.89 +CCC1(CC)C(=O)NC(=O)N(C)C1=O,-2.23 +CC(=O)OCC(=O)C3CCC4C2CCC1=CC(=O)CCC1(C)C2CCC34C,-4.63 +NC(=O)NCc1ccccc1,-0.95 +CN(C)C(=O)Nc1ccc(C)c(Cl)c1,-3.483 +CON(C)C(=O)Nc1ccc(Cl)c(Cl)c1,-3.592 +OC1CCCCCC1,-0.88 +CS(=O)(=O)c1ccc(cc1)C(O)C(CO)NC(=O)C(Cl)Cl,-2.154 +CCCC(C)C1(CC)C(=O)NC(=S)NC1=O,-3.36 +CC(=O)Nc1nnc(s1)S(N)(=O)=O,-2.36 +Oc1ccc(cc1)N(=O)=O,-0.74 +ClC1=C(Cl)C2(Cl)C3C4CC(C=C4)C3C1(Cl)C2(Cl)Cl,-6.307 +C1CCOC1,0.49 +Nc1ccccc1N(=O)=O,-1.96 +Clc1cccc(c1Cl)c2cccc(Cl)c2Cl,-7.28 +CCCCC1C(=O)N(N(C1=O)c2ccccc2)c3ccccc3,-3.81 +Cc1c(cccc1N(=O)=O)N(=O)=O,-3 +CC(=O)C1CCC2C3CCC4=CC(=O)CCC4(C)C3CCC12C,-4.42 +CCN(CC)c1nc(Cl)nc(n1)N(CC)CC,-4.4110000000000005 +ClC(Cl)C(Cl)(Cl)SN2C(=O)C1CC=CCC1C2=O,-5.4 +c1(Br)c(Br)cc(Br)cc1,-4.5 +OC3N=C(c1ccccc1)c2cc(Cl)ccc2NC3=O,-3.952 +O=C1NC(=O)NC(=O)C1(C(C)CCC)CC=C,-2.356 +c1(O)c(C)ccc(C(C)C)c1,-2.08 +C1SC(=S)NC1(=O),-1.77 +Oc1ccc(c(O)c1)c3oc2cc(O)cc(O)c2c(=O)c3O,-3.083 +ClC1(C(=O)C2(Cl)C3(Cl)C14Cl)C5(Cl)C2(Cl)C3(Cl)C(Cl)(Cl)C45Cl,-5.2589999999999995 +CCN(CC)C(=S)SSC(=S)N(CC)CC,-4.86 +C1CCCCC1,-3.1 +ClC1=C(Cl)C(Cl)(C(=C1Cl)Cl)C2(Cl)C(=C(Cl)C(=C2Cl)Cl)Cl,-7.278 +CN(C)C=Nc1ccc(Cl)cc1C,-2.86 +CC34CCc1c(ccc2cc(O)ccc12)C3CCC4=O,-5.24 +CCCCCCCCO,-2.39 +CCSCC,-1.34 +ClCCCl,-1.06 +CCC(C)(C)Cl,-2.51 +ClCCBr,-1.32 +Nc1ccc(cc1)N(=O)=O,-2.37 +OCC1OC(OC2C(O)C(O)C(O)OC2CO)C(O)C(O)C1O,-0.244 +CCN2c1ncccc1N(CC)C(=O)c3cccnc23,-2.86 +Clc1ccccc1,-2.38 +CCCCCCCC=C,-5.05 +Brc1ccc(I)cc1,-4.56 +CCC(C)(O)CC,-0.36 +CCCCCc1ccccc1,-4.64 +NC(=O)NC1NC(=O)NC1=O,-1.6 +OCC(O)COC(=O)c1ccccc1Nc2ccnc3cc(Cl)ccc23,-4.571000000000001 +ClC(Cl)C(c1ccc(Cl)cc1)c2ccc(Cl)cc2,-7.2 +CC(=O)OC3CCC4C2CCC1=CC(=O)CCC1(C)C2CCC34C,-5.184 +Clc1cccc2ccccc12,-3.93 +CCN2c1ccccc1N(C)C(=O)c3ccccc23,-4.749 +CCCCC(C)O,-0.89 +CCCC1CCCC1,-4.74 +CCOC(=O)c1cncn1C(C)c2ccccc2,-4.735 +Oc1ccc(Cl)c(Cl)c1,-1.25 +CC1(C)C(C=C(Cl)Cl)C1C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2,-8.017000000000001 +c2ccc1ocnc1c2,-1.16 +CCCCCO,-0.6 +CCN(CC)c1ccccc1,-3.03 +Fc1cccc(F)c1,-2 +ClCCC#N,-0.29 +CC(C)(C)Cc1ccccc1,-4.15 +O=C1NC(=O)NC(=O)C1(CC)c1ccccc1,-2.322 +Clc1ccccc1I,-3.54 +c2ccc1[nH]nnc1c2,-0.78 +CNC(=O)Oc1cccc2CC(C)(C)Oc12,-2.8 +Cc1cccc(C)c1O,-1.29 +CC(C)C(C)O,-0.18 +c1ccccc1C(O)c2ccccc2,-2.55 +CCCCCCCCCC(=O)OC,-4.69 +COP(=S)(OC)Oc1ccc(cc1Cl)N(=O)=O,-4.31 +CC(C)CBr,-2.43 +CCI,-1.6 +CN(C)C(=O)Oc1nc(nc(C)c1C)N(C)C,-1.95 +CCCCCCBr,-3.81 +CCCC(C)C,-3.74 +Cc1c(F)c(F)c(COC(=O)C2C(C=C(Cl)C(F)(F)F)C2(C)C)c(F)c1F,-7.321000000000001 +CCc1cccc(C)c1N(C(C)COC)C(=O)CCl,-2.73 +ON=Cc1ccc(o1)N(=O)=O,-2.19 +CC(C)C(Nc1ccc(cc1Cl)C(F)(F)F)C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2,-8.003 +Nc1nc[nH]n1,0.522 +BrC(Br)Br,-1.91 +COP(=O)(OC)C(O)C(Cl)(Cl)Cl,-0.22 +CCOP(=S)(OCC)SCn1c(=O)oc2cc(Cl)ccc12,-5.233 +OCc1ccccc1,-0.4 +O=c2c(C3CCCc4ccccc43)c(O)c1ccccc1o2,-2.84 +Oc1ccc(Br)cc1,-1.09 +CC(C)Br,-1.59 +CC(C)CC(C)(C)C,-4.74 +O=N(=O)c1cc(cc(c1)N(=O)=O)N(=O)=O,-2.89 +CN2C(=O)CN=C(c1ccccc1)c3cc(ccc23)N(=O)=O,-3.7960000000000003 +CCC,-1.94 +Nc1cc(nc(N)n1=O)N2CCCCC2,-1.989 +Nc2cccc3nc1ccccc1cc23,-4.22 +c1ccc2cc3c4cccc5cccc(c3cc2c1)c45,-8.49 +OC(c1ccc(Cl)cc1)(c2ccc(Cl)cc2)C(Cl)(Cl)Cl,-5.666 +C1Cc2cccc3cccc1c23,-4.63 diff --git a/chemprop/tests/data/regression.npz b/chemprop/tests/data/regression.npz new file mode 100644 index 0000000000000000000000000000000000000000..b14954faf3380e2530cd4de2d5a0866fd4c2e515 --- /dev/null +++ b/chemprop/tests/data/regression.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2640ae20ad2e8797f4f4458789473a2f6cad41f92284c340e40fea8b51dc15cc +size 242650 diff --git a/chemprop/tests/data/regression/bounded.csv b/chemprop/tests/data/regression/bounded.csv new file mode 100644 index 0000000000000000000000000000000000000000..5625e11da19b90d76116c5b0671abf427e333a25 --- /dev/null +++ b/chemprop/tests/data/regression/bounded.csv @@ -0,0 +1,501 @@ +"smiles","logSolubility" +"OCC3OC(OCC2OC(OC(C#N)c1ccccc1)C(O)C(O)C2O)C(O)C(O)C3O","<-0.77" +"Cc1occc1C(=O)Nc2ccccc2","<-3.3" +"CC(C)=CCCC(C)=CC(=O)","<-2.06" +"c1ccc2c(c1)ccc3c2ccc4c5ccccc5ccc43","<-7.87" +"c1ccsc1","<-1.33" +"c2ccc1scnc1c2","<-1.5" +"Clc1cc(Cl)c(c(Cl)c1)c2c(Cl)cccc2Cl","<-7.32" +"CC12CCC3C(CCc4cc(O)ccc34)C2CCC1O","<-5.03" +"ClC4=C(Cl)C5(Cl)C3C1CC(C2OC12)C3C4(Cl)C5(Cl)Cl","<-6.29" +"COc5cc4OCC3Oc2c1CC(Oc1ccc2C(=O)C3c4cc5OC)C(C)=C","<-4.42" +"O=C1CCCN1","<1.07" +"Clc1ccc2ccccc2c1","<-4.14" +"CCCC=C","<-2.68" +"CCC1(C(=O)NCNC1=O)c2ccccc2","<-2.64" +"CCCCCCCCCCCCCC","<-7.96" +"CC(C)Cl","<-1.41" +"CCC(C)CO","<-0.47" +"N#Cc1ccccc1","<-1" +"CCOP(=S)(OCC)Oc1cc(C)nc(n1)C(C)C","<-3.64" +"CCCCCCCCCC(C)O","<-2.94" +"Clc1ccc(c(Cl)c1)c2c(Cl)ccc(Cl)c2Cl","<-7.43" +"O=c2[nH]c1CCCc1c(=O)n2C3CCCCC3","<-4.594" +"CCOP(=S)(OCC)SCSCC","<-4.11" +"CCOc1ccc(NC(=O)C)cc1","<-2.35" +"CCN(CC)c1c(cc(c(N)c1N(=O)=O)C(F)(F)F)N(=O)=O","<-5.47" +"CCCCCCCO","<-1.81" +"Cn1c(=O)n(C)c2nc[nH]c2c1=O","<-1.39" +"CCCCC1(CC)C(=O)NC(=O)NC1=O","<-1.661" +"ClC(Cl)=C(c1ccc(Cl)cc1)c2ccc(Cl)cc2","<-6.9" +"CCCCCCCC(=O)OC","<-3.17" +"CCc1ccc(CC)cc1","<-3.75" +"CCOP(=S)(OCC)SCSC(C)(C)C","<-4.755" +"COC(=O)Nc1cccc(OC(=O)Nc2cccc(C)c2)c1","<-4.805" +"ClC(=C)Cl","<-1.64" +"Cc1cccc2c1Cc3ccccc32","<-5.22" +"CCCCC=O","<-0.85" +"N(c1ccccc1)c2ccccc2","<-3.504" +"CN(C)C(=O)SCCCCOc1ccccc1","<-3.927" +"CCCOP(=S)(OCCC)SCC(=O)N1CCCCC1C","<-4.15" +"CCCCCCCI","<-4.81" +"c1c(Cl)cccc1c2ccccc2","<-4.88" +"OCCCC=C","<-0.15" +"O=C2NC(=O)C1(CCC1)C(=O)N2","<-1.655" +"CC(C)C1CCC(C)CC1O","<-2.53" +"CC(C)OC=O","<-0.63" +"CCCCCC(C)O","<-1.55" +"CC(=O)Nc1ccc(Br)cc1","<-3.083" +"c1ccccc1n2ncc(N)c(Br)c2(=O)","<-3.127" +"COC(=O)C1=C(C)NC(=C(C1c2ccccc2N(=O)=O)C(=O)OC)C","<-4.76" +"c2c(C)cc1nc(C)ccc1c2","<-1.94" +"CCCCCCC#C",">-3.66" +"CCC1(C(=O)NC(=O)NC1=O)C2=CCCCC2",">-2.17" +"c1ccc2c(c1)ccc3c4ccccc4ccc23",">-8.057" +"CCC(C)n1c(=O)[nH]c(C)c(Br)c1=O",">-2.523" +"Clc1cccc(c1Cl)c2c(Cl)c(Cl)cc(Cl)c2Cl",">-8.6" +"Cc1ccccc1O",">-0.62" +"CC(C)CCC(C)(C)C",">-5.05" +"Cc1ccc(C)c2ccccc12",">-4.14" +"Cc1cc2c3ccccc3ccc2c4ccccc14",">-6.57" +"CCCC(=O)C",">-0.19" +"Clc1cc(Cl)c(Cl)c(c1Cl)c2c(Cl)c(Cl)cc(Cl)c2Cl",">-9.15" +"CCCOC(=O)CC",">-0.82" +"CC34CC(O)C1(F)C(CCC2=CC(=O)C=CC12C)C3CC(O)C4(O)C(=O)CO",">-3.68" +"Nc1ccc(O)cc1",">-0.8" +"O=C(Cn1ccnc1N(=O)=O)NCc2ccccc2",">-2.81" +"OC4=C(C1CCC(CC1)c2ccc(Cl)cc2)C(=O)c3ccccc3C4=O",">-5.931" +"CCNc1nc(Cl)nc(n1)N(CC)CC",">-4.06" +"NC(=O)c1cnccn1",">-0.667" +"CCC(Br)(CC)C(=O)NC(N)=O",">-2.68" +"Clc1ccccc1c2ccccc2Cl",">-5.27" +"O=C2CN(N=Cc1ccc(o1)N(=O)=O)C(=O)N2",">-3.38" +"Clc2ccc(Oc1ccc(cc1)N(=O)=O)c(Cl)c2",">-5.46" +"CC1(C)C2CCC1(C)C(=O)C2",">-1.96" +"O=C1NC(=O)NC(=O)C1(CC=C)c1ccccc1",">-2.369" +"CCCCC(=O)OCC",">-2.25" +"CC(C)CCOC(=O)C",">-1.92" +"O=C1N(COC(=O)CCCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3",">-5.886" +"Clc1cccc(c1)c2cc(Cl)ccc2Cl",">-6.01" +"CCCBr",">-1.73" +"CCCC1COC(Cn2cncn2)(O1)c3ccc(Cl)cc3Cl",">-3.493" +"COP(=S)(OC)SCC(=O)N(C)C=O",">-1.995" +"Cc1ncnc2nccnc12",">-0.466" +"NC(=S)N",">0.32" +"Cc1ccc(C)cc1",">-2.77" +"CCc1ccccc1CC",">-3.28" +"ClC(Cl)(Cl)C(Cl)(Cl)Cl",">-3.67" +"CC(C)C(C(=O)OC(C#N)c1cccc(Oc2ccccc2)c1)c3ccc(OC(F)F)cc3",">-6.876" +"CCCN(=O)=O",">-0.8" +"CC(C)C1CCC(C)CC1=O",">-2.35" +"CCN2c1cc(Cl)ccc1NC(=O)c3cccnc23",">-5.36" +"O=N(=O)c1c(Cl)c(Cl)ccc1",">-3.48" +"CCCC(C)C1(CC=C)C(=O)NC(=S)NC1=O",">-3.46" +"c1ccc2c(c1)c3cccc4cccc2c34",">-6" +"CCCOC(C)C",">-1.34" +"Cc1cc(C)c2ccccc2c1",">-4.29" +"CCC(=C(CC)c1ccc(O)cc1)c2ccc(O)cc2",">-4.07" +"c1(C#N)c(Cl)c(C#N)c(Cl)c(Cl)c(Cl)1",">-5.64" +"Clc1ccc(Cl)c(c1)c2ccc(Cl)c(Cl)c2",">-7.25" +"C1OC1c2ccccc2",">-1.6" +"CC(C)c1ccccc1",">-3.27" +"CC12CCC3C(CCC4=CC(=O)CCC34C)C2CCC1C(=O)CO",-3.45 +"c2(Cl)c(Cl)c(Cl)c1nccnc1c2(Cl)",-5.43 +"C1OC(O)C(O)C(O)C1O",0.39 +"ClCCl",-0.63 +"CCc1cccc2ccccc12",-4.17 +"COC=O",0.58 +"Oc1ccccc1N(=O)=O",-1.74 +"Cc1c[nH]c(=O)[nH]c1=O",-1.506 +"CC(C)C",-2.55 +"OCC1OC(C(O)C1O)n2cnc3c(O)ncnc23",-1.23 +"Oc1c(I)cc(C#N)cc1I",-3.61 +"Oc1ccc(Cl)cc1C(=O)Nc2ccc(cc2Cl)N(=O)=O",-4.7 +"CCCCC",-3.18 +"c1ccccc1O",0 +"Nc3ccc2cc1ccccc1cc2c3",-5.17 +"Cn1cnc2n(C)c(=O)[nH]c(=O)c12",-2.523 +"c1ccc2cnccc2c1",-1.45 +"COP(=S)(OC)SCC(=O)N(C(C)C)c1ccc(Cl)cc1",-4.432 +"CCCCCCc1ccccc1",-5.21 +"Clc1ccccc1c2ccccc2",-4.54 +"CCCC(=C)C",-3.03 +"CC(C)C(C)C(C)C",-4.8 +"Clc1cc(Cl)c(Cl)c(Cl)c1Cl",-5.65 +"Oc1cccc(c1)N(=O)=O",-1.01 +"CCCCCCCCC=C",-5.51 +"CC(=O)OCC(COC(=O)C)OC(=O)C",-0.6 +"CCCCc1c(C)nc(nc1O)N(C)C",-2.24 +"CC1(C)C(C=C(Cl)Cl)C1C(=O)OC(C#N)c2ccc(F)c(Oc3ccccc3)c2",-7.337 +"c1ccncc1",0.76 +"CCCCCCCBr",-4.43 +"Cc1ccncc1C",0.36 +"CC34CC(O)C1(F)C(CCC2=CC(=O)CCC12C)C3CCC4(O)C(=O)CO",-3.43 +"CCSCc1ccccc1OC(=O)NC",-2.09 +"CCOC(=O)CC(=O)OCC",-0.82 +"CC1=CCC(CC1)C(C)=C",-4.26 +"C1Cc2ccccc2C1",-3.04 +"CC(C)(C)c1ccc(O)cc1",-2.41 +"O=C2NC(=O)C1(CC1)C(=O)N2",-1.886 +"Clc1cccc(I)c1",-3.55 +"Brc1cccc2ccccc12",-4.35 +"CC/C=C/C",-2.54 +"Cc1cccc(C)n1",0.45 +"ClC=C(Cl)Cl",-1.96 +"Nc1cccc2ccccc12",-1.92 +"Cc1cccc(C)c1",-2.82 +"Oc2ncc1nccnc1n2",-1.947 +"CO",1.57 +"CCC1(CCC(C)C)C(=O)NC(=O)NC1=O",-2.468 +"CCC(=O)C",0.52 +"Fc1c[nH]c(=O)[nH]c1=O",-1.077 +"Nc1ncnc2n(ccc12)C3OC(CO)C(O)C3O",-1.95 +"Oc1cccc(O)c1",0.81 +"CCCCCCO",-1.24 +"CCCCCCl",-2.73 +"C=CC=C",-1.87 +"CCCOC(=O)C",-0.72 +"Oc2ccc1CCCCc1c2",-1.99 +"NC(=O)CCl",-0.02 +"COP(=S)(OC)Oc1cc(Cl)c(I)cc1Cl",-6.62 +"Cc1ccc(Cl)cc1",-3.08 +"CSc1nnc(c(=O)n1N)C(C)(C)C",-2.253 +"Cc1ccc(OP(=O)(Oc2cccc(C)c2)Oc3ccccc3C)cc1",-6.01 +"CCCCCC=O",-1.3 +"CCCCOC(=O)c1ccc(N)cc1",-3.082 +"O2c1cc(C)ccc1N(C)C(=O)c3cc(N)cnc23",-3.043 +"CC(C)=CCC/C(C)=C\CO",-2.46 +"Clc1ccc(cc1)c2ccccc2Cl",-5.28 +"O=C1N(COC(=O)CCCCCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3",-6.523 +"CCN(=O)=O",-0.22 +"CCN(CC(C)=C)c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O",-6.124 +"Clc1ccc(Cl)c(Cl)c1Cl",-4.57 +"CCCC(C)(COC(N)=O)COC(N)=O",-1.807 +"CC(=O)C3CCC4C2CC=C1CC(O)CCC1(C)C2CCC34C",-4.65 +"CI",-1 +"CC1CC(C)C(=O)C(C1)C(O)CC2CC(=O)NC(=O)C2",-1.13 +"O=C1N(COC(=O)CCCCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3",-6.301 +"CC1=CC(=O)CC(C)(C)C1",-1.06 +"O=C1NC(=O)NC(=O)C1(CC)C(C)CC",-2.39 +"CCCCC(=O)CCCC",-2.58 +"CCC1(CCC(=O)NC1=O)c2ccccc2",-2.337 +"CCC(C)CC",-3.68 +"CCOc1ccc(cc1)C(C)(C)COCc3cccc(Oc2ccccc2)c3",-8.6 +"Cc1ccccc1n3c(C)nc2ccccc2c3=O",-2.925 +"ClCC#N",-0.092 +"CCOP(=S)(CC)Oc1cc(Cl)c(Cl)cc1Cl",-5.752 +"CC12CCC(=O)C=C1CCC3C2CCC4(C)C3CCC4(O)C#C",-5.66 +"c1ccnnc1",1.1 +"Clc1cc(Cl)c(Cl)c(Cl)c1",-4.63 +"C1C(O)CCC2(C)CC3CCC4(C)C5(C)CC6OCC(C)CC6OC5CC4C3C=C21",-7.32 +"Nc1ccccc1O",-0.72 +"CCCCCCCCC(=O)OCC",-3.8 +"COCC(=O)N(C(C)C(=O)OC)c1c(C)cccc1C",-1.601 +"CNC(=O)Oc1ccccc1OC(C)C",-2.05 +"CCC(C)Cl",-1.96 +"Oc1ccc2ccccc2c1",-2.28 +"CC(C)Oc1cc(c(Cl)cc1Cl)n2nc(oc2=O)C(C)(C)C",-5.696 +"CCCCC#C",-2.36 +"CCCCCCCC#C",-4.24 +"Cc1ccccc1Cl",-3.52 +"CC(C)OC(C)C",-1.1 +"Nc1ccc(cc1)S(=O)(=O)c2ccc(N)cc2",-3.094 +"CNN",1.34 +"CC#C",-0.41 +"CCOP(=S)(OCC)ON=C(C#N)c1ccccc1",-4.862 +"CCNP(=S)(OC)OC(=CC(=O)OC(C)C)C",-3.408 +"C=CC=O",0.57 +"O=c1[nH]cnc2nc[nH]c12",-2.296 +"Oc2ccc1ncccc1c2",-2.16 +"Fc1ccccc1",-1.8 +"CCCCl",-1.47 +"CCOC(=O)C",-0.04 +"CCCC(C)(C)C",-4.36 +"Cc1cc(C)c(C)c(C)c1C",-4 +"CC12CCC(CC1)C(C)(C)O2",-1.64 +"CCCCOC(=O)CCCCCCCCC(=O)OCCCC",-3.896 +"Clc1ccc(cc1)c2ccc(Cl)cc2",-6.56 +"Cc1cccnc1C",0.38 +"CC(=C)C1CC=C(C)C(=O)C1",-2.06 +"CCOP(=S)(OCC)SCSc1ccc(Cl)cc1",-5.736 +"COc1cc(cc(OC)c1O)C6C2C(COC2=O)C(OC4OC3COC(C)OC3C(O)C4O)c7cc5OCOc5cc67",-3.571 +"c1cc2cccc3c4cccc5cccc(c(c1)c23)c54",-8.804 +"Cc1ccc(cc1N(=O)=O)N(=O)=O",-2.82 +"c1c(Br)ccc2ccccc12",-4.4 +"CNC(=O)Oc1cccc(N=CN(C)C)c1",-2.34 +"COc2cnc1ncncc1n2",-1.139 +"Cc3ccnc4N(C1CC1)c2ncccc2C(=O)Nc34",-3.19 +"CCOP(=S)(OCC)Oc1nc(Cl)n(n1)C(C)C",-3.658 +"CC(=C)C=C",-2.03 +"CC(C)=CCCC(O)(C)C=C",-1.99 +"COP(=S)(OC)Oc1ccc(SC)c(C)c1",-4.57 +"OC1CCCCC1",-0.44 +"O=C1NC(=O)NC(=O)C1(C)CC=C",-1.16 +"CC34CCC1C(CCC2CC(O)CCC12C)C3CCC4=O",-4.16 +"OCC(O)C(O)C(O)C(O)CO",0.06 +"Cc1ccc(cc1)c2ccccc2",-4.62 +"CCNc1nc(Cl)nc(NC(C)C)n1",-3.85 +"NC(=S)Nc1ccccc1",-1.77 +"CCCC(=O)CCC",-1.3 +"CC(=O)C(C)(C)C",-0.72 +"Oc1ccc(Cl)cc1",-0.7 +"O=C1CCCCC1",-0.6 +"Cc1cccc(N)c1",-0.85 +"ClC(Cl)(Cl)C#N",-2.168 +"CNc2cnn(c1cccc(c1)C(F)(F)F)c(=O)c2Cl",-4.046 +"CCCCCCCCC(=O)C",-3.3 +"CCN(CC)c1nc(Cl)nc(NC(C)C)n1",-3.785 +"CCOC(=O)c1ccc(N)cc1",-2.616 +"Clc1ccc(Cl)c(Cl)c1",-3.59 +"Cc3nnc4CN=C(c1ccccc1Cl)c2cc(Cl)ccc2n34",-4.09 +"Oc1ccccc1O",0.62 +"CCN2c1ncccc1N(C)C(=O)c3cccnc23",-2.62 +"CSC",-0.45 +"Cc1ccccc1Br",-2.23 +"CCOC(=O)N",0.85 +"CC(=O)OC3(CCC4C2C=C(C)C1=CC(=O)CCC1(C)C2CCC34C)C(C)=O",-5.35 +"CC(C)C(O)C(C)C",-1.22 +"c1ccc2ccccc2c1",-3.6 +"CCNc1ccccc1",-1.7 +"O=C1NC(=O)C(N1)(c2ccccc2)c3ccccc3",-4.097 +"Cc1c2ccccc2c(C)c3ccc4ccccc4c13",-7.02 +"CCOP(=S)(OCC)SC(CCl)N1C(=O)c2ccccc2C1=O",-6.34 +"COc1ccc(cc1)C(c2ccc(OC)cc2)C(Cl)(Cl)Cl",-6.89 +"Fc1cccc(F)c1C(=O)NC(=O)Nc2cc(Cl)c(F)c(Cl)c2F",-7.28 +"O=C1N(COC(=O)CCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3",-4.678 +"CN(C)C(=O)Nc1ccc(Cl)cc1",-2.89 +"OC(Cn1cncn1)(c2ccc(F)cc2)c3ccccc3F",-3.37 +"CC(=O)OCC(=O)C3(O)C(CC4C2CCC1=CC(=O)C=CC1(C)C2(F)C(O)CC34C)OC(C)=O",-4.13 +"CCCCBr",-2.37 +"Brc1cc(Br)c(Br)cc1Br",-6.98 +"CC(C)CC(=O)C",-0.74 +"CCSC(=O)N(CC)C1CCCCC1",-3.4 +"COc1ccc(Cl)cc1",-2.78 +"CC1(C)C(C=C(Br)Br)C1C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2",-8.402 +"CCC(C)C1(CC=C)C(=O)NC(=O)NC1=O",-2.016 +"COP(=S)(OC)Oc1ccc(N(=O)=O)c(C)c1",-4.04 +"Ic1cccc2ccccc12",-4.55 +"OCC(O)C(O)C(O)C(O)CO",1.09 +"CCS",-0.6 +"ClCC(Cl)Cl",-1.48 +"CN(C)C(=O)Oc1cc(C)nn1c2ccccc2",-2.09 +"NC(=O)c1ccccc1O",-1.82 +"Cc1ccccc1N(=O)=O",-2.33 +"O=C1NC(=O)NC(=O)C1(C(C)C)C(C)C",-2.766 +"CCc1ccccc1C",-3.21 +"CCCCCCCCl",-4 +"O=C1NC(=O)NC(=O)C1(CC)CC",-2.4 +"C(Cc1ccccc1)c2ccccc2",-4.62 +"ClC(Cl)C(Cl)Cl",-1.74 +"CCN2c1cc(OC)cc(C)c1NC(=O)c3cccnc23",-5.153 +"Cc1ccc2c(ccc3ccccc32)c1",-5.84 +"CCCCOC(=O)c1ccccc1C(=O)OCCCC",-4.4 +"COc1c(O)c(Cl)c(Cl)c(Cl)c1Cl",-4.02 +"CCN(CC)C(=O)C(=CCOP(=O)(OC)OC)Cl",0.523 +"CC34CCC1C(=CCc2cc(O)ccc12)C3CCC4=O",-5.282 +"CCOC(=O)c1ccccc1S(=O)(=O)NN(C=O)c2nc(Cl)cc(OC)n2",-4.576 +"COc1ccc(cc1)N(=O)=O",-2.41 +"CCCCCCCl",-3.12 +"Clc1cc(c(Cl)c(Cl)c1Cl)c2cc(Cl)c(Cl)c(Cl)c2Cl",-9.16 +"OCC1OC(CO)(OC2OC(COC3OC(CO)C(O)C(O)C3O)C(O)C(O)C2O)C(O)C1O",-0.41 +"CCCCCCCCCCCCCCCCCCCCCCCCCC",-8.334 +"CCN2c1ccccc1N(C)C(=O)c3cccnc23",-3.324 +"CC(Cl)Cl",-1.29 +"Nc1ccc(cc1)S(N)(=O)=O",-1.34 +"CCCN(CCC)c1c(cc(cc1N(=O)=O)C(C)C)N(=O)=O",-6.49 +"ClC1C(Cl)C(Cl)C(Cl)C(Cl)C1Cl",-4.64 +"CCOP(=S)(NC(C)C)Oc1ccccc1C(=O)OC(C)C",-4.194 +"Clc1cccc(Cl)c1Cl",-4 +"ClC(Cl)(Cl)Cl",-2.31 +"O=N(=O)c1cc(Cl)c(Cl)cc1",-3.2 +"OC1CCCCCCC1",-1.29 +"CC1(O)CCC2C3CCC4=CC(=O)CCC4(C)C3CCC21C",-3.999 +"CCOc1ccc(NC(N)=O)cc1",-2.17 +"C/C1CCC(\C)CC1",-4.47 +"c1cnc2c(c1)ccc3ncccc23",-2.68 +"COC(C)(C)C",-0.24 +"COc1ccc(C=CC)cc1",-3.13 +"CCCCCCCCCCCCCCCCO",-7 +"O=c1cc[nH]c(=O)[nH]1",-1.488 +"Nc1ncnc2nc[nH]c12",-2.12 +"Clc1cc(Cl)c(cc1Cl)c2cccc(Cl)c2Cl",-7.21 +"COc1ccc(cc1)C(O)(C2CC2)c3cncnc3",-2.596 +"c1ccc2c(c1)c3cccc4c3c2cc5ccccc54",-8.23 +"O=C(Nc1ccccc1)Nc2ccccc2",-3.15 +"CCC1(C(=O)NC(=O)NC1=O)c2ccccc2",-2.322 +"Clc1ccc(cc1)c2cccc(Cl)c2Cl",-6.29 +"CC(C)c1ccc(NC(=O)N(C)C)cc1",-3.536 +"CCN(CC)C(=O)CSc1ccc(Cl)nn1",-1.716 +"CCC(C)(C)CO",-1.04 +"CCCOC(=O)CCC",-1.75 +"Cc1c(cc(cc1N(=O)=O)N(=O)=O)N(=O)=O",-3.22 +"CC(C)OP(=S)(OC(C)C)SCCNS(=O)(=O)c1ccccc1",-4.2 +"C1CCCCCC1",-3.51 +"CCCOC=O",-0.49 +"CC(C)c1ccccc1C",-3.76 +"Nc1cccc(Cl)c1",-1.37 +"CC(C)CC(C)C",-4.26 +"o1c2ccccc2c3ccccc13",-4.6 +"CCOC2Oc1ccc(OS(C)(=O)=O)cc1C2(C)C",-3.42 +"CN(C)C(=O)Nc1cccc(c1)C(F)(F)F",-3.43 +"c3ccc2nc1ccccc1cc2c3",-3.67 +"CC12CC(=O)C3C(CCC4=CC(=O)CCC34C)C2CCC1(O)C(=O)CO",-3.11 +"OCC1OC(O)C(O)C(O)C1O",0.74 +"Cc1cccc(O)c1",-0.68 +"CC2Cc1ccccc1N2NC(=O)c3ccc(Cl)c(c3)S(N)(=O)=O",-3.586 +"CCC(C)C(=O)OC2CC(C)C=C3C=CC(C)C(CCC1CC(O)CC(=O)O1)C23",-6.005 +"O=N(=O)c1ccc(cc1)N(=O)=O",-3.39 +"CCC1(C(=O)NC(=O)NC1=O)C2=CCC3CCC2C3",-2.696 +"CCCCCCCCCC(=O)OCC",-4.1 +"CN(C)C(=O)Nc1ccccc1",-1.6 +"CCCOCC",-0.66 +"CC(C)O",0.43 +"Cc1ccc2ccccc2c1",-3.77 +"ClC(Br)Br",-1.9 +"CCC(C(CC)c1ccc(O)cc1)c2ccc(O)cc2",-4.43 +"CCOC(=O)CC(SP(=S)(OC)OC)C(=O)OCC",-3.37 +"ClCc1ccccc1",-2.39 +"C/C=C/C=O",0.32 +"CON(C)C(=O)Nc1ccc(Br)c(Cl)c1",-3.924 +"Cc1c2ccccc2c(C)c3ccccc13",-6.57 +"CCCCCC(=O)OC",-1.87 +"CN(C)C(=O)Nc1ccc(c(Cl)c1)n2nc(oc2=O)C(C)(C)C",-4.328 +"CC(=O)Nc1ccc(F)cc1",-1.78 +"CCc1cccc(CC)c1N(COC)C(=O)CCl",-3.26 +"C1CCC=CC1",-2.59 +"CC12CC(O)C3C(CCC4=CC(=O)CCC34C)C2CCC1(O)C(=O)CO",-3.09 +"c1cncnc1",1.1 +"Clc1ccc(cc1)N(=O)=O",-2.92 +"CCC(=O)OC",-0.14 +"Clc1ccccc1N(=O)=O",-2.55 +"CCCCN(C)C(=O)Nc1ccc(Cl)c(Cl)c1",-4.77 +"CN1CC(O)N(C1=O)c2nnc(s2)C(C)(C)C",-1.877 +"O=N(=O)c1ccccc1",-1.8 +"Ic1ccccc1",-3.01 +"CC2Nc1cc(Cl)c(cc1C(=O)N2c3ccccc3C)S(N)(=O)=O",-3.78 +"COc1ccccc1OCC(O)COC(N)=O",-0.985 +"CCCCOCN(C(=O)CCl)c1c(CC)cccc1CC",-4.19 +"Oc1cccc(Cl)c1Cl",-1.3 +"CCCC(=O)OC",-1.92 +"CCC(=O)Nc1ccc(Cl)c(Cl)c1",-3 +"Nc3nc(N)c2nc(c1ccccc1)c(N)nc2n3",-2.404 +"CCCCCC(=O)OCC",-2.35 +"OCC(O)C2OC1OC(OC1C2O)C(Cl)(Cl)Cl",-1.84 +"CN(C=Nc1ccc(C)cc1C)C=Nc2ccc(C)cc2C",-5.47 +"COc1nc(NC(C)C)nc(NC(C)C)n1",-2.478 +"CCCCCCC=C",-4.44 +"Cc1ccc(N)cc1",-1.21 +"Nc1nccs1",-0.36 +"c1ccccc1(OC(=O)NC)",-1.803 +"CCCC(O)CC",-0.8 +"c3ccc2c(O)c1ccccc1cc2c3",-4.73 +"Cc1ccc2cc3ccccc3cc2c1",-6.96 +"Cc1cccc(C)c1C",-3.2 +"CNC(=O)Oc1ccc(N(C)C)c(C)c1",-2.36 +"CCCCCCCC(C)O",-2.74 +"CN(C(=O)NC(C)(C)c1ccccc1)c2ccccc2",-3.35 +"CCCC(=O)CC",-0.83 +"Oc1c(Br)cc(C#N)cc1Br",-3.33 +"Clc1ccc(cc1Cl)c2ccccc2",-6.39 +"CN(C(=O)COc1nc2ccccc2s1)c3ccccc3",-4.873 +"Oc1cccc2ncccc12",-2.54 +"CC1=C(SCCO1)C(=O)Nc2ccccc2",-3.14 +"CCOc2ccc1nc(sc1c2)S(N)(=O)=O",-3.81 +"Oc1c(Cl)c(Cl)c(Cl)c(Cl)c1Cl",-4.28 +"ClCBr",-0.89 +"CCC1(CC)C(=O)NC(=O)N(C)C1=O",-2.23 +"CC(=O)OCC(=O)C3CCC4C2CCC1=CC(=O)CCC1(C)C2CCC34C",-4.63 +"NC(=O)NCc1ccccc1",-0.95 +"CN(C)C(=O)Nc1ccc(C)c(Cl)c1",-3.483 +"CON(C)C(=O)Nc1ccc(Cl)c(Cl)c1",-3.592 +"OC1CCCCCC1",-0.88 +"CS(=O)(=O)c1ccc(cc1)C(O)C(CO)NC(=O)C(Cl)Cl",-2.154 +"CCCC(C)C1(CC)C(=O)NC(=S)NC1=O",-3.36 +"CC(=O)Nc1nnc(s1)S(N)(=O)=O",-2.36 +"Oc1ccc(cc1)N(=O)=O",-0.74 +"ClC1=C(Cl)C2(Cl)C3C4CC(C=C4)C3C1(Cl)C2(Cl)Cl",-6.307 +"C1CCOC1",0.49 +"Nc1ccccc1N(=O)=O",-1.96 +"Clc1cccc(c1Cl)c2cccc(Cl)c2Cl",-7.28 +"CCCCC1C(=O)N(N(C1=O)c2ccccc2)c3ccccc3",-3.81 +"Cc1c(cccc1N(=O)=O)N(=O)=O",-3 +"CC(=O)C1CCC2C3CCC4=CC(=O)CCC4(C)C3CCC12C",-4.42 +"CCN(CC)c1nc(Cl)nc(n1)N(CC)CC",-4.411 +"ClC(Cl)C(Cl)(Cl)SN2C(=O)C1CC=CCC1C2=O",-5.4 +"c1(Br)c(Br)cc(Br)cc1",-4.5 +"OC3N=C(c1ccccc1)c2cc(Cl)ccc2NC3=O",-3.952 +"O=C1NC(=O)NC(=O)C1(C(C)CCC)CC=C",-2.356 +"c1(O)c(C)ccc(C(C)C)c1",-2.08 +"C1SC(=S)NC1(=O)",-1.77 +"Oc1ccc(c(O)c1)c3oc2cc(O)cc(O)c2c(=O)c3O",-3.083 +"ClC1(C(=O)C2(Cl)C3(Cl)C14Cl)C5(Cl)C2(Cl)C3(Cl)C(Cl)(Cl)C45Cl",-5.259 +"CCN(CC)C(=S)SSC(=S)N(CC)CC",-4.86 +"C1CCCCC1",-3.1 +"ClC1=C(Cl)C(Cl)(C(=C1Cl)Cl)C2(Cl)C(=C(Cl)C(=C2Cl)Cl)Cl",-7.278 +"CN(C)C=Nc1ccc(Cl)cc1C",-2.86 +"CC34CCc1c(ccc2cc(O)ccc12)C3CCC4=O",-5.24 +"CCCCCCCCO",-2.39 +"CCSCC",-1.34 +"ClCCCl",-1.06 +"CCC(C)(C)Cl",-2.51 +"ClCCBr",-1.32 +"Nc1ccc(cc1)N(=O)=O",-2.37 +"OCC1OC(OC2C(O)C(O)C(O)OC2CO)C(O)C(O)C1O",-0.244 +"CCN2c1ncccc1N(CC)C(=O)c3cccnc23",-2.86 +"Clc1ccccc1",-2.38 +"CCCCCCCC=C",-5.05 +"Brc1ccc(I)cc1",-4.56 +"CCC(C)(O)CC",-0.36 +"CCCCCc1ccccc1",-4.64 +"NC(=O)NC1NC(=O)NC1=O",-1.6 +"OCC(O)COC(=O)c1ccccc1Nc2ccnc3cc(Cl)ccc23",-4.571 +"ClC(Cl)C(c1ccc(Cl)cc1)c2ccc(Cl)cc2",-7.2 +"CC(=O)OC3CCC4C2CCC1=CC(=O)CCC1(C)C2CCC34C",-5.184 +"Clc1cccc2ccccc12",-3.93 +"CCN2c1ccccc1N(C)C(=O)c3ccccc23",-4.749 +"CCCCC(C)O",-0.89 +"CCCC1CCCC1",-4.74 +"CCOC(=O)c1cncn1C(C)c2ccccc2",-4.735 +"Oc1ccc(Cl)c(Cl)c1",-1.25 +"CC1(C)C(C=C(Cl)Cl)C1C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2",-8.017 +"c2ccc1ocnc1c2",-1.16 +"CCCCCO",-0.6 +"CCN(CC)c1ccccc1",-3.03 +"Fc1cccc(F)c1",-2 +"ClCCC#N",-0.29 +"CC(C)(C)Cc1ccccc1",-4.15 +"O=C1NC(=O)NC(=O)C1(CC)c1ccccc1",-2.322 +"Clc1ccccc1I",-3.54 +"c2ccc1[nH]nnc1c2",-0.78 +"CNC(=O)Oc1cccc2CC(C)(C)Oc12",-2.8 +"Cc1cccc(C)c1O",-1.29 +"CC(C)C(C)O",-0.18 +"c1ccccc1C(O)c2ccccc2",-2.55 +"CCCCCCCCCC(=O)OC",-4.69 +"COP(=S)(OC)Oc1ccc(cc1Cl)N(=O)=O",-4.31 +"CC(C)CBr",-2.43 +"CCI",-1.6 +"CN(C)C(=O)Oc1nc(nc(C)c1C)N(C)C",-1.95 +"CCCCCCBr",-3.81 +"CCCC(C)C",-3.74 +"Cc1c(F)c(F)c(COC(=O)C2C(C=C(Cl)C(F)(F)F)C2(C)C)c(F)c1F",-7.321 +"CCc1cccc(C)c1N(C(C)COC)C(=O)CCl",-2.73 +"ON=Cc1ccc(o1)N(=O)=O",-2.19 +"CC(C)C(Nc1ccc(cc1Cl)C(F)(F)F)C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2",-8.003 +"Nc1nc[nH]n1",0.522 +"BrC(Br)Br",-1.91 +"COP(=O)(OC)C(O)C(Cl)(Cl)Cl",-0.22 +"CCOP(=S)(OCC)SCn1c(=O)oc2cc(Cl)ccc12",-5.233 +"OCc1ccccc1",-0.4 +"O=c2c(C3CCCc4ccccc43)c(O)c1ccccc1o2",-2.84 +"Oc1ccc(Br)cc1",-1.09 +"CC(C)Br",-1.59 +"CC(C)CC(C)(C)C",-4.74 +"O=N(=O)c1cc(cc(c1)N(=O)=O)N(=O)=O",-2.89 +"CN2C(=O)CN=C(c1ccccc1)c3cc(ccc23)N(=O)=O",-3.796 +"CCC",-1.94 +"Nc1cc(nc(N)n1=O)N2CCCCC2",-1.989 +"Nc2cccc3nc1ccccc1cc23",-4.22 +"c1ccc2cc3c4cccc5cccc(c3cc2c1)c45",-8.49 +"OC(c1ccc(Cl)cc1)(c2ccc(Cl)cc2)C(Cl)(Cl)Cl",-5.666 +"C1Cc2cccc3cccc1c23",-4.63 diff --git a/chemprop/tests/data/regression/mol+mol/atom_descriptors_1.npz b/chemprop/tests/data/regression/mol+mol/atom_descriptors_1.npz new file mode 100644 index 0000000000000000000000000000000000000000..c30f411f33ad2830bc3a1bacf6b4e9ee32b39aa5 --- /dev/null +++ b/chemprop/tests/data/regression/mol+mol/atom_descriptors_1.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8be54ce6cb0cba7a8035fabeee8baff5d482f47e15de6889a69cbee56266e0da +size 35370 diff --git a/chemprop/tests/data/regression/mol+mol/atom_features_0.npz b/chemprop/tests/data/regression/mol+mol/atom_features_0.npz new file mode 100644 index 0000000000000000000000000000000000000000..08819b484c89426295d8200fd40ef28ceda2297e --- /dev/null +++ b/chemprop/tests/data/regression/mol+mol/atom_features_0.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b290b1337a1d08b64e160701ab479b049d5efe48eefea69f2a7722ae7d117edf +size 50426 diff --git a/chemprop/tests/data/regression/mol+mol/atom_features_1.npz b/chemprop/tests/data/regression/mol+mol/atom_features_1.npz new file mode 100644 index 0000000000000000000000000000000000000000..163cfe61377346eb5aa7120613f1b35e3e819c01 --- /dev/null +++ b/chemprop/tests/data/regression/mol+mol/atom_features_1.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44f018fca63691a64c103572d82315877524f2670ed08efdb20d26c511b3dc0e +size 28058 diff --git a/chemprop/tests/data/regression/mol+mol/bond_features_0.npz b/chemprop/tests/data/regression/mol+mol/bond_features_0.npz new file mode 100644 index 0000000000000000000000000000000000000000..4f1a32f61da6faf1b35229b97458d057db664955 --- /dev/null +++ b/chemprop/tests/data/regression/mol+mol/bond_features_0.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6450233e5dbf8eb2589b681e26a2f30ec31eade141d592cf72fc7d23521110de +size 82882 diff --git a/chemprop/tests/data/regression/mol+mol/descriptors.npz b/chemprop/tests/data/regression/mol+mol/descriptors.npz new file mode 100644 index 0000000000000000000000000000000000000000..71f6ecf3040f851d050c0debafa051d5faee28fd --- /dev/null +++ b/chemprop/tests/data/regression/mol+mol/descriptors.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee3183790461ef822b0a86439a7425cc9d8d304b5216a3b1008c02449e5fed67 +size 1864 diff --git a/chemprop/tests/data/regression/mol+mol/mol+mol.csv b/chemprop/tests/data/regression/mol+mol/mol+mol.csv new file mode 100644 index 0000000000000000000000000000000000000000..e6a63c99b36d27061727e6f05cc91df324e0915e --- /dev/null +++ b/chemprop/tests/data/regression/mol+mol/mol+mol.csv @@ -0,0 +1,101 @@ +smiles,solvent,peakwavs_max +CCCCN1C(=O)C(=C/C=C/C=C/C=C2N(CCCC)c3ccccc3N2CCCC)C(=O)N(CCCC)C1=S,ClCCl,642.0 +C(=C/c1cnccn1)\c1ccc(N(c2ccccc2)c2ccc(/C=C/c3cnccn3)cc2)cc1,ClCCl,420.0 +CN(C)c1ccc2c(-c3ccc(N)cc3C(=O)[O-])c3ccc(=[N+](C)C)cc-3oc2c1,O,544.0 +c1ccc2[nH]ccc2c1,O,290.0 +CCN(CC)c1ccc2c(c1)OC1=C(/C=C/C3=[N+](C)c4ccc5ccccc5c4C3(C)C)CCCC1=C2c1ccccc1C(=O)O,ClC(Cl)Cl,736.0 +CCN1/C(=C\C=C\C=C\C2=[N+](CC)c3ccc4ccccc4c3C2(C)C)C(C)(C)c2c1ccc1ccccc21,CC(C)=O,680.0 +O=C([O-])c1c(Cl)c(Cl)c(Cl)c(Cl)c1-c1c2cc(I)c(=O)c(I)c-2oc2c(I)c([O-])c(I)cc12,CC(C)O,561.0 +O=P(c1c2ccccc2cc2ccccc12)(c1c2ccccc2cc2ccccc12)c1c2ccccc2cc2ccccc12,C1CCOC1,411.0 +COc1ccc(/C=C/c2nc(-c3ccc(C)cc3)[nH]c2/C=C/c2ccc(OC)cc2)cc1,ClCCl,375.0 +CN1CCN(c2ccc3c4c(cccc24)C(=O)c2ccccc2-3)CC1,C1CCCCC1,428.2 +COc1ccc(/C=C/C2=CC(/C=C/c3ccc(OC)c(OC)c3)=[O+][B-](F)(F)O2)cc1OC,CS(C)=O,513.0 +Nc1cc2ccc3cccc4ccc(c1)c2c34,CC#N,338.0 +C(#Cc1ccncc1)C(C#Cc1ccncc1)=Cc1ccc(C=C(C#Cc2ccncc2)C#Cc2ccncc2)s1,ClC(Cl)Cl,522.0 +CN(C)c1cccc(/C=C/c2ncc(-c3ccc(OCc4ccccc4)c(OCc4ccccc4)c3)o2)c1,ClCCl,350.0 +CCN(CC)c1ccc2nc3ccc(=[N+](CC)CC)cc-3oc2c1,CCCCCCc1ccc(-c2ccc(C#N)cc2)cc1,662.0 +CN1c2ccccc2C(O)(c2ccccc2)c2cc([N+](=O)[O-])ccc21,CCO,393.7 +CN(C)CCCN1c2ccccc2Sc2ccc(Cl)cc21,CC#N,312.0 +N#Cc1ccc(N2c3ccccc3C(c3ccccc3)(c3ccccc3)c3ccccc32)cc1,Cc1ccccc1,352.0 +CCN1/C(=C\C2=C([O-])/C(=C/C3=[N+](CC)c4c(ccc5ccc(C)nc45)C3(C)C)C2=O)C(C)(C)c2ccc3ccc(C)nc3c21,C1CCCCC1,682.0 +CN1C(=O)c2cccc3c(-n4c5ccccc5c5ccccc54)ccc(c23)C1=O,CCCCCC,402.0 +COc1ccc(C2=C(c3ccccc3)C(c3ccccc3)=[N+]3C2=Nc2c(-c4ccc(OC)cc4)c(-c4ccccc4)c(-c4ccccc4)n2[B-]3(F)F)cc1,Cc1ccccc1,656.0 +Cc1ccc(C2=C3C=CC(c4ccccc4)=[N+]3[B-](F)(F)n3c2ccc3-c2ccccc2)cc1,C1CCOC1,553.0 +CCCCCCCCN1C(=O)c2ccc3c4c(-c5ccc6c(c5)c5ccccc5n6-c5ccccc5)cc5c6c(cc(-c7ccc8c(c7)c7ccccc7n8-c7ccccc7)c(c7ccc(c2c37)C1=O)c64)C(=O)N(CCCCCCCC)C5=O,ClC(Cl)Cl,563.0 +CC1=C(C(C)(C)C)C(C)=[N+]2C1=C(C)c1c(C)c(C(C)(C)C)c(C)n1[B-]2(F)F,C1CCCCC1,529.0 +O=c1oc2ccc3ccccc3c2nc1-c1ccc(Cl)cc1,c1ccccc1,404.0 +CCN1CCN(c2ccc(/N=N/c3cc([N+](=O)[O-])cc([N+](=O)[O-])c3)cc2)CC1,CCOC(C)=O,454.0 +N#CC(C#N)=C(/C=C/c1ccc(N(c2ccccc2)c2ccccc2)cc1)c1ccccc1,C1COCCO1,480.0 +Cc1ccc(C)cc1,C1CCCCC1,262.8811777 +CC(C)CCCCN1CCc2c1ccc1c3c(ccc21)CCCC3=O,Cc1ccccc1,426.0 +[O-]c1c(-c2ccccc2)cc(-[n+]2c(-c3ccccc3)cc(-c3ccccc3)cc2-c2ccccc2)cc1-c1ccccc1,ClC(Cl)Cl,731.2276215 +CCCCCN(c1ccccc1)c1ccc2c(c1)C(CC)(CC)c1cc(/C=C/c3ccc(/C=C(\C#N)C(=O)O)s3)ccc1-2,CC#N,430.0 +c1ccc(-c2ccc(-c3ccc(N(c4ccccc4)c4ccccc4)cc3)c3nsnc23)cc1,C1CCOC1,437.0 +COC(=O)c1ccc2c3ccc(C(=O)OC)c4c(C(=O)OC)ccc(c5ccc(C(=O)OC)c1c25)c43,ClC(Cl)Cl,469.0 +Cc1ccc(C(=O)NN2C3=C(CC4=C2CCCC4=O)C(=O)CCC3)cc1,CC(C)=O,366.0 +CCn1ncc2c3c(C#N)c4c5ccccc5n(C)c4nc3ccc21,CCOC(C)=O,383.0 +CCOc1ccc2cc(-c3ccc(C)cc3)c(=O)oc2c1,CCCCCC,338.0 +CCn1c2ccccc2c2cc(-c3cc(-c4ccc5c(c4)c4ccccc4n5CC)nc(S(C)(=O)=O)n3)ccc21,ClCCl,384.0 +COC(=O)c1c2ccccc2nc2ccccc12,CCCO,361.0 +CCOC(=O)COc1ccc(C(c2c(C)[nH]c3ccccc23)c2c(C)[nH]c3ccccc23)cc1,CO,290.0 +CN(C)c1ccc(/C=C/C=C2\CC/C(=C\C=C\c3ccc(N(C)C)cc3)C2=O)cc1,CCCCCC,478.0 +N#Cc1ccc(/C=C/C=C/c2ccccc2)cc1,CC#N,344.0 +CSc1sc(C(C)=O)c2nn[nH]c(=O)c12,CCO,397.0 +CN(C)c1ccc2c(c1)C(C)(C)C(=O)C=C2,O,413.0 +O=C1NC(=O)/C(=C\c2ccccc2O)S1,OCC(O)CO,364.0 +O=Cc1ccc2c(c1)c1ccccc1n2-c1cc2ccccc2c2ccccc12,CCCCC,313.0 +CN(C)CCCN1c2ccccc2CCc2ccccc21,O,275.0 +Cc1ccc(-n2c(-c3ccccc3[NH-])[nH+]c3c4ccccc4c4ccccc4c32)cc1,Cc1ccccc1,363.0 +C[n+]1ccc(/C=C/c2ccc([O-])cc2)cc1,CCO,515.0 +CC(C)(C)c1ccc2c(c1)c1cc(C(C)(C)C)ccc1n2-c1nc2ccccc2nc1-n1c2ccc(C(C)(C)C)cc2c2cc(C(C)(C)C)ccc21,Cc1ccccc1,419.0 +CC(C)[Si]1(C(C)C)c2cc(C#N)ccc2-c2ccc(N(C)C)cc21,ClCCl,368.0 +CNc1cccc2c1C(=O)c1ccccc1-2,C1CCCCC1,432.0 +CC[N+]1=C(/C=C/c2ccc3ccc4cccc5ccc2c3c45)C(C)(C)c2ccccc21,CS(C)=O,506.0 +CCn1c2sccc2c2c3nsnc3c3c4ccsc4n(CC)c3c21,ClCCl,425.0 +COc1ccc(NC(=O)C(C(=O)Nc2ccc(OC)cc2)C(c2ccccc2O)c2c(C)[nH]c3ccccc23)cc1,CO,258.0 +CC(C)(C)c1ccc(-c2cc(-c3ccc(-c4cc(-c5ccc(C(C)(C)C)cc5)c(C#N)s4)s3)sc2C#N)cc1,ClCCl,392.0 +CCCCCCC(CCCCCC)N1C(=O)c2cccc3c(-c4ccc(OC)cc4)ccc(c23)C1=O,CCCCO,366.2 +CN(C)c1ccc2cc3ccc(=[N+](C)C)cc-3oc2c1,O,548.0 +C1=C2c3cc4ccccc4cc3C3=[N+]2[B-]2(Oc4ccccc43)Oc3ccccc3-c3c4cc5ccccc5cc4c1n32,C1CCOC1,830.0 +c1ccc(C2=C(c3ccccc3)[Si](c3ccccc3)(c3ccccc3)c3ccccc32)cc1,C1CCOC1,330.0 +c1ccc(N(c2ccccc2)c2ccc(-c3nc4c5ccccc5c5ccccc5c4[nH]3)cc2)cc1,C1COCCN1,383.0 +Cc1n[nH]c2c1C(c1ccc([N+](=O)[O-])cc1)C(C#N)=C(N)O2,C1CCOC1,261.0 +CCN(CC)c1ccc(/C=C/C2=[O+][B-](F)(F)Oc3c2c(=O)oc2cc(N(CC)CC)ccc32)cc1,ClC(Cl)Cl,581.0 +CN1C(=CC2=C([O-])C(=Cc3oc(-c4ccc(-c5ccccc5)cc4)c[n+]3C)C2=O)C(C)(C)c2ccccc21,ClC(Cl)Cl,606.0 +Nc1cc2ccc3cccc4ccc(c1)c2c34,CO,336.0 +CCN(CC)c1ccc(/N=N/c2nc3ccc(Br)cc3s2)cc1,CC#N,520.0 +CC1=CC(C)=[N+]2C1=C(c1ccc(N3CCCCC3)cc1)c1c(C)cc(C)n1[B-]2(F)F,CO,497.0 +N#Cc1c(C#N)c2cc(Br)ccc2c2ccc(Br)cc12,ClC(Cl)Cl,350.0 +Cc1cnc(NC(=O)C2=C([O-])c3ccccc3S(=O)(=O)N2C)s1,CCCCO,369.0 +O=c1ccc2cc3c(cc2o1)OCC3,CC(C)O,333.0 +Cc1cccc(-c2c3ccccc3cc3ccccc23)n1,ClCCl,385.0 +CB1C=Cc2ccc3ccccc3c2N1c1ccccc1,C1CCCCC1,359.0 +CCOC(=O)C1=CNC=C(C(=O)OCC)C1c1ccccc1,C1CCOC1,355.0 +CN(C)c1ccc(/C=C/C=C2\CC/C(=C\c3ccc(N(C)C)cc3)C2=O)cc1,CCCCO,499.0 +C#CCN1C(=O)/C(=C/c2cc(OC(C)C)ccc2OC(C)C)N=C1C,CCCO,400.0 +[O-]c1c(-c2ccccc2)cc(-[n+]2c(-c3ccccc3)cc(-c3ccccc3)cc2-c2ccccc2)cc1-c1ccccc1,CCCCCCCCCC,922.2903226 +CCCCCCCCOc1ccc(C#Cc2ccc(C#Cc3ccc(OCCCCCCCC)c(OCCCCCCCC)c3)c3nc4c5ccccc5c5ccccc5c4nc23)cc1OCCCCCCCC,Cc1ccccc1,461.0 +CCCCN(CCCC)c1ccc(C#Cc2cc(C#Cc3ccc(N(CCCC)CCCC)cc3)c(C#Cc3ccc(C(F)(F)F)cc3)cc2C#Cc2ccc(C(F)(F)F)cc2)cc1,ClCCl,427.0 +COc1ccc(C#Cc2c3ccccc3c(C#Cc3ccc(OC)cc3)c3cc4sc(C)cc4cc23)cc1,ClCCl,520.0 +N#CC(C#N)=C(/C=C/c1cn(-c2ccccc2)nc1-c1ccccc1)c1ccccc1,C1CCOC1,402.0 +COC(=O)c1ccc2c3ccc(C(=O)OC)c4c(C(=O)OC)ccc(c5ccc(C(=O)OC)c1c25)c43,CC(C)O,476.0 +COc1ccc2c(c1)C(=O)c1cc(OC)cc(OC)c1-2,C1CCCCC1,478.0 +CCOC(=O)c1ccc(C2=C(c3ccccc3)c3oc4ccccc4[n+]3[B-](F)(F)O2)cc1,C1CCOC1,350.0 +CC1(C)Oc2c(c3nc4ccccc4nc3c3ccccc23)CC1Br,CCCCCC,422.1190376 +CC(C)(C)c1ccc(-c2nnc(-c3ccc4ccc5cccc6ccc3c4c56)o2)cc1,Cc1ccccc1,394.0 +CCCCCCCCCCCCC(CCCCCCCCCC)Cn1c2cc(C=C(C#N)C#N)c3cccc4c5cccc6c(C=C(C#N)C#N)cc1c(c65)c2c34,ClCCl,581.0 +CC1=C(Br)C(C)=[N+]2C1=C(c1c(-c3ccccn3)nc3ccccn13)c1c(C)c(Br)c(C)n1[B-]2(F)F,ClCCl,548.0 +[O-]c1c(-c2ccccc2)cc(-[n+]2c(-c3ccccc3)cc(-c3ccccc3)cc2-c2ccccc2)cc1-c1ccccc1,CC(C)(C)c1cccc(C(C)(C)C)n1,840.9117647 +CCCCC(CC)Cn1c2ccc(C(=C(C#N)C#N)c3ccccc3)cc2c2c(-c3ccc(Br)cc3)c3c(c(-c4ccc(Br)cc4)c21)c1cc(C(=C(C#N)C#N)c2ccccc2)ccc1n3CC(CC)CCCC,CC#N,462.0 +COC(=O)CCC(NC(=O)CCNc1cccc2ccccc12)C(=O)OC,CCO,330.0 +O=P1(c2ccccc2)C(c2cc(C(F)(F)F)cc(C(F)(F)F)c2)=Cc2c3ccc4cccc5ccc(c6cc(-c7ccc(N(c8ccccc8)c8ccccc8)cc7)n1c26)c3c45,ClCCl,437.0 +Cc1nnc(-c2cc(-n3c4ccccc4c4ccccc43)c(-n3c4ccccc4c4ccccc43)cc2-c2nnc(C)o2)o1,ClCCl,353.0 +COc1ccc2c3c(cccc13)C(=O)c1ccccc1-2,CCOCC,414.0786749 +c1ccc2c3c(ccc2c1)C1(c2cc(-c4ccc5ccc6cccc7ccc4c5c67)ccc2-c2ccc(-c4ccc5ccc6cccc7ccc4c5c67)cc21)c1ccc2ccccc2c1O3,ClC(Cl)Cl,365.0 +Cc1ccc(C2=Nc3cccc4[n+]3[B-](F)(O2)OC(c2ccc(C)cc2)=N4)cc1,ClCCl,394.0 +CC(C)(C)c1ccc(-c2nc3c4ccc(-c5ccccc5)cc4c4cc(-c5ccccc5)ccc4c3n2-c2ccc(C(C)(C)C)cc2)cc1,C1COCCO1,333.0 +COc1ccc(C2CC(c3ccc(O)cc3)=NN2c2ccc(S(N)(=O)=O)cc2)cc1,C1CCOC1,359.0 +COc1ccc2c3c(c4ccc(OC)cc4c2c1)C1(c2ccccc2-c2ccccc21)c1ccccc1-3,C1CCCCC1,386.0 +CCCCOc1c(C=C2N(C)c3ccccc3C2(C)C)c(=O)c1=O,CCO,425.0 +Cc1cc2ccc(-c3cccc4cccc(-c5ccc6cc(C)c(=O)oc6c5)c34)cc2oc1=O,c1ccccc1,324.0 +Cc1ccc(C(=O)c2c(C)c3ccc4cccc5c6cccc7ccc2c(c76)c3c45)cc1,ClCCl,391.0 \ No newline at end of file diff --git a/chemprop/tests/data/regression/mol/atom_descriptors.npz b/chemprop/tests/data/regression/mol/atom_descriptors.npz new file mode 100644 index 0000000000000000000000000000000000000000..1daa8644ebbc9d63d9ea89f3178fee08ad6bb7e9 --- /dev/null +++ b/chemprop/tests/data/regression/mol/atom_descriptors.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:627debbf7895a0cc77ad368a1d387193ee711e8139032940a2b2c5ff8d56ee67 +size 88986 diff --git a/chemprop/tests/data/regression/mol/atom_features.npz b/chemprop/tests/data/regression/mol/atom_features.npz new file mode 100644 index 0000000000000000000000000000000000000000..1daa8644ebbc9d63d9ea89f3178fee08ad6bb7e9 --- /dev/null +++ b/chemprop/tests/data/regression/mol/atom_features.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:627debbf7895a0cc77ad368a1d387193ee711e8139032940a2b2c5ff8d56ee67 +size 88986 diff --git a/chemprop/tests/data/regression/mol/bond_features.npz b/chemprop/tests/data/regression/mol/bond_features.npz new file mode 100644 index 0000000000000000000000000000000000000000..3c8ada61368ac8bfc166fef6d9e2c9b0328ac610 --- /dev/null +++ b/chemprop/tests/data/regression/mol/bond_features.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74926b490dcf8abcd91b8036a7649ec5d24d43d6e670ff8663b8bacef897cb66 +size 71458 diff --git a/chemprop/tests/data/regression/mol/config.toml b/chemprop/tests/data/regression/mol/config.toml new file mode 100644 index 0000000000000000000000000000000000000000..7cc86295a31156dcf488e718e495471125337597 --- /dev/null +++ b/chemprop/tests/data/regression/mol/config.toml @@ -0,0 +1,33 @@ +data-path = tests/data/regression.csv +output-dir = trained_model +epochs = 1 +num-workers = 0 +batch-size = 64 +accelerator = auto +devices = auto +rxn-mode = REAC_DIFF +multi-hot-atom-featurizer-mode = V2 +frzn-ffn-layers = 0 +ensemble-size = 1 +message-hidden-dim = 300 +depth = 3 +dropout = 0.0 +activation = RELU +aggregation = mean +aggregation-norm = 100 +ffn-hidden-dim = 300 +ffn-num-layers = 1 +multiclass-num-classes = 3 +task-type = regression +v-kl = 0.0 +eps = 1e-08 +warmup-epochs = 2 +init-lr = 0.0001 +max-lr = 0.001 +final-lr = 0.0001 +split = RANDOM +split-sizes = [0.8, 0.1, 0.1] +split-key-molecule = 0 +num-replicates = 1 +data-seed = 0 +pytorch-seed = 0 diff --git a/chemprop/tests/data/regression/mol/descriptors.npz b/chemprop/tests/data/regression/mol/descriptors.npz new file mode 100644 index 0000000000000000000000000000000000000000..b74555148ab90514e118ee51ce94af12e254b2e6 --- /dev/null +++ b/chemprop/tests/data/regression/mol/descriptors.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66d3c0cd9e972e0912a7e66aa19b3c70849041e84d47fa01922cb20e2f7988df +size 1064 diff --git a/chemprop/tests/data/regression/mol/mol.csv b/chemprop/tests/data/regression/mol/mol.csv new file mode 100644 index 0000000000000000000000000000000000000000..11a79f8386574c06f03f984654ea23d7bc8317d6 --- /dev/null +++ b/chemprop/tests/data/regression/mol/mol.csv @@ -0,0 +1,101 @@ +smiles,lipo +Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14,3.54 +COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)CCc3ccccc23,-1.18 +COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl,3.69 +OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(Cl)sc4[nH]3,3.37 +Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)NCC#N)c1,3.1 +OC1(CN2CCC1CC2)C#Cc3ccc(cc3)c4ccccc4,3.14 +COc1cc(OC)c(cc1NC(=O)CCC(=O)O)S(=O)(=O)NCc2ccccc2N3CCCCC3,-.72 +CNc1cccc(CCOc2ccc(C[C@H](NC(=O)c3c(Cl)cccc3Cl)C(=O)O)cc2C)n1,.34 +COc1ccc(cc1)C2=COc3cc(OC)cc(OC)c3C2=O,3.05 +Oc1ncnc2scc(c3ccsc3)c12,2.25 +CS(=O)(=O)c1ccc(Oc2ccc(cc2)C#C[C@]3(O)CN4CCC3CC4)cc1,1.51 +C[C@H](Nc1nc(Nc2cc(C)[nH]n2)c(C)nc1C#N)c3ccc(F)cn3,2.61 +O=C1CCCCCN1,-.08 +CCCSc1ncccc1C(=O)N2CCCC2c3ccncc3,1.95 +CC1CCCCC1NC(=O)c2cnn(c2NS(=O)(=O)c3ccc(C)cc3)c4ccccc4,1.34 +Nc1ccc(cc1)c2nc3ccc(O)cc3s2,3.2 +COc1ccc(cc1)N2CCN(CC2)C(=O)[C@@H]3CCCC[C@H]3C(=O)NCC#N,1.6 +CCC(COC(=O)c1cc(OC)c(OC)c(OC)c1)(N(C)C)c2ccccc2,3.77 +COc1cc(ccc1N2CC[C@@H](O)C2)N3N=Nc4cc(sc4C3=O)c5ccc(Cl)cc5,3.15 +CO[C@H]1CN(CCN2C(=O)C=Cc3ccc(cc23)C#N)CC[C@H]1NCc4ccc5OCC(=O)Nc5n4,.32 +CC(C)(CCCCCOCCc1ccccc1)NCCc2ccc(O)c3nc(O)sc23,2.92 +Clc1ccc(cc1)C(=O)Nc2oc(nn2)C(=O)Nc3ccc(cc3)N4CCOCC4,1.92 +COc1ccc(Oc2cccc(CN3CCCC(C3)N4C=C(C)C(=O)NC4=O)c2)cc1,3.17 +OC(=O)c1cccc(c1)N2CCC(CN3CCC(CC3)Oc4ccc(Cl)c(Cl)c4)CC2,2.17 +CNCC[C@@H](Oc1ccccc1C)c2ccccc2,1.2 +Clc1ccc(N2CCN(CC2)C(=O)CCCc3ccncc3)c(Cl)c1,3.93 +COc1cnc(nc1N(C)C)c2ccccn2,1.9 +C(CCCCNc1cc(nc2ccccc12)c3ccccc3)CCCNc4cc(nc5ccccc45)c6ccccc6,2.27 +CSc1c(cnn1c2ccc(cc2)C(=O)O)C(=O)NC3C4CC5CC(CC3C5)C4,1.2 +CNC1=Nc2ncccc2C(=NC1c3cccs3)c4occn4,1.14 +CS(=O)(=O)C1(CC1)c2cc(nc(n2)c3cccc4[nH]ccc34)N5CC6CCC(C5)O6,2.6 +CN([C@@H]1CCN(Cc2ccc(cc2)C(F)(F)F)C[C@@H]1F)C(=O)Cc3ccc(cc3)n4cnnn4,3.3 +CC(=O)[C@H]1CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C,3.94 +CS(=O)(=O)c1ccccc1C(=O)NC[C@@H](O)CN2CCC(CC2)Oc3ccc(Cl)c(Cl)c3,2.34 +O=C(NCc1ccncc1)c2ccc(Oc3ccccc3C#N)cc2,2.57 +CN(C)c1ccnc2sc(C(=O)NCc3ccccc3)c(N)c12,3.62 +CN1CCN(CC1)c2ccc3N=CN(C(=O)c3c2)c4cc(NC(=O)c5cscn5)ccc4C,2.06 +Cn1cncc1c2c3C(=O)N(CC4CC4)C(=O)N(CC5CC5)c3nn2Cc6ccnc7ccc(Cl)cc67,4.33 +COc1ccc2ncc(C#N)c(CCN3CCC(CC3)NCc4cc5SCOc5cn4)c2c1,2.55 +CNC(=O)C1(CCN(CC[C@H](CN(C)C(=O)c2c(OC)c(cc3ccccc23)C#N)c4ccc(Cl)c(Cl)c4)CC1)N5CCCCC5=O,2.78 +OB1N(C(=O)Nc2ccccc12)c3ccccc3,1.4 +CC(C)N(CCC(C(=O)N)(c1ccccc1)c2ccccn2)C(C)C,-.54 +NC(=NC#N)c1sc(Nc2ccccc2)nc1N,2.91 +CCS(=O)(=O)c1ccc(c(C)c1)c2cc(ccc2O[C@H](C)C(=O)O)C(F)(F)F,-.4 +OC(=O)COc1ccc(cc1c2cc(ccc2F)C#N)C(F)(F)F,-.16 +COc1ccc(cn1)C2=Cc3c(C)nc(N)nc3N([C@@H]4CC[C@H](CC4)OCCO)C2=O,2.2 +CC(Nc1ncnc2ccccc12)c3ccccc3,3.4 +CC(C)c1ccc2Oc3nc(N)c(cc3C(=O)c2c1)C(=O)O,1.1 +O[C@@H](CNCCCOCCOCCc1cccc2ccccc12)c3ccc(O)c4NC(=O)Sc34,2.28 +COc1ccccc1Cn2c(C)nc3ccccc23,3.47 +OC(=O)c1ccc(NC(=O)c2cc(OCc3ccccc3F)cc(OCc4ccccc4F)c2)nc1,3 +NC(Cc1c[nH]c2ccccc12)C(=O)O,-1.17 +OC(=O)CCC[C@H]1[C@@H](Cc2ccccc12)NC(=O)c3cc4cc(F)ccc4[nH]3,1.95 +CCNC(=O)c1cc2c(c(cnc2[nH]1)c3cncc(c3)C(=O)O)n4ccc(n4)C(F)(F)F,-.99 +C[C@H](NC(=O)c1c(C)nn(C2CCCC2)c1NS(=O)(=O)c3ccc(C)cc3)C(C)(C)C,2 +N(c1ccccc1)c2cc(Nc3ccccc3)[nH]n2,3.8 +COCCNC(=O)c1cccc(Nc2ncc3cc(ccc3n2)c4ccncc4)c1,3.21 +CCC(CC)NC(=O)c1cnn(C)c1NS(=O)(=O)c2ccc(C)cc2,.36 +NC(=O)c1cc(F)cc(O[C@H]2C[C@H]3CC[C@@H](C2)N3Cc4ccccc4)c1,2.14 +O=C1NC(=NC(=C1C#N)c2ccccc2)SCCc3ccccc3,1.71 +OC(C(=O)OC1CN2CCC1CC2)(c3ccccc3)c4ccccc4,1.19 +Cc1ccccc1NC(=O)CCS(=O)(=O)c2ccc(Br)s2,2.7 +CC(C)n1c(C)ncc1c2nc(Nc3ccc(cc3)C(=O)N(C)C)ncc2F,2.77 +COc1cccc(c1)c2c[nH]c(n2)c3ccccc3,3.8 +O=C(COc1ccccc1)c2ccccc2,2.87 +COc1cc2ncc(C(=O)N)c(Nc3ccc(F)cc3F)c2cc1NCCN(C)C,1.91 +CO[C@@H]1CC[C@@]2(CC1)Cc3ccc(OCC(C)C)cc3C24N=C(C)C(=N4)N,3.4 +COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN4CCCC4,3.13 +O=C1CCOc2cc(COc3ccccc3)ccc12,3 +Clc1cccc2cn[nH]c12,2.33 +CNC(=O)c1ccc(CC(=O)N(C)C2CCN(Cc3ccc(cc3)C(F)(F)F)CC2)cc1,2.8 +COCCNCc1ccc(CCNC[C@H](O)c2ccc(O)c3NC(=O)Sc23)cc1,-.54 +Cn1cncc1c2c3C(=O)N(CC#C)C(=O)N(CC4CC4)c3nn2Cc5ccnc6ccc(Cl)cc56,3.16 +C[C@H](NC(=O)c1cccnc1Oc2ccccc2)c3ccccc3,2.91 +Clc1ccc(CN2CC3CNCC(C2)O3)cc1C(=O)NCC45CC6CC(CC(C6)C4)C5,1.55 +COc1cc(NS(=O)(=O)c2ccc(N)cc2)nc(OC)n1,.2 +Cc1cc(CCC2CCN(CC2)S(=O)(=O)CC3(CCOCC3)N(O)C=O)c(C)cn1,1.43 +C[C@H](Nc1ncc(F)c(Nc2cc([nH]n2)C3CC3)n1)c4ncc(F)cn4,2.47 +CC(=O)Nc1ccc2c(c1)c(cn2CCCO)c3cc(NC4CC4)n5ncc(C#N)c5n3,2.48 +CC1COc2c(N3CCN(C)CC3)c(F)cc4C(=O)C(=CN1c24)C(=O)O,-.45 +CC1(CC1)c2nc(ncc2C(=O)N[C@@H]3C4CC5CC3C[C@@](O)(C5)C4)N6CCOCC6,2 +COC(=O)c1ccc(C)c(NS(=O)(=O)c2ccc3N(C)SC(=O)c3c2)c1,2.6 +COc1ccc(cc1)C2=COc3cc(O)cc(O)c3C2=O,3.5 +CNCCCC12CCC(c3ccccc13)c4ccccc24,.89 +Oc1cc(nc2ccnn12)c3ccccc3,1.3 +Fc1cc(cc(F)c1C2=CCN(CC2)C=O)N3C[C@H](COc4ccon4)OC3=O,2.01 +CC(C#C)N1C(=O)N(CC2CC2)c3nn(Cc4ccnc5ccc(Cl)cc45)c(c3C1=O)c6cncn6C,3.59 +C[C@H]1CN(Cc2cc(Cl)ccc2OCC(=O)O)CCN1C(=O)Cc3ccccc3,.18 +COc1cc(Nc2nc(N[C@@H](C)c3ncc(F)cn3)ncc2Br)n[nH]1,2.6 +Cc1nc(C)c(nc1C(=O)N)c2ccc([C@@H]3CC[C@@H](CC(=O)O)CC3)c(F)c2,1.3 +COc1ccnc(CCc2nc3c(C)ccnc3[nH]2)c1,2.1 +Cc1cc(CCCOc2c(Cl)cc(cc2Cl)C3=NCCO3)on1,3.72 +CN(C)C(=O)c1ccc(CN2CCc3cc4nc(N)sc4cc3CC2)cc1,1.72 +COC(=O)[C@H]1[C@@H](O)CC[C@H]2CN3CCc4c([nH]c5ccccc45)[C@@H]3C[C@H]12,1.65 +CCN1CCN(CC1)c2ccc(Nc3cc(ncn3)N(C)C(=O)Nc4c(Cl)c(OC)cc(OC)c4Cl)cc2,3.7 +CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C@H]([C@H](O)[C@@H]2O)n3cnc4c(N)ncnc34,2.2 +CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)c3)c2n1)c4c(Cl)c(OC)cc(OC)c4Cl,2.04 +CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)C(=O)c13,4.49 +COc1ccc(Cc2c(N)n[nH]c2N)cc1,.2 +CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(=O)Nc3cccnc3,2 diff --git a/chemprop/tests/data/regression/mol/mol_with_splits.csv b/chemprop/tests/data/regression/mol/mol_with_splits.csv new file mode 100644 index 0000000000000000000000000000000000000000..0c4f1f4503ba169d4b69f87b9a28f4db735b7cfc --- /dev/null +++ b/chemprop/tests/data/regression/mol/mol_with_splits.csv @@ -0,0 +1,101 @@ +smiles,lipo,split +Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14,3.54,train +COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)CCc3ccccc23,-1.18,val +COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl,3.69,test +OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(Cl)sc4[nH]3,3.37,train +Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)NCC#N)c1,3.1,val +OC1(CN2CCC1CC2)C#Cc3ccc(cc3)c4ccccc4,3.14,test +COc1cc(OC)c(cc1NC(=O)CCC(=O)O)S(=O)(=O)NCc2ccccc2N3CCCCC3,-0.72,train +CNc1cccc(CCOc2ccc(C[C@H](NC(=O)c3c(Cl)cccc3Cl)C(=O)O)cc2C)n1,0.34,val +COc1ccc(cc1)C2=COc3cc(OC)cc(OC)c3C2=O,3.05,test +Oc1ncnc2scc(c3ccsc3)c12,2.25,train +CS(=O)(=O)c1ccc(Oc2ccc(cc2)C#C[C@]3(O)CN4CCC3CC4)cc1,1.51,val +C[C@H](Nc1nc(Nc2cc(C)[nH]n2)c(C)nc1C#N)c3ccc(F)cn3,2.61,test +O=C1CCCCCN1,-0.08,train +CCCSc1ncccc1C(=O)N2CCCC2c3ccncc3,1.95,val +CC1CCCCC1NC(=O)c2cnn(c2NS(=O)(=O)c3ccc(C)cc3)c4ccccc4,1.34,test +Nc1ccc(cc1)c2nc3ccc(O)cc3s2,3.2,train +COc1ccc(cc1)N2CCN(CC2)C(=O)[C@@H]3CCCC[C@H]3C(=O)NCC#N,1.6,val +CCC(COC(=O)c1cc(OC)c(OC)c(OC)c1)(N(C)C)c2ccccc2,3.77,test +COc1cc(ccc1N2CC[C@@H](O)C2)N3N=Nc4cc(sc4C3=O)c5ccc(Cl)cc5,3.15,train +CO[C@H]1CN(CCN2C(=O)C=Cc3ccc(cc23)C#N)CC[C@H]1NCc4ccc5OCC(=O)Nc5n4,0.32,val +CC(C)(CCCCCOCCc1ccccc1)NCCc2ccc(O)c3nc(O)sc23,2.92,test +Clc1ccc(cc1)C(=O)Nc2oc(nn2)C(=O)Nc3ccc(cc3)N4CCOCC4,1.92,train +COc1ccc(Oc2cccc(CN3CCCC(C3)N4C=C(C)C(=O)NC4=O)c2)cc1,3.17,val +OC(=O)c1cccc(c1)N2CCC(CN3CCC(CC3)Oc4ccc(Cl)c(Cl)c4)CC2,2.17,test +CNCC[C@@H](Oc1ccccc1C)c2ccccc2,1.2,train +Clc1ccc(N2CCN(CC2)C(=O)CCCc3ccncc3)c(Cl)c1,3.93,val +COc1cnc(nc1N(C)C)c2ccccn2,1.9,test +C(CCCCNc1cc(nc2ccccc12)c3ccccc3)CCCNc4cc(nc5ccccc45)c6ccccc6,2.27,train +CSc1c(cnn1c2ccc(cc2)C(=O)O)C(=O)NC3C4CC5CC(CC3C5)C4,1.2,val +CNC1=Nc2ncccc2C(=NC1c3cccs3)c4occn4,1.14,test +CS(=O)(=O)C1(CC1)c2cc(nc(n2)c3cccc4[nH]ccc34)N5CC6CCC(C5)O6,2.6,train +CN([C@@H]1CCN(Cc2ccc(cc2)C(F)(F)F)C[C@@H]1F)C(=O)Cc3ccc(cc3)n4cnnn4,3.3,val +CC(=O)[C@H]1CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C,3.94,test +CS(=O)(=O)c1ccccc1C(=O)NC[C@@H](O)CN2CCC(CC2)Oc3ccc(Cl)c(Cl)c3,2.34,train +O=C(NCc1ccncc1)c2ccc(Oc3ccccc3C#N)cc2,2.57,val +CN(C)c1ccnc2sc(C(=O)NCc3ccccc3)c(N)c12,3.62,test +CN1CCN(CC1)c2ccc3N=CN(C(=O)c3c2)c4cc(NC(=O)c5cscn5)ccc4C,2.06,train +Cn1cncc1c2c3C(=O)N(CC4CC4)C(=O)N(CC5CC5)c3nn2Cc6ccnc7ccc(Cl)cc67,4.33,val +COc1ccc2ncc(C#N)c(CCN3CCC(CC3)NCc4cc5SCOc5cn4)c2c1,2.55,test +CNC(=O)C1(CCN(CC[C@H](CN(C)C(=O)c2c(OC)c(cc3ccccc23)C#N)c4ccc(Cl)c(Cl)c4)CC1)N5CCCCC5=O,2.78,train +OB1N(C(=O)Nc2ccccc12)c3ccccc3,1.4,val +CC(C)N(CCC(C(=O)N)(c1ccccc1)c2ccccn2)C(C)C,-0.54,test +NC(=NC#N)c1sc(Nc2ccccc2)nc1N,2.91,train +CCS(=O)(=O)c1ccc(c(C)c1)c2cc(ccc2O[C@H](C)C(=O)O)C(F)(F)F,-0.4,val +OC(=O)COc1ccc(cc1c2cc(ccc2F)C#N)C(F)(F)F,-0.16,test +COc1ccc(cn1)C2=Cc3c(C)nc(N)nc3N([C@@H]4CC[C@H](CC4)OCCO)C2=O,2.2,train +CC(Nc1ncnc2ccccc12)c3ccccc3,3.4,val +CC(C)c1ccc2Oc3nc(N)c(cc3C(=O)c2c1)C(=O)O,1.1,test +O[C@@H](CNCCCOCCOCCc1cccc2ccccc12)c3ccc(O)c4NC(=O)Sc34,2.28,train +COc1ccccc1Cn2c(C)nc3ccccc23,3.47,val +OC(=O)c1ccc(NC(=O)c2cc(OCc3ccccc3F)cc(OCc4ccccc4F)c2)nc1,3,test +NC(Cc1c[nH]c2ccccc12)C(=O)O,-1.17,train +OC(=O)CCC[C@H]1[C@@H](Cc2ccccc12)NC(=O)c3cc4cc(F)ccc4[nH]3,1.95,val +CCNC(=O)c1cc2c(c(cnc2[nH]1)c3cncc(c3)C(=O)O)n4ccc(n4)C(F)(F)F,-0.99,test +C[C@H](NC(=O)c1c(C)nn(C2CCCC2)c1NS(=O)(=O)c3ccc(C)cc3)C(C)(C)C,2,train +N(c1ccccc1)c2cc(Nc3ccccc3)[nH]n2,3.8,val +COCCNC(=O)c1cccc(Nc2ncc3cc(ccc3n2)c4ccncc4)c1,3.21,test +CCC(CC)NC(=O)c1cnn(C)c1NS(=O)(=O)c2ccc(C)cc2,0.36,train +NC(=O)c1cc(F)cc(O[C@H]2C[C@H]3CC[C@@H](C2)N3Cc4ccccc4)c1,2.14,val +O=C1NC(=NC(=C1C#N)c2ccccc2)SCCc3ccccc3,1.71,test +OC(C(=O)OC1CN2CCC1CC2)(c3ccccc3)c4ccccc4,1.19,train +Cc1ccccc1NC(=O)CCS(=O)(=O)c2ccc(Br)s2,2.7,val +CC(C)n1c(C)ncc1c2nc(Nc3ccc(cc3)C(=O)N(C)C)ncc2F,2.77,test +COc1cccc(c1)c2c[nH]c(n2)c3ccccc3,3.8,train +O=C(COc1ccccc1)c2ccccc2,2.87,val +COc1cc2ncc(C(=O)N)c(Nc3ccc(F)cc3F)c2cc1NCCN(C)C,1.91,test +CO[C@@H]1CC[C@@]2(CC1)Cc3ccc(OCC(C)C)cc3C24N=C(C)C(=N4)N,3.4,train +COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN4CCCC4,3.13,val +O=C1CCOc2cc(COc3ccccc3)ccc12,3,test +Clc1cccc2cn[nH]c12,2.33,train +CNC(=O)c1ccc(CC(=O)N(C)C2CCN(Cc3ccc(cc3)C(F)(F)F)CC2)cc1,2.8,val +COCCNCc1ccc(CCNC[C@H](O)c2ccc(O)c3NC(=O)Sc23)cc1,-0.54,test +Cn1cncc1c2c3C(=O)N(CC#C)C(=O)N(CC4CC4)c3nn2Cc5ccnc6ccc(Cl)cc56,3.16,train +C[C@H](NC(=O)c1cccnc1Oc2ccccc2)c3ccccc3,2.91,val +Clc1ccc(CN2CC3CNCC(C2)O3)cc1C(=O)NCC45CC6CC(CC(C6)C4)C5,1.55,test +COc1cc(NS(=O)(=O)c2ccc(N)cc2)nc(OC)n1,0.2,train +Cc1cc(CCC2CCN(CC2)S(=O)(=O)CC3(CCOCC3)N(O)C=O)c(C)cn1,1.43,val +C[C@H](Nc1ncc(F)c(Nc2cc([nH]n2)C3CC3)n1)c4ncc(F)cn4,2.47,test +CC(=O)Nc1ccc2c(c1)c(cn2CCCO)c3cc(NC4CC4)n5ncc(C#N)c5n3,2.48,train +CC1COc2c(N3CCN(C)CC3)c(F)cc4C(=O)C(=CN1c24)C(=O)O,-0.45,val +CC1(CC1)c2nc(ncc2C(=O)N[C@@H]3C4CC5CC3C[C@@](O)(C5)C4)N6CCOCC6,2,test +COC(=O)c1ccc(C)c(NS(=O)(=O)c2ccc3N(C)SC(=O)c3c2)c1,2.6,train +COc1ccc(cc1)C2=COc3cc(O)cc(O)c3C2=O,3.5,val +CNCCCC12CCC(c3ccccc13)c4ccccc24,0.89,test +Oc1cc(nc2ccnn12)c3ccccc3,1.3,train +Fc1cc(cc(F)c1C2=CCN(CC2)C=O)N3C[C@H](COc4ccon4)OC3=O,2.01,val +CC(C#C)N1C(=O)N(CC2CC2)c3nn(Cc4ccnc5ccc(Cl)cc45)c(c3C1=O)c6cncn6C,3.59,test +C[C@H]1CN(Cc2cc(Cl)ccc2OCC(=O)O)CCN1C(=O)Cc3ccccc3,0.18,train +COc1cc(Nc2nc(N[C@@H](C)c3ncc(F)cn3)ncc2Br)n[nH]1,2.6,val +Cc1nc(C)c(nc1C(=O)N)c2ccc([C@@H]3CC[C@@H](CC(=O)O)CC3)c(F)c2,1.3,test +COc1ccnc(CCc2nc3c(C)ccnc3[nH]2)c1,2.1,train +Cc1cc(CCCOc2c(Cl)cc(cc2Cl)C3=NCCO3)on1,3.72,val +CN(C)C(=O)c1ccc(CN2CCc3cc4nc(N)sc4cc3CC2)cc1,1.72,test +COC(=O)[C@H]1[C@@H](O)CC[C@H]2CN3CCc4c([nH]c5ccccc45)[C@@H]3C[C@H]12,1.65,train +CCN1CCN(CC1)c2ccc(Nc3cc(ncn3)N(C)C(=O)Nc4c(Cl)c(OC)cc(OC)c4Cl)cc2,3.7,val +CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C@H]([C@H](O)[C@@H]2O)n3cnc4c(N)ncnc34,2.2,test +CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)c3)c2n1)c4c(Cl)c(OC)cc(OC)c4Cl,2.04,train +CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)C(=O)c13,4.49,val +COc1ccc(Cc2c(N)n[nH]c2N)cc1,0.2,test +CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(=O)Nc3cccnc3,2, diff --git a/chemprop/tests/data/regression/mol_multitask.csv b/chemprop/tests/data/regression/mol_multitask.csv new file mode 100644 index 0000000000000000000000000000000000000000..361c274ae2a64496dc725912d62b6b5dcc426d2d --- /dev/null +++ b/chemprop/tests/data/regression/mol_multitask.csv @@ -0,0 +1,500 @@ +"smiles","mu","alpha","homo","lumo","gap","r2","zpve","cv","u0","u298","h298","g298" +"C",0,13.21,-0.3877,0.1171,0.5048,35.3641,0.044749,6.469,-40.47893,-40.476062,-40.475117,-40.498597 +"N",1.6256,9.46,-0.257,0.0829,0.3399,26.1563,0.034358,6.316,-56.525887,-56.523026,-56.522082,-56.544961 +"O",1.8511,6.31,-0.2928,0.0687,0.3615,19.0002,0.021375,6.002,-76.404702,-76.401867,-76.400922,-76.422349 +"C#C",0,16.28,-0.2845,0.0506,0.3351,59.5248,0.026841,8.574,-77.308427,-77.305527,-77.304583,-77.327429 +"C#N",2.8937,12.99,-0.3604,0.0191,0.3796,48.7476,0.016601,6.278,-93.411888,-93.40937,-93.408425,-93.431246 +"C=O",2.1089,14.18,-0.267,-0.0406,0.2263,59.9891,0.026603,6.413,-114.483613,-114.480746,-114.479802,-114.505268 +"CC",0,23.95,-0.3385,0.1041,0.4426,109.5031,0.074542,10.098,-79.764152,-79.760666,-79.759722,-79.787269 +"CO",1.5258,16.97,-0.2653,0.0784,0.3437,83.794,0.051208,8.751,-115.679136,-115.675816,-115.674872,-115.701876 +"CC#C",0.7156,28.78,-0.2609,0.0613,0.3222,177.1963,0.05541,12.482,-116.609549,-116.60555,-116.604606,-116.633775 +"CC#N",3.8266,24.45,-0.3264,0.0376,0.364,160.7223,0.045286,10.287,-132.71815,-132.714563,-132.713619,-132.742149 +"CC=O",2.5682,25.11,-0.254,-0.0198,0.2342,166.9728,0.055355,11.219,-153.787612,-153.783728,-153.782784,-153.812518 +"C(=O)N",3.7286,21.57,-0.2543,0.0302,0.2845,145.3078,0.045279,10.89,-169.860788,-169.856903,-169.855958,-169.885594 +"CCC",0.0597,34.75,-0.323,0.0949,0.4179,227.1361,0.103182,14.84,-119.052475,-119.047927,-119.046983,-119.078157 +"CCO",1.4131,27.87,-0.2619,0.0798,0.3417,193.1659,0.079754,13.546,-154.972731,-154.968412,-154.967467,-154.998148 +"COC",1.1502,28.13,-0.2525,0.091,0.3435,187.1015,0.079534,12.934,-154.960361,-154.956045,-154.9551,-154.985747 +"C1CC1",0.0005,30.82,-0.2888,0.1042,0.393,155.8145,0.081231,11.041,-117.824798,-117.821426,-117.820482,-117.849087 +"C1CO1",1.7675,24.04,-0.2682,0.1042,0.3724,129.891,0.057289,9.176,-153.742562,-153.73941,-153.738466,-153.766642 +"CC(=O)C",2.7362,35.53,-0.2431,-0.0087,0.2344,292.4367,0.083382,16.893,-193.08834,-193.082969,-193.082024,-193.116476 +"CC(=O)N",3.6367,31.83,-0.2436,0.0347,0.2783,267.6148,0.07319,16.561,-209.159302,-209.15402,-209.153076,-209.187468 +"C(=O)(N)N",3.4869,28.07,-0.2495,0.0556,0.3051,244.2308,0.063824,15.292,-225.221461,-225.217075,-225.216131,-225.247724 +"CC(C)C",0.0897,45.46,-0.3167,0.0843,0.401,355.0621,0.131146,20.273,-158.342346,-158.336603,-158.335658,-158.370016 +"CC(C)O",1.4259,38.58,-0.2612,0.074,0.3351,318.3721,0.107673,19.052,-194.267232,-194.261748,-194.260804,-194.294663 +"C#CC#C",0,38.52,-0.2599,-0.0214,0.2386,278.6264,0.037354,15.312,-153.459846,-153.455442,-153.454498,-153.482621 +"C#CC#N",3.792,32.66,-0.3102,-0.0543,0.2559,260.1896,0.027259,12.93,-169.557758,-169.553764,-169.55282,-169.581024 +"C(#N)C#N",0.0023,27.7,-0.3696,-0.0926,0.277,242.9308,0.015951,10.398,-185.648533,-185.644825,-185.64388,-185.667652 +"C#CC=O",2.7824,31.14,-0.2777,-0.0735,0.2042,268.3921,0.037208,13.049,-190.624631,-190.620363,-190.619419,-190.650543 +"C(=O)C#N",2.3112,26.25,-0.3166,-0.11,0.2066,251.0007,0.02654,11.329,-206.721858,-206.717875,-206.716931,-206.747625 +"C(=O)C=O",0.002,26.12,-0.2668,-0.1113,0.1555,266.8164,0.036943,12.147,-227.798785,-227.79457,-227.793626,-227.825074 +"CC#CC",0,42.32,-0.2412,0.0684,0.3096,400.2236,0.083896,17.447,-155.908941,-155.90318,-155.902236,-155.937641 +"CCC#C",0.7067,40.09,-0.2592,0.0566,0.3157,333.9589,0.084338,17.13,-155.897345,-155.892291,-155.891347,-155.924226 +"CCC#N",3.9233,35.38,-0.3213,0.034,0.3553,314.5335,0.07419,14.988,-172.006141,-172.001467,-172.000523,-172.032826 +"C(C#N)N",4.4361,31.81,-0.2683,0.0173,0.2855,295.6635,0.063305,14.488,-188.042067,-188.037478,-188.036534,-188.06863 +"C#CCO",1.7211,33.1,-0.2595,0.0277,0.2872,300.0993,0.060632,15.855,-191.810916,-191.806025,-191.805081,-191.837634 +"C(C#N)O",4.6788,28.56,-0.3018,0.0022,0.3039,280.6659,0.050262,13.845,-207.916786,-207.912215,-207.911271,-207.943384 +"CCC=O",2.6741,35.83,-0.25,-0.0205,0.2295,333.3276,0.084175,15.954,-193.075202,-193.070116,-193.069171,-193.102798 +"CNC=O",3.7071,32.78,-0.2516,0.0335,0.2851,279.7863,0.074166,15.058,-209.144909,-209.139976,-209.139032,-209.172305 +"COC=O",3.92,29.47,-0.2814,0.0074,0.2888,293.917,0.061327,13.885,-229.013797,-229.009003,-229.008059,-229.041086 +"C(C=O)O",1.7341,28.53,-0.2537,-0.0341,0.2196,303.8129,0.060508,14.78,-228.992613,-228.987769,-228.986825,-229.019918 +"CCCC",0,45.71,-0.317,0.0937,0.4107,426.2996,0.131708,19.668,-158.340943,-158.33517,-158.334226,-158.36894 +"CCCO",1.3402,38.61,-0.2619,0.081,0.3429,382.8628,0.108241,18.431,-194.261089,-194.255495,-194.254551,-194.28893 +"CCOC",1.0363,39.34,-0.2503,0.0925,0.3428,368.9331,0.107895,17.888,-194.254127,-194.248585,-194.247641,-194.281899 +"C(CO)O",0.0075,31.42,-0.2594,0.0584,0.3179,297.8398,0.085172,16.837,-230.183076,-230.177723,-230.176779,-230.211195 +"CC1CC1",0.1136,41.96,-0.2727,0.1012,0.3738,298.6061,0.109284,16.49,-157.116735,-157.11209,-157.111146,-157.143262 +"CC1CO1",1.812,35.01,-0.2633,0.1052,0.3685,267.2979,0.085275,14.764,-193.039603,-193.035186,-193.034242,-193.065979 +"CN1CC1",1.1353,39.02,-0.2304,0.0968,0.3271,270.5508,0.097671,15.298,-173.147782,-173.143343,-173.142399,-173.174073 +"C1CC1O",1.3894,34.64,-0.239,0.0775,0.3166,263.966,0.085106,15.679,-193.034988,-193.030356,-193.029411,-193.061689 +"C1CCC1",0,41.83,-0.2982,0.0956,0.3938,268.4432,0.110511,14.696,-157.115484,-157.111322,-157.110378,-157.141657 +"C1COC1",1.7978,34.56,-0.2424,0.0859,0.3283,236.9759,0.086675,12.915,-193.034094,-193.029968,-193.029024,-193.060777 +"CC(=NO)C",0.6875,45.37,-0.2392,0.0192,0.2584,452.5112,0.100501,21.616,-248.375248,-248.368823,-248.367879,-248.405354 +"c1cc[nH]c1",1.8689,43.14,-0.2029,0.0499,0.2528,303.9808,0.082433,14.821,-210.101789,-210.097816,-210.096872,-210.12818 +"c1cnc[nH]1",3.6193,39.13,-0.2253,0.0332,0.2585,283.6817,0.071145,13.371,-226.160842,-226.157088,-226.156144,-226.187104 +"c1ccoc1",0.5571,39.2,-0.2246,0.0199,0.2445,289.005,0.069883,13.358,-229.969129,-229.965414,-229.96447,-229.995393 +"c1cocn1",1.5081,35.17,-0.2509,0.001,0.2519,269.2444,0.058593,12.04,-246.02915,-246.025614,-246.024669,-246.055309 +"CC(C)(C)C",0.0003,56.01,-0.3145,0.0737,0.3882,486.2719,0.158836,26.084,-197.632222,-197.625241,-197.624297,-197.661411 +"CC(C)(C)O",1.384,49.04,-0.2601,0.0664,0.3265,449.0573,0.134977,25.128,-233.560626,-233.553779,-233.552834,-233.589759 +"CC(=O)C#C",2.8579,42.02,-0.2654,-0.0575,0.2079,416.7799,0.065175,18.723,-229.927277,-229.921622,-229.920677,-229.955843 +"CC(=O)C#N",3.3351,37.08,-0.3007,-0.0889,0.2118,397.7757,0.05451,16.963,-246.027383,-246.022024,-246.02108,-246.055769 +"C(#N)C(=N)N",5.1815,38.31,-0.2739,-0.0438,0.2301,384.5574,0.056619,17.465,-242.19573,-242.190591,-242.189646,-242.223513 +"C#CC(=O)N",3.7167,38.29,-0.2629,-0.0277,0.2352,390.4619,0.054731,18.527,-245.997884,-245.992256,-245.991312,-246.026404 +"CC(=O)C=O",0.9461,36.51,-0.2538,-0.0964,0.1574,399.222,0.064992,17.806,-267.10335,-267.097658,-267.096714,-267.132534 +"C(=O)C(=N)N",2.7707,38.09,-0.2561,-0.064,0.192,384.4827,0.067668,17.972,-263.278851,-263.273589,-263.272645,-263.306835 +"C(=O)C(=O)N",5.1668,33.39,-0.2533,-0.0763,0.177,381.9882,0.054577,17.525,-283.16874,-283.163262,-283.162318,-283.197298 +"CC(C)C#C",0.6578,51.2,-0.2589,0.0571,0.316,481.9854,0.112471,22.569,-195.186772,-195.180446,-195.179502,-195.215658 +"CC(C)C#N",3.9512,46.23,-0.318,0.0365,0.3545,460.694,0.102281,20.467,-211.295796,-211.289821,-211.288877,-211.324525 +"CC(N)C#N",2.7429,42.87,-0.2704,0.0278,0.2983,440.0738,0.091554,19.946,-227.338075,-227.332253,-227.331309,-227.366638 +"CC(O)C#C",1.3582,44.03,-0.2665,0.0336,0.3001,444.6452,0.088908,21.306,-231.108368,-231.102292,-231.101348,-231.137061 +"CC(O)C#N",3.269,39.28,-0.3051,0.0115,0.3166,424.3395,0.078602,19.252,-247.214861,-247.209162,-247.208218,-247.243338 +"CC(C)C=O",2.6921,46.58,-0.2469,-0.0188,0.2281,482.0475,0.112328,21.434,-232.364952,-232.358577,-232.357633,-232.394589 +"CC(O)C=O",2.8354,39.16,-0.255,-0.0279,0.2271,432.1489,0.088443,20.344,-268.287661,-268.281505,-268.280561,-268.316982 +"CN(C)C=O",3.7163,44.42,-0.2424,0.0327,0.2751,441.85,0.10227,19.918,-248.430371,-248.424309,-248.423365,-248.459383 +"CC(=O)CO",2.9514,39.33,-0.2699,-0.0262,0.2437,440.1727,0.088924,19.824,-268.301176,-268.295084,-268.29414,-268.331307 +"CCC(=O)C",2.6168,46.19,-0.2423,-0.0072,0.2351,489.8518,0.112006,21.716,-232.377706,-232.371073,-232.370129,-232.408256 +"CCC(=O)N",3.499,42.54,-0.2438,0.0355,0.2793,457.447,0.101847,21.374,-248.448467,-248.441988,-248.441044,-248.478935 +"CC(=O)NC",3.5402,43.61,-0.2418,0.0387,0.2805,458.9803,0.101735,20.967,-248.443503,-248.436899,-248.435955,-248.474272 +"CNC(=O)N",3.5648,39.51,-0.2436,0.0599,0.3034,428.404,0.091713,20.274,-264.504487,-264.498452,-264.497508,-264.533633 +"COC(C)=N",1.1876,43.57,-0.2595,0.0352,0.2948,443.1687,0.102062,20.256,-248.416462,-248.410358,-248.409414,-248.445651 +"CC(=O)OC",1.7569,39.33,-0.2685,0.0174,0.2859,427.6606,0.089436,19.501,-268.32127,-268.315051,-268.314106,-268.351214 +"COC(=O)N",2.329,35.5,-0.2669,0.0575,0.3244,398.5908,0.079271,19.084,-284.385189,-284.379361,-284.378417,-284.414085 +"C(C(=O)N)O",4.5676,35.87,-0.246,0.0273,0.2733,418.5967,0.078222,20.089,-284.360325,-284.354148,-284.353204,-284.390143 +"[NH3+]CC([O-])=O",5.3004,35.19,-0.2527,0.0208,0.2735,408.0279,0.080317,17.931,-284.372483,-284.367172,-284.366228,-284.40095 +"CC(C)CO",1.3149,49.26,-0.2629,0.0789,0.3418,516.4357,0.136209,23.924,-233.551389,-233.544542,-233.543598,-233.581067 +"CC(O)CO",2.2854,42.01,-0.2597,0.0631,0.3228,438.3028,0.11345,22.072,-269.479234,-269.472993,-269.472049,-269.508213 +"CCC(C)C",0.0618,56.26,-0.3085,0.085,0.3934,565.8412,0.159632,25.169,-197.629387,-197.622325,-197.621381,-197.659365 +"CCC(C)O",1.3894,49.45,-0.2617,0.0733,0.335,521.8605,0.136091,23.99,-233.555951,-233.549143,-233.548199,-233.585602 +"CC(C)OC",1.0758,49.8,-0.2476,0.086,0.3336,507.9614,0.135681,23.573,-233.545899,-233.539034,-233.53809,-233.57582 +"CC1(CC1)C",0.1068,52.92,-0.2633,0.0893,0.3526,439.1643,0.137025,22.258,-196.409349,-196.403398,-196.402453,-196.437676 +"CC1(CO1)C",1.8235,45.7,-0.2596,0.091,0.3505,405.521,0.112851,20.719,-232.335768,-232.329981,-232.329037,-232.364091 +"CC1(CC1)O",1.3822,45.85,-0.2494,0.0776,0.327,401.4888,0.113237,21.17,-232.33341,-232.327734,-232.32679,-232.36147 +"N=C1CCO1",2.5732,40.19,-0.263,0.0277,0.2907,352.0378,0.080558,15.51,-247.201165,-247.196699,-247.195755,-247.22841 +"C1CC(=O)C1",2.7119,42.77,-0.2415,-0.0194,0.2222,379.6371,0.090544,17.076,-231.15578,-231.150804,-231.149859,-231.184401 +"C1CNC1=O",3.6671,39.73,-0.25,0.038,0.288,355.0934,0.080442,16.1,-247.225618,-247.220897,-247.219953,-247.253218 +"C1COC1=O",3.9339,35.4,-0.2788,0.0089,0.2878,336.0792,0.068574,14.483,-267.106213,-267.101929,-267.100985,-267.13332 +"C1C(=O)CN1",2.5257,39.34,-0.2437,-0.0258,0.2179,359.152,0.07965,16.083,-247.190194,-247.185543,-247.184599,-247.217688 +"C1C(=O)CO1",0.8477,35.94,-0.2647,-0.0352,0.2295,344.5473,0.066989,15.115,-267.068488,-267.063976,-267.063032,-267.095887 +"CC1CCC1",0.095,52.94,-0.2896,0.0927,0.3823,455.0215,0.138424,20.299,-196.407957,-196.40245,-196.401505,-196.436159 +"CC1CCO1",1.6826,45.72,-0.241,0.0915,0.3325,407.6909,0.11459,18.582,-232.33123,-232.325877,-232.324933,-232.359589 +"CC1COC1",1.8995,45.45,-0.2419,0.0814,0.3233,414.1575,0.114694,18.443,-232.325947,-232.320548,-232.319604,-232.354576 +"C1CC(C1)O",1.4604,45.55,-0.256,0.0801,0.3361,413.9118,0.115063,19.023,-232.333258,-232.328097,-232.327153,-232.361103 +"C1C(CO1)O",2.4158,38.58,-0.2465,0.0604,0.3069,374.4087,0.091005,17.344,-268.248371,-268.243234,-268.24229,-268.276572 +"CC1CC1C",0.1023,52.92,-0.2632,0.094,0.3573,465.3301,0.137341,22.005,-196.406419,-196.40034,-196.399396,-196.435152 +"CC1CC1O",1.3092,45.84,-0.2518,0.0822,0.334,418.298,0.113827,20.702,-232.32907,-232.323327,-232.322383,-232.357426 +"CC1CN1C",1.2483,49.79,-0.2199,0.0931,0.313,432.5081,0.125556,20.993,-212.438188,-212.432286,-212.431342,-212.466725 +"CC1OC1C",1.8159,46.02,-0.2573,0.0984,0.3557,432.2224,0.113178,20.451,-232.334436,-232.32857,-232.327626,-232.363064 +"OC1CC1O",2.4925,38.43,-0.2369,0.0642,0.3011,370.2795,0.090396,19.173,-268.251114,-268.245852,-268.244908,-268.278934 +"C1C2CC1C2",0.0002,49.82,-0.2974,0.1082,0.4056,328.2069,0.116844,15.584,-195.158734,-195.154774,-195.15383,-195.185188 +"C1C2CC1O2",1.8725,42.39,-0.2352,0.0994,0.3346,298.4772,0.092109,14.648,-231.069318,-231.065445,-231.064501,-231.095721 +"C#CCC#C",0.4777,45.22,-0.2633,0.0345,0.2978,470.3666,0.065276,19.399,-192.736096,-192.730524,-192.72958,-192.764175 +"C#CCC#N",3.5925,40.56,-0.2911,0.0111,0.3022,448.7393,0.05503,17.258,-208.842347,-208.837159,-208.836215,-208.87023 +"C(C#N)C#N",3.6958,36.12,-0.3494,-0.0151,0.3344,429.0927,0.044642,15.259,-224.94466,-224.939832,-224.938888,-224.972334 +"C#CCC=O",2.0711,41.41,-0.2583,-0.0337,0.2245,483.1343,0.065029,18.303,-229.918003,-229.912379,-229.911435,-229.946855 +"C(C=O)C#N",2.172,36.79,-0.2868,-0.0568,0.23,464.3999,0.054694,16.279,-246.023231,-246.017964,-246.017019,-246.051902 +"C(=N)NC=O",5.0884,38.8,-0.2491,-0.0216,0.2276,391.0845,0.068192,16.694,-263.280728,-263.275534,-263.27459,-263.308973 +"N=COC=O",4.2338,35.69,-0.2934,-0.0298,0.2636,460.7073,0.055229,15.704,-283.156647,-283.151564,-283.15062,-283.184645 +"C(=O)NC=O",5.2904,34.31,-0.2608,-0.0416,0.2191,372.7845,0.055704,15.692,-283.179781,-283.17476,-283.173816,-283.207861 +"CC#CC#C",1.1881,54.54,-0.242,-0.0085,0.2335,576.5936,0.065688,19.382,-192.762455,-192.756657,-192.755713,-192.790147 +"CC#CC#N",5.1545,47.82,-0.2871,-0.0377,0.2494,553.2094,0.055566,17.054,-208.863267,-208.85788,-208.856936,-208.890804 +"CC#CC=O",3.783,45.67,-0.2642,-0.0592,0.2051,568.7849,0.065532,18.156,-229.928448,-229.922366,-229.921422,-229.958956 +"CC#CCO",1.2486,46.92,-0.2421,0.0368,0.2789,613.8892,0.089036,20.884,-231.111403,-231.104707,-231.103763,-231.142502 +"CCC#CC",0.078,53.93,-0.2401,0.0631,0.3033,652.4812,0.112761,22.115,-195.196751,-195.18988,-195.188936,-195.228146 +"CN=COC",2.9091,46.78,-0.2481,0.0251,0.2732,567.546,0.10111,20.343,-248.395289,-248.388824,-248.38788,-248.425185 +"CCCC#C",0.7752,51.47,-0.2586,0.0583,0.3169,593.0141,0.112736,22.023,-195.186228,-195.179881,-195.178937,-195.215412 +"CCCC#N",4.0641,46.62,-0.3185,0.0365,0.355,569.5065,0.102603,19.863,-211.295163,-211.289209,-211.288265,-211.324151 +"CNCC#N",4.1469,43.52,-0.249,0.0208,0.2698,535.2152,0.091346,19.026,-227.324775,-227.318937,-227.317993,-227.353619 +"COCC#C",1.3623,45.04,-0.2565,0.0324,0.2889,532.4302,0.088734,20.194,-231.092208,-231.086089,-231.085144,-231.121215 +"COCC#N",4.5445,40.22,-0.2859,0.008,0.2939,509.5881,0.078474,18.14,-247.198,-247.192237,-247.191293,-247.226866 +"C#CCCO",1.3892,44.05,-0.2624,0.0549,0.3174,542.7424,0.089329,20.716,-231.10631,-231.100171,-231.099227,-231.135302 +"C(CO)C#N",3.7158,39.41,-0.2914,0.0323,0.3237,519.4529,0.079045,18.636,-247.21445,-247.20867,-247.207726,-247.243281 +"CCCC=O",2.7552,47.03,-0.2487,-0.0195,0.2292,599.5103,0.112697,20.766,-232.36363,-232.357283,-232.356339,-232.39356 +"CCNC=O",3.694,43.88,-0.2499,0.0335,0.2834,490.704,0.102695,19.864,-248.436061,-248.429994,-248.429049,-248.466001 +"CCOC=O",4.0425,40.48,-0.2778,0.0091,0.2868,510.9124,0.089832,18.727,-268.307873,-268.301991,-268.301047,-268.337439 +"COCC=O",2.7959,40.04,-0.2577,-0.0282,0.2295,546.3932,0.088447,19.186,-268.27324,-268.267033,-268.266088,-268.30312 +"C(CO)C=O",1.4229,39.92,-0.2536,-0.0285,0.225,550.4469,0.089449,19.422,-268.282665,-268.276602,-268.275658,-268.312338 +"CCCCC",0.0603,56.8,-0.3105,0.0925,0.403,721.0614,0.160138,24.552,-197.629416,-197.622321,-197.621376,-197.659721 +"CCCCO",1.3815,49.63,-0.2615,0.0813,0.3428,669.0122,0.136722,23.274,-233.549368,-233.542485,-233.54154,-233.579515 +"CCCOC",0.9755,50.27,-0.25,0.0926,0.3426,647.6919,0.136321,22.785,-233.542445,-233.535569,-233.534624,-233.572603 +"CCOCC",0.9301,50.62,-0.2483,0.0931,0.3414,631.0818,0.136152,22.881,-233.547877,-233.541027,-233.540083,-233.578016 +"COCCO",2.2019,42.99,-0.261,0.0821,0.3431,522.4181,0.113563,21.001,-269.465281,-269.45898,-269.458036,-269.494828 +"C(CO)CO",2.7191,42.52,-0.2607,0.0779,0.3386,618.0376,0.113242,22.005,-269.468415,-269.461727,-269.460783,-269.498456 +"C#CC1CC1",0.8635,48.65,-0.2425,0.0503,0.2928,429.6736,0.090505,18.785,-193.963318,-193.958152,-193.957208,-193.990986 +"C#CC1CN1",1.2763,45.22,-0.2422,0.0322,0.2744,408.3613,0.079399,17.704,-210.001565,-209.996559,-209.995615,-210.029132 +"C#CC1CO1",1.7013,41.39,-0.265,0.0189,0.2839,397.8388,0.066467,16.924,-229.879598,-229.87469,-229.873746,-229.907107 +"C1CC1C#N",4.1966,43.5,-0.2931,0.0289,0.322,407.3682,0.080403,16.544,-210.071468,-210.066701,-210.065757,-210.098937 +"N#CC1CN1",2.8377,40.21,-0.2775,0.0076,0.2851,386.5557,0.069125,15.572,-226.107205,-226.102575,-226.10163,-226.134578 +"N#CC1CO1",3.6533,36.45,-0.3077,-0.0072,0.3004,376.0875,0.056139,14.866,-245.983375,-245.978827,-245.977883,-246.010694 +"C1CC1C=O",3.147,44.1,-0.2486,-0.0157,0.2329,436.2586,0.090683,17.457,-231.145151,-231.140101,-231.139157,-231.173071 +"O=CC1CN1",2.03,40.19,-0.2639,-0.0381,0.2258,363.3567,0.079887,16.144,-247.184334,-247.179519,-247.178575,-247.211988 +"O=CC1CO1",2.3951,36.8,-0.2636,-0.0418,0.2217,402.0419,0.066465,15.818,-267.060326,-267.055484,-267.05454,-267.088115 +"C1CN1C=O",3.2517,40.77,-0.2555,-0.0013,0.2542,383.6013,0.079595,16.197,-247.192524,-247.187626,-247.186682,-247.22031 +"CCC1CC1",0.095,52.9,-0.2723,0.0986,0.3709,514.3053,0.137725,21.364,-196.40532,-196.399392,-196.398448,-196.434166 +"CCC1CO1",1.7468,45.95,-0.2623,0.0976,0.3599,471.3094,0.113861,19.579,-232.328175,-232.322501,-232.321557,-232.356864 +"CCN1CC1",1.0617,50.07,-0.2299,0.095,0.3249,474.34,0.126034,20.247,-212.439286,-212.433577,-212.432633,-212.467838 +"COC1CC1",1.0906,46.29,-0.244,0.0955,0.3395,452.0432,0.113725,19.738,-232.3174,-232.311743,-232.310798,-232.345925 +"C1CC1CO",1.3201,46.01,-0.2601,0.079,0.339,468.6229,0.1143,20.059,-232.324966,-232.319256,-232.318312,-232.35363 +"OCC1CN1",2.9395,42.64,-0.2436,0.0677,0.3113,445.0397,0.103108,19.038,-248.364357,-248.358808,-248.357864,-248.392932 +"OCC1CO1",1.7822,38.7,-0.2641,0.0777,0.3418,424.287,0.090736,18.033,-268.248806,-268.243491,-268.242547,-268.277228 +"C1CC=CC1",0.1413,50.34,-0.2335,0.0325,0.2661,375.5096,0.116471,17.052,-195.226072,-195.221411,-195.220467,-195.253489 +"C1COC=N1",1.6299,38.7,-0.2479,0.0241,0.272,315.6279,0.081779,14.168,-247.215049,-247.210764,-247.20982,-247.242343 +"C1C=CCO1",1.5715,43.73,-0.2303,0.0188,0.2491,336.0014,0.092218,15.461,-231.142581,-231.138099,-231.137155,-231.169932 +"C1CCCC1",0.001,52,-0.3078,0.0844,0.3922,414.0869,0.140316,18.624,-196.432825,-196.427608,-196.426663,-196.462198 +"C1CCOC1",1.465,45.06,-0.2479,0.0825,0.3304,374.1439,0.116649,16.873,-232.351764,-232.346824,-232.34588,-232.380488 +"C1COCO1",1.3482,38.2,-0.2572,0.0865,0.3437,333.797,0.092697,15.172,-268.273739,-268.269015,-268.268071,-268.302293 +"C1C2CCC12",0.2527,48.92,-0.2519,0.0987,0.3506,350.7167,0.116089,16.868,-195.179738,-195.175335,-195.174391,-195.206606 +"C1C2COC12",1.7167,41.77,-0.2337,0.0853,0.319,315.0958,0.091972,15.312,-231.093096,-231.088957,-231.088013,-231.119749 +"C1CC2OC12",1.8518,41.8,-0.2517,0.0951,0.3468,317.7261,0.091787,15.427,-231.102112,-231.097923,-231.096979,-231.128841 +"c1c[nH]nc1",2.2277,39.17,-0.2438,0.0239,0.2677,284.6224,0.071284,13.255,-226.144377,-226.140644,-226.1397,-226.170637 +"c1cnn[nH]1",4.2954,35.66,-0.2641,-0.0023,0.2618,266.8979,0.059084,12.248,-242.180146,-242.176561,-242.175617,-242.206312 +"c1[nH]ncn1",2.79,35.01,-0.2705,0.0059,0.2764,264.8693,0.059902,11.923,-242.205627,-242.202078,-242.201134,-242.231775 +"c1[nH]cnn1",5.498,35.27,-0.2551,0.0164,0.2715,266.6747,0.059116,12.36,-242.196351,-242.192733,-242.191789,-242.222534 +"c1cn[nH]n1",0.0597,35.3,-0.2742,-0.0055,0.2687,265.6333,0.059852,11.936,-242.18601,-242.182459,-242.181515,-242.212163 +"c1cnoc1",2.8629,35.74,-0.2677,-0.0138,0.2538,272.9362,0.058014,12.232,-245.994065,-245.9905,-245.989556,-246.020256 +"c1conn1",3.4411,32.49,-0.2862,-0.044,0.2422,256.5774,0.045382,11.821,-262.035127,-262.031571,-262.030627,-262.061292 +"c1ncon1",1.126,31.56,-0.3054,-0.0335,0.2719,253.56,0.046608,11.003,-262.056533,-262.053124,-262.05218,-262.082625 +"c1nnco1",3.1035,31.41,-0.2889,-0.0178,0.2711,252.7276,0.046502,11.122,-262.065493,-262.06207,-262.061126,-262.091582 +"c1nnno1",2.8915,28.58,-0.331,-0.0643,0.2667,239.8863,0.033398,10.82,-278.075775,-278.072348,-278.071404,-278.101859 +"c1cnon1",3.2148,32.29,-0.3213,-0.0515,0.2698,256.8034,0.045873,11.319,-262.017735,-262.014288,-262.013344,-262.043855 +"c1nnon1",1.9641,28.86,-0.3379,-0.0815,0.2563,240.038,0.033459,10.694,-278.059333,-278.055942,-278.054997,-278.0854 +"n1nnon1",0.6581,25.66,-0.3671,-0.1073,0.2598,226.0123,0.020349,10.401,-294.081616,-294.078226,-294.077281,-294.10766 +"CC(=NO)C#C",0.6411,54.16,-0.2383,-0.0315,0.2068,618.9851,0.081951,23.675,-285.217962,-285.211126,-285.210182,-285.248449 +"CC(=NO)CO",1.9975,49.13,-0.2502,0.0066,0.2567,639.6693,0.105605,25.106,-323.58568,-323.578268,-323.577324,-323.617025 +"CCC(=NO)C",0.6759,56.46,-0.239,0.0173,0.2562,699.8479,0.129195,26.442,-287.662612,-287.654946,-287.654001,-287.694375 +"C1CC(=NO)C1",0.8763,53.48,-0.2405,0.0074,0.2479,599.0421,0.107557,21.872,-286.438084,-286.4319,-286.430956,-286.468514 +"C1C(=NO)CN1",1.2383,49.95,-0.23,0.0026,0.2326,576.9778,0.096619,20.934,-302.472397,-302.466506,-302.465561,-302.501719 +"C1C(=NO)CO1",1.1556,46.34,-0.2498,-0.0046,0.2452,562.026,0.084017,19.932,-322.351046,-322.345309,-322.344365,-322.380266 +"C(F)(F)(F)F",0.0003,15.93,-0.4286,0.1935,0.6221,279.3208,0.017147,12.639,-437.484875,-437.480956,-437.480011,-437.512059 +"N=C1NC=CO1",3.2823,44.74,-0.1947,0.0288,0.2235,443.1895,0.074595,18.338,-301.367422,-301.362374,-301.36143,-301.395667 +"N=C1OC=CO1",3.6351,40.77,-0.2262,0.0118,0.238,424.2581,0.062567,16.38,-321.236907,-321.232405,-321.231461,-321.264644 +"c1c[nH]c(=O)[nH]1",3.6997,43.39,-0.1951,0.0406,0.2357,444.2644,0.075511,18.314,-301.405558,-301.400597,-301.399653,-301.433583 +"c1coc(=O)[nH]1",4.6537,39.72,-0.2215,0.0207,0.2422,425.4583,0.063077,16.761,-321.276274,-321.271636,-321.270692,-321.3041 +"c1coc(=O)o1",4.4195,36.02,-0.253,0.0027,0.2556,406.9334,0.050756,15.099,-341.145624,-341.141349,-341.140405,-341.173218 +"Cc1ccc[nH]1",1.9162,55.77,-0.1934,0.0496,0.243,523.1127,0.109918,20.832,-249.397368,-249.391707,-249.390763,-249.42627 +"Cc1ccco1",0.5222,51.86,-0.2124,0.0239,0.2363,501.9826,0.097415,19.38,-269.267574,-269.262222,-269.261277,-269.2962 +"Cc1cnco1",2.0636,47.63,-0.2364,0.0066,0.243,481.0455,0.086244,18.024,-285.328258,-285.323118,-285.322174,-285.356714 +"Cc1ncco1",1.3368,47.52,-0.2371,0.0082,0.2453,474.9666,0.086046,18.156,-285.330157,-285.32495,-285.324006,-285.358812 +"c1cc([nH]c1)N",1.5102,51.62,-0.185,0.0556,0.2406,495.5949,0.099063,20.507,-265.441614,-265.436072,-265.435128,-265.470365 +"c1cc(oc1)N",1.5774,48.3,-0.1844,0.0397,0.2241,472.7151,0.086472,19.374,-285.313612,-285.308395,-285.307451,-285.341898 +"c1c(ocn1)N",2.9659,44.3,-0.2049,0.0202,0.2251,451.8468,0.075243,18,-301.373496,-301.368495,-301.36755,-301.401649 +"c1coc(n1)N",1.8589,43.86,-0.2088,0.0284,0.2373,444.8744,0.075481,17.958,-301.381127,-301.376265,-301.375321,-301.409022 +"c1cc([nH]c1)O",0.9982,47.38,-0.1814,0.0635,0.2449,474.5019,0.0864,19.901,-285.320198,-285.314886,-285.313942,-285.348469 +"c1c([nH]cn1)O",2.938,43.45,-0.2022,0.0448,0.247,453.0651,0.075256,18.316,-301.377995,-301.372972,-301.372028,-301.40609 +"c1cnc([nH]1)O",2.18,43.23,-0.2051,0.0505,0.2555,445.8144,0.075677,18.016,-301.389192,-301.38437,-301.383426,-301.417032 +"Cc1cc[nH]c1",1.6978,55.32,-0.1982,0.0533,0.2515,528.9258,0.109903,20.948,-249.395323,-249.38964,-249.388696,-249.424242 +"Cc1c[nH]cn1",3.3147,51.32,-0.2157,0.0376,0.2533,500.6403,0.098598,19.536,-265.457424,-265.451994,-265.45105,-265.486153 +"Cc1ccoc1",0.8398,51.11,-0.2183,0.025,0.2434,512.5694,0.097541,19.399,-269.263476,-269.25813,-269.257186,-269.292093 +"Cc1cocn1",1.3204,47.12,-0.2395,0.0074,0.2468,485.0096,0.086195,18.123,-285.326726,-285.321574,-285.32063,-285.355221 +"c1c[nH]cc1N",1.8044,51.67,-0.1741,0.0558,0.2299,500.1351,0.098959,20.983,-265.43852,-265.432959,-265.432015,-265.467025 +"c1c(nc[nH]1)N",3.0074,47.82,-0.1849,0.0391,0.2241,471.183,0.087869,19.414,-281.504579,-281.499371,-281.498427,-281.532762 +"c1cocc1N",1.7218,47.62,-0.1947,0.029,0.2237,483.7409,0.086736,19.259,-285.306999,-285.301863,-285.300919,-285.33518 +"c1c(nco1)N",1.6737,43.75,-0.2057,0.0107,0.2164,455.3577,0.075457,17.986,-301.373831,-301.368911,-301.367967,-301.401842 +"c1c[nH]cc1O",1.6811,47.51,-0.1864,0.05,0.2364,480.4033,0.086407,19.995,-285.316421,-285.311084,-285.31014,-285.344731 +"c1c(nc[nH]1)O",2.8207,43.71,-0.2006,0.0315,0.2321,450.9127,0.075668,18.065,-301.385442,-301.380615,-301.379671,-301.413294 +"c1c(nco1)O",0.4508,39.79,-0.2242,0.001,0.2252,435.5044,0.063161,16.639,-321.252793,-321.24821,-321.247265,-321.280543 +"Cn1cccc1",2.0318,55.54,-0.2006,0.0463,0.2469,510.1975,0.110002,20.193,-249.387499,-249.381908,-249.380963,-249.416542 +"Cn1ccnc1",3.9054,51.11,-0.2216,0.0324,0.2541,488.8221,0.098859,18.783,-265.44727,-265.441887,-265.440943,-265.476219 +"c1ccccc1",0,57.28,-0.2475,0.0029,0.2503,456.6788,0.100175,17.214,-232.164586,-232.160188,-232.159244,-232.192047 +"c1ccncc1",2.1103,53.03,-0.2518,-0.0225,0.2293,432.2254,0.088683,16.093,-248.211932,-248.207665,-248.206721,-248.239325 +"c1cnccn1",0,49.2,-0.2493,-0.0511,0.1982,408.933,0.07674,15.146,-264.255612,-264.251446,-264.250502,-264.282954 +"c1cncnc1",2.2031,48.47,-0.2531,-0.0415,0.2116,408.2875,0.076933,15.138,-264.261826,-264.257646,-264.256702,-264.289179 +"c1ncncn1",0.0001,43.74,-0.2768,-0.0554,0.2214,384.8085,0.065246,14.209,-280.31408,-280.30997,-280.309026,-280.341405 +"CC(C)(C)C#C",0.6082,62.05,-0.2585,0.0629,0.3214,624.7654,0.140054,28.513,-234.476516,-234.468849,-234.467905,-234.506959 +"CC(C)(C)C#N",3.9525,56.9,-0.3156,0.0438,0.3594,602.0353,0.129886,26.404,-250.585843,-250.578524,-250.57758,-250.616127 +"CC(C)(C#N)N",2.8519,53.67,-0.2675,0.0349,0.3024,580.9124,0.119019,25.93,-266.630629,-266.623452,-266.622508,-266.660779 +"CC(C)(C#C)O",1.7721,54.93,-0.2589,0.0425,0.3014,587.1297,0.116168,27.393,-270.399081,-270.391629,-270.390685,-270.429359 +"CC(C)(C#N)O",4.8465,49.98,-0.2931,0.0216,0.3148,564.648,0.105718,25.46,-286.506282,-286.499096,-286.498152,-286.5365 +"CC(C)(C)C=O",2.5692,56.92,-0.247,-0.0179,0.2291,611.1634,0.139975,27.295,-271.656372,-271.648691,-271.647747,-271.687368 +"CC(C)(C=O)O",2.8176,49.7,-0.2523,-0.0258,0.2265,570.0556,0.115843,26.432,-307.582044,-307.57441,-307.573465,-307.613529 +"CC(C)(C)CO",1.3469,59.72,-0.2638,0.0725,0.3363,650.4504,0.163869,29.797,-272.841336,-272.83319,-272.832246,-272.872437 +"CC(C)(CO)O",0.1987,52.67,-0.2619,0.0727,0.3346,609.3369,0.139933,28.879,-308.769557,-308.761514,-308.76057,-308.800675 +"CCC(C)(C)C",0.0414,66.63,-0.3031,0.0748,0.378,704.4105,0.187572,30.906,-236.917458,-236.909222,-236.908277,-236.948672 +"CCC(C)(C)O",1.3435,59.74,-0.2601,0.0658,0.3259,660.7696,0.163685,29.948,-272.84753,-272.839427,-272.838483,-272.878675 +"CC(C)(C)OC",1.0896,60.03,-0.2455,0.079,0.3245,644.1742,0.163225,29.597,-272.835594,-272.827416,-272.826471,-272.867062 +"CC#CC(=O)C",3.5644,56.61,-0.2532,-0.0449,0.2082,786.3721,0.093473,23.826,-269.230258,-269.222736,-269.221792,-269.263526 +"CC#CC(=O)N",4.0759,52.6,-0.2502,-0.016,0.2342,756.8071,0.082966,23.663,-285.30044,-285.292874,-285.29193,-285.334226 +"CC#CC(C)C",0.1156,65.24,-0.2404,0.0633,0.3036,861.2875,0.140828,27.581,-234.486176,-234.478004,-234.477059,-234.519337 +"CC#CC(C)O",1.89,57.94,-0.2492,0.0416,0.2908,819.2949,0.117252,26.384,-270.408713,-270.400789,-270.399845,-270.441577 +"CC(=O)CC#C",2.2729,51.94,-0.246,-0.0185,0.2275,636.8813,0.093109,23.951,-269.219315,-269.212283,-269.211339,-269.250569 +"CC(=O)CC#N",5.6789,47.48,-0.274,-0.037,0.237,640.5669,0.082597,21.969,-285.324641,-285.317853,-285.316909,-285.356331 +"C#CCC(=O)N",3.7732,48.37,-0.2498,0.0244,0.2742,627.6953,0.082749,23.677,-285.287913,-285.280928,-285.279984,-285.319587 +"C(C#N)C(=O)N",6.7117,43.68,-0.2741,0.0051,0.2792,603.7082,0.072367,21.668,-301.394865,-301.38819,-301.387246,-301.426589 +"CC(=N)NC=O",0.9105,50.18,-0.2611,-0.0177,0.2434,587.7789,0.096761,21.828,-302.588564,-302.582194,-302.58125,-302.618688 +"CC(=N)OC=O",4.0954,45.3,-0.2723,-0.0251,0.2472,632.4706,0.083113,21.35,-322.460671,-322.454075,-322.45313,-322.491611 +"CC(=O)CC=O",2.6833,48.15,-0.2505,-0.0381,0.2124,667.8025,0.09322,22.667,-306.397642,-306.390688,-306.389744,-306.429362 +"CC(=O)NC=N",5.1423,49.82,-0.2397,-0.0114,0.2283,621.2041,0.095742,22.603,-302.579654,-302.57277,-302.571825,-302.610884 +"CC(=O)NC=O",5.6602,45.18,-0.2511,-0.0298,0.2213,600.473,0.083351,21.535,-322.479769,-322.473128,-322.472183,-322.510598 +"CC(=O)OC=N",0.9137,45.91,-0.2768,-0.0235,0.2533,633.5112,0.083216,21.339,-322.464725,-322.458133,-322.457189,-322.495484 +"C(=O)NC(=N)N",2.4787,46.25,-0.2336,-0.0146,0.2189,561.2611,0.08593,21.711,-318.647923,-318.641827,-318.640883,-318.67749 +"C(C=O)C(=O)N",3.7344,44.18,-0.2502,-0.0278,0.2223,637.4959,0.083052,22.336,-322.469844,-322.463011,-322.462067,-322.501174 +"C(=N)NC(=O)N",5.2585,46.05,-0.2443,0.0042,0.2484,586.8385,0.085659,22.001,-318.641916,-318.635509,-318.634565,-318.672134 +"C(=O)NC(=O)N",6.1119,41.58,-0.2548,-0.0128,0.242,566.0758,0.073232,20.96,-338.542975,-338.536767,-338.535823,-338.573022 +"NC(=O)OC=N",1.994,42.07,-0.2743,0.0024,0.2767,600.144,0.0727,21.164,-338.531538,-338.524995,-338.52405,-338.562187 +"CC(C)CC#C",0.6828,62.17,-0.259,0.0553,0.3143,738.9549,0.140781,27.444,-234.476081,-234.46852,-234.467575,-234.506975 +"CC(C)CC#N",3.9826,57.19,-0.3161,0.0346,0.3507,713.1432,0.130632,25.297,-250.585183,-250.578,-250.577056,-250.615918 +"CC(O)CC#C",1.4348,54.82,-0.2622,0.0545,0.3167,692.7907,0.117241,26.225,-270.401061,-270.39373,-270.392786,-270.431817 +"CC(O)CC#N",4.9064,50.45,-0.2866,0.0348,0.3214,674.7315,0.106844,24.229,-286.508374,-286.501342,-286.500397,-286.539045 +"CN(C)CC#N",4.0824,54.43,-0.2371,0.0217,0.2588,671.7683,0.119064,24.109,-266.609146,-266.602164,-266.60122,-266.639636 +"CC(C)CC=O",2.7416,57.47,-0.2481,-0.0197,0.2284,750.1795,0.140549,26.267,-271.653205,-271.645574,-271.644629,-271.685015 +"CC(C)NC=O",3.6603,54.8,-0.2484,0.0348,0.2832,684.3066,0.130467,25.451,-287.72789,-287.72048,-287.719536,-287.760264 +"CC(C)OC=O",4.1216,51.48,-0.2748,0.0115,0.2863,712.2912,0.117552,24.383,-307.60208,-307.594822,-307.593878,-307.634011 +"CC(O)CC=O",3.3205,50.52,-0.2437,-0.0204,0.2232,730.3237,0.11705,25.112,-307.577822,-307.570462,-307.569518,-307.609331 +"CN(C)CC=O",2.7328,54.59,-0.2267,-0.0252,0.2016,722.1956,0.129063,25.118,-287.681216,-287.673848,-287.672904,-287.712476 +"CC(=O)CCO",1.2601,50.08,-0.2458,-0.0111,0.2348,760.732,0.117054,25.274,-307.586014,-307.578343,-307.577399,-307.618405 +"CCCC(=O)C",2.5426,57.31,-0.2415,-0.0071,0.2345,817.4605,0.140374,26.57,-271.666055,-271.658117,-271.657173,-271.699054 +"CCCC(=O)N",3.4104,53.75,-0.243,0.0359,0.2789,778.0458,0.130316,26.19,-287.736841,-287.729087,-287.728143,-287.769373 +"CCNC(=O)C",3.4941,54.72,-0.2402,0.0381,0.2783,725.3315,0.130219,25.795,-287.734664,-287.726871,-287.725927,-287.767999 +"CCNC(=O)N",3.5648,50.96,-0.2427,0.0598,0.3025,719.1944,0.120088,25.169,-303.795699,-303.788437,-303.787493,-303.827496 +"CCOC(=O)C",1.9371,50.66,-0.2654,0.0196,0.285,719.0142,0.117704,24.457,-307.614861,-307.607387,-307.606443,-307.647075 +"CCOC(=O)N",2.4228,46.82,-0.2644,0.0591,0.3235,686.248,0.107556,24.01,-323.6788,-323.671741,-323.670797,-323.710041 +"C[NH2+]CC([O-])=O",5.2128,46.17,-0.2494,0.0196,0.2691,624.3195,0.108531,22.427,-323.654114,-323.64761,-323.646666,-323.684532 +"CC(=O)COC",3.6117,51.17,-0.2449,-0.0102,0.2347,743.2534,0.116412,24.783,-307.571921,-307.564235,-307.563291,-307.604498 +"COCC(=O)N",4.2922,47.53,-0.2464,0.0274,0.2738,706.1403,0.10636,24.375,-323.641347,-323.63393,-323.632986,-323.673408 +"C(CO)C(=O)N",2.1536,46.39,-0.2471,0.0327,0.2798,722.9054,0.106736,25.021,-323.657279,-323.649644,-323.6487,-323.69021 +"[NH3+]CCC([O-])=O",14.8809,54.51,-0.167,-0.0333,0.1338,714.9069,0.107753,23.572,-323.582949,-323.575941,-323.574996,-323.614532 +"CC(C)CCO",1.3839,60.25,-0.2613,0.0807,0.342,850.8528,0.164714,28.734,-272.837559,-272.829406,-272.828462,-272.869568 +"CC(O)CCO",0.18,53.18,-0.2583,0.0719,0.3302,802.5075,0.141169,27.553,-308.764658,-308.756772,-308.755828,-308.796363 +"CCCC(C)C",0.0898,67.45,-0.3042,0.0859,0.3901,906.7075,0.188151,29.991,-236.917664,-236.909314,-236.90837,-236.949789 +"CCCC(C)O",1.3758,60.39,-0.2613,0.0739,0.3352,858.1408,0.164563,28.823,-272.84456,-272.836468,-272.835524,-272.876391 +"CCOC(C)C",0.9735,61.19,-0.2458,0.0875,0.3333,814.3456,0.163926,28.558,-272.839582,-272.831387,-272.830442,-272.871875 +"CC(C)COC",0.9451,61.01,-0.2508,0.0876,0.3384,817.8428,0.16427,28.268,-272.83269,-272.824541,-272.823597,-272.864643 +"COCC(C)O",2.2037,53.61,-0.2581,0.0777,0.3359,711.2834,0.141325,26.669,-308.759135,-308.751467,-308.750523,-308.790567 +"CC(=O)C(=O)C",0.0005,47.03,-0.2408,-0.0826,0.1582,581.337,0.092927,23.569,-306.406578,-306.399338,-306.398393,-306.43831 +"CC(=O)C(=N)N",1.9354,48.43,-0.2483,-0.0515,0.1968,562.5203,0.09552,23.683,-302.580808,-302.574009,-302.573065,-302.611309 +"CC(=O)C(=O)N",1.1652,43.45,-0.2416,-0.0632,0.1784,547.5835,0.083247,22.796,-322.482659,-322.475927,-322.474982,-322.513285 +"NC(=[NH2+])C([O-])=O",8.5052,41.06,-0.2349,-0.0376,0.1973,506.6426,0.073524,20.891,-338.530072,-338.524237,-338.523292,-338.559249 +"C(=O)(C(=O)N)N",0.0024,39.85,-0.2422,-0.0321,0.21,513.8164,0.073748,21.839,-338.559964,-338.553802,-338.552858,-338.589499 +"CC(C)C(=O)C",2.5905,56.8,-0.2389,-0.0081,0.2308,650.5524,0.140412,27.077,-271.665153,-271.657306,-271.656362,-271.697004 +"CC(C)C(=O)N",3.4399,53.35,-0.2409,0.0371,0.278,617.1359,0.130071,26.737,-287.737498,-287.729645,-287.728701,-287.770224 +"CC([NH3+])C([O-])=O",5.0933,45.76,-0.251,0.0174,0.2683,578.6969,0.108391,23.344,-323.665247,-323.658667,-323.657723,-323.695493 +"CC(O)C(C)=O",2.9304,49.94,-0.263,-0.0258,0.2371,606.3478,0.117268,25.252,-307.594137,-307.586914,-307.58597,-307.625022 +"CC(O)C(N)=O",4.1096,46.24,-0.26,0.0186,0.2786,574.3293,0.10692,24.979,-323.665987,-323.658861,-323.657917,-323.69684 +"CC(=O)N(C)C",3.5335,54.99,-0.2342,0.0354,0.2696,622.3344,0.129864,25.808,-287.723848,-287.716142,-287.715198,-287.755519 +"CN(C)C(=O)N",3.6689,50.94,-0.2306,0.0604,0.2909,591.7927,0.119711,25.1,-303.785832,-303.778493,-303.777549,-303.816841 +"CC(C)C(C)C",0,66.87,-0.3016,0.0822,0.3838,747.1084,0.187721,30.606,-236.916033,-236.907667,-236.906723,-236.94792 +"CC(C)C(C)O",1.414,59.83,-0.2613,0.0684,0.3297,675.0478,0.164151,29.475,-272.843742,-272.8356,-272.834656,-272.875156 +"CC(O)C(C)O",0.1664,52.62,-0.2564,0.0643,0.3208,632.2372,0.140737,28.004,-308.772292,-308.764288,-308.763343,-308.804403 +"CC1(CCC1)C",0.0245,63.32,-0.2837,0.0812,0.3649,603.4335,0.165997,26.214,-235.699029,-235.692187,-235.691243,-235.728837 +"CC1(CCO1)C",1.636,56.17,-0.2392,0.0812,0.3205,560.0105,0.141947,24.691,-271.626697,-271.619919,-271.618974,-271.656931 +"CC1(COC1)C",1.888,56.09,-0.2425,0.0728,0.3154,569.1031,0.142192,24.362,-271.618179,-271.611373,-271.610429,-271.649191 +"CC1(CCC1)O",1.3454,56.29,-0.2526,0.0743,0.3269,562.0396,0.142376,25.093,-271.627826,-271.621262,-271.620317,-271.657345 +"CC1(COC1)O",2.486,49.2,-0.246,0.0616,0.3076,527.7516,0.118569,23.305,-307.544712,-307.538272,-307.537328,-307.574452 +"CC1(C)CC1O",1.2792,56.93,-0.2497,0.083,0.3327,599.392,0.141365,26.639,-271.621961,-271.61481,-271.613865,-271.652008 +"CC1(O)CC1O",1.9707,49.85,-0.2427,0.0794,0.3221,557.5787,0.117602,25.546,-307.546144,-307.539271,-307.538327,-307.575926 +"CC1CC1(C)C",0.0963,64.09,-0.2558,0.0887,0.3445,649.5784,0.16489,27.904,-235.698987,-235.691505,-235.69056,-235.729399 +"CC1CC1(C)O",1.2745,56.95,-0.2458,0.0798,0.3256,607.2163,0.141106,26.806,-271.623957,-271.616756,-271.615812,-271.654105 +"CC1OC1(C)C",1.7972,56.95,-0.2545,0.0923,0.3468,616.1558,0.140619,26.504,-271.630416,-271.623061,-271.622116,-271.660915 +"CC1(CN1C)C",1.1514,60.96,-0.219,0.0873,0.3063,612.8334,0.153033,26.999,-251.732828,-251.725481,-251.724536,-251.763141 +"CC1CC(=N)O1",2.8604,51.41,-0.2598,0.0305,0.2903,567.9978,0.108342,21.198,-286.498235,-286.492393,-286.491449,-286.527464 +"CC1CC(=O)C1",2.8997,53.83,-0.2394,-0.0163,0.2231,615.4797,0.118664,22.568,-270.446915,-270.440714,-270.43977,-270.476747 +"CC1CC(=O)N1",3.855,50.75,-0.2465,0.0387,0.2852,578.1159,0.108284,21.755,-286.519735,-286.513686,-286.512742,-286.549288 +"CC1CC(=O)O1",4.16,46.42,-0.2737,0.0124,0.2861,549.3072,0.096346,20.176,-306.403144,-306.397481,-306.396537,-306.432231 +"CN1CC(=O)C1",2.5981,51.1,-0.2304,-0.0233,0.2071,581.4874,0.107085,21.382,-286.476486,-286.470495,-286.469551,-286.505845 +"C1C(CC1=O)O",3.0457,46.78,-0.242,-0.0206,0.2214,568.1088,0.095023,21.395,-306.369631,-306.363717,-306.362773,-306.399081 +"CC1CC(C)C1",0.1412,64.11,-0.2832,0.0906,0.3739,708.7554,0.166237,25.93,-235.700285,-235.693375,-235.69243,-235.730474 +"CC1CC(C)O1",1.5588,56.92,-0.2399,0.0942,0.3341,627.2141,0.14242,24.277,-271.628346,-271.621668,-271.620724,-271.658548 +"CC1CC(O)C1",1.398,56.88,-0.253,0.0788,0.3318,659.729,0.142766,24.723,-271.62464,-271.618,-271.617056,-271.65458 +"CN1CC(C1)O",1.5574,53.9,-0.2196,0.0838,0.3034,603.8647,0.131404,23.314,-287.657053,-287.650703,-287.649759,-287.686678 +"OC1CC(O)C1",1.3233,49.09,-0.2525,0.0691,0.3216,617.3904,0.119358,23.5,-307.548619,-307.542315,-307.541371,-307.578204 +"CC12CC(C1)C2",0.2077,61.09,-0.2934,0.0984,0.3917,520.6157,0.144312,21.869,-234.455424,-234.449921,-234.448977,-234.483864 +"CC12CC(C1)O2",1.8191,53.68,-0.2307,0.1012,0.3319,482.6135,0.119521,21.01,-270.370102,-270.364657,-270.363713,-270.398537 +"CC12CN(C1)C2",1.9709,57.23,-0.2257,0.0834,0.3091,503.8125,0.132764,21.04,-250.478855,-250.473446,-250.472502,-250.507275 +"OC12CC(C1)C2",1.329,53.49,-0.2587,0.0809,0.3397,476.4978,0.120826,20.67,-270.382725,-270.377533,-270.376589,-270.410876 +"OC12CN(C1)C2",1.7613,50.04,-0.2445,0.0682,0.3126,459.4671,0.109323,19.785,-286.408473,-286.403385,-286.402441,-286.436586 +"C#CC(=O)C#C",3.1111,50.23,-0.28,-0.0898,0.1902,559.9421,0.04691,20.497,-266.763445,-266.757444,-266.7565,-266.792861 +"C#CC(=O)C#N",3.7814,44.9,-0.3156,-0.121,0.1947,539.3608,0.036304,18.651,-282.861121,-282.855418,-282.854474,-282.890406 +"C(#N)C(=O)C#N",0.6341,39.71,-0.3538,-0.1562,0.1977,520.8737,0.02554,16.955,-298.95295,-298.94752,-298.946576,-298.982096 +"C#CC(=O)C=O",4.3138,43.98,-0.2672,-0.1181,0.1491,570.4143,0.046458,19.716,-303.934323,-303.928202,-303.927258,-303.964386 +"C(=O)C(=O)C#N",1.4415,38.68,-0.2978,-0.1503,0.1475,551.5682,0.035739,18.042,-320.029854,-320.024005,-320.023061,-320.059734 +"C(=O)C(=O)C=O",3.4458,37.46,-0.2606,-0.1455,0.1151,583.5476,0.045968,18.951,-341.097446,-341.091177,-341.090233,-341.128555 +"CC(C#C)C#C",0.6022,56.95,-0.2622,0.034,0.2962,627.8377,0.093567,24.829,-232.02593,-232.019028,-232.018084,-232.056008 +"CC(C#C)C#N",3.7481,51.99,-0.2879,0.0116,0.2995,605.1461,0.083296,22.726,-248.132701,-248.126157,-248.125213,-248.162616 +"CC(C#N)C#N",4.1272,47.23,-0.3433,-0.0129,0.3304,583.9904,0.072887,20.75,-264.235803,-264.229597,-264.228653,-264.265543 +"NC(C#C)C#N",2.5088,48.67,-0.2798,-0.003,0.2768,583.2106,0.072434,22.216,-264.171923,-264.165508,-264.164564,-264.201717 +"C(#N)C(C#N)N",3.6178,43.93,-0.3037,-0.0279,0.2758,562.9596,0.061962,20.295,-280.273691,-280.267611,-280.266667,-280.303301 +"C#CC(C#C)O",1.3299,49.83,-0.2708,0.0039,0.2747,588.5174,0.070081,23.408,-267.944278,-267.937674,-267.93673,-267.97414 +"OC(C#C)C#N",3.263,45.03,-0.3019,-0.0199,0.2821,566.4572,0.05969,21.363,-284.048718,-284.042467,-284.041523,-284.078408 +"C(#N)C(C#N)O",2.3093,40.41,-0.3463,-0.0464,0.3,546.2842,0.04917,19.442,-300.148891,-300.142982,-300.142038,-300.178386 +"CC(C=O)C#C",2.3065,52.76,-0.2536,-0.0324,0.2213,632.3767,0.093434,23.743,-269.207716,-269.20074,-269.199796,-269.238596 +"CC(C=O)C#N",2.7273,47.71,-0.2801,-0.0545,0.2256,611.9888,0.083083,21.73,-285.313284,-285.306637,-285.305693,-285.344071 +"OC(C=O)C#C",2.1664,45.31,-0.2709,-0.0493,0.2216,567.4229,0.070272,21.821,-305.130184,-305.123844,-305.1229,-305.160099 +"OC(C=O)C#N",2.9799,40.49,-0.3073,-0.0733,0.234,547.2998,0.059833,19.861,-321.233569,-321.227582,-321.226638,-321.263248 +"CC(C=O)C=O",3.0426,48.36,-0.2559,-0.0573,0.1986,637.0667,0.093292,22.644,-306.383308,-306.376314,-306.37537,-306.414827 +"CN(C=N)C=O",2.4166,51.06,-0.2595,-0.0171,0.2424,607.0428,0.096802,21.581,-302.579718,-302.573326,-302.572382,-302.60969 +"CN(C=O)C=O",0.6161,45.8,-0.2725,-0.0352,0.2374,588.7943,0.084124,20.743,-322.477268,-322.470851,-322.469907,-322.507813 +"C(=O)C(C=O)O",3.3417,41.69,-0.2649,-0.0663,0.1986,555.1885,0.070094,20.624,-342.307696,-342.301481,-342.300537,-342.33778 +"CN=C(C#N)N",5.1006,50.92,-0.2622,-0.0399,0.2223,616.0271,0.084203,22.896,-281.480993,-281.474271,-281.473326,-281.51108 +"CN=C(C=O)N",2.7489,50.95,-0.2476,-0.0605,0.187,629.4902,0.095327,23.292,-302.564379,-302.557593,-302.556649,-302.594563 +"CCC(=O)C#C",2.733,53.26,-0.264,-0.0552,0.2087,663.4092,0.09367,23.625,-269.216969,-269.210012,-269.209068,-269.247773 +"CCC(=O)C#N",3.5163,48.11,-0.2976,-0.0854,0.2122,643.1284,0.08307,21.82,-285.317454,-285.31081,-285.309865,-285.34803 +"CNC(=N)C#N",3.0753,50.21,-0.2527,-0.0407,0.2121,618.7695,0.084889,22.345,-281.484877,-281.478065,-281.477121,-281.515408 +"CNC(=O)C#C",3.6839,51,-0.2591,-0.0243,0.2349,627.8913,0.0833,22.928,-285.282532,-285.275639,-285.274695,-285.313335 +"COC(=O)C#C",1.7556,46.66,-0.2806,-0.0424,0.2382,595.6777,0.071026,21.393,-305.155985,-305.149492,-305.148548,-305.186165 +"COC(=O)C#N",4.0593,41.61,-0.3197,-0.0717,0.248,575.2916,0.06048,19.572,-321.254976,-321.248781,-321.247837,-321.285034 +"C#CC(=O)CO",4.1336,46.43,-0.2661,-0.0573,0.2088,617.4214,0.070046,22.333,-305.130313,-305.123605,-305.122661,-305.160728 +"C(C(=O)C#N)O",4.1197,41.54,-0.3004,-0.0881,0.2122,596.5667,0.059291,20.617,-321.230055,-321.223616,-321.222672,-321.260329 +"CCC(=O)C=O",1.0401,47.28,-0.2515,-0.0944,0.1571,641.1289,0.093575,22.634,-306.392828,-306.385878,-306.384934,-306.424157 +"CNC(=N)C=O",3.7361,49.61,-0.2388,-0.0681,0.1707,643.2539,0.095538,22.92,-302.558547,-302.551612,-302.550668,-302.589229 +"CNC(=O)C=O",5.2806,45.79,-0.2505,-0.0726,0.178,633.9378,0.083118,21.909,-322.454079,-322.447243,-322.446299,-322.485128 +"COC(=O)C=O",3.7746,40.92,-0.2683,-0.0862,0.1821,600.9282,0.070937,20.46,-342.329232,-342.322736,-342.321792,-342.359811 +"C(C(=O)C=O)O",5.1353,41.26,-0.2516,-0.0953,0.1563,627.7041,0.069653,21.411,-342.297848,-342.291172,-342.290228,-342.328622 +"CCC(=O)CC",2.5012,57,-0.2412,-0.0057,0.2356,770.4633,0.140552,26.577,-271.666841,-271.658902,-271.657958,-271.699649 +"CCC(=O)CO",3.84,50.24,-0.2429,-0.0073,0.2356,723.3895,0.116949,25.279,-307.58009,-307.572434,-307.571489,-307.612431 +"CCC(=O)NC",3.4247,54.61,-0.2422,0.0378,0.28,725.6801,0.130452,25.744,-287.732329,-287.724552,-287.723608,-287.764875 +"CCC(=O)OC",1.6323,50.2,-0.268,0.0192,0.2871,694.4729,0.118017,24.367,-307.61045,-307.602997,-307.602053,-307.642398 +"CNC(=O)CO",4.3706,47.75,-0.2433,0.0274,0.2707,681.2313,0.106577,24.645,-323.644468,-323.636751,-323.635807,-323.67712 +"CNC(=O)NC",3.5856,51.35,-0.2383,0.067,0.3052,689.1369,0.119461,25.314,-303.787203,-303.779425,-303.778481,-303.819322 +"CNC(=O)OC",2.2967,47.42,-0.2496,0.0662,0.3158,657.9646,0.107261,23.827,-323.6692,-323.661699,-323.660754,-323.701214 +"COC(=O)CN",1.6419,46.49,-0.2466,0.015,0.2616,669.5959,0.107439,23.834,-323.652284,-323.644911,-323.643967,-323.684119 +"COC(=O)CO",2.8141,43.25,-0.2705,0.0032,0.2737,640.0833,0.094874,22.716,-343.531666,-343.524693,-343.523749,-343.562703 +"C(C(=O)CO)O",5.1141,43.53,-0.2447,-0.01,0.2348,677.014,0.09331,24.017,-343.492484,-343.485107,-343.484162,-343.524104 +"CC(=NC)OC",0.8775,55.68,-0.2427,0.0334,0.2761,667.2341,0.129176,25.912,-287.697316,-287.689416,-287.688472,-287.729866 +"CC(CO)C#C",1.3163,55.08,-0.2613,0.0553,0.3166,682.8459,0.117465,26.225,-270.3961,-270.388643,-270.387698,-270.426967 +"CC(CO)C#N",3.7371,50.15,-0.2896,0.035,0.3246,658.6943,0.107232,24.138,-286.504534,-286.497432,-286.496488,-286.535239 +"CCC(C)C#C",0.7089,62.36,-0.2575,0.0581,0.3157,736.4751,0.14095,27.469,-234.474088,-234.466434,-234.46549,-234.505226 +"CCC(C)C#N",4.0632,57.23,-0.3144,0.0383,0.3527,712.3282,0.130797,25.346,-250.583259,-250.575974,-250.57503,-250.614218 +"CCC(N)C#N",2.8538,54.01,-0.2695,0.0286,0.298,689.9612,0.119956,24.891,-266.626759,-266.619585,-266.618641,-266.657544 +"CCC(O)C#C",1.3296,55.33,-0.2649,0.0332,0.2982,693.4546,0.117373,26.198,-270.397474,-270.390091,-270.389147,-270.428268 +"CCC(O)C#N",3.3465,50.37,-0.3028,0.0117,0.3145,670.8765,0.107081,24.136,-286.50404,-286.497024,-286.49608,-286.534653 +"CNC(C)C#N",4.4075,54.08,-0.2424,0.0288,0.2711,680.4479,0.119301,24.789,-266.615452,-266.608241,-266.607297,-266.646318 +"COC(C)C#C",1.41,55.84,-0.2538,0.0395,0.2933,675.6208,0.116766,25.831,-270.38418,-270.376713,-270.375769,-270.415376 +"COC(C)C#N",3.2128,50.59,-0.2865,0.0177,0.3042,619.0012,0.106659,23.771,-286.495035,-286.487984,-286.48704,-286.525708 +"NC(CO)C#N",4.5745,46.66,-0.2619,0.0215,0.2834,634.7561,0.096396,23.603,-302.545112,-302.538187,-302.537242,-302.575574 +"NCC(N)C#N",4.8739,50.26,-0.2534,0.0266,0.28,658.1766,0.109366,24.235,-282.667163,-282.660209,-282.659265,-282.697592 +"NCC(O)C#N",3.5369,46.88,-0.2549,0.0078,0.2627,640.0746,0.096532,23.453,-302.545562,-302.538783,-302.537839,-302.575827 +"OCC(O)C#C",2.1731,48.25,-0.2624,0.0188,0.2813,638.4935,0.094585,24.263,-306.320806,-306.313986,-306.313042,-306.351017 +"OCC(O)C#N",2.1065,43.5,-0.2878,-0.0038,0.2839,617.6379,0.084113,22.348,-322.425235,-322.418738,-322.417794,-322.455292 +"CC(CO)C=O",1.4722,50.03,-0.2523,-0.0219,0.2304,656.247,0.117353,24.974,-307.574899,-307.567493,-307.566549,-307.606203 +"CCC(C)C=O",2.7619,57.5,-0.2451,-0.0181,0.227,739.4896,0.140783,26.342,-271.651913,-271.644175,-271.643231,-271.683985 +"CCC(O)C=O",2.216,50.71,-0.2736,-0.0372,0.2365,676.7821,0.117766,24.411,-307.581989,-307.574957,-307.574013,-307.612724 +"CCN(C)C=O",3.8246,55.18,-0.2407,0.0328,0.2735,680.8869,0.130695,24.851,-287.720383,-287.713033,-287.712089,-287.75175 +"COC(C)C=O",2.7851,50.45,-0.2545,-0.0276,0.2269,676.5067,0.116572,24.765,-307.566593,-307.559107,-307.558163,-307.598185 +"OCC(O)C=O",1.2875,43.58,-0.2662,-0.0476,0.2186,618.5337,0.094809,22.686,-343.5036,-343.497024,-343.49608,-343.533909 +"CC(CO)CO",2.6348,53.08,-0.2607,0.0735,0.3342,743.5937,0.141197,27.566,-308.75909,-308.751086,-308.750141,-308.790858 +"CCC(C)CC",0.067,67.19,-0.303,0.0843,0.3874,855.476,0.188227,30.015,-236.916125,-236.907738,-236.906794,-236.948351 +"CCC(C)CO",1.3482,60.09,-0.2625,0.0783,0.3408,798.4108,0.164837,28.743,-272.838189,-272.830053,-272.829108,-272.870013 +"CCC(C)OC",1.0377,60.6,-0.248,0.0861,0.3341,743.1891,0.164188,28.45,-272.834623,-272.826448,-272.825504,-272.866669 +"CCC(CC)O",1.4336,60.24,-0.2621,0.0721,0.3342,805.7386,0.164626,28.828,-272.844804,-272.836705,-272.835761,-272.876509 +"CCC(O)CO",2.3512,52.93,-0.256,0.0618,0.3178,748.8236,0.141924,26.859,-308.768073,-308.760556,-308.759611,-308.799183 +"COC(C)CO",2.2435,53.28,-0.2545,0.079,0.3335,670.6307,0.141335,26.699,-308.757315,-308.74959,-308.748646,-308.788936 +"C(C(CO)O)O",1.7715,45.75,-0.2606,0.049,0.3096,694.4975,0.118969,25.116,-344.689237,-344.682195,-344.681251,-344.719856 +"CC1(CC1)C#C",0.7654,59.61,-0.24,0.0571,0.2972,582.3195,0.118255,24.638,-233.255259,-233.2487,-233.247756,-233.284774 +"CC1(CC1)C#N",4.1425,54.31,-0.2853,0.037,0.3224,559.2985,0.108158,22.416,-249.363808,-249.357626,-249.356681,-249.393152 +"CC1(CN1)C#C",1.9146,56,-0.2444,0.0441,0.2885,564.5299,0.106988,23.611,-249.293044,-249.286674,-249.285729,-249.322437 +"CC1(CN1)C#N",4.9191,50.81,-0.2743,0.0226,0.2969,541.8201,0.096715,21.524,-265.399968,-265.393944,-265.393,-265.429201 +"CC1(CO1)C#C",1.7508,52.23,-0.2616,0.0263,0.2879,548.2216,0.094019,22.957,-269.176124,-269.169791,-269.168847,-269.205558 +"CC1(CO1)C#N",3.8379,47.2,-0.3007,0.0031,0.3038,525.818,0.083704,20.903,-285.281032,-285.275044,-285.2741,-285.310299 +"C1CC1(C#N)N",3.0748,51.03,-0.2722,0.029,0.3012,537.6808,0.09733,21.894,-265.408147,-265.402167,-265.401223,-265.437265 +"C#CC1(CC1)O",1.2031,52.54,-0.2424,0.035,0.2774,544.2268,0.094491,23.372,-269.176442,-269.170213,-269.169268,-269.205661 +"C1CC1(C#N)O",3.5524,47.42,-0.2828,0.0136,0.2964,521.6025,0.084237,21.226,-285.282853,-285.276998,-285.276054,-285.311886 +"CC1(CC1)C=O",2.9392,54.61,-0.2482,-0.0141,0.2341,572.0979,0.118497,23.265,-270.439135,-270.432679,-270.431735,-270.468877 +"CC1(CN1)C=O",3.0447,50.74,-0.2483,-0.0234,0.2249,553.6489,0.107129,22.432,-286.477365,-286.471021,-286.470076,-286.507087 +"CC1(CO1)C=O",2.2978,47.26,-0.2592,-0.0374,0.2218,538.1677,0.094183,21.75,-306.358581,-306.352296,-306.351351,-306.388334 +"C1CC1(C=O)O",2.375,48.35,-0.2519,-0.0315,0.2204,528.8623,0.094927,21.796,-306.364186,-306.358187,-306.357242,-306.393465 +"CC1(CC1)CO",1.3206,56.77,-0.2616,0.0757,0.3373,610.3779,0.142009,25.913,-271.618137,-271.611049,-271.610104,-271.648484 +"CC1(CO)CN1",2.1168,52.8,-0.2463,0.0724,0.3186,582.3057,0.13149,24.487,-287.663174,-287.656474,-287.65553,-287.693286 +"CC1(CO)CO1",1.7232,49.21,-0.2611,0.081,0.3421,566.509,0.118491,23.915,-307.545713,-307.539052,-307.538108,-307.575814 +"CCC1(CC1)C",0.1151,63.52,-0.2627,0.0859,0.3486,658.7763,0.165551,27.138,-235.696677,-235.689426,-235.688482,-235.727158 +"CCC1(C)CO1",1.7595,56.33,-0.2587,0.0877,0.3465,616.3779,0.141536,25.512,-271.623066,-271.616004,-271.61506,-271.653614 +"CCC1(CC1)O",1.3751,56.44,-0.2519,0.0723,0.3242,618.4343,0.141819,26.023,-271.621836,-271.614856,-271.613912,-271.652038 +"CC1(CC1)OC",1.104,56.8,-0.245,0.0897,0.3348,596.0747,0.141327,25.705,-271.611366,-271.604301,-271.603357,-271.641692 +"C1CC1(CO)O",2.0651,49.73,-0.2421,0.0639,0.306,568.4896,0.1187,24.458,-307.544316,-307.537691,-307.536747,-307.574152 +"CC1CCC1=O",2.6766,53.75,-0.2374,-0.0163,0.2211,567.3641,0.118901,22.502,-270.447268,-270.440964,-270.44002,-270.477179 +"CC1CNC1=O",3.5654,50.52,-0.2452,0.0373,0.2825,539.0137,0.108515,21.625,-286.517094,-286.510925,-286.509981,-286.54679 +"CC1COC1=N",2.5239,50.92,-0.2615,0.0284,0.2899,537.9634,0.108616,21.067,-286.493052,-286.487128,-286.486184,-286.522434 +"CC1COC1=O",3.9998,46.04,-0.2726,0.0099,0.2825,521.9194,0.096641,20.03,-306.397746,-306.391983,-306.391039,-306.427029 +"CC1NCC1=O",2.556,50.49,-0.2421,-0.0238,0.2182,545.7113,0.107628,21.712,-286.484904,-286.478789,-286.477845,-286.514513 +"CC1OCC1=O",0.9705,46.89,-0.2575,-0.0323,0.2252,525.6219,0.094977,20.774,-306.365166,-306.359174,-306.35823,-306.394728 +"CN1CCC1=O",3.6013,51.94,-0.2414,0.0394,0.2809,542.8932,0.108561,21.114,-286.514923,-286.508751,-286.507807,-286.544706 +"NC1COC1=N",1.33,47.4,-0.2491,0.0196,0.2687,518.7658,0.097893,20.618,-302.535621,-302.529817,-302.528873,-302.564883 +"NC1COC1=O",2.9069,42.57,-0.2481,0.0012,0.2493,503.6554,0.085902,19.618,-322.439472,-322.433831,-322.432887,-322.468624 +"OC1CCC1=O",2.1771,46.67,-0.2509,-0.0284,0.2226,517.5463,0.095379,21.28,-306.368494,-306.362423,-306.361478,-306.398118 +"OC1CNC1=O",2.5208,43.41,-0.2475,0.0204,0.2679,495.5449,0.08494,20.351,-322.436757,-322.430835,-322.429891,-322.46623 +"OC1COC1=N",1.2035,43.85,-0.2717,0.0123,0.284,494.7347,0.084855,19.922,-322.412936,-322.407158,-322.406214,-322.44232 +"OC1COC1=O",3.0125,39.11,-0.2674,-0.0058,0.2616,481.5842,0.072889,18.879,-342.315546,-342.309972,-342.309028,-342.344721 +"CC1CCC1C",0.1206,63.28,-0.2822,0.0842,0.3663,624.4665,0.166462,25.85,-235.696852,-235.689979,-235.689035,-235.726894 +"CC1CCC1O",1.3078,56.23,-0.2522,0.0751,0.3274,575.9494,0.142966,24.662,-271.622539,-271.615882,-271.614937,-271.652386 +"CC1COC1C",1.7671,56.23,-0.2398,0.085,0.3248,585.5983,0.142555,24.15,-271.620997,-271.61422,-271.613276,-271.651656 +"CC1OCC1O",2.3269,49.4,-0.2442,0.0626,0.3069,539.14,0.118783,23.068,-307.544728,-307.538101,-307.537157,-307.575262 +"OC1CCC1O",1.022,49.03,-0.2476,0.0591,0.3067,529.6164,0.11984,22.931,-307.55036,-307.544139,-307.543195,-307.579988 +"N=C1CN=CN1",3.4388,48.64,-0.2323,-0.0049,0.2275,470.3702,0.08707,18.459,-281.491976,-281.486859,-281.485915,-281.520391 +"C1C=CCC1=O",2.5958,50.91,-0.2409,-0.0164,0.2245,502.2039,0.096841,19.177,-269.263802,-269.258575,-269.257631,-269.292553 +"C1C=CNC1=O",3.246,47.64,-0.2165,-0.0013,0.2152,473.5305,0.08615,18.76,-285.335887,-285.330807,-285.329863,-285.364182 +"C1C(=O)NC=N1",2.525,43.5,-0.25,-0.0182,0.2318,452.4227,0.075083,17.274,-301.395982,-301.391168,-301.390224,-301.424079 +"C1C(=O)OC=N1",1.6338,39.3,-0.2793,-0.0315,0.2478,433.0637,0.062716,15.897,-321.271406,-321.26687,-321.265926,-321.299312 +"N=C1OCC=C1",3.3473,50.58,-0.2462,-0.0302,0.216,472.0954,0.086685,17.709,-285.311823,-285.307013,-285.306068,-285.339869 +"C1CC(=O)C=C1",3.5709,52.33,-0.2366,-0.0429,0.1938,505.106,0.097695,18.812,-269.27132,-269.266149,-269.265204,-269.299973 +"C1C=CC(=O)N1",4.2248,48.42,-0.2428,-0.0263,0.2166,475.489,0.086559,18.405,-285.336744,-285.331604,-285.33066,-285.36533 +"N=C1CCCO1",3.1568,50.78,-0.2549,0.0316,0.2866,515.9607,0.110489,19.419,-286.516174,-286.51087,-286.509925,-286.544899 +"N=C1COCO1",1.8361,43.66,-0.272,0.0187,0.2907,475.1154,0.08643,17.785,-322.434119,-322.429025,-322.428081,-322.462711 +"N=C1OCCO1",4.2769,43.85,-0.2556,0.0548,0.3105,471.4994,0.08682,17.613,-322.44415,-322.439107,-322.438163,-322.472719 +"C1CCC(=O)C1",2.8451,52.83,-0.235,-0.0124,0.2226,547.0211,0.120921,20.712,-270.47247,-270.466895,-270.465951,-270.501576 +"C1CC(=O)NC1",3.7977,49.83,-0.237,0.0377,0.2747,521.6537,0.110762,19.794,-286.542391,-286.536964,-286.53602,-286.571234 +"C1CC(=O)OC1",4.3159,45.82,-0.2646,0.0149,0.2795,499.82,0.098309,18.482,-306.418503,-306.413341,-306.412397,-306.447148 +"C1COCC1=O",1.8765,46.04,-0.2439,-0.0253,0.2186,504.709,0.096958,19.062,-306.386594,-306.381259,-306.380315,-306.415468 +"C1C(=O)OCO1",2.7134,38.99,-0.2775,-0.0003,0.2772,459.0271,0.074203,16.844,-342.335241,-342.330282,-342.329338,-342.36377 +"C1CNC(=O)N1",3.9802,46.25,-0.2457,0.0629,0.3086,491.6466,0.100038,18.931,-302.599262,-302.594109,-302.593165,-302.627663 +"C1COC(=O)N1",4.8739,42.52,-0.2632,0.052,0.3152,473.1926,0.087482,17.739,-322.47672,-322.471693,-322.470748,-322.505169 +"C1COC(=O)O1",5.0601,38.98,-0.2935,0.0401,0.3336,455.3293,0.074926,16.45,-342.352279,-342.34739,-342.346446,-342.380838 +"CC1=CCCC1",0.2336,62.6,-0.2238,0.037,0.2608,609.4914,0.144201,22.921,-234.522504,-234.51632,-234.515375,-234.551976 +"CC1=CCOC1",1.8189,55.75,-0.2252,0.0249,0.2501,566.4241,0.119949,21.372,-270.43928,-270.433217,-270.432273,-270.468829 +"CC1=NCCO1",1.2483,50.7,-0.2399,0.0319,0.2718,530.0606,0.109278,20.206,-286.516879,-286.510983,-286.510039,-286.546498 +"CC1CC=CC1",0.1489,61.16,-0.2335,0.0313,0.2648,587.2379,0.144419,22.613,-234.516241,-234.51027,-234.509326,-234.545523 +"CC1CN=CO1",1.8411,49.49,-0.246,0.0261,0.2721,504.1131,0.109554,19.819,-286.510514,-286.504912,-286.503968,-286.539796 +"OC1CC=CC1",1.4817,53.91,-0.2461,0.0209,0.267,510.4532,0.120948,21.267,-270.441325,-270.435739,-270.434794,-270.470141 +"CC1CCC=C1",0.1605,61.66,-0.2341,0.03,0.2642,589.2794,0.144442,22.655,-234.516409,-234.510406,-234.509462,-234.545692 +"CC1COC=N1",1.5301,49.75,-0.2473,0.0239,0.2712,509.7285,0.109629,19.822,-286.508233,-286.502604,-286.50166,-286.53756 +"CC1OCC=C1",1.4505,55,-0.2302,0.0182,0.2484,531.8706,0.120101,21.19,-270.437636,-270.4318,-270.430856,-270.466917 +"OC1CCC=C1",1.7344,54.16,-0.2491,0.0142,0.2633,528.2233,0.121083,21.367,-270.441145,-270.435384,-270.43444,-270.470414 +"CC1CCCC1",0.0756,63.03,-0.3044,0.0828,0.3872,641.8924,0.168121,24.306,-235.723792,-235.71718,-235.716236,-235.754746 +"CC1CCCO1",1.5053,56.51,-0.2379,0.0868,0.3246,586.3351,0.144187,22.689,-271.647465,-271.641158,-271.640214,-271.67776 +"CC1CCOC1",1.7082,56.14,-0.2381,0.0839,0.322,594.9572,0.144241,22.656,-271.642927,-271.63658,-271.635636,-271.673114 +"CC1COCO1",1.2942,49.23,-0.2502,0.0904,0.3406,539.3012,0.120486,20.939,-307.569255,-307.563241,-307.562296,-307.599195 +"CC1OCCO1",1.0448,49.6,-0.2542,0.0896,0.3439,537.0339,0.120277,21.023,-307.573371,-307.567272,-307.566327,-307.603679 +"C1CCC(C1)O",1.3247,55.74,-0.2601,0.0738,0.3338,550.8423,0.144785,22.879,-271.648392,-271.642208,-271.641264,-271.678618 +"OC1CCOC1",1.5931,49.05,-0.2428,0.0712,0.314,508.516,0.120836,21.242,-307.56626,-307.560326,-307.559382,-307.595858 +"CC1C(C)C1C",0.0164,63.6,-0.2571,0.0841,0.3412,642.4422,0.165387,27.583,-235.694216,-235.686596,-235.685652,-235.725092 +"CC1C(C)C1O",1.3139,56.45,-0.2441,0.0725,0.3165,589.5057,0.141848,26.227,-271.61858,-271.611272,-271.610327,-271.649185 +"CC1C(C)N1C",1.3293,60.42,-0.2104,0.0826,0.293,607.6457,0.153308,26.797,-251.727102,-251.719552,-251.718607,-251.758017 +"CC1C(O)C1O",1.9714,49.27,-0.2301,0.0537,0.2838,539.3304,0.118633,24.471,-307.540766,-307.533965,-307.53302,-307.570981 +"OC1C(O)C1O",0.6618,41.57,-0.2552,0.0505,0.3058,476.1977,0.096192,21.828,-343.468335,-343.462548,-343.461604,-343.497233 +"CC12CC1CC2",0.3029,60.48,-0.243,0.0951,0.338,542.9521,0.143756,22.864,-234.474951,-234.469056,-234.468112,-234.50378 +"CC12CC1CO2",1.6329,53.34,-0.2287,0.0868,0.3155,498.0795,0.119588,21.408,-270.392444,-270.386783,-270.385839,-270.421106 +"CC12CC1OC2",1.8852,53.11,-0.2293,0.0805,0.3098,506.8939,0.119607,21.353,-270.388222,-270.382545,-270.381601,-270.416941 +"CC12CCC1O2",1.8535,53.26,-0.2442,0.0955,0.3397,508.8551,0.119394,21.539,-270.401391,-270.39568,-270.394736,-270.430118 +"OC12CC1CC2",1.1413,53.1,-0.2475,0.0834,0.3309,499.7541,0.120085,21.746,-270.397623,-270.392001,-270.391057,-270.426196 +"OC12CC1OC2",2.2569,45.92,-0.2318,0.0706,0.3024,464.135,0.095993,20.187,-306.309029,-306.303682,-306.302738,-306.337396 +"C#CC#CC#C",0.0151,72.39,-0.2465,-0.0565,0.19,802.1485,0.046718,21.875,-229.615446,-229.609222,-229.608278,-229.639949 +"C#CC#CC#N",4.5941,63.48,-0.284,-0.085,0.199,777.4112,0.036829,19.266,-245.711897,-245.706152,-245.705208,-245.733762 +"C(#CC#N)C#N",0,55.28,-0.3277,-0.1164,0.2113,754.2424,0.027023,17.827,-261.802956,-261.79743,-261.796485,-261.828854 +"C#CCCC#C",0.0008,57.87,-0.2615,0.0391,0.3006,799.7895,0.093925,24.211,-232.030352,-232.023466,-232.022522,-232.06066 +"C#CCCC#N",3.4446,52.71,-0.2821,0.0193,0.3014,773.7747,0.083685,22.093,-248.137789,-248.131277,-248.130333,-248.167922 +"C(CC#N)C#N",0.0004,47.69,-0.3397,-0.0013,0.3384,748.3461,0.073373,20.047,-264.243028,-264.236876,-264.235932,-264.27298 +"C#CC#CC=O",3.248,59.66,-0.275,-0.0922,0.1828,801.3233,0.047327,20.052,-266.777425,-266.77129,-266.770346,-266.806816 +"C(=O)C#CC#N",2.4648,51.47,-0.3053,-0.1211,0.1843,776.7874,0.03702,17.93,-282.871678,-282.865901,-282.864957,-282.900943 +"C(=O)C#CC=O",2.3737,48.1,-0.2894,-0.0927,0.1966,801.4852,0.0471,19.025,-303.940271,-303.933923,-303.932979,-303.970909 +"N=COCC#C",3.3466,50.16,-0.2698,0.0135,0.2834,665.5192,0.083043,21.973,-285.242094,-285.235491,-285.234547,-285.272892 +"N=COCC#N",3.3728,45.57,-0.2899,-0.0071,0.2828,646.0972,0.07256,20.023,-301.345683,-301.339414,-301.33847,-301.376371 +"C#CCCC=O",2.1557,52.99,-0.2549,-0.0302,0.2247,817.4048,0.093631,23.097,-269.20692,-269.199987,-269.199043,-269.238076 +"C(CC#N)C=O",2.4084,48.12,-0.274,-0.0464,0.2276,792.702,0.083388,21.004,-285.313674,-285.307112,-285.306168,-285.344659 +"C#CCNC=O",3.3746,49.89,-0.2561,0.0191,0.2752,691.5345,0.083614,22.134,-285.276634,-285.270024,-285.269079,-285.307545 +"C(C#N)NC=O",3.6197,44.97,-0.2788,-0.0017,0.2772,652.22,0.073313,20.073,-301.382062,-301.375865,-301.37492,-301.412562 +"C#CCOC=O",3.7921,45.77,-0.2819,-0.0051,0.2768,660.5683,0.070779,20.964,-305.145002,-305.13858,-305.137636,-305.175765 diff --git a/chemprop/tests/data/regression/rxn+mol/atom_descriptors.npz b/chemprop/tests/data/regression/rxn+mol/atom_descriptors.npz new file mode 100644 index 0000000000000000000000000000000000000000..d7d3bd59275bef6056d78cf66cfbaa40372e1c08 --- /dev/null +++ b/chemprop/tests/data/regression/rxn+mol/atom_descriptors.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8c9ff81b72dec4a5b85cbe4b6c6f107de6b9360567b451d67021a76cb4873ca +size 157746 diff --git a/chemprop/tests/data/regression/rxn+mol/atom_features.npz b/chemprop/tests/data/regression/rxn+mol/atom_features.npz new file mode 100644 index 0000000000000000000000000000000000000000..026aa368c4163687362c1ff314cb483cf48f3c73 --- /dev/null +++ b/chemprop/tests/data/regression/rxn+mol/atom_features.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:091326ffbd3b5232187d05f8d8fdd3d139ba73e1a7f681fc3a9eb1dd313f0761 +size 118050 diff --git a/chemprop/tests/data/regression/rxn+mol/bond_features.npz b/chemprop/tests/data/regression/rxn+mol/bond_features.npz new file mode 100644 index 0000000000000000000000000000000000000000..ac4ccc75502420d3fab07fc750a4e36e80e13b4b --- /dev/null +++ b/chemprop/tests/data/regression/rxn+mol/bond_features.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4743f823adc74e5e16c46fd4a933c7c03be519373342397b9af962a345d60543 +size 133386 diff --git a/chemprop/tests/data/regression/rxn+mol/descriptors.npz b/chemprop/tests/data/regression/rxn+mol/descriptors.npz new file mode 100644 index 0000000000000000000000000000000000000000..c635c4ceddb80f6ad13b974563dff4f6206b665f --- /dev/null +++ b/chemprop/tests/data/regression/rxn+mol/descriptors.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:806d0418f1fd57c79ea43bf01f0f612951473c363d643e7b35b1a014379ba159 +size 3464 diff --git a/chemprop/tests/data/regression/rxn+mol/rxn+mol.csv b/chemprop/tests/data/regression/rxn+mol/rxn+mol.csv new file mode 100644 index 0000000000000000000000000000000000000000..2beffa74a57322f10de8f35484232dac29a8de80 --- /dev/null +++ b/chemprop/tests/data/regression/rxn+mol/rxn+mol.csv @@ -0,0 +1,401 @@ +rxn_smiles,solvent_smiles,target +[C:4](=[C:5]=[O:6])([H:12])[H:13].[O:1]([C:2]([C:3](=[C:7]([H:14])[H:15])[H:11])([H:9])[H:10])[H:8]>>[O:1]([C:2]([C:3]1([H:11])[C:4]([H:12])([H:13])[C:5](=[O:6])[C:7]1([H:14])[H:15])([H:9])[H:10])[H:8],O,-3.76 +[O:1]([C:2]1=[N:3][C:4](=[O:5])[C:6]([H:9])([H:10])[N:7]1[H:11])[H:8]>>[O:1]([c:2]1=[n:3][c:4]([O:5][H:9])[c:6]([H:10])[n:7]1[H:11])[H:8],O,3.46 +[C:1]1([H:7])([H:8])[C:2]([H:9])([H:10])[O:3][C:4]([H:11])=[N+:5]=[C-:6]1.[H:12][H:13]>>[C:1]1([H:7])([H:8])[C:2]([H:9])([H:10])[O:3][C:4]([H:11])=[N:5][C:6]1([H:12])[H:13],O,1.54 +[O:1]=[C:2]([C:3]([C:5](=[C:4])[H:9])([H:7])[H:8])[H:6]>>[O:1]=[C:2]([C:3]([C:4]#[C:5][H:9])([H:7])[H:8])[H:6],CCCCCCCCO,-0.54 +[N:1](=[C:2]1\[O:3][C@:4]1([C:6]([C:5]([H:7])([H:10])[H:11])([H:12])[H:13])[H:9])\[H:8]>>[N:1]([C:2](=[O:3])[C:4]1([H:9])[C:5]([H:10])([H:11])[C:6]1([H:12])[H:13])([H:7])[H:8],CCCCCCCCO,-7.56 +[N:1]([c:2]1[c:3]([O:4][H:10])[c:5]([H:11])[c:6]([H:12])[n:7]1[H:13])([H:8])[H:9]>>[N:1]([C@:2]1([H:13])[C:3]([O:4][H:10])=[C:5]([H:11])[C:6]([H:12])=[N:7]1)([H:8])[H:9],CCCCCCCCO,-1.14 +[O:1]=[C:2]1[C:3]([H:8])([H:9])[C:4]([H:10])([H:11])[C@@:5]2([H:12])[C:6]([H:13])([H:14])[C@@:7]12[H:15]>>[O:1]=[C:2]([C@@:7]1([H:15])[C@:5]([C:4](=[C:3]([H:8])[H:9])[H:11])([H:12])[C:6]1([H:13])[H:14])[H:10],CCc1ccccc1,-2.2 +[O:1]([N:2]1[C:3]([H:9])([H:10])[C:4]([H:11])=[C:5][C:6]([H:12])([H:13])[C:7]1([H:14])[H:15])[H:8]>>[O:1]([N:2]1[C:3]([H:10])[C:4]([H:11])=[C:5]([H:9])[C:6]([H:12])([H:13])[C:7]1([H:14])[H:15])[H:8],CCc1ccccc1,-2.19 +[H:7][H:9].[O:1]=[N:2][C:3]1=[C:4]([H:8])[C:5]([H:10])([H:11])[C:6]1([H:12])[H:13]>>[O:1]([N:2]=[C:3]1[C:4]([H:8])([H:9])[C:5]([H:10])([H:11])[C:6]1([H:12])[H:13])[H:7],CCc1ccccc1,-2.32 +[N:1]([C@:2]12[C:3]([H:10])([H:11])[C@@:4]1([H:12])[O:5][C:6]2=[O:7])([H:8])[H:9]>>[N:1]([C:2]([C@@:4]1([H:12])[C:3]([H:10])([H:11])[O:5]1)=[C:6]=[O:7])([H:8])[H:9],C=Cc1ccccc1,-0.11 +[O:1]=[C:2]([c:3]1[c:4]([H:9])[n:5][c:6]([H:10])[n:7]1[H:11])[H:8]>>[O:1]=[C:2]=[C:3]1[C:4]([H:8])([H:9])[N:5]=[C:6]([H:10])[N:7]1[H:11],C=Cc1ccccc1,-0.32 +[C:1]([C:2]([N:3]([C:4]([H:12])([H:13])[H:14])[C:5](=[O:6])[H:15])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1](/[C:2](=[N:3]/[C:5](=[O:6])[H:15])[H:11])([H:7])([H:8])[H:9].[C:4]([H:10])([H:12])([H:13])[H:14],C=Cc1ccccc1,0.82 +[C:1]([C@@:2]1([H:10])[C:3]([H:12])([H:13])[C:4]([H:14])([H:15])/[C:5]1=[N:6]/[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2]([C:3]([C:4]([C:5]#[N:6])([H:14])[H:15])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9],NCc1ccccc1,0.27 +[N:1]([c:2]1[n:3][n:4]([H:9])[n:5][c:6]1[H:10])([H:7])[H:8]>>[N-:4]([N+:5]#[C:6][H:10])[H:9].[N:1]([C:2]#[N:3])([H:7])[H:8],NCc1ccccc1,0.57 +[C:1]([H:8])([H:9])([H:10])[H:14].[C:2]1=[C:3]([H:11])[N:4]([H:12])[C:5](=[O:6])[C:7]=1[H:13]>>[C:1]([C:2]1=[C:3]([H:11])[N:4]([H:12])[C:5](=[O:6])[C:7]1([H:13])[H:14])([H:8])([H:9])[H:10],NCc1ccccc1,-2.84 +[C+:1]([C:2]1=[C:3]([H:10])[N:4]([H:11])[C-:5]([H:12])[C:6]1([H:7])[H:13])([H:8])[H:9]>>[C:1]([c:2]1[c:3]([H:10])[n:4]([H:11])[c:5]([H:12])[c:6]1[H:13])([H:7])([H:8])[H:9],N#Cc1ccccc1,-0.32 +[C:1]([C:2]([C:3]#[C:4][C:5]([O:6][H:14])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2]([C:3](=[C:4]=[C:5]([H:12])[H:13])[O:6][H:14])([H:10])[H:11])([H:7])([H:8])[H:9],N#Cc1ccccc1,-3.65 +[C:1]([C:2]([C:3]([H:12])([H:13])[H:14])([C:4]([C:5]1([H:17])[C:6]([H:18])([H:19])[C:7]1([H:20])[H:21])([H:15])[H:16])[H:11])([H:8])([H:9])[H:10]>>[C:1](=[C:2]([C:3]([H:12])([H:13])[H:14])[H:11])([H:9])[H:10].[C:4](=[C:5]1[C:6]([H:18])([H:19])[C:7]1([H:20])[H:21])([H:15])[H:16].[H:8][H:17],N#Cc1ccccc1,-0.46 +[C:1]1([H:6])([H:7])[C:2]([H:8])([H:9])[C:3]([H:10])=[C:4]([H:11])[C:5]1([H:12])[H:13]>>[C:1]([C:5]([C:4](=[C:3]=[C:2]([H:8])[H:9])[H:11])([H:12])[H:13])([H:6])([H:7])[H:10],OCc1ccccc1,-8.41 +[C:1](=[C:2]([H:10])[H:11])([H:8])[H:9].[C:3]1[C:4]([H:12])([H:13])[C@@:5]2([H:14])[O:6][C@@:7]12[H:15]>>[C:1]1([H:8])([H:9])[C:2]([H:10])([H:11])[C:3]12[C:4]([H:12])([H:13])[C@@:5]1([H:14])[O:6][C@@:7]21[H:15],OCc1ccccc1,-3.6 +[N:1](=[C:2]1[C:3]([O:4][H:10])=[C:5]([H:11])[C-:6]([H:12])[N+:7]1([H:8])[H:13])[H:9]>>[N:1]([c:2]1[c:3]([O:4][H:10])[c:5]([H:11])[c:6]([H:12])[n:7]1[H:13])([H:8])[H:9],OCc1ccccc1,7.53 +[C:1]([C@@:2]1([H:10])[O:3][C@@:4]2([H:11])[C:5]([H:12])([H:13])[C@@:6]12[H:14])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[O+:3]=[C-:4][C:5]([H:12])([H:13])[C:6]1([H:11])[H:14])([H:7])([H:8])[H:9],COc1ccccc1,-1.84 +[C:1]([O:2][C:3](=[O:4])[C:5](=[C:6]([H:13])[H:14])[H:11])([H:8])([H:9])[H:10].[O:7]([H:12])[H:15]>>[C:1]([O:2][C:3](=[O:4])[C:5]([C:6]([O:7][H:15])([H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10],COc1ccccc1,1.07 +[N:1]([C@@:2]([C:3]([C:4]([O:5][H:15])([H:13])[H:14])([H:11])[H:12])([C:6]#[N:7])[H:10])([H:8])[H:9]>>[C:3](=[C:4]([H:13])[H:14])([H:11])[H:12].[N:1](=[C:2](\[C:6]#[N:7])[H:10])\[H:8].[O:5]([H:9])[H:15],COc1ccccc1,0.48 +[C:1]([N:2]([C:3](=[O:4])[C:5]([O:6][H:13])([H:11])[H:12])[H:10])([H:7])([H:8])[H:9]>>[C:1]([N:2]([C-:3]=[O+:4][C:5]([O:6][H:13])([H:11])[H:12])[H:10])([H:7])([H:8])[H:9],c1ccc(Oc2ccccc2)cc1,1.67 +[C:1]([O:2]/[C:3](=[C:4](\[C:5]([O:6][H:16])([H:14])[H:15])[H:12])[H:11])([H:7])([H:8])[H:9].[H:10][H:13]>>[C:1]([O:2][C:3]([C:4]([C:5]([O:6][H:16])([H:14])[H:15])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9],c1ccc(Oc2ccccc2)cc1,-2.7 +[N:1]([C:2](=[O:3])[C:4]([C:5](=[O:6])[H:11])([H:9])[H:10])([H:7])[H:8]>>[N:1]([C:2](=[O:3])[C:5]([C:4]([H:9])([H:10])[H:11])=[O:6])([H:7])[H:8],c1ccc(Oc2ccccc2)cc1,0.86 +[N:1]([C:2](=[O:3])[C:4]([C:5]([O:6][H:13])([H:11])[H:12])([H:9])[H:10])([H:7])[H:8]>>[C:4](=[C:5]([O:6][H:13])[H:11])([H:9])[H:10].[N:1]([C:2](=[O:3])[H:12])([H:7])[H:8],CCCCN(CCCC)CCCC,-2.4 +[C:1]([C-:2]1[N+:3]([H:11])([H:12])[C@:4]1([C:5]([H:13])([H:14])[H:15])[C:6]#[N:7])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[N:3]([H:12])[C@:4]1([C:5]([H:13])([H:14])[H:15])[C:6]#[N:7])([H:8])([H:9])[H:10],CCCCN(CCCC)CCCC,3.06 +[O+:1](=[C-:2][H:8])[c:3]1[n:4][c:5]([H:9])[n:6][o:7]1>>[O:1]=[C:2]([c:3]1[n:4][c:5]([H:9])[n:6][o:7]1)[H:8],CCCCN(CCCC)CCCC,-0.01 +[O:1]=[C:2]1[C:3]([H:7])([H:8])[N+:4]([H:9])=[C-:5][N:6]1[H:10]>>[O:1]=[C:2]1[C:3]([H:7])([H:8])[N:4]=[C:5]([H:9])[N:6]1[H:10],CCOc1ccccc1,1.66 +[C:1]([C:2]1([H:10])[O:3][C:4]([H:11])([H:12])[C:5]([H:13])([H:14])[O:6]1)([H:7])([H:8])[H:9]>>[C:1]([C:2]1=[O+:3][C-:4]([H:12])[C:5]([H:13])([H:14])[O:6]1)([H:7])([H:8])[H:9].[H:10][H:11],CCOc1ccccc1,-0.41 +[C:1]([C@@:2]1([H:10])[O:3][C@:4]1([C:5]#[C:6][H:12])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[O:3][C:6]([H:12])=[C:5]=[C:4]1[H:11])([H:7])([H:8])[H:9],CCOc1ccccc1,-1.16 +[C:1]([C:2]([C:5]([C:3]#[N:4])=[O:6])([C:7]([H:12])([H:13])[H:14])[H:11])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]([C:3]#[N:4])([C:5](=[O:6])[C:7]([H:12])([H:13])[H:14])[H:11])([H:8])([H:9])[H:10],CCCCC(CO)CC,-6.68 +[C:1]1([H:7])([H:8])[C:2]([H:9])([H:10])[C:3]([H:11])=[C:4]([H:12])[C:5]([H:13])([H:14])[O:6]1>>[C:1]1([H:8])=[O+:6][C:5]([H:13])([H:14])[C:4]([H:12])=[C-:3][C:2]1([H:9])[H:10].[H:7][H:11],CCCCC(CO)CC,-1.4 +[N:1](/[C:2](=[C:3]([C:4](=[C:5](/[C:6]=[N:7][H:14])[H:13])\[H:12])/[H:11])[H:10])([H:8])[H:9]>>[N:1]([C:2]([c:3]1[c:4]([H:12])[c:5]([H:13])[c:6][n:7]1[H:14])([H:10])[H:11])([H:8])[H:9],CCCCC(CO)CC,-2.89 +[C:1]1([H:7])([H:8])[C@@:2]2([H:9])[C@:3]1([H:10])[C@@:4]1([H:11])[C:5]([H:12])([H:13])[C@@:6]21[H:14]>>[C:1]1([H:8])=[C:2]([H:9])[C:3]1([C:4]1([H:11])[C:5]([H:12])=[C:6]1[H:14])[H:10].[H:7][H:13],CCOC(CCC)=O,-0.68 +[C:1]([C-:2](/[C:3](=[N:4]/[H:11])[H:10])[N+:6]#[N:5])([H:7])([H:8])[H:9]>>[C:1]([c:2]1[c:3]([H:10])[n:4]([H:11])[n:5][n:6]1)([H:7])([H:8])[H:9],CCOC(CCC)=O,-0.35 +[C:1]([C@:2]1(/[C:3](=[N:4]/[H:12])[H:11])[C:5]([N:6]([H:13])[H:14])=[C:7]1[H:15])([H:8])([H:9])[H:10]>>[C:1]([c:2]1[c:3]([H:11])[n:4]([H:12])[c:5]([N:6]([H:13])[H:14])[c:7]1[H:15])([H:8])([H:9])[H:10],CCOC(CCC)=O,-0.6 +[C:1](=[C:2]([H:9])[H:10])([H:6])[H:8].[C:3](=[C:4]([O:5][H:7])[H:13])([H:11])[H:12]>>[C:1]([C:2]([C:3]([C:4](=[O:5])[H:13])([H:11])[H:12])([H:9])[H:10])([H:6])([H:7])[H:8],Cc1ccc(C)cc1,-0.61 +[C:1]([C:2]([C@:3]([N:4][H:14])([C:5](=[O:6])[O:7][H:15])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([H:8])([H:9])[H:10].[C:2](/[C:3](=[N:4]/[H:14])[H:13])([C:5](=[O:6])[O:7][H:15])([H:11])[H:12],Cc1ccc(C)cc1,0.38 +[C:1]([C:2][C:6]([C:5]([C:3]([O:4][H:12])([H:10])[H:11])([H:13])[H:14])([H:15])[H:16])([H:7])([H:8])[H:9]>>[C:1]([C:2]1([C:3]([O:4][H:12])([H:10])[H:11])[C:5]([H:13])([H:14])[C:6]1([H:15])[H:16])([H:7])([H:8])[H:9],Cc1ccc(C)cc1,0.49 +[C:1]([H:7])([H:8])([H:9])[H:14].[O:2]=[C:3]([C:4]([C:5](=[O:6])[H:13])([H:11])[H:12])[H:10]>>[C:1]([O:2][C@@:3]1([H:10])[C:4]([H:11])([H:12])[C@:5]1([O:6][H:14])[H:13])([H:7])([H:8])[H:9],BrCCBr,0.95 +[C:1]([C@@:2]([C:3]#[N:4])([C:5](=[O:6])[C:7]([H:12])([H:13])[H:14])[H:11])([H:8])([H:9])[H:10]>>[C:1](/[C:2]([C:3]#[N:4])=[C:5](\[O:6][H:11])[C:7]([H:12])([H:13])[H:14])([H:8])([H:9])[H:10],BrCCBr,-1.14 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([H:14])([H:15])[O:5][C@:6]1([C:7]([H:17])([H:18])[H:19])[H:16])([H:8])([H:9])[H:10]>>[C:1]([C:2]1=[C:6]([H:16])[O:5][C:4]([H:14])([H:15])[C@@:3]1([C:7]([H:17])([H:18])[H:19])[H:12])([H:8])([H:9])[H:10].[H:11][H:13],BrCCBr,-2.11 +[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C@:4]1([C:5](=[O:6])[H:14])[H:13])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C@@:4]2([H:13])[C@:5]1([H:14])[O:6]2)([H:7])([H:8])[H:9],CCCBr,-1.41 +[O:1]([C@:2]12[C:3]([H:8])([H:9])[C@@:4]1([H:10])[C:5]([H:11])([H:12])[C:6]2([H:13])[H:14])[H:7]>>[C:2]12=[C:4]([C:3]1([H:8])[H:9])[C:5]([H:11])([H:12])[C:6]2([H:13])[H:14].[O:1]([H:7])[H:10],CCCBr,-3.59 +[C:1]([N:2]1[C:3]([H:11])([H:12])[C@:4]1([C:5](=[O:6])[C:7]([H:14])([H:15])[H:16])[H:13])([H:8])([H:9])[H:10]>>[C:1]([N+:2]1=[C:3]([H:12])[C@:4]1([C-:5]([O:6][H:11])[C:7]([H:14])([H:15])[H:16])[H:13])([H:8])([H:9])[H:10],CCCBr,0.41 +[C:1]1([H:7])([H:8])[C@@:2]2([H:9])[C:3]([H:10])([H:11])[C:4]([H:12])([H:14])[C:5]([H:13])=[C:6]12>>[C:1]1([H:7])([H:8])[C@@:2]2([H:9])[C:3]([H:10])([H:11])[C:4]([H:12])=[C:5]([H:13])[C@@:6]12[H:14],CCCC,0.03 +[O:1]=[C:2]([N-:3][N+:4]#[C:5][H:7])[H:6]>>[o:1]1[c:2]([H:6])[n:3][n:4][c:5]1[H:7],CCCC,0.23 +[O:1]=[C:2]([C@@:3]1([H:9])[C:4]([H:10])([H:11])[N:5]1[C:6](=[O:7])[H:12])[H:8]>>[O:1]=[C:2]([C@@:3]1([H:9])[C-:4]([H:10])[N+:5]1=[C:6]([O:7][H:11])[H:12])[H:8],CCCC,-0.07 +[O:1]=[C:2]1[C:3]([H:7])([H:8])[C:4]([H:9])=[C:5]([H:10])[N:6]1[H:11]>>[O-:1][C:2]1=[C:3]([H:7])[C:4]([H:8])([H:9])[C:5]([H:10])=[N+:6]1[H:11],ClCCCl,-3.65 +[N:1]([C:3]1=[C:4]([H:10])[C@:2]1([C:5]#[N:6])[H:9])([H:7])[H:8]>>[N:1]([C@@:2]([C:3]#[C:4][H:10])([C:5]#[N:6])[H:9])([H:7])[H:8],ClCCCl,-2.21 +[C:1]([C@@:2]12[C@:3]3([H:11])[C@@:4]([H:12])([C@:5]3([H:13])[C:6]1)[C:7]2([H:14])[H:15])([H:8])([H:9])[H:10]>>[C:1]([C@:2]12[C@@:3]3([H:11])[C@:4]4([H:12])[C@@:5]3([H:13])[C@@:6]1([H:14])[C@:7]24[H:15])([H:8])([H:9])[H:10],ClCCCl,1.4 +[O:1]([C:2]([C@@:3]1([H:11])[C:4]([H:12])([H:13])[C:5]([H:14])([H:15])[O:6][C:7]1([H:16])[H:17])([H:9])[H:10])[H:8]>>[H:14][H:15].[O:1]([C:2]([C@@:3]1([H:11])[C:4]([H:12])([H:13])[C-:5]=[O+:6][C:7]1([H:16])[H:17])([H:9])[H:10])[H:8],CCCN,-4.98 +[C:1]([C@@:2]([C:3]([O:4][H:13])([H:11])[H:12])([C:5](=[O:6])[H:14])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[C@:5]([C:3]([O:4][H:13])([H:11])[H:12])([H:14])[O:6]1)([H:7])([H:8])[H:9],CCCN,-6.62 +[C:1]([C:2]([O:3][H:11])[C@:4]([N:5]([H:13])[H:14])([C:6]([O:7][H:17])([H:15])[H:16])[H:12])([H:8])([H:9])[H:10]>>[C:1](/[C:2]([O:3][H:11])=[C:4](/[C:6]([O:7][H:17])([H:15])[H:16])[H:12])([H:8])([H:9])[H:10].[N:5]([H:13])[H:14],CCCN,-3.66 +[C:1]([C:2]([C:3]([C:4]([C:5]([H:13])([H:14])[H:15])=[C:6]([H:16])[H:17])([H:11])[H:12])=[O:7])([H:8])([H:9])[H:10]>>[C:1]([C:2]12[C:3]([H:11])([H:12])[C:4]([C:5]([H:13])([H:14])[H:15])([C:6]1([H:16])[H:17])[O:7]2)([H:8])([H:9])[H:10],CCC#N,-2.71 +[C:1]1([H:8])([H:9])[C:2]([H:10])([H:11])[C:3]12[C:4]([H:12])([H:13])[C:5]([H:14])=[C:6]([H:15])[C:7]2([H:16])[H:17]>>[C:1]1([H:8])([H:9])[C:2]([H:10])([H:11])[C:3]12[C:4]([H:13])=[C:5]([H:14])[C:6]([H:15])=[C:7]2[H:17].[H:12][H:16],CCC#N,-1.13 +[O:1]=[C:2]1[C:3]([H:7])([H:8])[C:4][C:5]([H:10])([H:11])[C:6]1([H:9])[H:12]>>[O:1]=[C:2]1[C:3]([H:7])([H:8])[C@@:4]2([H:9])[C:5]([H:10])([H:11])[C@@:6]12[H:12],CCC#N,1.31 +[n:1]1([H:6])[c-:2][n+:3]([H:7])[c:4]([H:8])[n:5]1>>[n:1]1([H:6])[c:2]([H:7])[n:3][c:4]([H:8])[n:5]1,OCCO,6.19 +[C:1]([C:2]([C:3]([H:11])([H:12])[H:13])([O:4][C:5](=[O:6])[H:14])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[C:3]([H:11])[H:13])[O:4][C:5](=[O:6])[H:14])([H:7])([H:8])[H:9].[H:10][H:12],OCCO,-0.74 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C@:4]1([C:5]([C:6](=[O:7])[H:17])([H:15])[H:16])[H:14])([H:8])([H:9])[H:10]>>[C:1]1([H:8])([H:10])[C@:2]2([H:11])[C:3]([H:12])([H:13])[C@:4]2([H:14])[C:5]([H:15])([H:16])[C@@:6]1([O:7][H:9])[H:17],OCCO,-4.23 +[C:1]([O:2][C:3]([C:4]([H:11])([H:12])[H:13])=[C:5]=[N:6][H:10])([H:7])([H:8])[H:9]>>[C:1]([O:2][C@@:3]([C:4]([H:11])([H:12])[H:13])([C:5]#[N:6])[H:10])([H:7])([H:8])[H:9],CCCC(C)C,0.34 +[C:1]([C@:2]1([H:10])[N:3]([H:11])[C@@:5]([O:6])([H:14])[C:4]1([H:12])[H:13])([H:7])([H:8])[H:9]>>[C:1]([C@:2]([N:3][H:11])([C:4]([C:5](=[O:6])[H:14])([H:12])[H:13])[H:10])([H:7])([H:8])[H:9],CCCC(C)C,0.03 +[C:1]([C:2]([C:3]([C:4]([C:5]([H:16])([H:17])[H:18])([H:14])[H:15])([N+:7]#[C-:6])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]([C:3]([C:4]([C:5]([H:16])([H:17])[H:18])([H:14])[H:15])([C:6]#[N:7])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10],CCCC(C)C,0.42 +[C:1](=[C:2]([H:9])[H:10])([H:6])[H:8].[C:3](=[C:4]([O:5][H:7])[H:13])([H:11])[H:12]>>[C:1]([C:2]([C:3]([C:4](=[O:5])[H:13])([H:11])[H:12])([H:9])[H:10])([H:6])([H:7])[H:8],CCCC(C)=O,1.75 +[C:1]([O:2][C:3]([C:4](/[N:5]=[C:6](/[O:7][H:15])[H:16])([H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([O:2][C:3]([C:4]([N:5]([C:6](=[O:7])[H:16])[H:15])([H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10],CCCC(C)=O,1.69 +[C:1]([C:2](=[O:3])[C:4]([H:10])([H:11])[H:13])([H:7])([H:8])[H:9].[C:5]([O:6][H:14])[H:12]>>[C:1]([C:2](=[O:3])[C:4]([C:5]([O:6][H:14])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9],CCCC(C)=O,6.74 +[C:1]1([H:7])([H:9])[N:2]2[C@:3]1([H:10])[C:4]([H:11])=[C:5]([H:12])[C:6]2([H:8])[H:13]>>[C:1]([n:2]1[c:3]([H:10])[c:4]([H:11])[c:5]([H:12])[c:6]1[H:13])([H:7])([H:8])[H:9],CCCN(=O)=O,-0.71 +[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[N:4]([H:14])[C@:5]1([C:6]#[N:7])[H:15])([H:8])([H:9])[H:10]>>[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[N:4]([H:14])[C:5]([H:15])=[C:6]=[N:7]1)([H:8])([H:9])[H:10],CCCN(=O)=O,-0.44 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])=[C:4]([H:13])[N:5]([H:14])[C:6]1=[O:7])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C:3]([H:12])=[C-:4][N:5]([H:14])[C+:6]1[O:7][H:13])([H:8])([H:9])[H:10],CCCN(=O)=O,2.45 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[O:4][C@:5]([C:6]([H:15])([H:16])[H:17])([H:14])[O:7]1)([H:8])([H:9])[H:10]>>[C:1]([C:2]([O:7][C:5](=[O+:4][C-:3]([H:12])[H:13])[C:6]([H:15])([H:16])[H:17])([H:11])[H:14])([H:8])([H:9])[H:10],CC(=O)CC(C)C,0.58 +[N:1]1=[C-:2][C@@:3]2([H:8])[O+:4]=[C:5]([H:9])[C@@:7]2([H:12])[C:6]1([H:10])[H:11]>>[N:1]#[C:2][C@@:3]1([H:8])[O:4][C@@:5]2([H:9])[C:6]([H:10])([H:11])[C@@:7]12[H:12],CC(=O)CC(C)C,0.95 +[C:1]([O:2][C:3]([C:4]([O:5][C:6]([H:14])([H:15])[H:16])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C-:1]([O+:2]=[C:3]([H:10])[H:11])([H:7])[H:9].[C:4]([O:5][C:6]([H:14])([H:15])[H:16])([H:8])([H:12])[H:13],CC(=O)CC(C)C,0.71 +[C:1](/[C:2](=[C:3](\[C:4]([C:5](=[C:6]=[C:7]([H:16])[H:17])[H:15])([H:13])[H:14])[H:12])[H:11])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])=[C:6]([H:16])[C@@:7]12[H:17])([H:8])([H:9])[H:10],CC(C)OC(C)C,-0.5 +[C:1]1([H:8])([H:9])[C:2]([H:10])([H:11])[C:3]1([C:4]([C@@:5]1([H:14])[C:6]([H:15])([H:16])[C:7]1)([H:12])[H:13])[H:17]>>[C:1]1([H:8])([H:9])[C:2]([H:10])([H:11])[C:3]12[C:4]([H:12])([H:13])[C@@:5]1([H:14])[C:6]([H:15])([H:16])[C@@:7]21[H:17],CC(C)OC(C)C,0.08 +[C:1]([n:2]1[c:3]([H:11])[c:4]([H:12])[c:5]([C:6]([H:13])([H:14])[H:15])[n:7]1)([H:8])([H:9])[H:10]>>[C:1]([N:2]1[C:3]([H:11])=[C+:4][C@@:5]([C:6]([H:13])([H:14])[H:15])([H:12])[N-:7]1)([H:8])([H:9])[H:10],CC(C)OC(C)C,-1.99 +[C:1]([C@@:2]1([O:3][H:11])[C:4]([H:12])([H:13])[C@@:5]2([H:14])[O:6][C@@:7]12[H:15])([H:8])([H:9])[H:10]>>[C:1]([C:2]([O:3][H:11])=[C:4]([H:12])[H:13])([H:8])([H:9])[H:10].[C:5]([O-:6])(=[C+:7][H:15])[H:14],CC(C)OC(C)=O,-3.03 +[C:1]([O:2][C:3]([C:4]([C:5]([H:13])([H:14])[H:15])([C:6]([H:16])([H:17])[H:18])[C:7]([H:19])([H:20])[H:21])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([O+:2]=[C-:3][H:12])([H:8])([H:9])[H:10].[C:4]([C:5]([H:13])([H:14])[H:15])([C:6]([H:16])([H:17])[H:18])([C:7]([H:19])([H:20])[H:21])[H:11],CC(C)OC(C)=O,-0.77 +[C:1](=[C:2]1[C:3]([H:11])=[C:4]([N:5]([H:12])[H:13])[C:6]([H:10])([H:14])[N:7]1[H:15])([H:8])[H:9]>>[C:1]([c:2]1[c:3]([H:11])[c:4]([N:5]([H:12])[H:13])[c:6]([H:14])[n:7]1[H:15])([H:8])([H:9])[H:10],CC(C)OC(C)=O,0.61 +[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[C-:4]([H:15])[O+:5]=[C:6]([H:16])[N:7]1[H:14])([H:8])([H:9])[H:10]>>[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[C:4]([H:14])([H:15])[O:5][C:6]([H:16])=[N:7]1)([H:8])([H:9])[H:10],Cc1cccc(C)c1,2.48 +[C:1]([C@@:2]1([H:9])[N:3]([H:10])[N:4]1[C:5]([C:6]([H:13])([H:14])[H:15])([H:11])[H:12])([H:7])[H:8]>>[C:1]1([H:7])([H:8])[C@@:2]2([H:9])[N:3]([H:10])[N@@:4]12.[C:5]([C:6]([H:13])([H:14])[H:15])([H:11])[H:12],Cc1cccc(C)c1,0.17 +[N:1](=[C:2]=[C:3]([N:4]=[C:5]([C:6](=[O:7])[H:11])[H:10])[H:9])[H:8]>>[N:1](=[c:2]1/[c:3]([H:9])[n:4][c:5]([H:10])[c:6]([H:11])[o:7]1)\[H:8],Cc1cccc(C)c1,0.52 +[C:1]([O:6]/[C:5](=[N:2]/[C:3](=[O:4])[H:11])[C:7]([H:12])([H:13])[H:14])([H:8])([H:9])[H:10]>>[C:1]([N:2]([C:3](=[O:4])[H:11])[C:5](=[O:6])[C:7]([H:12])([H:13])[H:14])([H:8])([H:9])[H:10],Cc1cccc(O)c1,-1.69 +[C:1]([N:2]1[C:3]([H:11])([H:12])[C@:4]1([C:5](=[O:6])[C:7]([H:14])([H:15])[H:16])[H:13])([H:8])([H:9])[H:10]>>[C:1]([C@@:4]([N:2]=[C:3]([H:11])[H:12])([C:5](=[O:6])[C:7]([H:14])([H:15])[H:16])[H:13])([H:8])([H:9])[H:10],Cc1cccc(O)c1,0.38 +[C:1]([C@@:2]1([H:11])[N:3]([H:12])[C@:4]1([C:5]([N:7]=[C:6]([H:15])[H:16])([H:14])[H:17])[H:13])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[N:3]([H:12])[C@:4]1([C@@:5]1([H:14])[C:6]([H:15])([H:16])[N:7]1[H:17])[H:13])([H:8])([H:9])[H:10],Cc1cccc(O)c1,-0.97 +[O:1]([C@@:2]1([H:8])[C:3]([H:9])([H:10])[C@:4]1([C:5]#[C:6][H:12])[H:11])[H:7]>>[O:1]([C:2]([C:3]([C-:4]=[C:5]=[C+:6][H:12])([H:9])[H:10])([H:8])[H:11])[H:7],Brc1ccccc1,-0.45 +[C:1]([O:2][C@@:3]1([H:10])[C-:4]=[O+:5][C:6]1([H:13])[H:14])([H:7])([H:8])[H:9].[H:11][H:12]>>[C:1]([O:2][C:3]1([H:10])[C:4]([H:11])([H:12])[O:5][C:6]1([H:13])[H:14])([H:7])([H:8])[H:9],Brc1ccccc1,-0.49 +[N:1](=[C:2]1/[C:3]([H:8])=[C:4]([H:9])[C:5]([H:10])([H:11])[O:6]1)\[H:7]>>[N:1]1([H:7])[C-:2]=[O+:6][C:5]([H:10])([H:11])[C:4]([H:9])=[C:3]1[H:8],Brc1ccccc1,-0.31 +[C:1]([C:2](=[O:3])[C:4]([C:5]([C:6](=[C:7]([H:18])[H:19])[H:16])([H:14])[H:15])([H:12])[H:13])([H:8])([H:9])[H:10].[H:11][H:17]>>[C:1]([C:2]1([O:3][H:11])[C:4]([H:12])([H:13])[C:5]([H:14])([H:15])[C:6]([H:16])([H:17])[C:7]1([H:18])[H:19])([H:8])([H:9])[H:10],CC1CCCCC1,-1.62 +[C:1]([C:2]([C:3]([C:4]([C:5]([O:6][C:7]([H:19])([H:20])[H:21])([H:17])[H:18])([H:15])[H:16])([H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([H:8])([H:9])([H:10])[H:14].[C:2](=[C:3]([C:4]([C:5]([O:6][C:7]([H:19])([H:20])[H:21])([H:17])[H:18])([H:15])[H:16])[H:13])([H:11])[H:12],CC1CCCCC1,0.02 +[C:1]([C:2]([N:3]([C:4]([O:5][H:13])[H:12])[H:11])([H:9])[H:10])([H:6])([H:7])[H:8]>>[C:1]([C:2]([N:3]([C:4]([O:5][H:13])([H:6])[H:12])[H:11])([H:9])[H:10])([H:7])[H:8],CC1CCCCC1,-0.23 +[O:1]([C@@:2]1([H:9])[C@@:3]2([H:10])[C@@:4]3([H:11])[C:5]([H:12])([H:13])[C@:6]1([H:14])[N:7]23)[H:8]>>[O:1]([C@@:2]1([H:9])[C@@:3]2([H:10])[C:4][C:5]([H:12])([H:13])[C@:6]1([H:14])[N:7]2[H:11])[H:8],Cc1ccccc1,-0.06 +[C:1](=[C:2]=[C:3]1[C:4]([H:9])([H:10])[O:5][C:6]1([H:11])[H:12])([H:7])[H:8]>>[C:1](#[C:2][C:3]1([H:8])[C:4]([H:9])([H:10])[O:5][C:6]1([H:11])[H:12])[H:7],Cc1ccccc1,-1.99 +[C:1]([C:2](/[C:3](=[N:4]/[H:11])[H:10])=[N+:6]=[C-:5][H:12])([H:7])([H:8])[H:9]>>[C:1]([c:2]1[c:3]([H:10])[n:4]([H:11])[c:5]([H:12])[n:6]1)([H:7])([H:8])[H:9],Cc1ccccc1,0.44 +[C:1]([O:2][C:3]([C:4]([C:5](=[O:6])[H:14])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]1([H:7])([H:8])[O:2][C:3]([H:10])([H:11])[C:4]([H:12])([H:13])[C@:5]1([O:6][H:9])[H:14],Clc1ccccc1,-1.27 +[C:1]1([H:7])([H:8])[C@:2]2([H:9])[C@:3]3([H:10])[C:4]([H:11])([H:12])[C@@:5]1([H:13])[C@:6]23[H:14]>>[C:1]1([H:7])([H:8])[C@@:2]2([H:9])[C@:3]([C:4]([H:11])([H:12])[H:13])([H:10])[C@@:6]2([H:14])[C:5]1,Clc1ccccc1,-1.49 +[C:1]([C-:2]/[N+:6](=[C:5](\[C:4]#[N:3])[H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([c:2]1[n:3][c:4][c:5]([H:10])[n:6]1[H:11])([H:7])([H:8])[H:9],Clc1ccccc1,0.23 +[C:1]([n:2]1[c:3]([H:10])[c:4]([H:11])[n:5][n:6]1)([H:7])([H:8])[H:9]>>[C:1]([C@:3]1([H:10])[N:2]=[N:6][N:5]=[C:4]1[H:11])([H:7])([H:8])[H:9],OC1CCCCC1,-0.98 +[C:1]([C@@:3]1([H:11])[N:2]=[C:6]([O:7][H:14])[C:5]([H:13])=[C:4]1[H:12])([H:8])([H:9])[H:10]>>[C:1]([n:2]1[c:3]([H:11])[c:4]([H:12])[c:5]([H:13])[c:6]1[O:7][H:14])([H:8])([H:9])[H:10],OC1CCCCC1,2.87 +[C:1]([C:2]([C:3]([C@:4]([N:5]([H:15])[H:16])([C:6]#[N:7])[H:14])([H:12])[H:13])[H:11])([H:8])([H:9])[H:10]>>[C:1]([C@:2]1([H:11])[C:3]([H:12])([H:13])[C@@:4]1([C:6]#[N:7])[H:14])([H:8])([H:9])[H:10].[N:5]([H:15])[H:16],OC1CCCCC1,-0.76 +[C:1]([N:2]1[C@@:3]2([H:11])[C@@:4]([C:7][H:16])([H:12])[C:5]([H:13])([H:14])[C@@:6]12[H:15])([H:8])([H:9])[H:10]>>[C:1]([N:2]1[C@@:3]2([H:11])[C@@:4]3([H:12])[C:5]([H:13])([H:14])[C@:6]1([H:15])[C@@:7]23[H:16])([H:8])([H:9])[H:10],O=C1CCCCC1,0.38 +[C:1]([C@@:2]1([H:10])[O:3][C@:4]1([C:5](=[O:6])[H:12])[H:11])([H:7])([H:8])[H:9]>>[C:1]([O:3][C@:4]([C:2][H:10])([C:5](=[O:6])[H:12])[H:11])([H:7])([H:8])[H:9],O=C1CCCCC1,0.66 +[O:1]=[C:2]([C:3]([O:4][C:5](=[O:6])[H:10])([H:7])[H:8])[H:9]>>[O:1]=[C:2]1[C:3]([H:7])([H:8])[O:4][C:5]([H:9])([H:10])[O:6]1,O=C1CCCCC1,-0.27 +[C:1]1([H:7])([H:8])[N:2]([H:9])[O:6][C:5]([H:12])([H:13])[C:4]([H:11])=[C:3]1[H:10]>>[C:1]1([H:7])([H:8])[N:2]([H:9])[C@:3]1([C@@:4]1([H:11])[C:5]([H:12])([H:13])[O:6]1)[H:10],Cc1ccccn1,-0.55 +[C:1]([C:2]([N:3]1[C:4]([H:12])([H:13])[C@:5]1([C:6]([H:15])([H:16])[H:17])[H:14])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2]([N:3]1[C:4]([H:12])=[C:5]1[H:14])([H:10])[H:11])([H:7])([H:8])[H:9].[C:6]([H:13])([H:15])([H:16])[H:17],Cc1ccccn1,-1.47 +[O:1]([C:2]([C:3]([C:4]#[C:5][H:12])([C:6]#[C:7][H:13])[H:11])([H:9])[H:10])[H:8]>>[O:1]([C:2]([C:3](=[C:4]=[C:5]([H:11])[H:12])[C:6]#[C:7][H:13])([H:9])[H:10])[H:8],Cc1ccccn1,-2.31 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[O:6][C:5]([H:15])([H:16])[C:4]([H:14])[C:7]1([H:17])[H:18])([H:8])([H:9])[H:10]>>[C:1]([C:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([C:5]([O:6])([H:15])[H:16])([H:14])[C:7]1([H:17])[H:18])([H:8])([H:9])[H:10],CCCOC(C)=O,-0.32 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C@:4]([C:5]([H:15])([H:16])[H:17])([H:14])[C@:6]1([C:7]([H:19])([H:20])[H:21])[H:18])([H:8])([H:9])[H:10]>>[C:1]([H:8])([H:9])([H:10])[H:11].[C:2]1[C:3]([H:12])([H:13])[C@:4]([C:5]([H:15])([H:16])[H:17])([H:14])[C@:6]1([C:7]([H:19])([H:20])[H:21])[H:18],CCCOC(C)=O,-1.5 +[C:1]([N:2]([C:3]([C:4]#[N:5])([C:6]#[N:7])[H:12])[H:11])([H:8])([H:9])[H:10]>>[C:1]([N+:2]([C-:3]([C:4]#[N:5])[C:6]#[N:7])([H:11])[H:12])([H:8])([H:9])[H:10],CCCOC(C)=O,-4.3 +[N:1]([C:2](=[O:3])/[N:4]=[C:5](/[O:6][H:9])[H:10])([H:7])[H:8]>>[N:1]([C:2](=[O:3])[N:4]([C:5](=[O:6])[H:10])[H:9])([H:7])[H:8],CCCCC,0 +[C:1]([C:2]1=[N:5][C@@:6]1(/[N:3]=[N:4]/[H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([c:2]1[n:3][n:4]([H:10])[n:5][c:6]1[H:11])([H:7])([H:8])[H:9],CCCCC,-0.46 +[C:1]([C@@:2]1([C+:5]=[N:6][H:10])[C-:3]([H:11])[C:4]1([H:12])[H:13])([H:7])([H:8])[H:9]>>[C:1]([C:2]1([C:5]#[N:6])[C:3]([H:10])([H:11])[C:4]1([H:12])[H:13])([H:7])([H:8])[H:9],CCCCC,0.25 +[C:1]([N:2]([C:3]([C:4]([C:5](=[C:6]([N:7][H:17])[H:16])[H:15])([H:14])[H:18])([H:12])[H:13])[H:11])([H:8])([H:9])[H:10]>>[C:1]([N:2]([C:3](/[C:4](=[C:5](/[C:6]([N:7]([H:17])[H:18])[H:16])[H:15])[H:14])([H:12])[H:13])[H:11])([H:8])([H:9])[H:10],CCCCCl,0.14 +[C:1](/[C:2](=[C:3](/[C:4]([C:5]([H:14])([H:15])[H:17])([H:12])[H:13])[H:11])[C:6](=[O:7])[H:16])([H:8])([H:9])[H:10]>>[C:1]([C:2]1=[C:3]([H:11])[C:4]([H:12])([H:13])[C:5]([H:14])([H:15])[C@:6]1([O:7][H:17])[H:16])([H:8])([H:9])[H:10],CCCCCl,-1.29 +[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[N:4]([H:12])[C@:5]1([C:6](=[O:7])[H:14])[H:13])[H:8]>>[O:1]([C:3]([C@@:2]1([H:9])[N:4]([H:12])[C@:5]1([C:6](=[O:7])[H:14])[H:13])([H:10])[H:11])[H:8],CCCCCl,-0.66 +[C:1]([C:2]([C:3]#[C:4][C:5](=[O:6])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]1([H:7])([H:9])[C:2]([H:10])([H:11])[C:3]1=[C:4]([C:5](=[O:6])[H:12])[H:8],CCCCN,-0.98 +[N:1](=[C:2]1/[C:3]([H:8])=[C:4]([H:9])[C:5]([H:10])([H:11])[O:6]1)\[H:7]>>[N:1]([c:2]1[c:3]([H:8])[c:4]([H:9])[c:5]([H:11])[o:6]1)([H:7])[H:10],CCCCN,0.71 +[O:1]([C:2]1=[C:3]([H:6])[O:4][C:5]1([H:8])[H:9])[H:7]>>[O:1]=[C:2]1[C:3]([H:6])([H:7])[O:4][C:5]1([H:8])[H:9],CCCCN,0.73 +[C:1]([C:2](=[C:3]([H:11])[H:13])[H:10])([H:7])([H:8])[H:9].[C:4](=[C:5]=[C:6]([H:12])[H:16])([H:14])[H:15]>>[C:1]([C:2]([C:3]([H:11])([H:12])[H:13])([C:4]([C:5]#[C:6][H:16])([H:14])[H:15])[H:10])([H:7])([H:8])[H:9],CCCC#N,-0.69 +[O:1]([C:2]([O+:4]=[C-:3][C:5]#[C:6][H:10])([H:8])[H:9])[H:7]>>[O:1]([C:2]([C:3](=[O:4])[C:5]#[C:6][H:10])([H:8])[H:9])[H:7],CCCC#N,-1.54 +[C:1]([C:2](=[C:3]=[C:4]([H:10])[H:11])[H:9])(/[C:6](=[N:5]\[H:12])[H:13])([H:7])[H:8]>>[C:1]1([H:7])([H:8])[C:2]([H:9])=[C:3]([H:10])[C@@:4]2([H:11])[N:5]([H:12])[C@@:6]12[H:13],CCCC#N,-1.42 +[C:1]([C:5]([C:4](=[C:3]=[C:2]([H:8])[H:9])[H:11])([H:12])[H:13])([H:6])([H:7])[H:10]>>[C:1]1([H:6])([H:7])[C:2]([H:8])([H:9])[C:3]([H:10])=[C:4]([H:11])[C:5]1([H:12])[H:13],COCCO,-8.49 +[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C@:4]1([C:5]#[N:6])[H:13])([H:7])([H:8])[H:9]>>[C:1]([C:2][C:3]([C:4]([C:5]#[N:6])([H:10])[H:13])([H:11])[H:12])([H:7])([H:8])[H:9],COCCO,-3.15 +[O:1]([C:2]([C:3]1([H:10])[C:4]([H:11])([H:12])[C:5]([H:13])([H:14])[C:6]1([H:15])[H:16])([H:8])[H:9])[H:7]>>[C:5](=[C:6]([H:15])[H:16])([H:13])[H:14].[O:1](/[C:2](=[C:3](/[C:4]([H:8])([H:11])[H:12])[H:10])[H:9])[H:7],COCCO,-2.21 +[O:1]([C@@:2]([C:3]([C:4]#[C:5][H:12])([H:10])[H:11])([C:6]#[C:7][H:13])[H:9])[H:8]>>[O:1]1[C@@:2]([C:6]#[C:7][H:13])([H:9])[C:3]([H:10])([H:11])[C:4]1=[C:5]([H:8])[H:12],C1CCOC1,-0.7 +[C:1]([C:2]1[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[N:5]=[C:6]([H:15])[O:7]1)([H:8])([H:9])[H:10]>>[C:1]([C@:2]12[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[N:5]1[C:6]([H:15])[O:7]2)([H:8])([H:9])[H:10],C1CCOC1,0.45 +[N:1](=[C:2]1/[O:3][C:4]([H:9])([H:10])[C-:5]=[N+:6]1[H:8])\[H:7]>>[N:1](=[C:2](/[O:3][C:4]([C:5]#[N:6])([H:9])[H:10])[H:8])\[H:7],C1CCOC1,2.89 +[C:1]1([H:7])([H:8])[C:2]([H:9])=[C:3]([H:10])[C@@:6]1([C:5](=[O:4])[H:11])[H:12]>>[C:1]1([H:7])([H:8])[C@@:2]2([H:9])[C@:3]3([H:10])[O:4][C@@:5]2([H:11])[C@:6]13[H:12],c1ccsc1,-0.95 +[C:1]([C@@:2]1([H:10])[N:3]([H:11])[C@:4]1([C:5]([O:6][H:15])([H:13])[H:14])[H:12])([H:7])([H:8])[H:9]>>[C:1]([C:2](/[N:3]=[C:4](/[C:5]([O:6][H:15])([H:13])[H:14])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9],c1ccsc1,-0.81 +[C:1]([O:2][C:3](=[O:4])[C:5]#[C:6][H:10])([H:7])([H:8])[H:9]>>[C:1](=[O:2])([H:7])[H:9].[c:3]1(=[O:4])[c:5]([H:8])[c:6]1[H:10],c1ccsc1,0.29 +[N:1]1([H:7])/[C:2](=[N:6]\[H:11])[C:3]1([H:8])[H:9].[N:4]#[C:5][H:10]>>[N:1](=[C:2]1\[C:3]([H:8])([H:9])[N:4]=[C:5]([H:10])[N:6]1[H:11])\[H:7],CCCCCC,-0.1 +[C:1]([C@@:2]1([C:5](=[O:6])[N:7]([H:14])[H:15])[C:3]([H:11])([H:12])[N:4]1[H:13])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]([N+:4](=[C-:3][H:11])[H:13])([C:5](=[O:6])[N:7]([H:14])[H:15])[H:12])([H:8])([H:9])[H:10],CCCCCC,0.15 +[C:1]([C@@:2]12[O:3][C@@:4]3([H:11])[C:5]([H:12])([H:13])[C@:6]1([H:14])[C@@:7]23[H:15])([H:8])([H:9])[H:10]>>[C:1]([C@:2]1([H:14])[O:3][C@@:4]2([H:11])[C:5]([H:12])([H:13])[C:6][C@@:7]12[H:15])([H:8])([H:9])[H:10],CCCCCC,-0.66 +[C-:1]1([H:8])[N+:2]([H:7])([H:9])[C:3]12[C:4]([H:10])([H:11])[O:5][C:6]2([H:12])[H:13]>>[C:1]1([H:7])([H:8])[N:2]([H:9])[C:3]12[C:4]([H:10])([H:11])[O:5][C:6]2([H:12])[H:13],CCOCCO,9.67 +[O:1]([C@@:2]1([H:7])[C:3]([H:8])([H:9])[C@:4]1([O:5][H:11])[H:10])[H:6]>>[C:4]([O:5][H:11])[H:10].[O:1]([C:2](=[C:3]([H:8])[H:9])[H:7])[H:6],CCOCCO,-5.74 +[C:1]([C@@:2]12[C:3]([H:11])([H:12])[C@@:4]3([H:13])[O:5][C@:6]1([H:14])[C@@:7]23[H:15])([H:8])([H:9])[H:10]>>[C:1]([C@@:7]1([H:15])[C@@:4]2([H:13])[C:3]([H:11])([H:12])[C:2][C@:6]1([H:14])[O:5]2)([H:8])([H:9])[H:10],CCOCCO,-1.56 +[C:1]([O:2][C:3]([C:6]([C:4]([C:5]([H:14])([H:15])[H:16])([H:13])[H:17])=[O:7])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([O:2][C:3]([C@@:4]([C:5]([H:14])([H:15])[H:16])([C:6](=[O:7])[H:17])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10],C1CCCCC1,-0.89 +[C:1]([C@@:2]([C:3]([O:4][H:12])([H:10])[H:11])([C:5][H:14])[O:6][H:13])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([C:3]([O:4][H:12])([H:10])[H:11])[C:5]([H:13])([H:14])[O:6]1)([H:7])([H:8])[H:9],C1CCCCC1,0.88 +[C:1]([H:7])([H:8])([H:9])[H:10].[N:2]=[C:3]([C:4]([N:5]([C:6]([H:15])([H:16])[H:17])[H:14])([H:12])[H:13])[H:11]>>[C:1]([N:2][C:3]([C:4]([N:5]([C:6]([H:15])([H:16])[H:17])[H:14])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9],C1CCCCC1,-0.89 +[C:1]([O:2][C:3]([N:4][C:5]#[C:6][H:11])=[C:7]([H:12])[H:13])([H:8])([H:9])[H:10]>>[C:1]([O:2][C:3]1=[N:4][C:5]=[C:6]([H:11])[C:7]1([H:12])[H:13])([H:8])([H:9])[H:10],C1=CCCCC1,-0.3 +[C:1]([C@:2]1([H:9])/[C:3](=[N:5]/[H:11])[O:4]1)([H:6])([H:7])[H:8].[H:10][H:12]>>[C:1]([C:2]([C:3](=[O:4])[N:5]([H:11])[H:12])([H:9])[H:10])([H:6])([H:7])[H:8],C1=CCCCC1,-2.48 +[C:1](=[C:2]([c:3]1[c:4]([H:10])[c:5]([H:11])[c:6]([H:12])[n:7]1[H:13])[H:9])[H:8]>>[C-:1](=[C:2]([C:3]1=[C:4]([H:10])[C:5]([H:11])([H:12])[C+:6][N:7]1[H:13])[H:9])[H:8],C1=CCCCC1,-0.44 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([H:14])=[C:5]([H:15])[C@:6]1([O:7][H:17])[H:16])([H:8])([H:9])[H:10]>>[C:1](=[C:2](/[C:3](=[C:4]([C:5](=[C:6](/[O:7][H:17])[H:16])\[H:15])/[H:14])[H:12])[H:11])([H:8])[H:10].[H:9][H:13],c1ccncc1,-0.73 +[N:1]([C:2]1=[N:7][N+:6](=[N-:5])[C:4]([H:10])=[N:3]1)([H:8])[H:9]>>[N:1]([c:2]1[n:3][c:4]([H:10])[n:5][n:6][n:7]1)([H:8])[H:9],c1ccncc1,3.58 +[N:1]#[C:2][C@@:3]1([H:8])[C:4]([H:9])([H:10])[N:5]=[C:6]([H:11])[N:7]1[H:12]>>[N:1]#[C:2][C@@:3]1([H:8])[C:4]([H:9])=[N:5][C-:6]([H:11])[N+:7]1([H:10])[H:12],c1ccncc1,-3.2 +[C:1]([C@@:2]1([C:5](=[O:6])[N:7]([H:13])[H:14])[C:3]([H:11])([H:12])[O:4]1)([H:8])([H:9])[H:10]>>[C:1]([O:6]/[C:5](=[C:2]1\[C:3]([H:11])([H:12])[O:4]1)[N:7]([H:13])[H:14])([H:8])([H:9])[H:10],C1CCNCC1,-0.6 +[C:1]([C:2]([C:3]([C:4]([N+:6]#[C-:5])([H:14])[H:15])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2]([C:3]([C:4]([C:5]#[N:6])([H:14])[H:15])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9],C1CCNCC1,0.96 +[N:1]([c:2]1[n:3][o:4][c:5]([H:9])[c:6]1[H:10])([H:7])[H:8]>>[C:5](#[C:6][H:10])[H:9].[N:1]([C:2]#[N+:3][O-:4])([H:7])[H:8],C1CCNCC1,-1.74 +[C:1]([C:2]([C:3]([H:11])([H:12])[H:13])([C:4](=[O:5])[N:6]([H:14])[H:15])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[C@@:4]([N:6]([H:14])[H:15])([H:10])[O:5]1)([H:7])([H:8])[H:9],CCCCCCO,-5.32 +[C-:6]#[O+:7].[C:1]([C:2]([C:3]([O:4][H:14])=[C:5]([H:15])[H:16])([H:11])[H:12])([H:8])([H:9])[H:10].[H:13][H:17]>>[C:1]([C:2]([C@@:3]([O:4][H:14])([C:5]([C:6](=[O:7])[H:17])([H:15])[H:16])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10],CCCCCCO,-3.93 +[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[C@@:4]2([O:5][H:12])[C:6]([H:13])([H:14])[C@@:7]12[H:15])[H:8]>>[O:1]([C:6]([C@:4]1([O:5][H:12])[C:3]([H:10])([H:11])[C:2]([H:9])=[C:7]1[H:15])([H:13])[H:14])[H:8],CCCCCCO,-0.62 +[C:1]([C:2]([C:3]([H:12])([H:13])[H:14])([N:4]([C:5]([H:15])([H:16])[H:17])[C:6](=[O:7])[H:18])[H:11])([H:8])([H:9])[H:10]>>[C:1]([C:2]([C:3]([H:12])([H:13])[H:14])([H:11])[H:18])([H:8])([H:9])[H:10].[N:4]([C:5]([H:15])([H:16])[H:17])=[C:6]=[O:7],CCCOCCC,-0.66 +[C:1]([C@:5]1([H:10])[C-:4]=[N+:3]=[C:2][N:6]1[H:11])([H:7])([H:8])[H:9]>>[C:1]([c:2]1[n:3][c:4][c:5]([H:10])[n:6]1[H:11])([H:7])([H:8])[H:9],CCCOCCC,2.53 +[H:7][H:8].[O:1]=[C:2]([C:3](=[O:4])[H:9])[C:5]#[N:6]>>[O:1]([C@@:2]([C:3](=[O:4])[H:9])([C:5]#[N:6])[H:8])[H:7],CCCOCCC,-3.42 +[C:1]([C:2]([N:3]([C:4](=[N:5][H:14])[N+:7]#[C-:6])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]([N:3](/[C:4](=[N:5]/[H:14])[C:6]#[N:7])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10],OCCOCCO,0.84 +[O:1]=[c:2]1[n:3]([H:7])[c:4]([H:8])[c:5]([H:9])[o:6]1>>[O:1]=[C:2]1[N:3]([H:7])[C:4]([H:8])([H:9])[C-:5]=[O+:6]1,OCCOCCO,-4.7 +[C:6](=[N:7][H:13])([H:11])[H:12].[N:1]([C:2](=[O:3])[C:4](=[O:5])[H:10])([H:8])[H:9]>>[N:1]([C:2](=[O:3])[C:4]([O:5]/[C:6](=[N:7]/[H:13])[H:12])([H:10])[H:11])([H:8])[H:9],OCCOCCO,-2.41 +[O:1]=[C:2]1[C:3]([H:7])([H:8])[C:4]([H:9])=[C:5]([H:10])[C:6]1([H:11])[H:12]>>[O:1]([C:2]1=[C:6]([H:11])[C:5]([H:10])=[C:4]([H:9])[C:3]1([H:7])[H:8])[H:12],CCCCCCCC,0.15 +[O:1](/[C:2](=[C:3](/[C:4]#[N:5])[H:7])[H:6])[H:8]>>[O:1]=[C:2]([C:3]([C:4]#[N:5])([H:7])[H:8])[H:6],CCCCCCCC,0.39 +[C:1]([C:2]([C:3]([H:11])([H:12])[H:13])=[C:4]1[C:5]([H:14])([H:15])[C:6]([H:16])[C:7]1([H:17])[H:18])([H:8])([H:9])[H:10]>>[C:1](=[C:2]([C:3]([H:11])([H:12])[H:13])[C:4]1[C:5]([H:14])([H:15])[C:6]([H:10])([H:16])[C:7]1([H:17])[H:18])([H:8])[H:9],CCCCCCCC,-0.08 +[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C@:4]1([C:5]([O:6][H:16])([H:14])[H:15])[H:13])([H:7])([H:8])[H:9]>>[C:1]([C:2]([C:3]([C@@:4]1([H:13])[C:5]([H:14])([H:15])[O:6]1)([H:11])[H:12])([H:10])[H:16])([H:7])([H:8])[H:9],N#CCCCCC#N,-6.8 +[C:1]([c:2]1[c:3]([H:10])[n:4]([H:11])[c:5]([H:12])[n:6]1)([H:7])([H:8])[H:9]>>[C:1]([N+:6]#[C:2][C-:3]([N+:4](=[C-:5][H:12])[H:11])[H:10])([H:7])([H:8])[H:9],N#CCCCCC#N,-2.96 +[C:1]([c:2]1[c:3]([H:11])[n:4]([H:12])[c:5]([O:6][H:13])[n:7]1)([H:8])([H:9])[H:10]>>[C:1](=[C:2]1[C:3]([H:11])=[N:4][C:5]([O:6][H:13])=[N:7]1)([H:8])[H:9].[H:10][H:12],N#CCCCCC#N,0.39 +[C:1]1([H:6])([H:7])[O:2][C:3]([H:8])([H:9])[C:4]([H:10])=[C:5]1[H:11]>>[C:1]([C:5]#[C:4][H:10])([H:6])([H:7])[H:11].[O:2]=[C:3]([H:8])[H:9],CCCCCCCO,-5.43 +[C:1]([C:2](=[O:3])[C:4]1([H:10])[C:5]([H:11])([H:12])[C:6]1([H:13])[H:14])([H:7])([H:8])[H:9]>>[C:1]1([H:7])([H:9])[C@:2]([C:4]2([H:10])[C:5]([H:11])([H:12])[C:6]2([H:13])[H:14])([H:8])[O:3]1,CCCCCCCO,-1.89 +[C:2](=[C:3]([O:4][C:5]1([H:12])[C:6]([H:13])([H:14])[C:7]1([H:15])[H:16])[H:10])[H:9].[O:1]([H:8])[H:11]>>[O:1]([C:2]([C:3]([O:4][C:5]1([H:12])[C:6]([H:13])([H:14])[C:7]1([H:15])[H:16])([H:10])[H:11])[H:9])[H:8],CCCCCCCO,-0.4 +[C:1]([C@:2]12[C:3]([H:10])([H:11])[C@@:4]1([H:12])[C:5]([H:13])([H:14])[O:6]2)([H:7])([H:8])[H:9]>>[C:1]([C@:2]1([O:6][H:14])[C:3]([H:10])([H:11])[C@:4]1([C:5][H:13])[H:12])([H:7])([H:8])[H:9],OCCOCCCC,-1.22 +[N:1](=[C:2]=[C:3]1[C:4]([H:8])([H:9])[C:5]([H:10])([H:11])[N:6]1[H:12])[H:7]>>[N:1]#[C:2][C@@:3]1([H:7])[C:4]([H:8])([H:9])[C:5]([H:10])([H:11])[N:6]1[H:12],OCCOCCCC,-1.49 +[C:1]([N:2]([C:3](=[O:4])[C:5]([C:6]([H:13])([H:14])[H:15])([C:7]([H:16])([H:17])[H:18])[H:12])[H:11])([H:8])([H:9])[H:10]>>[C:1](=[N:2][H:11])([H:9])[H:10].[C:3](=[O:4])([C:5]([C:6]([H:13])([H:14])[H:15])([C:7]([H:16])([H:17])[H:18])[H:12])[H:8],OCCOCCCC,-0.72 +[N+:1](#[C-:2])[C@@:3]1([H:8])[O:4][C@@:5]2([H:9])[C:6]([H:10])([H:11])[C@@:7]12[H:12]>>[N:1]#[C:2][C@@:3]1([H:8])[O:4][C@@:5]2([H:9])[C:6]([H:10])([H:11])[C@@:7]12[H:12],CCCCCCCCC,0.25 +[C:1]([C:2]([C:3]([C:4]1=[N:6][O:5]1)([H:11])[H:12])([H:10])[H:13])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C:4](=[O:5])[N:6]1[H:13])([H:7])([H:8])[H:9],CCCCCCCCC,-0.13 +[C-:2]1=[O+:7][C:6]([H:15])[N:5]=[C:4]([H:14])[C:3]1([H:11])[H:12].[C:1]([H:8])([H:9])([H:10])[H:13]>>[C:1]([C:2]1[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[N:5]=[C:6]([H:15])[O:7]1)([H:8])([H:9])[H:10],CCCCCCCCC,-1.21 +[C:1]1([H:7])([H:8])[O:2][C@:3]([C:4]([H:11])([H:12])[H:13])([H:10])[C:5]1([H:14])[H:15].[O:6]([H:9])[H:16]>>[C:1]([O:2][C@@:3]([C:4]([H:11])([H:12])[H:13])([C:5]([O:6][H:16])([H:14])[H:15])[H:10])([H:7])([H:8])[H:9],CCCCNCCCC,-0.91 +[C:1]([C@@:2]([O:3][C:4](=[C:5]([H:13])[H:14])[H:11])([O:6][H:12])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C:2]1([H:10])[O:3][C:4]([H:11])([H:12])[C:5]([H:13])([H:14])[O:6]1)([H:7])([H:8])[H:9],CCCCNCCCC,2.11 +[C:1]([C@@:2]1([O:3][H:10])[C:4]([H:11])([H:12])[C@:5]1([O:6][H:14])[H:13])([H:7])([H:8])[H:9]>>[C:1]([C:2]([O:3][H:10])([C:4]([H:11])([H:12])[H:14])[C:5](=[O:6])[H:13])([H:7])([H:8])[H:9],CCCCNCCCC,5.49 +[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[C@:4]2([H:12])[C:5]([H:13])([H:14])[C@@:6]1([H:15])[O:7]2)[H:8]>>[O:1]=[C:2]([C@@:6]([C:5]([C:4](=[C:3]([H:10])[H:11])[H:12])([H:13])[H:14])([O:7][H:8])[H:15])[H:9],CCCCCCCCCCCC,-0.68 +[N:1](=[C:2]1/[C:3]([H:9])([H:10])[C:4]([H:11])([H:12])[C:5]([H:13])([H:14])[C:6]([H:15])([H:16])[O:7]1)\[H:8]>>[N:1](=[C:2](/[C:3]([C:4]([C:5]([C:6](=[O:7])[H:15])([H:13])[H:14])([H:11])[H:12])([H:9])[H:10])[H:16])\[H:8],CCCCCCCCCCCC,0.02 +[O:1]([C:2][C:3]([C:4]([C:5](=[O:6])[H:8])([H:11])[H:12])([H:9])[H:10])[H:7]>>[O:1]([C@@:2]1([H:8])[C:3]([H:9])([H:10])[C:4]([H:11])([H:12])[C:5]1=[O:6])[H:7],CCCCCCCCCCCC,-0.84 +[C:1]([C@@:2]1([H:11])[N:3]=[C:4]1[H:13])([H:8])([H:9])[H:10].[C:5]1([H:14])=[N:7][C:6]1([H:15])[H:16].[H:12][H:17]>>[C:1]([C@@:2]1([H:11])[N:3]([H:12])[C@:4]1([C@@:5]1([H:14])[C:6]([H:15])([H:16])[N:7]1[H:17])[H:13])([H:8])([H:9])[H:10],O=C1CCCC1,-1.7 +[N:1]#[C:2][C:3]1([H:8])[C:4]([H:9])([H:10])[C:5]([H:11])=[C:6]([H:12])[C:7]1([H:13])[H:14]>>[N:1](=[C:2]=[C:3]1[C:4]([H:9])([H:10])[C:5]([H:11])=[C:6]([H:12])[C:7]1([H:13])[H:14])[H:8],O=C1CCCC1,-0.14 +[C:1]1([H:6])([H:7])[C:2]([H:8])([H:9])[C:3]([H:10])([H:11])[O:4][C:5]1([H:12])[H:13]>>[C:1](=[C:5]([H:12])[H:13])([H:6])[H:7].[C:2]1([H:8])([H:9])[C:3]([H:10])([H:11])[O:4]1,O=C1CCCC1,-0.08 +[O:1]([C:2]([C@:3]1([H:11])[C:4]([H:12])=[C:5]([O:6][H:13])[C:7]1([H:14])[H:15])([H:9])[H:10])[H:8]>>[O:1]([C:2]([C:3]1([H:11])[C:4]([H:12])([H:13])[C:5](=[O:6])[C:7]1([H:14])[H:15])([H:9])[H:10])[H:8],CCN(CC)CC,2.16 +[C:1](/[C:2](=[C:4](/[C:5]([H:15])([H:16])[H:17])[H:14])[H:10])([H:7])([H:8])[H:9].[C:3]([O:6][H:18])([H:11])([H:12])[H:13]>>[C:1]([C:2]([C:3]([H:11])([H:12])[H:13])([C@@:4]([C:5]([H:15])([H:16])[H:17])([O:6][H:18])[H:14])[H:10])([H:7])([H:8])[H:9],CCN(CC)CC,1.48 +[C:1]([O:2][C:3]1=[C:7]=[C:6]([H:12])[N:5]([H:11])[N:4]1[H:13])([H:8])([H:9])[H:10]>>[C:1]([O:2][c:3]1[n:4][n:5]([H:11])[c:6]([H:12])[c:7]1[H:13])([H:8])([H:9])[H:10],CCN(CC)CC,1.22 +[N:1]1=[C:2]2[C@@:3]3([H:8])[C:4]([H:9])([H:10])[C@@:6]([H:12])([C@@:5]12[H:11])[C:7]3([H:13])[H:14]>>[N:1]#[C:2][C:3]1([H:8])[C:4]([H:9])([H:10])[C:5]([H:11])=[C:6]([H:12])[C:7]1([H:13])[H:14],CC(C)CCO,-16.73 +[N:1]([c:2]1[c:3]([H:9])[o:4][c:5]([H:10])[n:6]1)([H:7])[H:8]>>[N:1]([c:2]1[c:3]([H:9])[o+:4][c-:5][n:6]1[H:10])([H:7])[H:8],CC(C)CCO,-2.04 +[C:1]([C@:2]12[C:3]([H:10])([H:11])[C@@:4]1([H:12])[C:5]([H:13])([H:14])[C:6]2([H:15])[H:16])([H:7])([H:8])[H:9]>>[C:1]([C@:2]1([H:11])[C@:4]([C:3][H:10])([H:12])[C:5]([H:13])([H:14])[C:6]1([H:15])[H:16])([H:7])([H:8])[H:9],CC(C)CCO,-6.59 +[C:1]([H:7])([H:8])([H:9])[H:11].[C:2](=[O:3])=[C:4]([C:5]([O:6][H:14])([H:12])[H:13])[H:10]>>[C:1]([C:2](=[O:3])[C:4]([C:5]([O:6][H:14])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9],CC(CC(C)=O)=O,-1.89 +[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[C:4]([H:14])([H:15])[O:5]/[C:6]1=[N:7]\[H:16])([H:8])([H:9])[H:10]>>[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[C-:4]([H:15])[O+:5]=[C:6]1[N:7]([H:14])[H:16])([H:8])([H:9])[H:10],CC(CC(C)=O)=O,0.86 +[N+:1]([C:2]1=[C:6]=[C:5]([H:10])[O:4][C-:3]1[H:9])([H:7])([H:8])[H:11]>>[N:1]([c:2]1[c:3]([H:9])[o:4][c:5]([H:10])[c:6]1[H:11])([H:7])[H:8],CC(CC(C)=O)=O,10.97 +[O:1]=[C:2]1[N:3]([H:8])[C:4]([H:9])([H:10])[C@@:5]2([H:11])[O:6][C@@:7]12[H:12]>>[O:1]=[C:2]([N+:3](=[C:4]([H:9])[H:10])[H:8])[C:7](=[C:5]([O-:6])[H:11])[H:12],C1CCNC1,-2.2 +[C:1]([C:3]1([O:2][H:8])[C:4]([H:9])([H:10])[C:5]([H:11])([H:12])[C:6]1([H:13])[H:14])[H:7]>>[C:1]1([H:7])([H:8])[O:2][C:3]12[C:4]([H:9])([H:10])[C:5]([H:11])([H:12])[C:6]2([H:13])[H:14],C1CCNC1,8.8 +[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[N:4]([H:14])[C:5]([H:15])([H:16])[C:6]1=[O:7])([H:8])([H:9])[H:10]>>[C:1]([C:2]([C:3]([H:11])([H:12])[H:13])=[N:4][H:14])([H:8])([H:9])[H:10].[C:5](=[C:6]=[O:7])([H:15])[H:16],C1CCNC1,-1.63 +[C:5](#[N:6])[H:12].[O:1]([C@@:2]1([C:7]([H:9])([H:13])[H:14])[C:3]([H:10])([H:11])[O:4]1)[H:8]>>[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[O:4][C:5]([H:12])=[N:6][C:7]1([H:13])[H:14])[H:8],CCCCOC(C)=O,-2.68 +[C:1]([C:2]([C@@:3]([C:4]([H:13])([H:14])[H:15])([C:5]([C:6][O:7][H:19])([H:16])[H:17])[H:18])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]([C@@:3]1([C:4]([H:13])([H:14])[H:15])[C:5]([H:16])([H:17])[C@:6]1([O:7][H:19])[H:18])([H:11])[H:12])([H:8])([H:9])[H:10],CCCCOC(C)=O,1.68 +[N:1](=[C:2](/[O:3][C:4](=[C:5]=[N:6][H:10])[H:9])[H:8])\[H:7]>>[N:1](=[C:2](/[O:3][C:4]([C:5]#[N:6])([H:9])[H:10])[H:8])\[H:7],CCCCOC(C)=O,-0.22 +[N:1](=[C:2]1\[O:3][C:4]([H:9])([H:10])[C@@:5]2([H:11])[C:6]([H:12])([H:13])[C@@:7]12[H:14])\[H:8]>>[H:10][H:14].[N-:1]([C:2]1=[C:7]2[C@@:5]([H:11])([C:4]([H:9])=[O+:3]1)[C:6]2([H:12])[H:13])[H:8],C1COCCO1,-0.77 +[C:1]1([H:7])([H:8])[O:2][C:3]12[C:4]([H:9])([H:10])[O:5][C:6]2([H:11])[H:12]>>[C:1]1([H:7])([H:8])[O+:2]=[C-:3][C:4]1([H:9])[H:10].[O:5]=[C:6]([H:11])[H:12],C1COCCO1,-4.77 +[C:1]([N:2]1[C:3]([H:10])=[C+:4][N:5]([H:11])[N-:6]1)([H:7])([H:8])[H:9]>>[C:1]([n:2]1[c:3]([H:10])[c:4]([H:11])[n:5][n:6]1)([H:7])([H:8])[H:9],C1COCCO1,1.74 +[C:1]([C:2](=[C:3]([H:11])[H:12])[H:10])(/[C:7](=[N:6]\[C:5](=[O:4])[H:13])[H:14])([H:8])[H:9]>>[C:1]1([H:8])([H:9])[C@@:2]2([H:10])[C:3]([H:11])([H:12])[O:4][C:5]([H:13])=[N:6][C@@:7]12[H:14],CCCCCCCCCC,-0.16 +[O:1]([C@@:2]1([H:7])[C:3]([H:8])([H:9])[C@:4]1([O:5][H:11])[H:10])[H:6]>>[C:4]([O:5][H:11])[H:10].[O:1]([C:2](=[C:3]([H:8])[H:9])[H:7])[H:6],CCCCCCCCCC,0.29 +[N:1]([c:2]1[n:3][n:4]([H:10])[c:5]([O:6][H:11])[n:7]1)([H:8])[H:9]>>[N:1]([C@@:5]([N:4]([N-:3])[H:10])([O:6][H:11])[N+:7]#[C:2])([H:8])[H:9],CCCCCCCCCC,-0.73 +[C:1](/[C:2](=[N:3]\[H:11])[H:9])([H:6])([H:7])[H:8].[C:4]=[N:5][H:10]>>[C:1]([C@@:2]([N:3]([H:10])[H:11])([C:4]#[N:5])[H:9])([H:6])([H:7])[H:8],ClC(Cl)=C(Cl)Cl,-6.67 +[O:1]([C:2]([C@@:3]([C:4]#[C:5][H:12])([C:6]#[N:7])[H:11])([H:9])[H:10])[H:8]>>[O:1]([C:2]([C@@:3]([C:5](=[C:4])[H:12])([C:6]#[N:7])[H:11])([H:9])[H:10])[H:8],ClC(Cl)=C(Cl)Cl,-0.59 +[O:1]=[C:2]([C:3]#[N+:4][N-:5][H:7])[H:6]>>[O-:1][C:2](=[C:3]([N+:4]#[N:5])[H:7])[H:6],ClC(Cl)=C(Cl)Cl,1.58 +[C:1]([C:2]([C:3]([C:4]([C:5]#[C:6][H:16])([H:14])[H:15])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[C:3]([H:12])[H:13])[H:10])([H:7])([H:8])[H:9].[C:4](=[C:5]=[C:6]([H:11])[H:16])([H:14])[H:15],CC(=O)N(C)C,0 +[O:1]=[C:2]([C@@:3]1([H:8])[C:4]([H:9])([H:10])[C:5]([H:11])([H:12])[O:6]1)[H:7]>>[O:1]=[C:2]([C:4]1([H:9])[C:3]([H:8])([H:10])[O:6][C:5]1([H:11])[H:12])[H:7],CC(=O)N(C)C,-5.48 +[O+:1](=[C-:2][H:7])[C:3]([O:4][C:5](=[O:6])[H:10])([H:8])[H:9]>>[O:1]=[C:2]([C:3]([O:4][C:5](=[O:6])[H:10])([H:8])[H:9])[H:7],CC(=O)N(C)C,-5.11 +[N+:1]([C:2]1=[C:6]=[N:5][N:4]([H:9])[N-:3]1)([H:7])([H:8])[H:10]>>[N:1]([c:2]1[n:3][n:4]([H:9])[n:5][c:6]1[H:10])([H:7])[H:8],CCC(C)CO,17.95 +[C-:1]1([H:9])[C@@:2]2([H:10])[C@@:3]3([H:11])[C:4]([H:12])([H:13])[C@:5]([H:14])([C:6]3([H:15])[H:16])[N+:7]12[H:8]>>[C:1]1([H:8])([H:9])[C@@:2]2([H:10])[C@@:3]3([H:11])[C:4]([H:12])([H:13])[C@:5]([H:14])([C:6]3([H:15])[H:16])[N:7]12,CCC(C)CO,7.75 +[N:1]([C+:2]1[C@@:3]([O:6][H:12])([H:10])[N:4]([H:11])[C-:5]=[C:7]1[H:13])([H:8])[H:9]>>[N:1]([c:2]1[c:3]([H:10])[n:4]([H:11])[c:5]([O:6][H:12])[c:7]1[H:13])([H:8])[H:9],CCC(C)CO,0.53 +[C:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[C@:4]1([O:5][H:13])[H:12])([H:6])([H:7])[H:8]>>[C:1]([C:2]([C:3]([C:4][O:5][H:13])([H:10])[H:11])([H:9])[H:12])([H:6])([H:7])[H:8],NCCO,-2.95 +[O:1]([C-:2](/[C:3](=[N:4]/[H:9])[H:8])[N+:6]#[N:5])[H:7]>>[O:1]([c:2]1[c:3]([H:8])[n:4]([H:9])[n:5][n:6]1)[H:7],NCCO,-0.23 +[O:1]([C@@:2]1([H:9])[C@@:3]2([H:10])[C@:4]3([H:11])[C:5]([H:12])([H:13])[N:6]2[C@:7]13[H:14])[H:8]>>[H:8][H:9].[O:1]=[C:2]1[C@@:3]2([H:10])[C@:4]3([H:11])[C:5]([H:12])([H:13])[N:6]2[C@:7]13[H:14],NCCO,-4.06 +[C:1]1([H:7])=[C:6]([H:10])[C:5]([H:9])=[N+:4]2[C@:2]1([H:8])[N-:3]2>>[c:1]1([H:7])[c:2]([H:8])[n:3][n:4][c:5]([H:9])[c:6]1[H:10],CCOC(C)=O,0.22 +[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[C:4]([H:14])([H:15])[N:5]([H:16])[C:6]1=[O:7])([H:8])([H:9])[H:10]>>[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[C:6]([N:5]([C-:4]([H:14])[H:15])[H:16])=[O+:7]1)([H:8])([H:9])[H:10],CCOC(C)=O,0.18 +[C:1]1([H:8])([H:9])[C:2]([H:10])=[C:7]([H:13])[O:6][C@@:5]2([H:12])[O:3][C@@:4]12[H:11]>>[C:1]1([H:8])([H:9])[C@:2]2([H:10])[O:3][C@@:4]1([H:11])[C@:5]1([H:12])[O:6][C@:7]21[H:13],CCOC(C)=O,-3.06 +[C:1](#[C:2][C:3]([C:4]1([H:10])[C:5]([H:11])([H:12])[C:6]1([H:13])[H:14])([H:8])[H:9])[H:7]>>[C:1](=[C:2]=[C:3]([H:8])[H:9])([C:6]([C:4](=[C:5]([H:11])[H:12])[H:10])([H:13])[H:14])[H:7],CCCCCCC,0.17 +[O+:1]1=[C:2][N:7]([H:10])[C:5](=[O:6])[N:4]([H:9])[C-:3]1[H:8]>>[O:1]=[C:2]1[C:3]([H:8])[N:4]([H:9])[C:5](=[O:6])[N:7]1[H:10],CCCCCCC,0.99 +[C:1]([C:2]([O:3]/[C:4](=[N:5]/[H:13])[C:6]#[N:7])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]([O+:3]=[C-:4][N:5]([N+:7]#[C-:6])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10],CCCCCCC,-2.14 +[C:1]([C:2]([C:3]1([C:4]([H:12])([H:13])[H:14])[C:5]([H:15])([H:16])[C:6]1([H:17])[H:18])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1](=[C:2]([H:10])[H:11])([H:7])[H:8].[C:3]1([C:4]([H:12])([H:13])[H:14])=[C:6]([H:17])[C:5]1([H:15])[H:16].[H:9][H:18],CCCCOCCCC,-0.85 +[O:1]([C:2]([C+:3]([N:5]([C-:4]([H:10])[H:11])[H:12])[H:9])([H:7])[H:8])[H:6]>>[O:1]([C:2]([C@@:3]1([H:9])[C:4]([H:10])([H:11])[N:5]1[H:12])([H:7])[H:8])[H:6],CCCCOCCCC,0.91 +[C:1]1([H:8])([H:9])[C:2]([H:10])([H:16])[C:3]([H:11])([H:12])[C@:4]2([H:13])[C:5]([H:14])([H:15])[C:6][C@:7]12[H:17]>>[C:1]1([H:8])([H:9])[C@@:2]2([H:10])[C:3]([H:11])([H:12])[C@:4]3([H:13])[C:5]([H:14])([H:15])[C@@:6]2([H:16])[C@:7]13[H:17],CCCCOCCCC,0.56 +[C:1]([C@@:2]1([H:10])[C@:3]([C:4]([H:12])([H:13])[H:14])([H:11])[C@:5]1([O:6][H:16])[H:15])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]([C:3]([C:4]([H:12])([H:13])[H:14])([H:11])[H:15])([C:5][O:6][H:16])[H:10])([H:7])([H:8])[H:9],OCCCCCCCCC,-3.34 +[N:1](=[C:2]1[C+:3]([H:9])[N:4]([H:10])[N-:5][C:6]1([H:7])[H:11])[H:8]>>[N:1]([c:2]1[c:3]([H:9])[n:4]([H:10])[n:5][c:6]1[H:11])([H:7])[H:8],OCCCCCCCCC,-1.63 +[O:1]([C:2][C:3]([C:4]1([O:5][H:12])[C:6]([H:13])([H:14])[C:7]1([H:9])[H:15])([H:10])[H:11])[H:8]>>[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[C@@:4]2([O:5][H:12])[C:6]([H:13])([H:14])[C@@:7]12[H:15])[H:8],OCCCCCCCCC,3.18 +[O:1]([C@@:2]1([H:8])[C:3]([H:9])([H:10])[N:4]([H:11])[C:5]1=[O:6])[H:7]>>[O:1]([C@@:2]1([H:8])[C:5]([N:4]([C-:3]([H:9])[H:10])[H:11])=[O+:6]1)[H:7],ClC=CCl,0.39 +[O:1]([C@@:2]1([H:8])[C:3]([H:9])([H:10])[C:4]([H:11])([H:12])[C:5]([H:13])=[C:6]1[H:14])[H:7]>>[H:7][H:8].[O:1]=[C:2]1[C:3]([H:9])([H:10])[C:4]([H:11])([H:12])[C:5]([H:13])=[C:6]1[H:14],ClC=CCl,-2.09 +[C-:2]([O+:3]=[C:4]([C:5]([O:6][C:7]([H:17])([H:18])[H:19])([H:15])[H:16])[H:13])([H:11])[H:12].[C:1]([H:8])([H:9])([H:10])[H:14]>>[C:1]([C:2]([O:3][C:4]([C:5]([O:6][C:7]([H:17])([H:18])[H:19])([H:15])[H:16])([H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10],ClC=CCl,-0.53 +[C:1]([N+:2]([C:3](=[C:4]=[N:5][H:11])[H:10])=[N-:6])([H:7])([H:8])[H:9]>>[C:1]([n:2]1[c:3]([H:10])[c:4]([H:11])[n:5][n:6]1)([H:7])([H:8])[H:9],C1CCCC1,-0.76 +[C:1]([N:2]([C:3](=[O:4])[C:5]([H:10])([H:11])[H:12])[H:9])([H:6])([H:7])[H:8]>>[C:1]([N:2]=[C:3]=[O:4])([H:6])([H:7])[H:8].[C:5]([H:9])([H:10])([H:11])[H:12],C1CCCC1,0.85 +[O:1]=[C:2]([C:3]([H:7])([H:8])[H:9])[O:6][C:5](=[C:4]([H:10])[H:11])[H:12]>>[O:1]=[C:2]([C:3]([C:4]([C:5](=[O:6])[H:12])([H:10])[H:11])([H:8])[H:9])[H:7],C1CCCC1,-0.95 +[C:1]([C@:2]1([H:14])[N:3]([H:11])[C@:4]1([C:5](=[C:6]=[O:7])[H:13])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C@:2]12[N:3]([H:11])[C@@:4]1([H:12])[C:5]([H:13])([H:14])[C:6]2=[O:7])([H:8])([H:9])[H:10],Fc1c(F)c(F)c(F)c(F)c1F,-2.49 +[C:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[C-:4]=[C:5]([H:13])[C+:6]1[H:14])([H:7])([H:8])[H:12]>>[C:1]1([H:7])([H:8])[C@@:2]2([H:9])[C:3]([H:10])([H:11])[C:4]([H:12])=[C:5]([H:13])[C@@:6]12[H:14],Fc1c(F)c(F)c(F)c(F)c1F,1.84 +[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C:4](=[O:5])[N:6]1[H:13])([H:7])([H:8])[H:9]>>[C:1]([C-:2]([N:6]([C:4]1=[O+:5][C:3]1([H:11])[H:12])[H:13])[H:10])([H:7])([H:8])[H:9],Fc1c(F)c(F)c(F)c(F)c1F,1.63 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C@:4]2([C:5]([H:14])([H:15])[O:6]2)[C:7]1([H:16])[H:17])([H:8])([H:9])[H:10]>>[C:1]([C:2](=[C:3]([H:12])[H:13])[H:11])([H:8])([H:9])[H:10].[C:4]1(=[C:7]([H:16])[H:17])[C:5]([H:14])([H:15])[O:6]1,Fc1ccccc1,0.11 +[C:1]1([H:7])([H:8])[N:2]([H:9])[C:3]12[C:4]([H:10])([H:11])[O:5][C:6]2([H:12])[H:13]>>[C:1](=[N:2][H:9])([H:7])[H:8].[C:3]1([H:11])=[C:4]([H:10])[O:5][C:6]1([H:12])[H:13],Fc1ccccc1,-2.95 +[C:1]([C@@:2]([O:3][H:11])([C:4]([C:5](=[O:6])[H:14])([H:12])[H:13])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[O:3])[H:10])([H:7])([H:8])[H:9].[C:4](=[C:5]([O:6][H:11])[H:14])([H:12])[H:13],Fc1ccccc1,0.71 +[N:1]([C:2](=[O:3])[C:4]([N:5]1[C:6]([H:12])([H:13])[C:7]1([H:14])[H:15])([H:10])[H:11])([H:8])[H:9]>>[N:1](=[C:2](/[O:3][H:9])[C:4]([N:5]1[C:6]([H:12])([H:13])[C:7]1([H:14])[H:15])([H:10])[H:11])\[H:8],CCCCl,1.31 +[C:1]([C@:2]1([N:3]([H:11])[H:12])[C:4]([H:13])([H:14])[C:5]([H:8])([H:15])[C:6]([H:16])([H:17])[O:7]1)([H:9])[H:10]>>[C:1]([C@:2]1([N:3]([H:11])[H:12])[C:4]([H:13])([H:14])[C:5]([H:15])[C:6]([H:16])([H:17])[O:7]1)([H:8])([H:9])[H:10],CCCCl,0.41 +[C:1]([O:2][C@@:3]([C:4]([H:11])([H:12])[H:13])([C:5]([O:6][H:16])([H:14])[H:15])[H:10])([H:7])([H:8])[H:9]>>[C:1]([O:6][C:5]([C@@:3]([O:2][H:16])([C:4]([H:11])([H:12])[H:13])[H:10])([H:14])[H:15])([H:7])([H:8])[H:9],CCCCl,-0.54 +[C:1](=[C:2]([C@@:3]1([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])=[C:6]=[C:7]1[H:17])[H:11])([H:8])[H:10].[H:9][H:16]>>[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])=[C:6]([H:16])[C@@:7]12[H:17])([H:8])([H:9])[H:10],CCCCCCCl,-1.87 +[C:1]([C@@:2]([O+:3]=[C-:4][H:12])([C:5]([C:6]([H:14])([H:15])[H:16])([C:7]([H:17])([H:18])[H:19])[H:13])[H:11])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[O:3][C:4]([H:12])([H:13])[C:5]1([C:6]([H:14])([H:15])[H:16])[C:7]([H:17])([H:18])[H:19])([H:8])([H:9])[H:10],CCCCCCCl,0.16 +[O:1]=[c:2]1[c:3]([H:8])[c:4]([H:9])[n:5]([H:10])[c:6]([H:11])[n:7]1>>[O+:1]1=[C:2]([H:8])[N:7]=[C:6]([H:11])[N:5]([H:10])[C:4]([H:9])=[C-:3]1,CCCCCCCl,1.66 +[C:1]([H:7])([H:8])([H:9])[H:12].[C:2]1([H:10])=[C:3]([H:11])[C:4]([H:13])([H:14])[C@:5]1([O:6][H:16])[H:15]>>[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[C@:5]1([O:6][H:16])[H:15])([H:7])([H:8])[H:9],CCCCCCCCCCCCCCCC,-1.28 +[C:1]([C:2]([C@:3]([N:4][H:14])([C:5](=[O:6])[O:7][H:15])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2](/[N+:4](=[C:3](\[C:5]([O-:6])[O:7][H:15])[H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10],CCCCCCCCCCCCCCCC,0.54 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])[C:4]([H:14])=[C:5]([H:15])[C:6]([H:13])([H:16])[O:7]1)([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([H:14])=[C:5]([H:15])[C:6]([H:16])[O:7]1)([H:8])([H:9])[H:10],CCCCCCCCCCCCCCCC,0.28 +[C:1](/[C:2](=[C:6](/[C:5]1([H:15])[C:3]([H:11])([H:12])[C:4]1([H:13])[H:14])[H:16])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])=[C:6]1[H:16])([H:7])([H:8])[H:9],ClC(Cl)(Cl)Cl,0.11 +[O:1]([C@@:2]1([H:9])[C:3]([H:10])[N:4]([H:11])[C:5]([H:12])([H:13])[C@@:6]1([O:7][H:15])[H:14])[H:8]>>[O:1]([C@@:2](/[C:3](=[N+:4](\[C-:5][H:13])[H:11])[H:10])([C:6]([O:7][H:15])([H:12])[H:14])[H:9])[H:8],ClC(Cl)(Cl)Cl,-0.84 +[C:1]([N:2]([C:3](=[O:4])[C:5]([H:10])([H:11])[H:12])[H:9])([H:6])([H:7])[H:8]>>[C:1]([O:4]/[C:3](=[N:2]/[H:9])[C:5]([H:10])([H:11])[H:12])([H:6])([H:7])[H:8],ClC(Cl)(Cl)Cl,1.28 +[N:1]1([H:7])[C@@:2]2([H:8])[C@:3]1([H:9])[C@@:4]1([H:10])[N:5]([H:11])[C@@:6]21[H:12]>>[N:1]1([H:7])[C@@:2]([C@@:6]2([H:12])[C:4][N:5]2[H:11])([H:8])[C:3]1([H:9])[H:10],OCC(O)CO,0.42 +[C:2]1([H:9])([H:10])[C:3]([H:11])([H:12])[C@@:4]2([H:13])[C@:5]1([H:15])[O:6][C:7]2([H:16])[H:17].[O:1]([H:8])[H:14]>>[O:1]([C:2]([C:3]([C:4]1([H:13])[C:5]([H:14])([H:15])[O:6][C:7]1([H:16])[H:17])([H:11])[H:12])([H:9])[H:10])[H:8],OCC(O)CO,2.25 +[O:1]([C:2]([C:3]1([H:10])[C:4]([H:11])([H:12])[C:5]([H:13])([H:14])[C:6]1([H:15])[H:16])([H:8])[H:9])[H:7]>>[O:1]([C:2]([C@@:3]([C:4]([H:11])([H:12])[H:13])([C:6]([C:5][H:14])([H:15])[H:16])[H:10])([H:8])[H:9])[H:7],OCC(O)CO,-5.82 +[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[O:4][C:5]([H:12])=[N:6][C:7]1([H:13])[H:14])[H:8]>>[O:1]([C:2](=[C:3]([H:10])[H:11])[H:9])[H:8].[O:4]=[C:5]([N:6]=[C:7]([H:13])[H:14])[H:12],Cc1cccnc1C,2.19 +[C:1]([C:2]([C:3]1=[C:5]([C:6]([H:16])([H:17])[H:18])[C:4]1([H:14])[H:15])([H:11])[H:12])([H:8])([H:9])[H:10].[O:7]([H:13])[H:19]>>[C:1]([C:2]([C@@:3]1([H:13])[C:4]([H:14])([H:15])[C@:5]1([C:6]([H:16])([H:17])[H:18])[O:7][H:19])([H:11])[H:12])([H:8])([H:9])[H:10],Cc1cccnc1C,0.21 +[C:1]([O:2][C@@:3]1([H:11])[C:4]([H:12])([H:13])[C@:5]1([C:6]([O:7][H:17])([H:15])[H:16])[H:14])([H:8])([H:9])[H:10]>>[C:1]([O:2][C@@:3]1([H:11])[C:4]([H:12])([H:13])[C:5]1=[C:6]([H:15])[H:16])([H:8])([H:9])[H:10].[O:7]([H:14])[H:17],Cc1cccnc1C,-3.52 +[C:1](=[C:2]([H:11])[H:12])([H:9])[H:10].[C:3](=[C:4]([O:5][H:8])[C:6](=[O:7])[H:15])([H:13])[H:14]>>[C:1]([C:2]([C:3]([C:4](=[O:5])[C:6](=[O:7])[H:15])([H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10],CCC(O)CC,3.87 +[C:1]([C:2]([N:3][N:4]([C:5]([H:13])([H:14])[H:15])[C:6]([C:7]([H:18])([H:19])[H:20])([H:16])[H:17])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]([N:3]([N:4]([C:5]([H:13])([H:14])[H:15])[C:6]([C:7]([H:18])([H:19])[H:20])([H:16])[H:17])[H:12])[H:11])([H:8])([H:9])[H:10],CCC(O)CC,0.24 +[C:1]([C@@:2]1([O:3][H:11])[C:4]([H:12])([H:13])[N:5]2[C:6]([H:14])([H:15])[C@@:7]12[H:16])([H:8])([H:9])[H:10]>>[C:1]([C:2]1=[C:7]2[N:5]([C:4]1([H:12])[H:13])[C:6]2([H:14])[H:15])([H:8])([H:9])[H:10].[O:3]([H:11])[H:16],CCC(O)CC,-8.86 +[O:1]([C:3](=[C:2]=[N:6][C:5](=[N:4][H:9])[H:10])[H:8])[H:7]>>[O:1]([c:2]1[c:3]([H:8])[n:4]([H:9])[c:5]([H:10])[n:6]1)[H:7],Ic1ccccc1,-0.1 +[N:1]#[C:2][C:3]([C@@:4]1([H:9])[C:5]([H:10])([H:11])[O:6]1)([H:7])[H:8]>>[N:1]#[C:2][C:3]([C-:4]=[O+:6][C:5]([H:9])([H:10])[H:11])([H:7])[H:8],Ic1ccccc1,-0.96 +[C:1]([C@@:2]1([H:11])[N:3]([H:12])[C@:4]1([C@@:5]1([H:14])[C:6]([H:15])([H:16])[O:7]1)[H:13])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[N:3]([H:12])[O:7][C:6]([H:15])([H:16])[C:5]([H:14])=[C:4]1[H:13])([H:8])([H:9])[H:10],Ic1ccccc1,-1.04 +[C:1]1([H:7])([H:8])[O:2][C@@:3]2([H:9])[C:4]([H:10])([H:11])[O:5][C@@:6]12[H:12]>>[C:1]1([H:7])([H:8])[O:2][C:3]([H:9])=[C:6]([H:12])[O:5][C:4]1([H:10])[H:11],CCCCC(C)=O,1.4 +[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[C@:4]2([H:12])[C:5]([H:13])([H:14])[C@@:6]1([H:15])[O:7]2)[H:8]>>[C:3]1([H:10])([H:11])[C@@:4]2([H:12])[C:5]([H:13])([H:14])[C@:6]1([H:15])[O:7]2.[O:1]([C:2][H:9])[H:8],CCCCC(C)=O,-2.14 +[C:1]([C:2]([N:3]([H:11])[H:13])=[O:4])([H:8])([H:9])[H:10].[C:5]1([H:12])=[C:6]([H:14])[C:7]1([H:15])[H:16]>>[C:1](/[C:2](=[N:3]/[H:11])[O:4][C:5]1([H:12])[C:6]([H:13])([H:14])[C:7]1([H:15])[H:16])([H:8])([H:9])[H:10],CCCCC(C)=O,2.67 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4][C:6]([H:15])([H:16])[C@@:7]1([O:5][H:14])[H:17])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C@@:4]2([O:5][H:14])[C:6]([H:15])([H:16])[C@@:7]12[H:17])([H:8])([H:9])[H:10],C=CCCCC,1.07 +[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])=[C:6]([H:16])[C@@:7]12[H:17])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C@:3]2([H:12])[C:4]([H:13])([H:14])[C@@:5]1([H:15])[C:6]([H:16])=[C:7]2[H:17])([H:8])([H:9])[H:10],C=CCCCC,-0.08 +[C:1]([C@@:2]1([H:11])[C:3](=[C:4]([H:12])[H:13])[N:5]([H:14])[C:6]1=[O:7])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C:3]([H:12])=[C:4]([H:13])[N:5]([H:14])[C:6]1=[O:7])([H:8])([H:9])[H:10],C=CCCCC,-4.27 +[O:1]([C:2][C:3]([O:4]/[C:5](=[N:6]\[C:7]([H:9])([H:13])[H:14])[H:12])([H:10])[H:11])[H:8]>>[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[O:4][C:5]([H:12])=[N:6][C:7]1([H:13])[H:14])[H:8],CCOCC,0.31 +[N+:1](#[C-:2])[C@:3]1([H:6])[C:4]([H:7])[C:5]1([H:8])[H:9]>>[N:1]#[C:2][C@:3]1([H:6])[C:4]([H:7])[C:5]1([H:8])[H:9],CCOCC,0.73 +[C:1](=[C:2]([H:11])[H:12])([H:8])[H:9].[C:3]1(=[C:6]=[C:7]([H:10])[H:15])[C:4]([H:13])([H:14])[O:5]1>>[C:1]([C:2]([C@@:3]1([C:6]#[C:7][H:15])[C:4]([H:13])([H:14])[O:5]1)([H:11])[H:12])([H:8])([H:9])[H:10],CCOCC,-0.55 +[O:1]([c:2]1[n:3][n:4]([H:8])[c:5]([H:9])[c:6]1[H:10])[H:7]>>[O:1]([C:2]1=[C:6]([H:10])[C@@:5]2([H:9])[N:3]1[N:4]2[H:8])[H:7],Nc1ccccc1,2.87 +[N:1]1([H:8])[C-:2]=[O+:7][N:6]=[C:5]([H:11])[C:4]([H:10])=[C:3]1[H:9]>>[N:1](=[c:2]1/[c:3]([H:9])[c:4]([H:10])[c:5]([H:11])[n:6][o:7]1)\[H:8],Nc1ccccc1,2.84 +[O:1]=[C:2]([c:3]1[n:4][c:5]([H:9])[c:6]([H:10])[o:7]1)[H:8]>>[O:1]=[C:2]([C:3]([N:4]=[C:5]=[C:6]([H:9])[H:10])=[O:7])[H:8],Nc1ccccc1,-1.75 +[C:1]([C@@:2]1([H:10])[O:3][C@:4]1([C:5]([O:6][H:14])([H:12])[H:13])[H:11])([H:7])([H:8])[H:9]>>[C:1]([H:7])([H:8])([H:9])[H:12].[C@@:2]12([H:10])[O:3][C@:4]1([H:11])[C@:5]2([O:6][H:14])[H:13],CCCC(=O)OC,-2.26 +[O:1]=[C:2]([C@@:3]1([H:9])[C@@:4]2([H:10])[C:5]([H:11])([H:12])[C:6]([H:13])([H:14])[N:7]12)[H:8]>>[O:1]=[C:2](/[C:3](=[N:7]/[C:4]1([H:10])[C:5]([H:11])([H:12])[C:6]1([H:13])[H:14])[H:9])[H:8],CCCC(=O)OC,-1.47 +[N:1]([c:2]1[n:3][n:4]([H:10])[c:5]([O:6][H:11])[n:7]1)([H:8])[H:9]>>[N:1]([c:2]1[n:3]([H:10])[n:4][c:5]([O:6][H:11])[n:7]1)([H:8])[H:9],CCCC(=O)OC,0.67 +[C:1]([C@@:2]1([H:11])[N:3]([H:12])[C:4]([O:5][C:7]([H:14])([H:15])[H:16])=[C:6]1[H:13])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[N:3]([H:12])[C:4](=[O:5])[C@:6]1([C:7]([H:14])([H:15])[H:16])[H:13])([H:8])([H:9])[H:10],CCCCCOC(C)=O,-0.52 +[C:1]([C@@:2]1([H:10])[O:3][C:4]1=[C:5]([H:12])[H:13])([H:7])([H:8])[H:9].[O:6]([H:11])[H:14]>>[C:1]([C@@:2]1([H:10])[O:3][C@:4]1([C:5]([O:6][H:14])([H:12])[H:13])[H:11])([H:7])([H:8])[H:9],CCCCCOC(C)=O,-1 +[C:1]([O:2][H:12])([H:8])([H:9])[H:10].[C:3](=[O:4])=[C:5]([C:6]([O:7][H:15])([H:13])[H:14])[H:11]>>[C:1]([O:2][C:3](=[O:4])[C:5]([C:6]([O:7][H:15])([H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10],CCCCCOC(C)=O,2.3 +[O:1]([C:2]1([H:9])[C:3]([H:10])([H:11])[N:4]([C:6](=[O:7])[H:14])[C:5]1([H:12])[H:13])[H:8]>>[C:2]1([H:9])=[C:3]([H:10])[N:4]([C:6](=[O:7])[H:14])[C:5]1([H:12])[H:13].[O:1]([H:8])[H:11],CCCCCC#N,0.16 +[C:1]1([H:4])([H:5])[C:2]([H:6])([H:7])[O:3]1>>[C:1]1([H:4])([H:5])[C-:2]=[O+:3]1.[H:6][H:7],CCCCCC#N,-0.34 +[C:1]([C@@:2]1([H:11])[N:3]([H:12])[N:6]([O:7][H:15])[C:5]1=[C:4]([H:13])[H:14])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[N:3]([H:12])[C:4]([H:13])([H:14])/[C:5]1=[N:6]\[O:7][H:15])([H:8])([H:9])[H:10],CCCCCC#N,-1.61 +[N:1](=[C:2]1/[C:3]([H:8])=[C:4]([C:5]([H:9])([H:10])[H:11])[O:6]1)\[H:7]>>[N:1](=[C:2]1/[C:3]([H:8])=[C:4]([H:9])[C:5]([H:10])([H:11])[O:6]1)\[H:7],CCO,-13.72 +[O:1]=[C:2]([C@:6]1([H:14])[C:5]([H:12])([H:13])[C@:4]2([H:11])[C:3]([H:8])([H:10])[N:7]21)[H:9]>>[O:1]([C@@:2]1([H:9])[C@@:3]2([H:10])[C@@:4]3([H:11])[C:5]([H:12])([H:13])[C@:6]1([H:14])[N:7]23)[H:8],CCO,-1.76 +[O:1]([C:2]([c:3]1[n:4][o:5][c:6]([H:11])[c:7]1[H:12])([H:9])[H:10])[H:8]>>[O:1]([C:2]([C:3]1=[C:7]=[C:6]([H:11])[O:5][N:4]1[H:12])([H:9])[H:10])[H:8],CCO,-6.14 +[C:1](/[C:4]([C:3]([C:2]([H:9])[H:10])([H:11])[H:12])=[N:5]\[H:13])([H:6])([H:7])[H:8]>>[C:1]([C:2]([C:3]([C:4]=[N:5][H:13])([H:11])[H:12])([H:9])[H:10])([H:6])([H:7])[H:8],CC(=O)O,1.77 +[N:1]([C:2]1=[N:7][N:6]2[C@@:3]1([H:10])[N:4]=[C:5]2[H:11])([H:8])[H:9]>>[N:1]([c:2]1[c:3]([H:10])[n:4][c:5]([H:11])[n:6][n:7]1)([H:8])[H:9],CC(=O)O,-0.89 +[c:1]1([H:7])[c:2]([H:8])[n:3][n:4][c:5]([H:9])[c:6]1[H:10]>>[C:1]1([H:7])=[C:6]=[C:5]([H:9])[N-:4][N:3]([H:10])[C+:2]1[H:8],CC(=O)O,4.45 +[C:1]([C@:2]1([H:11])[C@:5]([O+:4]=[C-:3][H:12])([H:13])[C@:6]1([O:7][H:15])[H:14])([H:8])([H:9])[H:10]>>[C:1]([C@:2]12[C:3]([H:11])([H:12])[O:4][C@@:5]1([H:13])[C@:6]2([O:7][H:15])[H:14])([H:8])([H:9])[H:10],CO,2.54 +[C:1]([C@@:2]([O:3][H:12])([C@@:4]([C:5]([H:14])([H:15])[H:16])([C:6]#[N:7])[H:13])[H:11])([H:8])([H:9])[H:10]>>[C:1](/[C:2]([O:3][H:12])=[C:4](/[C:5]([H:14])([H:15])[H:16])[C:6]#[N:7])([H:8])([H:9])[H:10].[H:11][H:13],CO,-4.7 +[C:1]([C@@:2]([C@@:3]([C:4]([H:12])([H:13])[H:14])([O:6][H:16])[H:11])([C:5][H:15])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[C@:3]([C:4]([H:12])([H:13])[H:14])([H:11])[C@:5]1([O:6][H:16])[H:15])([H:7])([H:8])[H:9],CO,2.07 +[C:1]([C@@:2]12[C:3]([H:11])([H:12])[C@:4]1([O:5][H:13])[C:6]([H:14])([H:15])[O:7]2)([H:8])([H:9])[H:10]>>[C:1]([C:2]1=[O+:7][C-:6]([H:15])[C@:4]1([C:3]([H:11])([H:12])[H:14])[O:5][H:13])([H:8])([H:9])[H:10],CC(C)O,1.08 +[C:1]([C:2]1=[C:3]([H:11])[C:4]([H:12])([H:13])[C@@:5]2([H:14])[O:6][C@@:7]12[H:15])([H:8])([H:9])[H:10]>>[C:1]([C:2]1=[C:7]2[C@@:5]([H:14])([C:4]([H:12])([H:13])[C:3]1([H:11])[H:15])[O:6]2)([H:8])([H:9])[H:10],CC(C)O,0.33 +[O:1]([C:4](=[C:3]=[C:2]([H:8])[H:9])[C:5]([O:6][H:12])([H:10])[H:11])[H:7]>>[O:1]([C:2]([C:3]#[C:4][C:5]([O:6][H:12])([H:10])[H:11])([H:8])[H:9])[H:7],CC(C)O,-9.44 +[C:1]([N:2]([C:3](=[O:4])[N:5]([C-:6]=[N+:7]([H:13])[H:14])[H:12])[H:11])([H:8])([H:9])[H:10]>>[C:1]([N:2]([C:3](=[O:4])[N:5](/[C:6](=[N:7]/[H:14])[H:13])[H:12])[H:11])([H:8])([H:9])[H:10],CC(C)=O,3.89 +[O:1]([C:2]1([C:5](=[O:6])[H:12])[C:3]([H:8])([H:9])[C:4]1([H:10])[H:11])[H:7]>>[O:1]([C:2]([C:5]1([H:12])[C:3]([H:8])([H:9])[C:4]1([H:10])[H:11])=[O:6])[H:7],CC(C)=O,-5.58 +[N:1](=[C:2]=[C:3]([C@@:4]1([H:9])[C:5]([H:10])([H:11])[O:6]1)[H:8])[H:7]>>[N:1]#[C:2][C:3]([C@@:4]1([H:9])[C:5]([H:10])([H:11])[O:6]1)([H:7])[H:8],CC(C)=O,-2.45 +[C:1]([C:2]([C:4](=[O:5])[C:6](=[O:7])[H:15])([H:11])[H:13])([H:8])([H:9])[H:10].[O:3]([H:12])[H:14]>>[C:1]([C@@:2]([O:3][H:12])([C@@:4]([O:5][H:14])([C:6](=[O:7])[H:15])[H:13])[H:11])([H:8])([H:9])[H:10],ClC(Cl)Cl,-5.36 +[C:1]([C:2]([N:3]1[C:4]([H:11])([H:12])[C:5]1([H:13])[H:14])([H:9])[H:10])([H:6])([H:7])[H:8]>>[C:1]([C-:2]([N+:3]1=[C:5]([H:14])[C:4]1([H:11])[H:12])[H:10])([H:6])([H:7])[H:8].[H:9][H:13],ClC(Cl)Cl,-0.44 +[C:1]([C@@:2]12[C:3]([H:11])([H:12])[C@:4]1([O:5][H:13])[C:6]([H:14])([H:15])[O:7]2)([H:8])([H:9])[H:10]>>[C:1]([C:2]1=[C:4]([O:5][H:13])[C:6]([H:14])([H:15])[O:7][C:3]1([H:11])[H:12])([H:8])([H:9])[H:10],ClC(Cl)Cl,-4.34 +[O:1]=[C:2]([C:3]#[C:4][C:5](=[O:6])[H:8])[H:7]>>[C-:3]#[C:4][C+:5]([O:6][H:7])[H:8].[O+:1]#[C-:2],CS(C)=O,0.36 +[C:1](=[C:2](/[C:3](=[N:4]\[C:5]([N:6]([H:12])[H:13])=[O:7])[H:11])[H:9])([H:8])[H:10]>>[C:1]([c:2]1[c:3]([H:11])[n:4][c:5]([N:6]([H:12])[H:13])[o:7]1)([H:8])([H:9])[H:10],CS(C)=O,-1.45 +[O:1]([N:2]=[C:3]=[C:4]([C:5]([C:6]([O:7][H:14])([H:9])[H:13])([H:11])[H:12])[H:10])[H:8]>>[O:1](/[N:2]=[C:3]1/[C:4]([H:9])([H:10])[C:5]([H:11])([H:12])[C@:6]1([O:7][H:14])[H:13])[H:8],CS(C)=O,1.13 +[C:1]([C@@:2]1([C:5](=[O:6])[H:13])[C:3]([H:10])([H:11])[N:4]1[H:12])([H:7])([H:8])[H:9]>>[C:1]([O:6]/[C:5](=[C:2]1/[C:3]([H:10])([H:11])[N:4]1[H:12])[H:13])([H:7])([H:8])[H:9],CN(C)C=O,-0.6 +[C:1]([C:2](=[O:3])[C:4]([C:5]([N:7]([H:18])[H:19])[H:14])([H:12])[H:13])([H:8])([H:9])[H:10].[C:6]([H:11])([H:15])([H:16])[H:17]>>[C:1]([C:2]([O:3][H:11])[C:4]([C@:5]([C:6]([H:15])([H:16])[H:17])([N:7]([H:18])[H:19])[H:14])([H:12])[H:13])([H:8])([H:9])[H:10],CN(C)C=O,-2.55 +[N-:1]=[C:2]=[C+:3][C:4]([C:5]([C:6]([H:7])([H:12])[H:13])([H:10])[H:11])([H:8])[H:9]>>[N:1]#[C:2][C:3]1([H:7])[C:4]([H:8])([H:9])[C:5]([H:10])([H:11])[C:6]1([H:12])[H:13],CN(C)C=O,-1.05 +[C:1]([C:2][C:3]([H:10])([H:11])[H:12])([H:7])([H:8])[H:9].[O:4]([C:5]([O:6][H:16])([H:14])[H:15])[H:13]>>[C:1]([C:2]([C:3]([H:10])([H:11])[H:12])([O:4][H:13])[C:5]([O:6][H:16])([H:14])[H:15])([H:7])([H:8])[H:9],CCCO,4.57 +[C:1]([C:2]1[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[N:5]=[C:6]([H:15])[O:7]1)([H:8])([H:9])[H:10]>>[C:1]([C:2]([C:3]([C:4]([H:13])[H:14])([H:11])[H:12])=[O:7])([H:8])([H:9])[H:10].[N:5]#[C:6][H:15],CCCO,-0.02 +[C:1]([N:2]1[C:3]([H:10])([H:11])[C:4]1([C:5]([H:12])([H:13])[H:14])[C:6]([H:15])([H:16])[H:17])([H:7])([H:8])[H:9]>>[C:1]([N+:2]1([H:10])[C-:3]([H:11])[C:4]1([C:5]([H:12])([H:13])[H:14])[C:6]([H:15])([H:16])[H:17])([H:7])([H:8])[H:9],CCCO,-3.9 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C@@:4]2([O:5][H:14])[C:6]([H:15])([H:16])[C@@:7]12[H:17])([H:8])([H:9])[H:10]>>[C:1]1([H:8])([H:9])[C@@:2]2([H:11])[C:3]([H:12])([H:13])[C@@:4]([O:5][H:14])([C:6]([H:10])([H:15])[H:16])[C@@:7]12[H:17],CCCCO,-0.3 +[C:1]1([H:7])=[C:2]=[C:3]([H:8])[O:4][C@@:5]1([C:6]([H:10])[H:11])[H:9]>>[C:1](=[C:2]=[C:3]([O:4][C:5](=[C:6]([H:10])[H:11])[H:9])[H:8])[H:7],CCCCO,5.96 +[C:1]([C@@:2]1([H:8])[C:3]([H:9])([H:10])[O:4]1)([H:5])([H:6])[H:7]>>[C:1](=[C:2]([H:6])[H:8])([H:5])[H:7].[C:3](=[O:4])([H:9])[H:10],CCCCO,-9.37 +[C:1]([C:2]([C:3]#[C:4][C:5]([O:6][H:14])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[C:3]=[C:4]([C:5]([O:6][H:14])([H:12])[H:13])[H:10])[H:11])([H:7])([H:8])[H:9],CCCCCO,-4.31 +[C:1]([C:2][C:3]([C:4]([C@@:5]1([H:15])[C:6]([H:16])([H:17])[O:7]1)([H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C@:2]12[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[C@@:5]1([H:15])[C:6]([H:16])([H:17])[O:7]2)([H:8])([H:9])[H:10],CCCCCO,1.86 +[C:1]([C:2]([C:3]([H:11])([H:12])[H:13])([C:4](=[O:5])[N:6]([H:14])[H:15])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C:2]1([C:3]([H:11])([H:12])[H:13])[C@@:4]([N:6]([H:14])[H:15])([H:10])[O:5]1)([H:7])([H:8])[H:9],CCCCCO,-5.39 +[C:1]([C:2]([C@@:3]([O:4][H:14])([C@@:5]([C:6]([H:16])([H:17])[H:18])([O:7][H:19])[H:15])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2](/[C:3](=[C:5](\[C:6]([H:16])([H:17])[H:18])[O:7][H:19])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10].[O:4]([H:14])[H:15],c1ccccc1,-0.28 +[C:1]([N:2][C:3]([C:4]([N:5]([C:6]([H:15])([H:16])[H:17])[H:14])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([N-:2][C:3]([C+:4][N:5]([C:6]([H:15])([H:16])[H:17])[H:14])([H:10])[H:11])([H:7])([H:8])[H:9].[H:12][H:13],c1ccccc1,-0.57 +[C:1]([O:6]/[C:5](=[C:2](\[C:3](=[O:4])[H:11])[H:10])[H:12])([H:7])([H:8])[H:9]>>[C:1]([C:2]([C:3](=[O:4])[H:11])([C:5](=[O:6])[H:12])[H:10])([H:7])([H:8])[H:9],c1ccccc1,0.8 +[N:1]#[C:2][C@:3]1([H:6])[C:4]([H:7])[C:5]1([H:8])[H:9]>>[N:1]=[C:2]=[C:3]([C:4](=[C:5]([H:8])[H:9])[H:7])[H:6],CC(Cl)(Cl)Cl,-0.24 +[C:1]([O:2][C:3]([C@@:4]1([H:13])[C:5]([H:14])([H:15])[C@:6]1([O:7][H:17])[H:16])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([O:2][C:3]([C:4][C:5]([C:6]([O:7][H:17])([H:13])[H:16])([H:14])[H:15])([H:11])[H:12])([H:8])([H:9])[H:10],CC(Cl)(Cl)Cl,-0.72 +[C:1]1([H:6])=[C:2]=[C:3]([H:7])[C:4]([H:8])([H:9])[O:5]1>>[C:1](#[C:2][C@@:3]1([H:7])[C:4]([H:8])([H:9])[O:5]1)[H:6],CC(Cl)(Cl)Cl,3.17 +[C:1]([C@:2]([N:3]([H:11])[H:12])([C:4](=[C:5]([O:6])[H:14])[H:13])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C@:2]([N:3][H:11])([C:4]([C:5](=[O:6])[H:14])([H:12])[H:13])[H:10])([H:7])([H:8])[H:9],CI,0.54 +[C:1]1([H:7])([H:8])[C:2]([H:9])([H:12])[C:6]1([C:5](=[C:4]=[C:3]([H:10])[H:11])[H:13])[H:14]>>[C:1]1([H:7])([H:8])[C@@:2]2([H:9])[C:3]([H:10])([H:11])[C:4]([H:12])=[C:5]([H:13])[C@@:6]12[H:14],CI,-4.55 +[O:1]([C@@:2]1([H:8])[N:3]([H:9])[C:4]([H:10])([H:11])[C:5]([H:12])=[C:6]=[C:7]1[H:14])[H:13]>>[O:1]=[C:2]([N:3]([C:4]([C:5]([C:6]#[C:7][H:14])([H:12])[H:13])([H:10])[H:11])[H:9])[H:8],CI,-0.46 +[C:1]([N:2]1[C:3]([H:10])([H:11])[C@:4]1([C:5](=[O:6])[H:13])[H:12])([H:7])([H:8])[H:9]>>[C:1](=[N+:2]1[C:3]([H:10])([H:11])[C:4]1=[C:5]([O-:6])[H:13])([H:7])[H:8].[H:9][H:12],CCBr,-0.57 +[C:1](/[C:2](=[N:3]/[H:11])[O:4][C:5]1([H:12])[C:6]([H:13])([H:14])[C:7]1([H:15])[H:16])([H:8])([H:9])[H:10]>>[C:1](/[C:2](=[N:3]/[H:11])[O:4][C@:5]([C:6][H:13])([C:7]([H:14])([H:15])[H:16])[H:12])([H:8])([H:9])[H:10],CCBr,-1.56 +[O:1]=[C:2]([c:3]1[c:4]([H:9])[n:5][c:6]([H:10])[n:7]1[H:11])[H:8]>>[O:1]=[C:2]([c:3]1[c-:4][n:5]([H:9])[c+:6]([H:10])[n:7]1[H:11])[H:8],CCBr,-1.03 +[C:1]1([H:8])([H:9])[C:2]([H:10])([H:11])[C:3]12[C:4]([H:12])([H:13])[C@@:5]1([H:14])[O:6][C@@:7]21[H:15]>>[C:1]1([H:8])([H:9])[C:2]([H:10])([H:11])[C:3]1([C@@:7]1([H:15])[C@:5]([C:4][H:12])([H:14])[O:6]1)[H:13],CCC,-0.64 +[N:1]([C:2](=[O:3])[C:4]([O:5]/[C:6](=[N:7]/[H:13])[H:12])([H:10])[H:11])([H:8])[H:9]>>[N:1]([C:2](=[O:3])[C:4]([N:7]([C:6](=[O:5])[H:12])[H:13])([H:10])[H:11])([H:8])[H:9],CCC,0.18 +[O:1]([C+:2]([N:3]([C:4](=[C:5]=[N-:6])[H:10])[H:8])[H:7])[H:9]>>[O:1]=[C:2]([N:3]([C:4]([C:5]#[N:6])([H:9])[H:10])[H:8])[H:7],CCC,-0.57 +[C:1](/[C:2]([C:3](=[C:4]([H:12])[H:13])[H:11])=[C:7](/[C:5](=[O:6])[H:14])[H:15])([H:8])([H:9])[H:10]>>[C:1]([C:2]1=[C:3]([H:11])[C:4]([H:12])([H:13])[C@@:5]2([H:14])[O:6][C@@:7]12[H:15])([H:8])([H:9])[H:10],CCI,-2.29 +[C:1]([C:2]([C@@:3]1([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])([H:16])[O:6]1)([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2](/[C:3](=[C:4](/[C:5]([O:6][H:13])([H:15])[H:16])[H:14])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9],CCI,-0.43 +[C:1]([C@@:2]([O:3][H:11])([C@@:4]1([H:12])[C:5]([H:13])([H:14])[N:6]1[H:15])[H:10])([H:7])([H:8])[H:9]>>[C:1](/[C:2](=[C:4]1\[C:5]([H:13])([H:14])[N:6]1[H:15])[H:10])([H:7])([H:8])[H:9].[O:3]([H:11])[H:12],CCI,-0.85 +[C:1]([N:2]([C:3]([H:10])([H:11])[H:12])[C:4]([C:5]#[N:6])([H:13])[H:14])([H:7])([H:8])[H:9]>>[C:1]([N+:2]([C:3]([H:10])([H:11])[H:12])([C-:4]([C:5]#[N:6])[H:13])[H:14])([H:7])([H:8])[H:9],CCN,-2.78 +[C:1]([C@@:2]1([H:10])[O:3][C@:4]1([C:5]#[N:6])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]([O:3][C:5]#[N:6])([C:4][H:11])[H:10])([H:7])([H:8])[H:9],CCN,-2.71 +[C:1]([c:2]1[c:3]([H:10])[o:4][n:5][n:6]1)([H:7])([H:8])[H:9]>>[C:1]([C:2]1([C:3](=[O:4])[H:10])[N:5]=[N:6]1)([H:7])([H:8])[H:9],CCN,0.2 +[C:1]([c:2]1[n:3][c:4][c:5]([H:10])[n:6]1[H:11])([H:7])([H:8])[H:9]>>[C:1](/[C:2]([C:4]#[N:3])=[N+:6](/[C-:5][H:10])[H:11])([H:7])([H:8])[H:9],CC#N,0.07 +[C:1]([C:2]([N:3]([C:4](=[O:5])[H:12])[H:11])([H:9])[H:10])([H:6])([H:7])[H:8]>>[C:1]1([H:6])([H:7])[C:2]([H:9])([H:10])[N:3]([H:11])[C@@:4]1([O:5][H:8])[H:12],CC#N,-0.01 +[C:1]([C@:2]([O:3][H:11])([N:4]1[C:5]([H:12])[C:6]1([H:13])[H:14])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C:2]([O:3][H:11])[N:4]1[C:5]([H:10])([H:12])[C:6]1([H:13])[H:14])([H:7])([H:8])[H:9],CC#N,-1.94 +[C+:1](=[C:2]=[C-:3][H:9])[H:7].[C:4]1([H:8])([H:10])[C:5]([H:11])([H:12])[C:6]1([H:13])[H:14]>>[C:1](#[C:2][C:3]([C:4]1([H:10])[C:5]([H:11])([H:12])[C:6]1([H:13])[H:14])([H:8])[H:9])[H:7],CC=O,-0.75 +[C:1]([C@@:2]1([C:5]#[O+:6])[C:3]([H:10])([H:11])[C-:4]1[H:13])([H:7])([H:8])[H:9].[H:12][H:14]>>[C:1]([C:2]1([C:5](=[O:6])[H:14])[C:3]([H:10])([H:11])[C:4]1([H:12])[H:13])([H:7])([H:8])[H:9],CC=O,-1.16 +[C:1]([C:2](/[C:3](=[N:4]\[H:14])[C@@:5]([O:6])([O:7][H:15])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]([C@:3]([N:4][H:14])([C:5](=[O:6])[O:7][H:15])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10],CC=O,0.26 +[C:1](/[N:2]=[C:3](/[N:4]([C:5](=[O:6])[H:12])[H:11])[H:10])([H:7])([H:8])[H:9]>>[C-:5]#[O+:6].[C:1]([N:2](/[C:3](=[N:4]/[H:11])[H:10])[H:12])([H:7])([H:8])[H:9],ClCCl,-0.73 +[C:1]([C:2](=[O:3])[C:4](=[O:5])[N:6]([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[O:3])[H:10])([H:7])([H:8])[H:9].[C:4](=[O:5])=[N:6][H:11],ClCCl,0.94 +[C:1]([C:2]1([C:6]#[N:7])[C:4]([H:13])([H:14])[C:5]1([H:15])[H:16])([H:8])([H:9])[H:10].[C:3]([H:11])[H:12]>>[C:1]([C:2]1([C:6]#[N:7])[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[C:5]1([H:15])[H:16])([H:8])([H:9])[H:10],ClCCl,-0.68 +[O:1]=[C:2]1[C:3]([H:6])([H:7])[C:4]([H:8])([H:9])[C:5]1([H:10])[H:11]>>[O:1]([C:2]1=[C:5]([H:10])[C:4]([H:8])([H:9])[C:3]1([H:6])[H:7])[H:11],S=C=S,0.31 +[C:1]([C:2]([C@@:3]1([H:13])[C:4]([H:14])([H:15])[C@@:5]2([H:16])[C:6]([H:17])([H:18])[C@@:7]12[H:19])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C@:2]1([H:11])[C@@:3]2([H:13])[C:4]([H:14])([H:15])[C@@:5]3([H:16])[C@:6]1([H:18])[C@@:7]23[H:19])([H:8])([H:9])[H:10].[H:12][H:17],S=C=S,-0.71 +[N:1]#[C:2]/[C:3](=[C:7](\[C:6]([C:5](=[O:4])[H:9])([H:10])[H:11])[H:12])[H:8]>>[N:1]#[C:2][C@@:3]1([H:8])[O:4][C@@:5]2([H:9])[C:6]([H:10])([H:11])[C@@:7]12[H:12],S=C=S,0.25 +[C:1](=[C:2]([C:3]([O:4][H:11])[C@:5]1([H:12])[C:6]([H:13])([H:14])[O:7]1)[H:10])([H:8])[H:9]>>[C:1](=[C:2](/[C:3]([O:4][H:11])=[C:5]1\[C:6]([H:13])([H:14])[O:7]1)[H:10])[H:8].[H:9][H:12],CSC,-0.7 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])=[C:4]([H:13])[N:5]([H:14])[C:6]1=[O:7])([H:8])([H:9])[H:10]>>[C:1](/[C:2](=[C:3]([C:4](=[N:5]/[H:14])/[H:13])\[H:12])[C:6](=[O:7])[H:11])([H:8])([H:9])[H:10],CSC,-3.01 +[C:1]([C@@:2]12[O:3][C@@:4]3([H:11])[C:5]([H:12])([H:13])[C@:6]1([H:14])[C@@:7]23[H:15])([H:8])([H:9])[H:10]>>[C:1]([C:2](=[O:3])[C@:6]1([H:14])[C:5]([H:12])([H:13])[C:4]([H:11])=[C:7]1[H:15])([H:8])([H:9])[H:10],CSC,-0.84 +[C:1]1([H:7])([H:8])[C@:2]2([H:9])[C@:3]3([H:10])[C:4]([H:11])([H:12])[C@@:5]1([H:13])[C@:6]23[H:14]>>[C:1]1([H:7])([H:8])[C@@:2]2([H:9])[C@:3]([C:4]([H:11])([H:12])[H:13])([H:10])[C@@:6]2([H:14])[C:5]1,BrC(Br)Br,-5.44 +[C:1](=[C:2]([C:3]([O:4][H:11])[C@:5]1([H:12])[C:6]([H:13])([H:14])[O:7]1)[H:10])([H:8])[H:9]>>[C:1](=[C:2]([C:3]([O:4][H:11])[C:5](=[C:6]([O:7][H:13])[H:14])[H:12])[H:10])([H:8])[H:9],BrC(Br)Br,-0.04 +[C:1]([C:2]([C:3]([C:4]([C:5]([H:16])([H:17])[H:18])([H:14])[H:15])([C:6](=[O:7])[H:19])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1](/[C:2](=[C:3](/[C:6](=[O:7])[H:19])[H:13])[H:12])([H:8])([H:9])[H:10].[C:4](=[C:5]([H:16])[H:18])([H:14])[H:15].[H:11][H:17],BrC(Br)Br,-1.92 +[C:1]([C:2]1([H:10])[C:3]([H:11])=[C:6]1[H:14])([H:7])([H:8])[H:9].[C:4](=[O:5])([H:12])[H:13]>>[C:1]([C@@:2]1([H:10])[C@@:3]2([H:11])[C:4]([H:12])([H:13])[O:5][C@@:6]12[H:14])([H:7])([H:8])[H:9],NC(C)C,-1.08 +[O:1]=[C:2]([C:3]1([H:8])[C:4]([H:9])([H:10])[C:5]([H:11])([H:12])[C:6]1([H:13])[H:14])[H:7]>>[O:1](/[C:2](=[C:3](\[C:6]1([H:14])[C:4]([H:9])([H:10])[C:5]1([H:11])[H:12])[H:8])[H:7])[H:13],NC(C)C,0.03 +[C:1](=[C:2]([C:6]([C:5]([C:4]([C:3]([H:7])([H:11])[H:12])([H:13])[H:14])([H:15])[H:16])([H:17])[H:18])[H:10])([H:8])[H:9]>>[C:1]([C:2]1([H:10])[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])([H:16])[C:6]1([H:17])[H:18])([H:7])([H:8])[H:9],NC(C)C,-0.53 +[C:2]1([H:8])([H:9])[C:3]([H:10])([H:11])[O:4]/[C:5]1=[N:6]/[H:13].[O:1]([H:7])[H:12]>>[O:1]([C:2]([C:3]([O:4]/[C:5](=[N:6]/[H:13])[H:12])([H:10])[H:11])([H:8])[H:9])[H:7],CC(Cl)Cl,0.25 +[C:1]1([H:6])([H:7])[C:2]([H:8])([H:9])[C:3]([H:10])=[C:4]([H:11])[C:5]1([H:12])[H:13]>>[C:1]([C:2]([C:3](=[C:4])[H:10])([H:8])[H:9])([C:5]([H:11])([H:12])[H:13])([H:6])[H:7],CC(Cl)Cl,-1.76 +[C:1]([C:2]([C:3]([C:4]([N:5]([H:15])[H:16])=[C:6]=[N:7])([H:12])[H:13])([H:11])[H:14])([H:8])([H:9])[H:10]>>[C:1]([C:2]([C:3]([C@:4]([N:5]([H:15])[H:16])([C:6]#[N:7])[H:14])([H:12])[H:13])[H:11])([H:8])([H:9])[H:10],CC(Cl)Cl,1.09 +[N:1]([C:2](=[C:3]([N+:4]#[N:5])[H:9])[N-:6][H:10])([H:7])[H:8]>>[N:1]([c:2]1[c:3]([H:9])[n:4][n:5][n:6]1[H:10])([H:7])[H:8],C[N+](=O)[O-],0.08 +[C:1]([N:2]([c:3]1[n:4]([H:12])[n:5][c:6]([H:13])[c:7]1[H:14])[H:11])([H:8])([H:9])[H:10]>>[C:1]([N:2]([c:3]1[n:4][n:5]([H:12])[c:6]([H:13])[c:7]1[H:14])[H:11])([H:8])([H:9])[H:10],C[N+](=O)[O-],2.07 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[O:4][C:5]1=[C:6]=[C:7]([H:14])[H:15])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[O:4][C@:5]1([C:6]#[C:7][H:15])[H:14])([H:8])([H:9])[H:10],C[N+](=O)[O-],-5.36 +[O:1]([C:2]([C@@:3]1([H:10])[C:4]([H:11])([H:12])[C:5]([H:13])([H:14])[O:6]1)([H:8])[H:9])[H:7]>>[O:1]([C:2](/[C:3](=[C:4](/[C:5]([O:6][H:12])([H:13])[H:14])[H:11])[H:10])([H:8])[H:9])[H:7],CC(C)(C)O,-0.13 +[C:1]([C:2]([N:3]([C:4](=[O:5])[C:6]([H:13])([H:14])[H:15])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2]([N:3]([C-:4]=[O+:5][C:6]([H:13])([H:14])[H:15])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9],CC(C)(C)O,1.45 +[C:1]1([H:7])([H:8])[O+:2]=[C-:3][C@@:4]2([H:11])[N:5]([H:12])[C@@:6]12[H:13].[H:9][H:10]>>[C:1]1([H:7])([H:8])[O:2][C:3]([H:9])([H:10])[C@@:4]2([H:11])[N:5]([H:12])[C@@:6]12[H:13],CC(C)(C)O,2.15 +[O:1]=[N:2][C:3]1=[C:6]([H:13])[C:5]([H:11])([H:12])[C:4]1([H:9])[H:10].[O:7]([H:8])[H:14]>>[O:1](/[N:2]=[C:3]1/[C:4]([H:9])([H:10])[C:5]([H:11])([H:12])[C@:6]1([O:7][H:14])[H:13])[H:8],CCC(C)C,-1.46 +[N+:1](=[C-:2][C:3]([N:4]=[C:5]([H:9])[H:10])([H:7])[H:8])=[C:6]([H:11])[H:12]>>[N:1]#[C:2][C:3]([N:4]1[C:5]([H:9])([H:10])[C:6]1([H:11])[H:12])([H:7])[H:8],CCC(C)C,-2.11 +[C:1](/[C:2]([C-:3]([N+:4]#[N:5])[H:10])=[N:6]\[H:11])([H:7])([H:8])[H:9]>>[C:1]([c:2]1[c:3]([H:10])[n:4][n:5][n:6]1[H:11])([H:7])([H:8])[H:9],CCC(C)C,-0.08 +[C:1]([C@@:2]1([H:10])[O+:3]=[C-:4][C@:5]1([O:6][H:14])[H:13])([H:7])([H:8])[H:9].[H:11][H:12]>>[C:1]([C@@:2]1([H:10])[O:3][C:4]([H:11])([H:12])[C@:5]1([O:6][H:14])[H:13])([H:7])([H:8])[H:9],CCC(O)C,2.84 +[C:1]1([H:7])([H:8])[C:2]([H:9])([H:10])[C:3]([H:11])([H:12])[C:4]([H:13])=[C:5]([H:14])[C:6]1([H:15])[H:16]>>[C:1]1([H:7])([H:8])[C:2]([H:9])([H:10])[C:3]([H:11])([H:12])[C:4]1([C:5](=[C:6]([H:15])[H:16])[H:14])[H:13],CCC(O)C,0.34 +[C:1]([C@@:2]([O:3][H:11])([C+:4]=[C:5]([O-:6])[H:12])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[O:3][C@:4]1([C:5](=[O:6])[H:12])[H:11])([H:7])([H:8])[H:9],CCC(O)C,10.23 +[O:1]([C@@:2]12[C:3]([H:9])([H:10])[C@@:4]3([H:11])[C:5]([H:12])([H:13])[C@:6]1([H:14])[C@@:7]23[H:15])[H:8]>>[O:1](/[C:2](=[C:3](/[C@@:4]1([H:11])[C:5]([H:12])([H:13])[C:6]([H:14])=[C:7]1[H:15])[H:9])[H:10])[H:8],CCC(C)=O,-1.85 +[C:1]([C:2]([C:3]1=[C:6]=[C:7]([H:15])[C:4]([H:13])([H:14])[O:5]1)([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]([C@@:3]1([C:6]#[C:7][H:15])[C:4]([H:13])([H:14])[O:5]1)([H:11])[H:12])([H:8])([H:9])[H:10],CCC(C)=O,1.01 +[N:1]#[C:2][C:3]([C@@:4]1([H:9])[C:5]([H:10])([H:11])[N:6]1[H:12])([H:7])[H:8]>>[N:1]#[C:2][C:3]([C:4][N:6]([C:5]([H:9])([H:10])[H:11])[H:12])([H:7])[H:8],CCC(C)=O,-1.07 +[C:1]([N-:2][N+:7]#[C:6][H:15])([H:8])([H:9])[H:10].[C:3](#[C:4][C:5]([H:12])([H:13])[H:14])[H:11]>>[C:1]([n:2]1[c:3]([H:11])[c:4]([C:5]([H:12])([H:13])[H:14])[c:6]([H:15])[n:7]1)([H:8])([H:9])[H:10],COC(C)=O,0.2 +[O:1]=[C:2]1[C:3]([H:7])([H:8])[C@@:4]2([H:9])[N:5]([H:10])[C@@:6]12[H:11]>>[O:1]([C:2]1=[C:3]([H:8])[C@@:4]2([H:9])[N:5]([H:10])[C@@:6]12[H:11])[H:7],COC(C)=O,1.21 +[O-:1][C:2](=[C:3]=[O+:6][C:5]([C:4]([H:8])([H:9])[H:10])([H:11])[H:12])[H:7]>>[O:1]=[C:2]([C@@:3]1([H:8])[C:4]([H:9])([H:10])[C:5]([H:11])([H:12])[O:6]1)[H:7],COC(C)=O,-1.06 +[C:1]([C@:2]([N:3][H:11])([C:4]([C:5](=[O:6])[H:14])([H:12])[H:13])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C@:2]([N-:3][H:11])([C:4]([C+:5][O:6][H:14])([H:12])[H:13])[H:10])([H:7])([H:8])[H:9],CC[N+](=O)[O-],1.24 +[C:1]([C:2]([O:3][C@@:4]([C:5]([H:14])([H:15])[H:16])([C:6](=[O:7])[H:17])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([H:8])([H:9])([H:10])[H:13].[C:2](=[O+:3][C:4]([C:5]([H:14])([H:15])[H:16])=[C:6]([O-:7])[H:17])([H:11])[H:12],CC[N+](=O)[O-],-1.25 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([H:14])=[C:5]([H:15])[C:6]([H:16])[O:7]1)([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([H:14])=[C:5]([H:15])[C:6]([H:9])([H:16])[O:7]1)([H:8])[H:10],CC[N+](=O)[O-],-0.46 +[C:1]([C:2]1([C:3]([O:4][H:12])([H:10])[H:11])[C:5]([H:13])([H:14])[C:6]1([H:15])[H:16])([H:7])([H:8])[H:9]>>[C:1]([C:2]1([O:4][H:12])[C:5]([H:13])([H:14])[C:6]1([H:15])[H:16])([H:7])([H:8])[H:9].[C:3]([H:10])[H:11],ClC(Cl)C(Cl)Cl,-0.46 +[C:1]([C:2]([N:3][N:4]([C:5]([H:13])([H:14])[H:15])[C:6]([C:7]([H:18])([H:19])[H:20])([H:16])[H:17])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([N:3]([C:2]([H:11])[H:12])[N:4]([C:5]([H:13])([H:14])[H:15])[C:6]([C:7]([H:18])([H:19])[H:20])([H:16])[H:17])([H:8])([H:9])[H:10],ClC(Cl)C(Cl)Cl,1.06 +[C:1]([C:2]([C:3](=[C:4]=[O:5])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10].[C:6](=[O:7])([H:14])[H:15]>>[C:1]([C:2]([C:3]([C:4](=[O:5])[C:6](=[O:7])[H:15])([H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10],ClC(Cl)C(Cl)Cl,-4.59 +[C:1]([N:2]([C:3]([C:4]#[N:5])([H:10])[H:11])[H:9])([H:6])([H:7])[H:8]>>[C:1]([N:2]([C:3]([N+:5]#[C-:4])([H:10])[H:11])[H:9])([H:6])([H:7])[H:8],CC(C)[N+](=O)[O-],-3.83 +[C:1]([C+:2]1[N:3]([H:11])[C@:4]1([C-:5]=[N:6][H:10])[H:12])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[N:3]([H:11])[C@:4]1([C:5]#[N:6])[H:12])([H:7])([H:8])[H:9],CC(C)[N+](=O)[O-],0.54 +[O:1]([C:2]([C:3]([C:4]([C:5]#[C:6][H:14])([H:12])[H:13])([H:10])[H:11])([H:8])[H:9])[H:7]>>[O:1]([C@:2]1([H:9])[C:3]([H:10])([H:11])[C:4]([H:12])([H:13])[C:5][C:6]1([H:8])[H:14])[H:7],CC(C)[N+](=O)[O-],-4.74 +[C:1]([C:2]([O:3][H:11])=[C:4]([H:12])[H:13])([H:8])([H:9])[H:10].[C:5](=[C:6]=[O:7])([H:14])[H:15]>>[C:1]([C@@:2]1([O:3][H:11])[C:4]([H:12])([H:13])[C:5]([H:14])([H:15])[C:6]1=[O:7])([H:8])([H:9])[H:10],c1ccc2ncccc2c1,1.23 +[C:1]([C:2]([C:3]([C@:4]([O:5][H:15])([C:6]([N:7]([H:18])[H:19])([H:16])[H:17])[H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]([C:3]([C:4]([O:5][H:15])[C:6]([N:7]([H:18])[H:19])([H:16])[H:17])([H:13])[H:14])([H:11])[H:12])([H:8])([H:9])[H:10],c1ccc2ncccc2c1,0.79 +[C-:1]([O+:2]=[C:3]([C:4]([C:5](=[O:6])[H:13])([H:11])[H:12])[H:10])([H:7])[H:9].[H:8][H:14]>>[C:1]([O:2][C@@:3]1([H:10])[C:4]([H:11])([H:12])[C@:5]1([O:6][H:14])[H:13])([H:7])([H:8])[H:9],c1ccc2ncccc2c1,-4.68 +[C:1]([C@@:2]([O:3][C:4](=[O:5])[H:12])([C:7](=[C:6])[H:13])[H:11])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]([O:3][C:4](=[O:5])[H:12])([C:6]#[C:7][H:13])[H:11])([H:8])([H:9])[H:10],Cc1ccccc1C,0.41 +[C:1]([C@@:2]([C:3](=[O:4])[H:11])([C:5]#[N:6])[H:10])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]([C:3](=[O:4])[H:11])([N+:6]#[C-:5])[H:10])([H:7])([H:8])[H:9],Cc1ccccc1C,1.35 +[C:1]([C@@:2]1([C:3]([C:4]#[N:5])([H:11])[H:12])[C:6]([H:13])([H:14])[N:7]1[H:15])([H:8])([H:9])[H:10]>>[C:1](/[C:2]([C:3]([C:4]1=[N:5][C:6]1([H:13])[H:14])([H:11])[H:12])=[N:7]\[H:15])([H:8])([H:9])[H:10],Cc1ccccc1C,0.29 +[O:1]([N:2]1[C:3]([H:9])([H:10])[C:4]([H:11])=[C:5][C:6]([H:12])([H:13])[C:7]1([H:14])[H:15])[H:8]>>[H:13].[O:1]([N:2]1[C:3]([H:9])([H:10])[C:4]([H:11])=[C:5]=[C:6]([H:12])[C:7]1([H:14])[H:15])[H:8],Clc1ccccc1Cl,0.13 +[C:1]([C@@:2]1([H:11])[O:3][C:4]([H:12])([H:13])[C@@:5]2([H:14])[C:6]([H:15])([H:16])[C@@:7]12[H:17])([H:8])([H:9])[H:10]>>[C:1]1([H:9])([H:10])[C@@:2]2([H:11])[O:3][C:4]([H:12])([H:13])[C@:5]([C:6]([H:8])([H:15])[H:16])([H:14])[C@@:7]12[H:17],Clc1ccccc1Cl,-1.16 +[N:1]([C:2](=[O:3])[C:4]([O:5]/[C:6](=[N:7]/[H:13])[H:12])([H:10])[H:11])([H:8])[H:9]>>[N:1]([C:2]([O:3][C@@:6]1([H:12])[O:5][N:7]1[H:13])=[C:4]([H:10])[H:11])([H:8])[H:9],Clc1ccccc1Cl,-1.5 +[C:1]([C:2]([C@@:3]1([H:10])[C:4]([H:11])=[N:5]1)=[O:6])([H:7])([H:8])[H:9]>>[C:1]([c:2]1[c:3]([H:10])[c:4]([H:11])[n:5][o:6]1)([H:7])([H:8])[H:9],Cc1ccc(C)c(C)c1,-0.54 diff --git a/chemprop/tests/data/regression/rxn/descriptors.npz b/chemprop/tests/data/regression/rxn/descriptors.npz new file mode 100644 index 0000000000000000000000000000000000000000..c112116630a424986657dc26d88462556c2575f8 --- /dev/null +++ b/chemprop/tests/data/regression/rxn/descriptors.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b67d2f6e56f86a3de6f5756ba90119c648c8eacbe4796597fa2cf82f44312aba +size 1064 diff --git a/chemprop/tests/data/regression/rxn/rxn.csv b/chemprop/tests/data/regression/rxn/rxn.csv new file mode 100644 index 0000000000000000000000000000000000000000..9654358baadc8adbb104994c76f3cfc2703afdbd --- /dev/null +++ b/chemprop/tests/data/regression/rxn/rxn.csv @@ -0,0 +1,101 @@ +smiles,ea +[O:1]([C:2]([C:3]([C:4](=[O:5])[C:6]([O:7][H:15])([H:13])[H:14])([H:11])[H:12])([H:9])[H:10])[H:8]>>[C:3](=[C:4]=[O:5])([H:11])[H:12].[C:6]([O:7][H:15])([H:8])([H:13])[H:14].[O:1]=[C:2]([H:9])[H:10],8.89893350229384 +[C:1]1([H:8])([H:9])[O:2][C@@:3]2([H:10])[C@@:4]3([H:11])[O:5][C@:6]1([H:12])[C@@:7]23[H:13]>>[C:1]1([H:8])([H:9])[O:2][C:3]([H:10])=[C:7]([H:13])[C@:6]1([O+:5]=[C-:4][H:11])[H:12],5.464327694301 +[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])=[C:6]([H:16])[C@@:7]12[H:17])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([H:14])=[C:5]([H:15])[C:6]([H:16])=[C:7]1[H:17])([H:8])([H:9])[H:10],5.270552275670961 +[C:1]([O:2][C:3]([C@@:4]([C:5]([H:14])([H:15])[H:16])([C:6]([O:7][H:19])([H:17])[H:18])[H:13])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C-:1]([O+:2]=[C:3]([C@@:4]([C:5]([H:14])([H:15])[H:16])([C:6]([O:7][H:19])([H:17])[H:18])[H:13])[H:12])([H:8])[H:10].[H:9][H:11],8.47300569018029 +[C:1]([C:2]#[C:3][C:4]([C:5](=[O:6])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[C:3]=[C:4]([H:10])[H:11])[C:5](=[O:6])[H:12])([H:7])([H:8])[H:9],5.579036955502979 +[O:1]=[c:2]1[n:3]([H:7])[c:4]([H:8])[n:5][o:6]1>>[N:3]([C:4]#[N:5])([H:7])[H:8].[O:1]=[C:2]=[O:6],5.87179986296395 +[C:1](/[C:2](=[N:3]\[O:4][H:10])[C:5]#[C:6][H:11])([H:7])([H:8])[H:9]>>[C:1]([C@:2]12[N:3]([O:4][H:10])[C@@:6]1([H:11])[C:5]2)([H:7])([H:8])[H:9],6.249862206930191 +[O:1]=[C:2]1[N:3]([H:7])[C:4]([H:8])([H:9])[C:5]([H:10])([H:11])[O:6]1>>[C:5](=[O:6])([H:10])[H:11].[O:1]=[C:2]1[N:3]([H:7])[C:4]1([H:8])[H:9],10.554549809087401 +[N:1]#[C:2][C@@:3]1([H:8])[C:4]([H:9])([H:10])[N:5]=[C:6]([H:11])[N:7]1[H:12]>>[N:1]1=[C:6]([H:11])[N:7]([H:12])[C@@:3]2([H:8])[C:2]1=[N:5][C:4]2([H:9])[H:10],12.321927083334499 +[N:1]([C:2]([C@@:3]([N:4]([H:12])[H:13])([C:5]#[N:6])[H:11])([H:9])[H:10])([H:7])[H:8]>>[N:1]([C:2]([C:3]([N:4]([H:12])[H:13])=[C:5]=[N:6][H:11])([H:9])[H:10])([H:7])[H:8],8.50568077785716 +[C:1]([C@@:2]([O:3][H:11])([C@@:4]1([H:12])[C:5]([H:13])([H:14])[O:6]1)[H:10])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[O:3])[C@@:4]1([H:12])[C:5]([H:13])([H:14])[O:6]1)([H:7])([H:8])[H:9].[H:10][H:11],8.63150208656758 +[C:1]([C:2]([O:3][C@@:4]1([H:13])[C:5]([H:14])([H:15])[C@:6]1([C:7]([H:17])([H:18])[H:19])[H:16])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]([O:3][H:19])([H:11])[H:12])([H:8])([H:9])[H:10].[C@@:4]12([H:13])[C:5]([H:14])([H:15])[C@:6]1([H:16])[C:7]2([H:17])[H:18],10.6849610584282 +[N:1]#[C:2][C:3]([C@@:4]1([H:9])[C:5]([H:10])([H:11])[N:6]1[H:12])([H:7])[H:8]>>[N+:1](#[C-:2])[C:3]([C@@:4]1([H:9])[C:5]([H:10])([H:11])[N:6]1[H:12])([H:7])[H:8],5.98241138069479 +[N:1]([c:2]1[c:3]([O:4][H:10])[c:5]([H:11])[c:6]([H:12])[n:7]1[H:13])([H:8])[H:9]>>[N:1]([C:2]1=[N:7][C:6]([H:12])([H:13])[C:5]([H:11])=[C:3]1[O:4][H:10])([H:8])[H:9],4.05263477722536 +[O:1]([N:2]=[C:3]1[C:4]([H:8])([H:9])[O:5][C:6]1([H:10])[H:11])[H:7]>>[O:1](/[N:2]=[C:3](/[C:4]([O+:5]=[C-:6][H:10])([H:8])[H:9])[H:11])[H:7],7.9198700949550584 +[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[O:5][C:6]1([H:15])[H:16])([H:7])([H:8])[H:9]>>[C:1]([C:2]([C:3](=[C:4]([H:13])[H:14])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9].[O:5]=[C:6]([H:15])[H:16],9.885120970662388 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([H:14])([H:15])/[C:5]1=[N:6]\[O:7][H:16])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C:4]([H:14])([H:15])[C@@:5]1([N:6])[O:7][H:16])([H:8])([H:9])[H:10],6.842806303982721 +[C:1]([C@@:2]1([H:11])[O:3][C@@:4]2([H:12])[C:5]([H:13])([H:14])[C@@:6]12[O:7][H:15])([H:8])([H:9])[H:10]>>[C:1](=[C:2]([C@@:6]1([O:7][H:15])[C@:4]([O:3][H:8])([H:12])[C:5]1([H:13])[H:14])[H:11])([H:9])[H:10],6.17621139932046 +[C:1]([O:2][C:3](=[O:4])[C:5]([N:6]([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([O+:2]=[C-:3][O:4][H:12])([H:7])([H:8])[H:9].[C:5](=[N:6][H:13])([H:10])[H:11],6.90879668948285 +[O:1]([C:2]([C@@:3]1([H:11])[C:4]([H:12])([H:13])[C:5]([H:14])([H:15])[C:6]1=[O:7])([H:9])[H:10])[H:8]>>[C:4](=[C:5]([H:14])[H:15])([H:12])[H:13].[O:1]([C:2]([C@@:3]1([H:11])[C-:6]=[O+:7]1)([H:9])[H:10])[H:8],11.083881155868001 +[C:1]([C@@:2]1([H:10])[O:3][C:4]([H:11])([H:12])[C@:5]1([O:6][H:14])[H:13])([H:7])([H:8])[H:9]>>[C-:2]1=[O+:3][C:4]([H:11])([H:12])[C@:5]1([O:6][H:14])[H:13].[C:1]([H:7])([H:8])([H:9])[H:10],8.76558117959312 +[N:1]([C:2](=[O:3])[C:4]([C:5]#[N:6])([H:9])[H:10])([H:7])[H:8]>>[N:1]([C:2](=[O:3])[C:4]([N+:6]#[C-:5])([H:9])[H:10])([H:7])[H:8],6.054472281944199 +[C:1]([C:2]([C:3](=[O:4])[C:5]#[C:6][H:12])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2]([O+:4]=[C-:3][C:5]#[C:6][H:12])([H:10])[H:11])([H:7])([H:8])[H:9],11.0582607831632 +[C:1]([C:2]([O:3][C:4]([C:5]#[N:6])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([H:7])([H:8])([H:9])[H:13].[C:2](=[O+:3][C:4](=[C:5]=[N-:6])[H:12])([H:10])[H:11],8.939102673241631 +[N:1]([c+:2]1[n-:3][c:4]([H:9])[n:5][o:6]1)([H:7])[H:8]>>[N:1]([C:2]([N:3]1[C:4]([H:9])=[N:5]1)=[O:6])([H:7])[H:8],6.465258149913429 +[O:1]([N:2]=[C:3]1[C:4]([H:8])([H:9])[N:5]([H:10])[C:6]1([H:11])[H:12])[H:7]>>[O:1](/[N:2]=[C:3](/[C:4]([N:5]([C:6][H:12])[H:10])([H:8])[H:9])[H:11])[H:7],7.08729333341998 +[N:1]([C:2](=[O:3])[C:4]([O:5]/[C:6](=[N:7]/[H:13])[H:12])([H:10])[H:11])([H:8])[H:9]>>[N:1]([C:4]([C:2](=[O:3])[N:7]([C:6](=[O:5])[H:12])[H:13])([H:10])[H:11])([H:8])[H:9],6.8389747156693605 +[C:1]([C:2]([C:3]1([O:4][H:12])[C:5]([H:13])([H:14])[C:6]1([H:15])[H:16])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2]([C:3]1=[C:5]([H:14])[C:6]1([H:15])[H:16])([H:10])[H:11])([H:7])([H:8])[H:9].[O:4]([H:12])[H:13],7.8402621209318415 +[C:1]([C@@:2]1([H:10])[C@:3]([C:4]([H:12])([H:13])[H:14])([H:11])[N:5]1[C:6]([H:15])([H:16])[H:17])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[C-:3]([H:11])[N+:5]1([C:4]([H:12])([H:13])[H:14])[C:6]([H:15])([H:16])[H:17])([H:7])([H:8])[H:9],10.753621206032602 +[N:1](=[C:2]1/[C:3]([H:9])([H:10])[C:4]([H:11])([H:12])[C:5]([H:13])([H:14])[C:6]([H:15])([H:16])[O:7]1)\[H:8]>>[N:1]([C:2]1=[C:3]([H:10])[C:4]([H:11])([H:12])[C:5]([H:13])([H:14])[C:6]([H:15])([H:16])[O:7]1)([H:8])[H:9],6.19361281168903 +[C:1]([c:2]1[c:3]([H:10])[n:4]([H:11])[c:5]([H:12])[c:6]1[H:13])([H:7])([H:8])[H:9]>>[C:1]([C:2]1=[C-:6][C:5]([H:12])([H:13])[N:4]([H:11])[C+:3]1[H:10])([H:7])([H:8])[H:9],7.12814220907606 +[C:1]([C@@:2]1([H:10])[N:3]([H:11])[C@:4]1([C:5](=[O:6])[H:13])[H:12])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[N:3]([H:11])[C@:4]1([C:5][O:6][H:13])[H:12])([H:7])([H:8])[H:9],8.06324514779744 +[C:1]([C:2]([n:3]1[c:4]([H:13])[c:5]([H:14])[n:6][c:7]1[H:15])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]([N:3]1[C:4]([H:13])([H:14])[C-:5]=[N+:6]=[C:7]1[H:15])([H:11])[H:12])([H:8])([H:9])[H:10],7.644209569137141 +[C:1]([C:2]([C:3](=[O:4])[C:5]#[C:6][C:7]([H:13])([H:14])[H:15])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([H:8])([H:9])([H:10])[H:12].[C@:2]12([H:11])[C:3](=[O:4])[C:5]1=[C:6]2[C:7]([H:13])([H:14])[H:15],9.1644919104469 +[C:1]([C:2]([C:3]([C:4]([H:13])([H:14])[H:15])([C:5]([H:16])([H:17])[H:18])[C:6]#[N:7])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]1([H:8])([H:10])[C:2]([H:11])([H:12])[C:3]([C:4]([H:13])([H:14])[H:15])([C:5]([H:16])([H:17])[H:18])[C:6][N:7]1[H:9],9.56724442441322 +[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[N:4]([H:12])[C@:5]1([C:6](=[O:7])[H:14])[H:13])[H:8]>>[O:1]([C:3]([C@@:2]1([H:9])[N:4]([H:12])[C@:5]1([C:6](=[O:7])[H:14])[H:13])([H:10])[H:11])[H:8],9.558144644427909 +[C:1]([C@@:2]1([H:10])[N:3]([H:11])[C@:4]1([C:5]#[N:6])[H:12])([H:7])([H:8])[H:9]>>[C:1]([C:2]([N:3]([C+:4]=[C:5]=[N-:6])[H:11])([H:10])[H:12])([H:7])([H:8])[H:9],7.43925231218507 +[C:1](#[C:2][C:3]#[C:4][C@@:5]1([H:9])[C:6]([H:10])([H:11])[O:7]1)[H:8]>>[C:1]12([H:8])[C:2]3=[C:3]1[C:4]23[C@@:5]1([H:9])[C:6]([H:10])([H:11])[O:7]1,8.8875305817719 +[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[N:4]([H:12])[C:5]([H:13])([H:14])[C:6]1=[O:7])[H:8]>>[O:1]([O:7][C:6]1=[C:2]([H:9])[C:3]([H:10])([H:11])[N:4]([H:12])[C:5]1([H:13])[H:14])[H:8],10.939094127895299 +[C:1]([C:2]#[C:3][C:4]([H:8])([H:9])[H:10])([H:5])([H:6])[H:7]>>[C:1]([C:2](=[C:3]=[C:4]([H:8])[H:9])[H:10])([H:5])([H:6])[H:7],8.51817600177597 +[C:1]([C:2]1([H:10])[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])([H:16])[C:6]1([H:17])[H:18])([H:7])([H:8])[H:9]>>[C:1]([C:2]1([H:10])[C:3]([H:11])([H:12])[C:4]([H:14])=[C:5]([H:15])[C:6]1([H:17])[H:18])([H:7])([H:8])[H:9].[H:13][H:16],11.890462046069599 +[O:1]=[C:2]1[C:3]([H:8])([H:9])[C:4]([H:10])([H:11])[C:5](=[O:6])[C:7]1([H:12])[H:13]>>[O:1]=[C:2]1[C:3]([H:8])([H:9])[C:4]([H:10])([H:11])[C:5]([O:6][H:13])=[C:7]1[H:12],6.4213837564242295 +[C:1]([C:2]([C:3](=[O:4])[N:5]([C:6]([H:13])([H:14])[H:15])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[C:3]([O-:4])[N:5]([C+:6]([H:13])[H:14])[H:12])[H:11])([H:7])([H:8])[H:9].[H:10][H:15],9.33923135580394 +[C:1]([C:2]#[C:3][C:4]([C@@:5]1([H:13])[C:6]([H:14])([H:15])[N:7]1[H:16])([H:11])[H:12])([H:8])([H:9])[H:10]>>[C:1]([C:2]/[C:3](=[C:4](\[C@@:5]1([H:13])[C:6]([H:14])([H:15])[N:7]1[H:16])[H:12])[H:11])([H:8])([H:9])[H:10],7.90054772381408 +[C:1]([C:2]1([C:3]([H:10])([H:11])[H:12])[C:4]([H:13])([H:14])[C@:5]1([O:6][H:16])[H:15])([H:7])([H:8])[H:9]>>[C:1]([C:2]1([C:3]([H:10])([H:11])[H:12])[C:4]([H:13])=[C:5]1[H:15])([H:7])([H:8])[H:9].[O:6]([H:14])[H:16],8.05505987173428 +[C:1]([O:2][C:3]([C:4]([H:10])([H:11])[H:12])([C:5]([H:13])([H:14])[H:15])[H:9])([H:6])([H:7])[H:8]>>[C:1]([O:2][H:15])([H:6])([H:7])[H:8].[C:3]([C:4]([H:10])([H:11])[H:12])(=[C:5]([H:13])[H:14])[H:9],6.503993884214461 +[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[N:4]([H:13])[C:5]1=[O:6])([H:7])([H:8])[H:9]>>[C:1]([C@@:2]1([H:10])[C@@:5]2([C:3]([H:11])([H:12])[N:4]2[H:13])[O:6]1)([H:7])([H:8])[H:9],10.3092592578813 +[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[O:4][C:5]([H:12])=[N:6][C:7]1([H:13])[H:14])[H:8]>>[O:1]([C@@:2]1([C:3]([H:9])([H:10])[H:11])[N:6]([C:5](=[O:4])[H:12])[C:7]1([H:13])[H:14])[H:8],6.7021388750542 +[C:1]([N:2]([C@@:3]([C:4]([O:5][H:15])([H:13])[H:14])([C:6]#[N:7])[H:12])[H:11])([H:8])([H:9])[H:10]>>[C:1]([N:2]([C:3](=[C:6]=[N:7][H:15])[H:12])[H:11])([H:8])([H:9])[H:10].[C:4](=[O:5])([H:13])[H:14],5.05460912630239 +[C:1]([n:2]1[c:3]([H:11])[c:4]([H:12])[c:5]([N:6]([H:13])[H:14])[c:7]1[H:15])([H:8])([H:9])[H:10]>>[C-:1]([N+:2]1([H:8])[C:3]([H:11])=[C:4]([H:12])[C:5]([N:6]([H:13])[H:14])=[C:7]1[H:15])([H:9])[H:10],9.458794004936673 +[C:1]([N:2]1[C:3]([H:10])([H:11])[C@:4]1([C:5]#[N:6])[H:12])([H:7])([H:8])[H:9]>>[C:1]([N+:2]1([C:5]#[N:6])[C:3]([H:10])([H:11])[C-:4]1[H:12])([H:7])([H:8])[H:9],8.10652473175005 +[C:1]([C@@:2]([O:3][H:12])([C:4](=[O:5])[C:6]#[C:7][H:13])[H:11])([H:8])([H:9])[H:10]>>[C:1](=[C:2]([O:3][H:12])[H:11])([H:8])[H:10].[C:4](=[O:5])=[C:6]=[C:7]([H:9])[H:13],5.6522318507892795 +[C:1]([C:2]([C@@:3]([O:4][H:13])([C:5]([O:6][H:16])([H:14])[H:15])[H:12])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1](/[C:2](=[C:3](\[O:4][H:13])[H:12])[H:11])([H:7])([H:8])[H:9].[C:5](=[O:6])([H:14])[H:15].[H:10][H:16],8.905118208654372 +[C:1]([C:2](=[O:3])[N:4]([H:8])[H:9])([H:5])([H:6])[H:7]>>[C:1]([O+:3]=[C-:2][N:4]([H:8])[H:9])([H:5])([H:6])[H:7],10.6072245863762 +[C:1]1([H:7])([H:8])[C:2]([H:9])([H:10])[O:3][C:4]([H:11])=[N:5][C:6]1([H:12])[H:13]>>[C:1]([C:6](/[N:5]=[C:4](\[O+:3]=[C-:2][H:9])[H:11])([H:12])[H:13])([H:7])([H:8])[H:10],9.147762893239692 +[N:1]([c:2]1[c:3]([H:10])[o:4][n:5][c:6]1[N:7]([H:11])[H:12])([H:8])[H:9]>>[N-:1]([C:2]1=[C:3]([H:10])[O+:4]=[N:5][C@@:6]1([N:7]([H:11])[H:12])[H:8])[H:9],7.744038630612399 +[C:1]([n:2]1[c:3]([H:11])[c:4]([H:12])[c:5]([H:13])[c:6]1[O:7][H:14])([H:8])([H:9])[H:10]>>[C:1]([N:2]1[C+:3]([H:11])[C:4]([H:12])=[C-:5][C@@:6]1([O:7][H:14])[H:13])([H:8])([H:9])[H:10],7.03393272372824 +[C:1](/[N:2]=[C:3](\[N:4]([H:11])[H:12])[C:5](=[O:6])[N:7]([H:13])[H:14])([H:8])([H:9])[H:10]>>[C:1]([N:2]([C+:3]([N:4]([H:11])[H:12])[C+:5]([O-:6])[N-:7][H:14])[H:13])([H:8])([H:9])[H:10],2.012001777188 +[N:1](=[C:2]1\[O:3][C@@:4]2([H:9])[C:5]([H:10])([H:11])[C:6]([H:12])([H:13])[C@@:7]12[H:14])\[H:8]>>[N:1](=[C:2]1\[O:3][C@:4]([C:5]([C:6][H:12])([H:10])[H:11])([H:9])[C:7]1([H:13])[H:14])\[H:8],8.349162775142108 +[O:1]([c:2]1[c:3]([H:8])[c:4]([H:9])[n:5][n:6]1[H:10])[H:7]>>[O:1]([C@@:2]1([H:10])[C:3]([H:8])=[C:4]([H:9])[N:5]=[N:6]1)[H:7],5.40886416542282 +[C:1]1([H:8])([H:9])[C@@:2]2([H:10])[C@@:3]3([H:11])[O:4][C:5]([H:12])([H:13])[C@:6]1([H:14])[C@@:7]23[H:15]>>[C:1]1([H:8])([H:9])[C@:2]2([H:10])[C:3]([H:11])([H:15])[O:4][C:5]([H:12])([H:13])[C@@:6]1([H:14])[C:7]2,6.6811223736896395 +[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])=[C:6]1[H:16])([H:7])([H:8])[H:9]>>[C:1]([C:2]1=[C:6]([H:16])[C:5]([H:10])([H:15])[C:4]([H:13])([H:14])[C:3]1([H:11])[H:12])([H:7])([H:8])[H:9],8.48233187657502 +[O:1]([N:2]=[C:3]1[C:4]([H:8])([H:9])[O:5][C:6]1([H:10])[H:11])[H:7]>>[O:1]([N:2]1[C:3](=[C:4]([H:8])[H:9])[C:6]([H:10])([H:11])[O:5]1)[H:7],8.162662393860531 +[C:1]([C@@:2]([O:3][H:12])([C@@:4]1([H:13])[C:5]([H:14])([H:15])[C:6]([H:16])([H:17])[O:7]1)[H:11])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]([O:3][H:12])([C:5]([C:6]([O+:7]=[C-:4][H:13])([H:16])[H:17])([H:14])[H:15])[H:11])([H:8])([H:9])[H:10],12.523488930972402 +[C:1]([C:2]([C:3]#[C:4][C:5]([O:6][H:14])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2]([C:3](=[C:4])[C:5]([O:6][H:14])([H:12])[H:13])([H:10])[H:11])([H:7])([H:8])[H:9],5.14514691462105 +[C:1]([C@@:2]1([H:10])[O:3][C@:4]1([C:5]#[N:6])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[O+:3][C:4](=[C:5]=[N-:6])[H:11])[H:10])([H:7])([H:8])[H:9],5.5145468136589 +[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[C@:4]([O:5][H:14])([H:13])[C:6]1([H:15])[H:16])([H:7])([H:8])[H:9]>>[C:1]([C:2](=[C:6]([H:15])[H:16])[H:10])([H:7])([H:8])[H:9].[C:3](=[C:4]([O:5][H:14])[H:13])([H:11])[H:12],9.04772087578722 +[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[C@:4]2([H:12])[C:5]([H:13])([H:14])[C@@:6]1([H:15])[O:7]2)[H:8]>>[O:1]([C@@:2]1([H:9])[C:3]([H:10])([H:11])[C-:4]=[O+:7][C@@:6]1([C:5]([H:12])([H:13])[H:14])[H:15])[H:8],7.487274734764421 +[C:1]([C:2]([C@@:3]1([C:4]([H:12])([H:13])[H:14])[C:5]([H:15])([H:16])[O:6]1)([H:10])[H:11])([H:7])([H:8])[H:9]>>[C:1]([C:2]([C@:3]([C:4]([H:12])([H:13])[H:14])([O+:6]=[C-:5][H:16])[H:15])([H:10])[H:11])([H:7])([H:8])[H:9],7.77205306076843 +[C:1]([C:2]([C@@:3]([C:4]([H:12])([H:13])[H:14])([O:5][H:15])[H:11])([H:9])[H:10])([H:6])([H:7])[H:8]>>[C:1](=[C:2]([H:9])[H:10])([H:7])[H:8].[C:3]([C:4]([H:12])([H:13])[H:14])(=[O:5])[H:11].[H:6][H:15],8.54804185198168 +[O:1]=[c:2]1[n:3]([H:7])[c:4]([H:8])[c:5]([H:9])[o:6]1>>[O:1]([c:2]1[n:3][c:4]([H:8])[c:5]([H:9])[o:6]1)[H:7],5.452475451259422 +[O:1]([C@@:2]1([H:8])[C:3]([H:9])([H:10])[N:4]2[C:5]([H:11])([H:12])[C@@:6]12[H:13])[H:7]>>[H:7][H:8].[O:1]=[C:2]1[C:3]([H:9])([H:10])[N:4]2[C:5]([H:11])([H:12])[C@@:6]12[H:13],8.423323392042459 +[O:1]=[C:2]1[C:3]([H:8])([H:9])[C:4]([H:10])=[C:5]([H:11])[C:6]1=[O:7]>>[O:1]=[C:2]1[C:3]([H:8])([H:9])[C@@:4]1([C:5](=[C:6]=[O:7])[H:11])[H:10],8.18779353216338 +[C:1]([N:2]1[C:3]([H:10])([H:11])[C:4]([H:12])([H:13])[C:5]1=[O:6])([H:7])([H:8])[H:9]>>[C:1](=[N:2][C:3]([C:4]([C:5](=[O:6])[H:7])([H:12])[H:13])([H:10])[H:11])([H:8])[H:9],8.16424839016317 +[C:1]1([H:7])([H:8])[C:2]([H:9])=[C:3]([H:10])[C@@:4]2([H:11])[O:5][C@@:6]12[H:12]>>[C:1]1([H:7])([H:8])[C:2][C:3]([H:9])([H:10])[C@@:4]2([H:11])[O:5][C@@:6]12[H:12],6.932872801210389 +[N:1]([c:2]1[n:3][o:4][c:5]([H:9])[n:6]1)([H:7])[H:8]>>[C:5](#[N:6])[H:9].[N:1]([C:2]#[N+:3][O-:4])([H:7])[H:8],7.26403614761611 +[O:1]([C:2]([C:3]([C:4](=[O:5])[H:11])([H:9])[H:10])([H:7])[H:8])[H:6]>>[C:2](=[C:3]([C:4](=[O:5])[H:11])[H:10])([H:7])[H:8].[O:1]([H:6])[H:9],5.672914243715029 +[O:1]([C:2]([C:3]([O:4][C:5](=[O:6])[H:12])([H:10])[H:11])([H:8])[H:9])[H:7]>>[C-:5]#[O+:6].[O:1]([C:2]([C:3]([O:4][H:12])([H:10])[H:11])([H:8])[H:9])[H:7],6.89586730282268 +[C:1]([C@@:2]([O:3][H:11])([C:4]([N:5]([H:14])[H:15])([H:12])[H:13])[C:6]#[N:7])([H:8])([H:9])[H:10]>>[C:1]([C@:2]([O:3][H:11])([C:6]#[N:7])[H:13])([H:8])([H:9])[H:10].[C:4]([N:5]([H:14])[H:15])[H:12],7.213900926218651 +[O:1]([c:2]1[c:3]([H:8])[n:4]([H:9])[n:5][n:6]1)[H:7]>>[O:1]([c:2]1[c:3]([H:8])[n:4][n:5]([H:9])[n:6]1)[H:7],4.47025769519276 +[C:1]([C@@:2]1([O:3][H:10])[C:4]([H:11])([H:12])[C@:5]1([O:6][H:14])[H:13])([H:7])([H:8])[H:9]>>[C:1](=[C:2]([O:3][H:10])[C:4]([C:5](=[O:6])[H:13])([H:11])[H:12])([H:7])[H:9].[H:8][H:14],6.09028387277675 +[C:1]([C@:2]12[N:3]([H:11])[C@@:4]1([H:12])[C:5]([H:13])([H:14])[C:6]2=[O:7])([H:8])([H:9])[H:10]>>[C-:6]#[O+:7].[C:1](/[C:2](=[N:3]/[H:11])[C:4](=[C:5]([H:13])[H:14])[H:12])([H:8])([H:9])[H:10],7.338289583240521 +[N:1]#[C:2][C@@:3]1([H:8])[C:4]([H:9])([H:10])[C@@:5]2([H:11])[C:6]([H:12])([H:13])[N:7]12>>[N:1]#[C:2]/[C:3](=[N:7]\[C:6]([C:5](=[C:4]([H:9])[H:10])[H:11])([H:12])[H:13])[H:8],5.848991692576071 +[C:1]([C@@:2]1([H:11])[C:3]([H:12])([H:13])[C@:4]1([C:5]([C:6]#[C:7][H:17])([H:15])[H:16])[H:14])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]([C:4](=[C:3]([H:12])[H:13])[H:14])([C:7](=[C:6]=[C:5]([H:15])[H:16])[H:17])[H:11])([H:8])([H:9])[H:10],7.843525163692592 +[O:1]=[C:2]([c:3]1[c:4]([H:9])[n:5][c:6]([H:10])[n:7]1[H:11])[H:8]>>[O:1]=[C:2]([c:3]1[c-:4][n:5]([H:9])[c+:6]([H:10])[n:7]1[H:11])[H:8],8.29205506684849 +[C:1]([C@@:2]1([H:11])[O:3][C@@:4]2([H:12])[C:5]([H:13])([H:14])[C@@:6]12[C:7]([H:15])([H:16])[H:17])([H:8])([H:9])[H:10]>>[C:1](/[C:2](=[C:6](/[C@@:4]1([H:12])[O:3][C:5]1([H:13])[H:14])[C:7]([H:15])([H:16])[H:17])[H:11])([H:8])([H:9])[H:10],9.58527585055238 +[C:1]([C@@:2]1([H:10])[C:3]([H:11])([H:12])[N:4]([H:13])[C:5]1=[O:6])([H:7])([H:8])[H:9]>>[C-:1]([O+:6]=[C:5]1[C:2]([H:9])([H:10])[C:3]([H:11])([H:12])[N:4]1[H:13])([H:7])[H:8],10.7853171818489 +[C:1]1([H:7])([H:8])[C@@:2]2([H:9])[C@@:3]3([H:10])[O:4][C@:5]1([H:11])[C@@:6]23[H:12]>>[C:1]([C:2]1([H:9])[C:3]([H:10])=[C:6]1[H:12])([C:5](=[O:4])[H:11])([H:7])[H:8],4.03622656043101 +[O:1]([c+:2]1[n-:3][c:4]([H:8])[n:5][n:6]1[H:9])[H:7]>>[O:1]=[c:2]1[n:3]([H:7])[c:4]([H:8])[n:5][n:6]1[H:9],4.30770484353892 +[N:1]([C:2](=[O:3])[C:4]([C:5]([C:6]([O:7][H:16])([H:14])[H:15])([H:12])[H:13])([H:10])[H:11])([H:8])[H:9]>>[C:5]([C:6](=[O:7])[H:14])([H:12])([H:13])[H:15].[N:1]([C:2](=[O:3])[C:4]([H:10])([H:11])[H:16])([H:8])[H:9],9.57678082079487 +[O:1]=[C:2]([C@@:3]1([H:9])[C:4]([H:10])([H:11])[N:5]=[C:6]([H:12])[O:7]1)[H:8]>>[O:1]([C:2][C@@:3]1([H:9])[C:4]([H:10])([H:11])[N:5]=[C:6]([H:12])[O:7]1)[H:8],8.086546852034171 +[O:1]=[C:2]1[N:3]([H:8])[C:4]([H:9])([H:10])[C@@:5]2([H:11])[C:6]([H:12])([H:13])[N:7]12>>[C:4](=[C:5]([N:7]=[C:6]([H:12])[H:13])[H:11])([H:9])[H:10].[O:1]=[C:2]=[N:3][H:8],9.240672854865 +[C:1]([C:2]1([C:3]([H:10])([H:11])[H:12])[C:4]([H:13])([H:14])[C@:5]1([O:6][H:16])[H:15])([H:7])([H:8])[H:9]>>[C:1]([C:4]([C@:5]([C:2][C:3]([H:10])([H:11])[H:12])([O:6][H:16])[H:15])([H:13])[H:14])([H:7])([H:8])[H:9],9.630105332396308 +[N:1]#[C:2][c:3]1[c:4]([H:8])[o:5][c:6]([H:9])[c:7]1[H:10]>>[N:1]#[C:2][C:3]1=[C:4]([H:8])[O:5][C:7]1=[C:6]([H:9])[H:10],8.54712866156014 +[C:1](#[C:2][C:3]([C:4]([C:5]#[C:6][H:12])([H:10])[H:11])([H:8])[H:9])[H:7]>>[C:1](=[C:2]1[C:3]([H:8])([H:9])[C@@:4]1([C:5]#[C:6][H:12])[H:11])([H:7])[H:10],8.92566573467996 +[C:1]([C:2]([C:3]([H:12])([H:13])[H:14])([C:4]#[C:5][C:6](=[O:7])[H:15])[H:11])([H:8])([H:9])[H:10]>>[C:1]([C:2](=[C:4]=[C:5]([C:3]([H:12])([H:13])[H:14])[C:6](=[O:7])[H:15])[H:11])([H:8])([H:9])[H:10],9.29566511350831 +[O:1]=[C:2]([C@@:3]1([H:9])[C:4]([H:10])([H:11])[N:5]=[C:6]([H:12])[O:7]1)[H:8]>>[O:1]([C@@:2]1([H:8])[C@@:3]2([H:9])[C:4]([H:10])=[N:5][C@:6]1([H:12])[O:7]2)[H:11],7.75344158332724 +[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])=[C:6]([H:16])[C@@:7]12[H:17])([H:8])([H:9])[H:10]>>[C:1]([C@@:2]1([H:11])[C@@:3]2([H:12])[C:4]([H:13])([H:14])[C:5]([H:15])([H:17])[C:6]([H:16])=[C:7]12)([H:8])([H:9])[H:10],10.650215451201401 +[C:1]1([H:8])([H:9])[C@@:2]2([H:10])[N:3]1[C:4]([H:11])([H:12])[C:5]21[C:6]([H:13])([H:14])[C:7]1([H:15])[H:16]>>[C:1]1([H:8])([H:9])[C@@:2]2([C:7]([H:10])([H:15])[H:16])[N:3]1[C:4]([H:11])([H:12])[C:5]2=[C:6]([H:13])[H:14],10.1389447353643 +[C:1]([C@@:2]1([C:3]([C:4]([O:5][H:15])([H:13])[H:14])([H:11])[H:12])[C:6]([H:16])([H:17])[O:7]1)([H:8])([H:9])[H:10]>>[C:1]([C:2]([C:3]([C:4]([O:5][H:15])([H:13])[H:14])([H:11])[H:12])=[O+:7][C-:6]([H:16])[H:17])([H:8])([H:9])[H:10],6.97993447045958 diff --git a/chemprop/tests/data/regression/test.npz b/chemprop/tests/data/regression/test.npz new file mode 100644 index 0000000000000000000000000000000000000000..65762d371dc586da86cebc785dfe9bb762657c02 --- /dev/null +++ b/chemprop/tests/data/regression/test.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f9b2d92f681644101bbb7172ebce2fd3901661c88604326132e0fcd31b42d5c +size 6663 diff --git a/chemprop/tests/data/regression/test_true.csv b/chemprop/tests/data/regression/test_true.csv new file mode 100644 index 0000000000000000000000000000000000000000..7d03a83f1ebe7e26bf342be35e58825874ebe8ea --- /dev/null +++ b/chemprop/tests/data/regression/test_true.csv @@ -0,0 +1,11 @@ +smiles,logSolubility +CCOP(=S)(OCC)SC(CCl)N2C(=O)c1ccccc1C2=O,-6.34 +Brc1ccc(Br)cc1,-4.07 +Cn2c(=O)on(c1ccc(Cl)c(Cl)c1)c2=O,-2.82 +Oc1ccc(cc1)c2ccccc2,-3.48 +CC1=C(CCCO1)C(=O)Nc2ccccc2,-2.56 +CCOC=C,-0.85 +CCC#C,-1.24 +COc1ncnc2nccnc12,-1.11 +CCCCC(C)(O)CC,-1.6 +Clc1ccc(Cl)cc1,-3.27 diff --git a/chemprop/tests/data/regression/weights.csv b/chemprop/tests/data/regression/weights.csv new file mode 100644 index 0000000000000000000000000000000000000000..e160ad7b5289db68deb464624745de23bee5754a --- /dev/null +++ b/chemprop/tests/data/regression/weights.csv @@ -0,0 +1,500 @@ +1 +2 +3 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 \ No newline at end of file diff --git a/chemprop/tests/data/smis.csv b/chemprop/tests/data/smis.csv new file mode 100644 index 0000000000000000000000000000000000000000..c96748bd2dd531384e7e4a79b9fef8126fbc1eb1 --- /dev/null +++ b/chemprop/tests/data/smis.csv @@ -0,0 +1,101 @@ +smiles +Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14 +COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)CCc3ccccc23 +COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl +OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(Cl)sc4[nH]3 +Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)NCC#N)c1 +OC1(CN2CCC1CC2)C#Cc3ccc(cc3)c4ccccc4 +COc1cc(OC)c(cc1NC(=O)CCC(=O)O)S(=O)(=O)NCc2ccccc2N3CCCCC3 +CNc1cccc(CCOc2ccc(C[C@H](NC(=O)c3c(Cl)cccc3Cl)C(=O)O)cc2C)n1 +COc1ccc(cc1)C2=COc3cc(OC)cc(OC)c3C2=O +Oc1ncnc2scc(c3ccsc3)c12 +CS(=O)(=O)c1ccc(Oc2ccc(cc2)C#C[C@]3(O)CN4CCC3CC4)cc1 +C[C@H](Nc1nc(Nc2cc(C)[nH]n2)c(C)nc1C#N)c3ccc(F)cn3 +O=C1CCCCCN1 +CCCSc1ncccc1C(=O)N2CCCC2c3ccncc3 +CC1CCCCC1NC(=O)c2cnn(c2NS(=O)(=O)c3ccc(C)cc3)c4ccccc4 +Nc1ccc(cc1)c2nc3ccc(O)cc3s2 +COc1ccc(cc1)N2CCN(CC2)C(=O)[C@@H]3CCCC[C@H]3C(=O)NCC#N +CCC(COC(=O)c1cc(OC)c(OC)c(OC)c1)(N(C)C)c2ccccc2 +COc1cc(ccc1N2CC[C@@H](O)C2)N3N=Nc4cc(sc4C3=O)c5ccc(Cl)cc5 +CO[C@H]1CN(CCN2C(=O)C=Cc3ccc(cc23)C#N)CC[C@H]1NCc4ccc5OCC(=O)Nc5n4 +CC(C)(CCCCCOCCc1ccccc1)NCCc2ccc(O)c3nc(O)sc23 +Clc1ccc(cc1)C(=O)Nc2oc(nn2)C(=O)Nc3ccc(cc3)N4CCOCC4 +COc1ccc(Oc2cccc(CN3CCCC(C3)N4C=C(C)C(=O)NC4=O)c2)cc1 +OC(=O)c1cccc(c1)N2CCC(CN3CCC(CC3)Oc4ccc(Cl)c(Cl)c4)CC2 +CNCC[C@@H](Oc1ccccc1C)c2ccccc2 +Clc1ccc(N2CCN(CC2)C(=O)CCCc3ccncc3)c(Cl)c1 +COc1cnc(nc1N(C)C)c2ccccn2 +C(CCCCNc1cc(nc2ccccc12)c3ccccc3)CCCNc4cc(nc5ccccc45)c6ccccc6 +CSc1c(cnn1c2ccc(cc2)C(=O)O)C(=O)NC3C4CC5CC(CC3C5)C4 +CNC1=Nc2ncccc2C(=NC1c3cccs3)c4occn4 +CS(=O)(=O)C1(CC1)c2cc(nc(n2)c3cccc4[nH]ccc34)N5CC6CCC(C5)O6 +CN([C@@H]1CCN(Cc2ccc(cc2)C(F)(F)F)C[C@@H]1F)C(=O)Cc3ccc(cc3)n4cnnn4 +CC(=O)[C@H]1CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C +CS(=O)(=O)c1ccccc1C(=O)NC[C@@H](O)CN2CCC(CC2)Oc3ccc(Cl)c(Cl)c3 +O=C(NCc1ccncc1)c2ccc(Oc3ccccc3C#N)cc2 +CN(C)c1ccnc2sc(C(=O)NCc3ccccc3)c(N)c12 +CN1CCN(CC1)c2ccc3N=CN(C(=O)c3c2)c4cc(NC(=O)c5cscn5)ccc4C +Cn1cncc1c2c3C(=O)N(CC4CC4)C(=O)N(CC5CC5)c3nn2Cc6ccnc7ccc(Cl)cc67 +COc1ccc2ncc(C#N)c(CCN3CCC(CC3)NCc4cc5SCOc5cn4)c2c1 +CNC(=O)C1(CCN(CC[C@H](CN(C)C(=O)c2c(OC)c(cc3ccccc23)C#N)c4ccc(Cl)c(Cl)c4)CC1)N5CCCCC5=O +OB1N(C(=O)Nc2ccccc12)c3ccccc3 +CC(C)N(CCC(C(=O)N)(c1ccccc1)c2ccccn2)C(C)C +NC(=NC#N)c1sc(Nc2ccccc2)nc1N +CCS(=O)(=O)c1ccc(c(C)c1)c2cc(ccc2O[C@H](C)C(=O)O)C(F)(F)F +OC(=O)COc1ccc(cc1c2cc(ccc2F)C#N)C(F)(F)F +COc1ccc(cn1)C2=Cc3c(C)nc(N)nc3N([C@@H]4CC[C@H](CC4)OCCO)C2=O +CC(Nc1ncnc2ccccc12)c3ccccc3 +CC(C)c1ccc2Oc3nc(N)c(cc3C(=O)c2c1)C(=O)O +O[C@@H](CNCCCOCCOCCc1cccc2ccccc12)c3ccc(O)c4NC(=O)Sc34 +COc1ccccc1Cn2c(C)nc3ccccc23 +OC(=O)c1ccc(NC(=O)c2cc(OCc3ccccc3F)cc(OCc4ccccc4F)c2)nc1 +NC(Cc1c[nH]c2ccccc12)C(=O)O +OC(=O)CCC[C@H]1[C@@H](Cc2ccccc12)NC(=O)c3cc4cc(F)ccc4[nH]3 +CCNC(=O)c1cc2c(c(cnc2[nH]1)c3cncc(c3)C(=O)O)n4ccc(n4)C(F)(F)F +C[C@H](NC(=O)c1c(C)nn(C2CCCC2)c1NS(=O)(=O)c3ccc(C)cc3)C(C)(C)C +N(c1ccccc1)c2cc(Nc3ccccc3)[nH]n2 +COCCNC(=O)c1cccc(Nc2ncc3cc(ccc3n2)c4ccncc4)c1 +CCC(CC)NC(=O)c1cnn(C)c1NS(=O)(=O)c2ccc(C)cc2 +NC(=O)c1cc(F)cc(O[C@H]2C[C@H]3CC[C@@H](C2)N3Cc4ccccc4)c1 +O=C1NC(=NC(=C1C#N)c2ccccc2)SCCc3ccccc3 +OC(C(=O)OC1CN2CCC1CC2)(c3ccccc3)c4ccccc4 +Cc1ccccc1NC(=O)CCS(=O)(=O)c2ccc(Br)s2 +CC(C)n1c(C)ncc1c2nc(Nc3ccc(cc3)C(=O)N(C)C)ncc2F +COc1cccc(c1)c2c[nH]c(n2)c3ccccc3 +O=C(COc1ccccc1)c2ccccc2 +COc1cc2ncc(C(=O)N)c(Nc3ccc(F)cc3F)c2cc1NCCN(C)C +CO[C@@H]1CC[C@@]2(CC1)Cc3ccc(OCC(C)C)cc3C24N=C(C)C(=N4)N +COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN4CCCC4 +O=C1CCOc2cc(COc3ccccc3)ccc12 +Clc1cccc2cn[nH]c12 +CNC(=O)c1ccc(CC(=O)N(C)C2CCN(Cc3ccc(cc3)C(F)(F)F)CC2)cc1 +COCCNCc1ccc(CCNC[C@H](O)c2ccc(O)c3NC(=O)Sc23)cc1 +Cn1cncc1c2c3C(=O)N(CC#C)C(=O)N(CC4CC4)c3nn2Cc5ccnc6ccc(Cl)cc56 +C[C@H](NC(=O)c1cccnc1Oc2ccccc2)c3ccccc3 +Clc1ccc(CN2CC3CNCC(C2)O3)cc1C(=O)NCC45CC6CC(CC(C6)C4)C5 +COc1cc(NS(=O)(=O)c2ccc(N)cc2)nc(OC)n1 +Cc1cc(CCC2CCN(CC2)S(=O)(=O)CC3(CCOCC3)N(O)C=O)c(C)cn1 +C[C@H](Nc1ncc(F)c(Nc2cc([nH]n2)C3CC3)n1)c4ncc(F)cn4 +CC(=O)Nc1ccc2c(c1)c(cn2CCCO)c3cc(NC4CC4)n5ncc(C#N)c5n3 +CC1COc2c(N3CCN(C)CC3)c(F)cc4C(=O)C(=CN1c24)C(=O)O +CC1(CC1)c2nc(ncc2C(=O)N[C@@H]3C4CC5CC3C[C@@](O)(C5)C4)N6CCOCC6 +COC(=O)c1ccc(C)c(NS(=O)(=O)c2ccc3N(C)SC(=O)c3c2)c1 +COc1ccc(cc1)C2=COc3cc(O)cc(O)c3C2=O +CNCCCC12CCC(c3ccccc13)c4ccccc24 +Oc1cc(nc2ccnn12)c3ccccc3 +Fc1cc(cc(F)c1C2=CCN(CC2)C=O)N3C[C@H](COc4ccon4)OC3=O +CC(C#C)N1C(=O)N(CC2CC2)c3nn(Cc4ccnc5ccc(Cl)cc45)c(c3C1=O)c6cncn6C +C[C@H]1CN(Cc2cc(Cl)ccc2OCC(=O)O)CCN1C(=O)Cc3ccccc3 +COc1cc(Nc2nc(N[C@@H](C)c3ncc(F)cn3)ncc2Br)n[nH]1 +Cc1nc(C)c(nc1C(=O)N)c2ccc([C@@H]3CC[C@@H](CC(=O)O)CC3)c(F)c2 +COc1ccnc(CCc2nc3c(C)ccnc3[nH]2)c1 +Cc1cc(CCCOc2c(Cl)cc(cc2Cl)C3=NCCO3)on1 +CN(C)C(=O)c1ccc(CN2CCc3cc4nc(N)sc4cc3CC2)cc1 +COC(=O)[C@H]1[C@@H](O)CC[C@H]2CN3CCc4c([nH]c5ccccc45)[C@@H]3C[C@H]12 +CCN1CCN(CC1)c2ccc(Nc3cc(ncn3)N(C)C(=O)Nc4c(Cl)c(OC)cc(OC)c4Cl)cc2 +CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C@H]([C@H](O)[C@@H]2O)n3cnc4c(N)ncnc34 +CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)c3)c2n1)c4c(Cl)c(OC)cc(OC)c4Cl +CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)C(=O)c13 +COc1ccc(Cc2c(N)n[nH]c2N)cc1 +CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(=O)Nc3cccnc3 diff --git a/chemprop/tests/data/spectra.csv b/chemprop/tests/data/spectra.csv new file mode 100644 index 0000000000000000000000000000000000000000..3fd81c6aad01de70fdaaac5ee2ffb836c047f6ad --- /dev/null +++ b/chemprop/tests/data/spectra.csv @@ -0,0 +1,201 @@ +"smiles",400,402,404,406,408,410 +"O=C(O)c1ccco1",0.001718021194011,0.001718021194011,0.001716797003396,0.001701030921568,0.001677361856277,0.001643664219237 +"O=C(O)c1ccco1",0.000814858567868,0.000814858567868,0.000814658731673,0.000821599292867,0.000841480209384,0.000869636808942 +"CCOP(=O)(OCC)C(O)[C@@H]1OC(C)(C)O[C@H]1[C@@H]1COC(C)(C)O1",0.00095975940077,0.00095975940077,0.00095869222154,0.000944127265653,0.000921543744199,0.000889734581559 +"CCOP(=O)(OCC)C(O)[C@@H]1OC(C)(C)O[C@H]1[C@@H]1COC(C)(C)O1",0.003490215139879,0.003490215139879,0.003489255010247,0.003467171609632,0.003426105091663,0.003371304442035 +"c1ccc(C2=NOC(c3ccccc3)C2)cc1",0.00052270050794,0.00052270050794,0.00052104532661,0.000525180733987,0.000558042717288,0.000607812473411 +"c1ccc(C2=NOC(c3ccccc3)C2)cc1",0.002354314549285,0.002354314549285,0.002352638290808,0.002313031156778,0.002241742882841,0.002152567395278 +"Clc1ccc(OCc2ccccc2)cc1",0.000810842444357,0.000810842444357,0.000809552075127,0.000797494015839,0.000787722398537,0.000781087910526 +"Clc1ccc(OCc2ccccc2)cc1",0.001320808947919,0.001320808947919,0.001318776307599,0.001303206221412,0.001293945404753,0.001287487770948 +"Cc1ccc(OCc2ccccc2)cc1",0.00065840172601,0.00065840172601,0.000658059716896,0.000660817032034,0.000672467742524,0.000689709782268 +"Cc1ccc(OCc2ccccc2)cc1",0.002311699163512,0.002311699163512,0.00230898717372,0.00228016095968,0.002246088363437,0.002203490933737 +"c1ccc(P(CCP(c2ccccc2)c2ccccc2)c2ccccc2)cc1",0.003332960946086,0.003332960946086,0.00332042146637,0.003217893403366,0.003133245997156,0.003033806612718 +"c1ccc(P(CCP(c2ccccc2)c2ccccc2)c2ccccc2)cc1",0.001509156146077,0.001509156146077,0.001507610193323,0.001520821867592,0.001578102687275,0.001669170936526 +"CC(=O)Oc1ccc(C=O)cc1Br",0.000724402388643,0.000724402388643,0.000722504043091,0.000688800993576,0.000634126899912,0.000566945373484 +"CC(=O)Oc1ccc(C=O)cc1Br",0.001859697519601,0.001859697519601,0.001856053125476,0.001801014627982,0.00171752205043,0.001613157562645 +"CN=C(O)Oc1cccc(C(=O)O)c1",0.000833728357547,0.000833728357547,0.000833187241774,0.000831338884827,0.000834735160083,0.000841583837956 +"CN=C(O)Oc1cccc(C(=O)O)c1",0.002454620804169,0.002454620804169,0.002453797647828,0.002428621423434,0.002375550593143,0.00229918968797 +"CN=C(O)Oc1cccc(C(=O)OC)c1",0.000683722707528,0.000683722707528,0.000684397488895,0.000693601845895,0.000706472042492,0.000721941351579 +"CN=C(O)Oc1cccc(C(=O)OC)c1",0.001449498255641,0.001449498255641,0.001450402275959,0.00146115471988,0.001474975870809,0.001491953211684 +"COc1ccc(OCc2ccccc2)cc1",0.001168586792138,0.001168586792138,0.001167098893544,0.00116276184151,0.001170652662053,0.001180761792532 +"COc1ccc(OCc2ccccc2)cc1",0.00187424523919,0.00187424523919,0.001869902994179,0.00182610967031,0.001777420007798,0.001717747254148 +"COc1cc(/C=C/C(=O)O)ccc1OC(C)=O",0.000748475867829,0.000748475867829,0.00074777539367,0.000739575347073,0.00072915813677,0.000716381478054 +"COc1cc(/C=C/C(=O)O)ccc1OC(C)=O",0.001872254480516,0.001872254480516,0.001867656371957,0.001814070300071,0.00174291769209,0.001649459469235 +"CC1(C)CC[C@]2(C(=O)O)CC[C@]3(C)C(=CC[C@@H]4[C@@]5(C)CC[C@H](O)[C@@](C)(CO)[C@@H]5CC[C@]43C)[C@@H]2C1",0.000925660580061,0.000925660580061,0.000925149182147,0.000919817013521,0.000914304058597,0.000908836890005 +"CC1(C)CC[C@]2(C(=O)O)CC[C@]3(C)C(=CC[C@@H]4[C@@]5(C)CC[C@H](O)[C@@](C)(CO)[C@@H]5CC[C@]43C)[C@@H]2C1",0.003479867685431,0.003479867685431,0.0034699028362,0.003365943491687,0.003249261106792,0.003112495517379 +"CS(=O)(=O)OC1C2CC3CC(C2)C(=O)C1C3",0.000566294668051,0.000566294668051,0.000565900733051,0.000561280089324,0.000554703167765,0.000545142371854 +"CS(=O)(=O)OC1C2CC3CC(C2)C(=O)C1C3",0.000984637464029,0.000984637464029,0.000983018797351,0.000958989120183,0.000923439312541,0.000880071975387 +"O=C(O)[C@@H]1C[C@@H]2C=CC[C@H](C1)C2",0.000625654312298,0.000625654312298,0.000623982463689,0.000618667821929,0.000628416987333,0.000645396405794 +"O=C(O)[C@@H]1C[C@@H]2C=CC[C@H](C1)C2",0.0017941024604,0.0017941024604,0.001792234148276,0.001760999404394,0.001710831440675,0.001647138710888 +"OC1=NC2CC3CC(C2)CC1C3",0.001375942061624,0.001375942061624,0.001373785909499,0.001360153794491,0.00135670662078,0.001357301687633 +"OC1=NC2CC3CC(C2)CC1C3",0.001525946854972,0.001525946854972,0.001520763540364,0.001486197670848,0.001474704959552,0.001474739115898 +"ON=C1C2CC3CC(C2)CC1C3",0.001106720880744,0.001106720880744,0.001104863881011,0.001080706591626,0.001046051258337,0.001000516628517 +"ON=C1C2CC3CC(C2)CC1C3",0.003135448864042,0.003135448864042,0.00313439589494,0.003089116297134,0.002991609157676,0.002858281532411 +"OC1C2CC3CC(C2)CC1C3",0.001562789367383,0.001562789367383,0.001559532867229,0.001534658302208,0.001517257045199,0.001497967028673 +"OC1C2CC3CC(C2)CC1C3",0.002150075447414,0.002150075447414,0.002145521096006,0.002083811575201,0.001993555534015,0.001876469753293 +"O=C(O)C1CC2CCCC(C2)C1",0.001341037932348,0.001341037932348,0.001338867547583,0.001314578115846,0.001283442456079,0.001242423385961 +"O=C(O)C1CC2CCCC(C2)C1",0.003744276484873,0.003744276484873,0.003743535751921,0.003723029510948,0.003683201519234,0.003630424961493 +"NC1C2CC3CC(C2)CC1C3",0.001199712916371,0.001199712916371,0.001199313817154,0.001190270315545,0.001173447219109,0.001150845414807 +"NC1C2CC3CC(C2)CC1C3",0.001467837890395,0.001467837890395,0.001464977162953,0.001440153362869,0.001414851371905,0.00137874257272 +"O=C(O)C12CC3CC(CC(C3)C1)C2",0.001152332162315,0.001152332162315,0.001152071621577,0.001143428667892,0.001126071710292,0.001103226286078 +"O=C(O)C12CC3CC(CC(C3)C1)C2",0.001615398037049,0.001615398037049,0.001615398037049,0.001597272715252,0.001572518098465,0.001548153095967 +"O=C(O)C12CC3CC(CC(C3)C1)C2",0.002134517520757,0.002134517520757,0.002133285602166,0.00212154199788,0.002110432292872,0.002099190727388 +"O/N=C1/CC2C[C@H]3CC1C[C@@H](C2)C3",0.001243662931256,0.001243662931256,0.001238035303857,0.001185792256904,0.001135315641163,0.001078919928777 +"O/N=C1/CC2C[C@H]3CC1C[C@@H](C2)C3",0.003137976147899,0.003137976147899,0.003134698735437,0.003084031520585,0.003000987961505,0.002886258043889 +"O=C(O)CC1C[C@H]2CCC[C@@H](C1)C2",0.000919363720206,0.000919363720206,0.000921039474642,0.000941929944071,0.000968723026689,0.000999237610327 +"O=C(O)CC1C[C@H]2CCC[C@@H](C1)C2",0.003252326473162,0.003252326473162,0.003242679122482,0.003153227070565,0.003061135781429,0.002945080771062 +"ClC(Cl)[C@]12C[C@@H]3C[C@H](C1)C[C@@](C(Cl)Cl)(C3)C2",0.00534845173677,0.00534845173677,0.005343782329951,0.005291911881391,0.005224859663568,0.00513414348467 +"ClC(Cl)[C@]12C[C@@H]3C[C@H](C1)C[C@@](C(Cl)Cl)(C3)C2",0.001945640809474,0.001945640809474,0.001942987714868,0.001919738862473,0.001899780873369,0.001879028550369 +"O=C(O)C12CC3CC(CC(CBr)(C3)C1)C2",0.000916370311921,0.000916370311921,0.000916482405903,0.000916772814012,0.000916250198849,0.000915929494363 +"O=C(O)C12CC3CC(CC(CBr)(C3)C1)C2",0.007858812308727,0.007858812308727,0.007852310122188,0.007762536593329,0.007634486461968,0.007476892821513 +"CC(O)=NC12CC3CC(CC(C3)C1)C2",0.00161158093703,0.00161158093703,0.001612359938931,0.001601063351429,0.001563688507875,0.001512924253318 +"CC(O)=NC12CC3CC(CC(C3)C1)C2",0.003741126095613,0.003741126095613,0.00373433803993,0.003651674846398,0.003538834963848,0.003391395319184 +"CC(O)=NC1(C)C2CC3CC(C2)CC1C3",0.002618478953991,0.002618478953991,0.002615725666243,0.002592869533172,0.002571911936553,0.002543254268616 +"CC(O)=NC1(C)C2CC3CC(C2)CC1C3",0.000893786366761,0.000893786366761,0.000893795311505,0.000893818480477,0.000893776781004,0.000893751181174 +"CCCCCCCCCC1CCCCC1",0.00334670096896,0.00334670096896,0.003343590820542,0.003296713317767,0.003224588616061,0.003131906766355 +"CCCCCCCCCCC1CCCCC1",0.006340314839409,0.006340314839409,0.006340498215315,0.006278594629684,0.006126584700986,0.005922089429307 +"c1ccc2c(c1)Cc1cc3ccccc3cc1-2",0.001114625848908,0.001114625848908,0.001112721022853,0.001101521731849,0.00110150219775,0.001108222616505 +"c1ccc2c(c1)Cc1cc3ccccc3cc1-2",0.003596873343085,0.003596873343085,0.003592651122822,0.003520922404186,0.003399436134198,0.003234602239806 +"O=C(O)c1ccccc1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1",0.00057790463162,0.00057790463162,0.000576775593398,0.000568041722842,0.000561798345277,0.000554927895698 +"O=C(O)c1ccccc1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1",0.000801589964393,0.000801589964393,0.00080186919873,0.000802975186025,0.000800929261023,0.000795683091934 +"Cc1ccc(/C=C/C(=O)O)o1",0.001737047861411,0.001737047861411,0.001734981293203,0.001714651911826,0.001693055636889,0.001667313852754 +"Cc1ccc(/C=C/C(=O)O)o1",0.002403074396331,0.002403074396331,0.00240138076174,0.002372487682542,0.002325768587323,0.002266614549837 +"CCCCNc1ccccc1",0.001448557231584,0.001448557231584,0.00144780897147,0.001452239033618,0.001473340300037,0.001503993482388 +"CCCCNc1ccccc1",0.001395728285315,0.001395728285315,0.001396301467008,0.001395168959663,0.001382050527161,0.001357968830524 +"NCCNCCN",0.001301158513397,0.001301158513397,0.001299248800555,0.001280077319527,0.001259397847293,0.001235077097568 +"COC(=O)/C=C/c1ccco1",0.0006680060611,0.0006680060611,0.000667152636274,0.000658060221716,0.000648497505044,0.00063908627888 +"COC(=O)/C=C/c1ccco1",0.000495688605345,0.000495688605345,0.000494683202909,0.000486304382877,0.000478167807101,0.00046634033155 +"COC(=O)c1ccc(C(C)=O)o1",0.000437024622862,0.000437024622862,0.000435900468775,0.000431960427333,0.000439234183464,0.00045513935411 +"COC(=O)c1ccc(C(C)=O)o1",0.003290130195429,0.003290130195429,0.003289035887059,0.003285587606594,0.003291887745201,0.003302257448652 +"COC(=O)/C=C/c1ccc(C)o1",0.000355106417068,0.000355106417068,0.000355576192307,0.000363368185437,0.000375731528014,0.000391180201359 +"COC(=O)/C=C/c1ccc(C)o1",0.001408683538118,0.001408683538118,0.001406781729215,0.001389552782119,0.00137414614952,0.00135830917763 +"OCc1cccs1",0.001246981971416,0.001246981971416,0.001246430066127,0.001231250066406,0.001200760429801,0.001158371504403 +"Cc1ccc(C=O)s1",0.002489728664158,0.002489728664158,0.002487585792396,0.002494911596595,0.002540700165362,0.002605171776548 +"Cc1ccc(C=O)s1",0.000658200227859,0.000658200227859,0.000657930690997,0.000659913974168,0.000668579611328,0.000681328571521 +"Cc1ccc(C=O)s1",0.000875323720374,0.000875323720374,0.000875816823539,0.000889239356529,0.0009136490722,0.00094251224163 +"c1cscn1",0.000998241037688,0.000998241037688,0.000998427145578,0.000999186783385,0.000997778982712,0.000994009967793 +"C#CCCO",0.001643112620979,0.001643112620979,0.001640688282696,0.001612580628741,0.001577227307734,0.00153409570317 +"CC/C=C/CC",0.004916553689412,0.004916553689412,0.004911941961109,0.004861441768465,0.004800464218471,0.004724956106222 +"C/C=C/C(CC)CC",0.00274067330068,0.00274067330068,0.002741975036957,0.002789781185404,0.00289129940497,0.003033654734951 +"CCCCCC(C)C(C)C",0.004202068981141,0.004202068981141,0.004196401141147,0.004117398926314,0.004002498663441,0.003857989138471 +"CCCCCC(C)(C)CC",0.003472157975402,0.003472157975402,0.003462863433988,0.003377112873657,0.003291519489775,0.003187764407276 +"CC1(C)C(=O)[C@]2(C)CC[C@H]1C2",0.003577895131815,0.003577895131815,0.003581344870233,0.003608946910462,0.0036266480917,0.00363888982431 +"CCCCCCC(C)(C)C",0.005722584947602,0.005722584947602,0.005721429212805,0.005667275162823,0.005553997348974,0.005407709938593 +"CCC(C)CCCC(C)C",0.005745379979516,0.005745379979516,0.00573877598369,0.005613385765233,0.005398374012888,0.005120185452244 +"Cc1cc(C)c2ccccc2c1",0.002301083694001,0.002301083694001,0.002301523769017,0.002313828853115,0.002337856288365,0.002369894114871 +"C=CCCC(C)=O",0.000908943007738,0.000908943007738,0.000907540885199,0.000895325901391,0.000884913417855,0.000874061876574 +"C=CCCC(C)=O",0.000859837077208,0.000859837077208,0.000862324096253,0.000887498621312,0.000912852970538,0.000938220738353 +"OC/C=C/c1ccccc1",0.000252673524188,0.000252673524188,0.000252191760701,0.000245647412717,0.000236203694218,0.000224208876858 +"OC/C=C/c1ccccc1",0.001044371847333,0.001044371847333,0.001044709335114,0.001025652899677,0.000972238129473,0.000895359451555 +"OC/C=C/c1ccccc1",0.000430908276566,0.000430908276566,0.000430101543328,0.000423122932184,0.000415358210613,0.000402981709727 +"OC/C=C/c1ccccc1",0.002683496806127,0.002683496806127,0.002683496806127,0.002618518763381,0.00249045810214,0.002333891535566 +"C/C=C/C=O",0.001692808975099,0.001692808975099,0.001691644113279,0.001670698975495,0.001634382188318,0.001584953843554 +"C/C=C/C=O",0.001414324753441,0.001414324753441,0.001410311637082,0.001356654807834,0.001282861991468,0.001194902945053 +"Clc1cc(Cl)cc(Cl)c1",0.001110120979772,0.001110120979772,0.001110370012591,0.001117979858102,0.001133072649557,0.001153078165939 +"Clc1cc(Cl)cc(Cl)c1",0.001100341638378,0.001100341638378,0.00110462622788,0.001153042176604,0.001212993584033,0.001287639803331 +"Clc1cc(Cl)cc(Cl)c1",0.001095249777361,0.001095249777361,0.001094503807592,0.001086137647633,0.001075831201719,0.001063078978437 +"C#CCO",0.00058441750236,0.00058441750236,0.000583636109991,0.00057694925639,0.000571384658625,0.000565545499676 +"C#CCO",0.001177642618512,0.001177642618512,0.001176739898586,0.001162562139579,0.0011391844742,0.001106895214881 +"O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1",0.001258785286516,0.001258785286516,0.001258526554426,0.001265999002663,0.001286263405937,0.001311340606229 +"O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1",0.000885758280606,0.000885758280606,0.000885316721161,0.000900572377555,0.000943338429253,0.001001806787228 +"O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1",0.002390863978396,0.002390863978396,0.002389385075432,0.002345663608308,0.002260096760039,0.002148284885808 +"BrC(Br)c1cccc(C(Br)Br)c1",0.000590317533026,0.000590317533026,0.000591686331479,0.000602879275115,0.000611499237075,0.000620744191319 +"BrC(Br)c1cccc(C(Br)Br)c1",0.000636209163384,0.000636209163384,0.000636006843913,0.000635050894586,0.000636836585259,0.000642513976296 +"BrC(Br)c1cccc(C(Br)Br)c1",0.004042107521661,0.004042107521661,0.004036783885137,0.00397427922163,0.003896694215722,0.003805833716808 +"CCSC#N",0.003910657833755,0.003910657833755,0.003905707289248,0.003828503427476,0.00370523952941,0.003542392466382 +"CCOC(=O)CC(C(=O)OCC)C(C(=O)OCC)C(=O)OCC",0.002277592362436,0.002277592362436,0.002276414796252,0.002261461377192,0.002240773249748,0.002214276421924 +"CCCCCCCCCCCCCCCCCCCCCC",0.004994984462008,0.004994984462008,0.004989466382052,0.004923812713151,0.004836282746394,0.004722534573558 +"CCCCCCCCCCCCCCCCCCCCCC",0.003929900476866,0.003929900476866,0.003929739664487,0.003910543024829,0.00386756188552,0.003812486739059 +"CC(C)CCC#N",0.000969720221234,0.000969720221234,0.000967864269898,0.000961642510016,0.000973083625161,0.000995616079035 +"CC(C)CCC#N",0.003244411988843,0.003244411988843,0.003232110486425,0.003324855043373,0.003610427694565,0.00386829697764 +"C=CCOC(=O)CCC(=O)OCC=C",0.001470167487509,0.001470167487509,0.001467587142741,0.001436342580937,0.001394808566463,0.001342346141865 +"C=CC1CC=CCC1",0.000562749956281,0.000562749956281,0.000563970290969,0.000568396694315,0.000560271748922,0.000542604792027 +"C=CC1CC=CCC1",0.000767878383041,0.000767878383041,0.000765940433071,0.000753520725764,0.000752016816537,0.000759027582971 +"CCc1ccccn1",0.004409861887012,0.004409861887012,0.004400725560892,0.004281393167948,0.004111545227854,0.003892314902875 +"CCCCCCCC/C=C\CCCCCCCC(=O)OCCCC",0.000776615776034,0.000776615776034,0.000778643756685,0.000795913000439,0.00080879429736,0.000819325343349 +"C#CC(O)c1ccccc1",0.000620778333046,0.000620778333046,0.000620428076424,0.000608552149248,0.000583924153737,0.000550343998713 +"CC(N)CN",0.001924668669902,0.001924668669902,0.001923010622307,0.001900711893441,0.001867862171346,0.001824175756107 +"CCOC(=O)C(=O)C(C)C(=O)OCC",0.000805514213129,0.000805514213129,0.000805136190559,0.000799862414442,0.000792436353711,0.0007836011027 +"O=S(=O)(Cl)c1ccccc1",0.000464934288403,0.000464934288403,0.000465605340513,0.000472154420445,0.000478509094289,0.000484924890707 +"C=CCc1ccc(O)c(OC)c1",0.001850196251208,0.001850196251208,0.001851533135608,0.001875871520544,0.001916496246382,0.001968326004411 +"C=CCc1ccc(O)c(OC)c1",0.001543725936885,0.001543725936885,0.00154178843564,0.0015190564819,0.00149085229164,0.00145781239583 +"ClCC(Cl)CCl",0.002701306496333,0.002701306496333,0.002699788526744,0.002690416137517,0.002688403842426,0.002689004776899 +"C=CCc1ccc(OC(C)=O)c(OC)c1",0.000623804770522,0.000623804770522,0.000622247524325,0.000606862816259,0.000588980919232,0.00056474208899 +"CCc1ccccc1[N+](=O)[O-]",0.001123020103993,0.001123020103993,0.001120753642306,0.001086114064694,0.001034173086507,0.000971041005104 +"CCc1ccccc1[N+](=O)[O-]",0.002548625192808,0.002548625192808,0.002546804920817,0.002507940984867,0.002439432724395,0.002352405155428 +"Cc1ccccc1",0.003027615937978,0.003027615937978,0.003020276589253,0.002926049354785,0.002791305316685,0.002613708788309 +"CC(=O)OC1CCCCC1",0.000508351036366,0.000508351036366,0.000509099010623,0.000515766117555,0.000521588169948,0.00052762642869 +"CC1(C)CC(O)CC(C)(C)N1[O]",0.001456558638968,0.001456558638968,0.001453106489428,0.001420316444776,0.001383219017534,0.001331604967575 +"CC1(C)CC(O)CC(C)(C)N1[O]",0.002062916952092,0.002062916952092,0.002061752091391,0.002037043066483,0.001992084392317,0.001931917938831 +"C/C=C/C=C/C(=O)O",0.000638912210565,0.000638912210565,0.000637998034308,0.000639559146041,0.000656612557764,0.000683754033542 +"C/C=C/C=C/C(=O)O",0.001408764499532,0.001408764499532,0.001407706097217,0.001394786530306,0.001377306077887,0.001354719573875 +"O=C(O)CCN1C(=O)c2ccccc2C1=O",0.000955020306053,0.000955020306053,0.000957064647584,0.000996924937896,0.001066609421572,0.001158227169969 +"O=C(O)CCN1C(=O)c2ccccc2C1=O",0.00066013516606,0.00066013516606,0.000662304048604,0.000687102131517,0.000719663662809,0.000763932700238 +"CCCCCCCCCCCCCCCCC(=O)OC",0.00408949853518,0.00408949853518,0.004082324203529,0.003969672203026,0.003792776989075,0.003566511282459 +"CCCCCCCCCCCCCCCCC(=O)OC",0.001457749049853,0.001457749049853,0.001455477357252,0.001419446005857,0.001359380382703,0.001276393968015 +"O=C1c2ccccc2C(=O)c2cc(Cl)ccc21",0.00082210696046,0.00082210696046,0.000819278493129,0.0008045293702,0.000807011029494,0.00081658486468 +"O=C1c2ccccc2C(=O)c2cc(Cl)ccc21",0.003643428769101,0.003643428769101,0.003641126761025,0.003597628739648,0.003524758073951,0.003433401932937 +"O=C1c2ccccc2C(=O)c2ccccc21",0.002393759466363,0.002393759466363,0.002387549137707,0.002314532589091,0.002221070981982,0.002106251577641 +"O=C1c2ccccc2C(=O)c2ccccc21",0.001363319981965,0.001363319981965,0.001361544242838,0.00133751527733,0.001302973122143,0.001259086181017 +"CCCCCCc1c2ccccc2cc2ccccc12",0.004935719541282,0.004935719541282,0.004926118043482,0.004838101990824,0.004749489588371,0.004638510548077 +"CCCCCCc1c2ccccc2cc2ccccc12",0.000447649326779,0.000447649326779,0.00045031624782,0.000468960155242,0.000479136882499,0.000491236088445 +"FC1(F)C(F)(F)C(F)(F)C2(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C2(F)C1(F)F",0.001506751653308,0.001506751653308,0.001506562979692,0.001498098357858,0.001478181337091,0.001447701012395 +"CSC1=CC(=O)C=CC1=O",0.000680334744945,0.000680334744945,0.000682601528949,0.000706206461363,0.000732086948856,0.00076153828672 +"NCc1ccccc1",0.001092425906411,0.001092425906411,0.001091860026198,0.001083129051778,0.001069798314772,0.001053195682325 +"NCc1ccccc1",0.000108653789486,0.000108653789486,0.000108761935631,0.000107854165763,0.000104037227273,9.84311719646078E-05 +"N=c1nc(O)c2[nH]c(=O)cnc2[nH]1",0.001099641038101,0.001099641038101,0.001099641038101,0.001092135969085,0.001071222889446,0.00104393405244 +"N=c1nc(O)c2[nH]c(=O)cnc2[nH]1",0.000624460599789,0.000624460599789,0.000623718467253,0.000627242458678,0.000645762124857,0.000672352650184 +"CCCCCCCCCCCc1c2ccccc2cc2ccccc12",0.002934491741835,0.002934491741835,0.002929721576638,0.002864769498483,0.002770435423108,0.002649543747599 +"CCCCCCCCCCCc1c2ccccc2cc2ccccc12",0.002238921272485,0.002238921272485,0.002240783746441,0.002237826941303,0.002203289309605,0.002148993733298 +"CC(CC(=O)O)c1ccccc1",0.000652429788979,0.000652429788979,0.000651998751927,0.000639206908178,0.000614009842144,0.00058083283661 +"CC(CC(=O)O)c1ccccc1",0.001482328962704,0.001482328962704,0.001480589679768,0.001450931277174,0.001403469486115,0.00134441324605 +"CC(CC(=O)O)c1ccccc1",0.002659047324208,0.002659047324208,0.002655461742804,0.002602062168189,0.002520710964502,0.00241691668491 +"COC(=O)[C@@H]1CCC[C@H]1C(=O)OC",0.001001450445642,0.001001450445642,0.001000736314585,0.00098937284165,0.000970544362528,0.000944584651821 +"CC(=O)n1cnc(/C=C2\N=C(c3ccccc3)OC2=O)c1",0.00135240208531,0.00135240208531,0.001349499873797,0.001322716564314,0.001294392861261,0.001256649074795 +"CC(=O)n1cnc(/C=C2\N=C(c3ccccc3)OC2=O)c1",0.004171883538026,0.004171883538026,0.004165772091148,0.004092425579288,0.00399345740674,0.003864014917082 +"COC(=O)/C=C/c1ccc([N+](=O)[O-])s1",0.001201276992511,0.001201276992511,0.001198042710525,0.0011546269286,0.001089022564254,0.000999812579256 +"COC(=O)/C=C/c1ccc([N+](=O)[O-])s1",0.003582717070019,0.003582717070019,0.003579002460625,0.003521023742011,0.003431539030957,0.003319251424274 +"COC(=O)/C=C/c1ccc(Br)s1",0.000935722616009,0.000935722616009,0.000934810776445,0.000936175414041,0.000952615255936,0.000978553209962 +"COC(=O)/C=C/c1ccc(Br)s1",0.007763918244485,0.007763918244485,0.00775576965242,0.007639252563432,0.00747176203788,0.007270127390359 +"O=C(O)/C=C/c1ccc([N+](=O)[O-])s1",0.000620214389512,0.000620214389512,0.000618581387968,0.000598902496643,0.000573369091056,0.000542270705958 +"O=C(O)/C=C/c1ccc([N+](=O)[O-])s1",0.001873009041489,0.001873009041489,0.001865665095909,0.001798360571644,0.001731497194345,0.001650531358074 +"O=C(O)/C=C/c1ccc(Cl)s1",0.002575923417334,0.002575923417334,0.002572500301067,0.002521705204504,0.002439830527684,0.002326258705307 +"O=C(O)/C=C/c1ccc(Cl)s1",0.004765302802949,0.004765302802949,0.004757410728739,0.004683418332195,0.004611057513713,0.004529205487703 +"O=C(O)C=Cc1ccc(Br)s1",0.002159911677497,0.002159911677497,0.00215562438673,0.002098904538199,0.002017060157172,0.00191053927471 +"O=C(O)C=Cc1ccc(Br)s1",0.001711612555351,0.001711612555351,0.001711622994992,0.001698446770849,0.001665894396997,0.001620968157293 +"C/C=C/C(=O)OC(C)(C)C",0.000545599990522,0.000545599990522,0.000544310639453,0.000526270055461,0.000498540940207,0.000461140224687 +"O=C(O)[C@@H]1CC[C@H]1C(=O)O",0.001258387118705,0.001258387118705,0.001258497202538,0.001271927290944,0.001303903445908,0.00134906368119 +"O=C(O)[C@@H]1CC[C@H]1C(=O)O",0.001347510688857,0.001347510688857,0.001348113796452,0.001350182047046,0.001346387333535,0.001338789705495 +"Cc1ccc(C=NO)s1",0.001951859605456,0.001951859605456,0.001949151324343,0.0019265135138,0.001905590642066,0.001877206867095 +"Cc1ccc(C=NO)s1",0.001206283257257,0.001206283257257,0.001205416539329,0.001193600173009,0.001177158981928,0.001157526274622 +"Cc1ccc(Br)s1",0.002958940397577,0.002958940397577,0.002955725089221,0.002887856137041,0.002767603160027,0.002613051533751 +"O=[N+]([O-])c1cccs1",0.00121976090843,0.00121976090843,0.001219451392425,0.001204299650943,0.001172339055533,0.001130890798451 +"CCCCCCCCCCC(C)C",0.003679026398294,0.003679026398294,0.00367470646763,0.003623919127817,0.003556715308429,0.003469161729441 +"CCCCCCCC1CCCCC1",0.00523019493678,0.00523019493678,0.005224737390558,0.005159683858506,0.005076219349197,0.004974621655032 +"c1ccc(CC2CCNCC2)cc1",0.002003307668452,0.002003307668452,0.002001778671835,0.001974037345974,0.001927542088939,0.001867731340188 +"C/C=C\Cl",0.000256313554212,0.000256313554212,0.000255960825317,0.000250821597679,0.000243158438802,0.000233527124157 +"N#Cc1ccc2c(Cl)cccc2n1",0.001825744162353,0.001825744162353,0.001824845405623,0.001811574061474,0.001792363320881,0.001769694961224 +"N#Cc1ccc2c(Cl)cccc2n1",0.002192530310623,0.002192530310623,0.002186500405607,0.002139620093867,0.002110795281417,0.002090768614637 +"Cc1nc(-c2ccccc2)c(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1",0.004719585457034,0.004719585457034,0.004714412622066,0.004628797677538,0.004484975890453,0.004288701315043 +"Cc1nc(-c2ccccc2)c(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1",0.000644355887184,0.000644355887184,0.000644979958832,0.000647129789641,0.000643187548324,0.000635278356872 +"O=c1c(O[C@@H]2O[C@@H](CO)[C@H](O)[C@H]2O)c(-c2ccc(O)c(O)c2)oc2cc(O)cc(O)c12",0.000146866708032,0.000146866708032,0.000147734787553,0.000161598561242,0.000184279585597,0.000214835127367 +"O=c1c(O[C@@H]2O[C@@H](CO)[C@H](O)[C@H]2O)c(-c2ccc(O)c(O)c2)oc2cc(O)cc(O)c12",0.000813923805172,0.000813923805172,0.000813649790502,0.000818602335527,0.000834283226656,0.000856039816226 +"CC1(C)CC1C(=O)OCC(=O)c1ccc(-c2ccccc2)cc1",0.001600427562802,0.001600427562802,0.001600256641187,0.001589812946977,0.001567339982101,0.001538327116728 +"CC1(C)CC1C(=O)OCC(=O)c1ccc(-c2ccccc2)cc1",0.003495395407337,0.003495395407337,0.003488279343857,0.003407485665784,0.003300844256072,0.003157242089638 +"CCOC(=O)CCc1ccccc1",0.001801679591141,0.001801679591141,0.001798456757132,0.001760929958604,0.001713657476351,0.001656223795507 +"CCCN(CCC)C(=O)c1ccccc1",0.002921779086415,0.002921779086415,0.002916724536848,0.00287884440293,0.002857322696033,0.002842301414084 +"O=C(Cl)CCl",0.000442319724511,0.000442319724511,0.000444359085458,0.000465648171886,0.000489471109914,0.000517689492641 +"O=C(Cl)CCl",0.001028435476674,0.001028435476674,0.001025172672375,0.000979074868873,0.000909751498723,0.000820260607825 +"O=C(Cl)CCl",0.000234633280642,0.000234633280642,0.000234122469318,0.000229617919318,0.000226696275458,0.00022597057443 +"O=C(Cl)CCl",0.000514462517725,0.000514462517725,0.000512457728144,0.000490162362369,0.000460856892773,0.000421270375364 +"Oc1cccnc1O",0.000523957679607,0.000523957679607,0.000525146683315,0.000543381074787,0.000571826906485,0.000608765153379 +"Oc1cccnc1O",0.001376451598925,0.001376451598925,0.001376090840527,0.00137473855941,0.001377235383874,0.001383186710387 +"Oc1cccnc1O",0.001572651018218,0.001572651018218,0.001566950854991,0.001496223625128,0.001399299531239,0.00127587134396 +"CCC=C(CC)CC",0.004331797155989,0.004331797155989,0.004321571254328,0.004213072228435,0.004088980725559,0.003942602777491 +"CCCC(CC)CCC",0.003998977942541,0.003998977942541,0.003996853114573,0.003967274317822,0.003925669539396,0.00387614831022 +"CCC(CC)C(CC)CC",0.004792548774896,0.004792548774896,0.004793293237769,0.004785947419391,0.004756865263184,0.004713649987006 +"CCC(C)CCC(C)CC",0.002001793736694,0.002001793736694,0.001996255606332,0.001956546171054,0.001931924288507,0.001903482293456 +"CCCCC(C)(C)CCC",0.002778685707643,0.002778685707643,0.002774251771334,0.002744298378059,0.002725097737185,0.002694624214078 +"CCCCC(C)(CC)CC",0.00332617481953,0.00332617481953,0.003327885281865,0.003365976144788,0.003432510276172,0.003513539894214 +"CCCCCCCC(C)C",0.00781520215437,0.00781520215437,0.00779797128071,0.007621110676782,0.007430400486176,0.00721619563018 +"CCCCCCC(C)CC",0.004129425306313,0.004129425306313,0.004126485602405,0.004076343290657,0.003995268174461,0.00389260968071 diff --git a/chemprop/tests/data/spectra/exclusions.csv b/chemprop/tests/data/spectra/exclusions.csv new file mode 100644 index 0000000000000000000000000000000000000000..dd962fa410db0652116349bde25712e982dfe41d --- /dev/null +++ b/chemprop/tests/data/spectra/exclusions.csv @@ -0,0 +1,201 @@ +"smiles",400,402,404,406,408,410 +"O=C(O)c1ccco1",0.001718021194011,,0.001716797003396,0.001701030921568,0.001677361856277,0.001643664219237 +"O=C(O)c1ccco1",0.000814858567868,0.000814858567868,,0.000821599292867,0.000841480209384,0.000869636808942 +"CCOP(=O)(OCC)C(O)[C@@H]1OC(C)(C)O[C@H]1[C@@H]1COC(C)(C)O1",0.00095975940077,0.00095975940077,,0.000944127265653,0.000921543744199,0.000889734581559 +"CCOP(=O)(OCC)C(O)[C@@H]1OC(C)(C)O[C@H]1[C@@H]1COC(C)(C)O1",0.003490215139879,0.003490215139879,,0.003467171609632,0.003426105091663,0.003371304442035 +"c1ccc(C2=NOC(c3ccccc3)C2)cc1",0.00052270050794,,0.00052104532661,0.000525180733987,0.000558042717288,0.000607812473411 +"c1ccc(C2=NOC(c3ccccc3)C2)cc1",0.002354314549285,,0.002352638290808,0.002313031156778,0.002241742882841,0.002152567395278 +"Clc1ccc(OCc2ccccc2)cc1",0.000810842444357,,0.000809552075127,0.000797494015839,0.000787722398537,0.000781087910526 +"Clc1ccc(OCc2ccccc2)cc1",0.001320808947919,0.001320808947919,,0.001303206221412,0.001293945404753,0.001287487770948 +"Cc1ccc(OCc2ccccc2)cc1",0.00065840172601,0.00065840172601,0.000658059716896,,0.000672467742524,0.000689709782268 +"Cc1ccc(OCc2ccccc2)cc1",0.002311699163512,0.002311699163512,0.00230898717372,,0.002246088363437,0.002203490933737 +"c1ccc(P(CCP(c2ccccc2)c2ccccc2)c2ccccc2)cc1",0.003332960946086,0.003332960946086,0.00332042146637,,0.003133245997156,0.003033806612718 +"c1ccc(P(CCP(c2ccccc2)c2ccccc2)c2ccccc2)cc1",0.001509156146077,0.001509156146077,0.001507610193323,,0.001578102687275,0.001669170936526 +"CC(=O)Oc1ccc(C=O)cc1Br",0.000724402388643,0.000724402388643,,0.000688800993576,0.000634126899912,0.000566945373484 +"CC(=O)Oc1ccc(C=O)cc1Br",0.001859697519601,0.001859697519601,,0.001801014627982,0.00171752205043,0.001613157562645 +"CN=C(O)Oc1cccc(C(=O)O)c1",0.000833728357547,0.000833728357547,0.000833187241774,0.000831338884827,,0.000841583837956 +"CN=C(O)Oc1cccc(C(=O)O)c1",0.002454620804169,0.002454620804169,0.002453797647828,0.002428621423434,,0.00229918968797 +"CN=C(O)Oc1cccc(C(=O)OC)c1",0.000683722707528,0.000683722707528,0.000684397488895,0.000693601845895,0.000706472042492, +"CN=C(O)Oc1cccc(C(=O)OC)c1",0.001449498255641,0.001449498255641,0.001450402275959,0.00146115471988,0.001474975870809, +"COc1ccc(OCc2ccccc2)cc1",0.001168586792138,0.001168586792138,0.001167098893544,0.00116276184151,0.001170652662053, +"COc1ccc(OCc2ccccc2)cc1",0.00187424523919,0.00187424523919,0.001869902994179,0.00182610967031,0.001777420007798, +"COc1cc(/C=C/C(=O)O)ccc1OC(C)=O",0.000748475867829,0.000748475867829,0.00074777539367,0.000739575347073,0.00072915813677, +"COc1cc(/C=C/C(=O)O)ccc1OC(C)=O",0.001872254480516,0.001872254480516,0.001867656371957,0.001814070300071,0.00174291769209, +"CC1(C)CC[C@]2(C(=O)O)CC[C@]3(C)C(=CC[C@@H]4[C@@]5(C)CC[C@H](O)[C@@](C)(CO)[C@@H]5CC[C@]43C)[C@@H]2C1",0.000925660580061,0.000925660580061,0.000925149182147,0.000919817013521,0.000914304058597, +"CC1(C)CC[C@]2(C(=O)O)CC[C@]3(C)C(=CC[C@@H]4[C@@]5(C)CC[C@H](O)[C@@](C)(CO)[C@@H]5CC[C@]43C)[C@@H]2C1",0.003479867685431,0.003479867685431,0.0034699028362,0.003365943491687,0.003249261106792, +"CS(=O)(=O)OC1C2CC3CC(C2)C(=O)C1C3",0.000566294668051,0.000566294668051,0.000565900733051,0.000561280089324,0.000554703167765, +"CS(=O)(=O)OC1C2CC3CC(C2)C(=O)C1C3",0.000984637464029,0.000984637464029,0.000983018797351,0.000958989120183,0.000923439312541, +"O=C(O)[C@@H]1C[C@@H]2C=CC[C@H](C1)C2",0.000625654312298,0.000625654312298,0.000623982463689,0.000618667821929,0.000628416987333, +"O=C(O)[C@@H]1C[C@@H]2C=CC[C@H](C1)C2",0.0017941024604,0.0017941024604,0.001792234148276,0.001760999404394,0.001710831440675, +"OC1=NC2CC3CC(C2)CC1C3",0.001375942061624,0.001375942061624,0.001373785909499,0.001360153794491,0.00135670662078, +"OC1=NC2CC3CC(C2)CC1C3",0.001525946854972,0.001525946854972,0.001520763540364,0.001486197670848,0.001474704959552, +"ON=C1C2CC3CC(C2)CC1C3",0.001106720880744,0.001106720880744,0.001104863881011,0.001080706591626,0.001046051258337, +"ON=C1C2CC3CC(C2)CC1C3",0.003135448864042,0.003135448864042,0.00313439589494,0.003089116297134,0.002991609157676, +"OC1C2CC3CC(C2)CC1C3",0.001562789367383,0.001562789367383,0.001559532867229,0.001534658302208,0.001517257045199, +"OC1C2CC3CC(C2)CC1C3",0.002150075447414,0.002150075447414,0.002145521096006,0.002083811575201,0.001993555534015, +"O=C(O)C1CC2CCCC(C2)C1",0.001341037932348,0.001341037932348,0.001338867547583,0.001314578115846,,0.001242423385961 +"O=C(O)C1CC2CCCC(C2)C1",0.003744276484873,0.003744276484873,0.003743535751921,0.003723029510948,,0.003630424961493 +"NC1C2CC3CC(C2)CC1C3",0.001199712916371,0.001199712916371,0.001199313817154,0.001190270315545,,0.001150845414807 +"NC1C2CC3CC(C2)CC1C3",0.001467837890395,0.001467837890395,0.001464977162953,0.001440153362869,,0.00137874257272 +"O=C(O)C12CC3CC(CC(C3)C1)C2",0.001152332162315,0.001152332162315,0.001152071621577,0.001143428667892,,0.001103226286078 +"O=C(O)C12CC3CC(CC(C3)C1)C2",0.001615398037049,0.001615398037049,0.001615398037049,0.001597272715252,,0.001548153095967 +"O=C(O)C12CC3CC(CC(C3)C1)C2",0.002134517520757,0.002134517520757,0.002133285602166,0.00212154199788,,0.002099190727388 +"O/N=C1/CC2C[C@H]3CC1C[C@@H](C2)C3",0.001243662931256,0.001243662931256,0.001238035303857,0.001185792256904,,0.001078919928777 +"O/N=C1/CC2C[C@H]3CC1C[C@@H](C2)C3",0.003137976147899,,0.003134698735437,0.003084031520585,0.003000987961505,0.002886258043889 +"O=C(O)CC1C[C@H]2CCC[C@@H](C1)C2",0.000919363720206,,0.000921039474642,0.000941929944071,0.000968723026689,0.000999237610327 +"O=C(O)CC1C[C@H]2CCC[C@@H](C1)C2",0.003252326473162,,0.003242679122482,0.003153227070565,0.003061135781429,0.002945080771062 +"ClC(Cl)[C@]12C[C@@H]3C[C@H](C1)C[C@@](C(Cl)Cl)(C3)C2",0.00534845173677,,0.005343782329951,0.005291911881391,0.005224859663568,0.00513414348467 +"ClC(Cl)[C@]12C[C@@H]3C[C@H](C1)C[C@@](C(Cl)Cl)(C3)C2",0.001945640809474,,0.001942987714868,0.001919738862473,0.001899780873369,0.001879028550369 +"O=C(O)C12CC3CC(CC(CBr)(C3)C1)C2",0.000916370311921,,0.000916482405903,0.000916772814012,0.000916250198849,0.000915929494363 +"O=C(O)C12CC3CC(CC(CBr)(C3)C1)C2",0.007858812308727,,0.007852310122188,0.007762536593329,0.007634486461968,0.007476892821513 +"CC(O)=NC12CC3CC(CC(C3)C1)C2",0.00161158093703,,0.001612359938931,0.001601063351429,0.001563688507875,0.001512924253318 +"CC(O)=NC12CC3CC(CC(C3)C1)C2",0.003741126095613,,0.00373433803993,0.003651674846398,0.003538834963848,0.003391395319184 +"CC(O)=NC1(C)C2CC3CC(C2)CC1C3",0.002618478953991,,0.002615725666243,0.002592869533172,0.002571911936553,0.002543254268616 +"CC(O)=NC1(C)C2CC3CC(C2)CC1C3",0.000893786366761,,0.000893795311505,0.000893818480477,0.000893776781004,0.000893751181174 +"CCCCCCCCCC1CCCCC1",0.00334670096896,,0.003343590820542,0.003296713317767,0.003224588616061,0.003131906766355 +"CCCCCCCCCCC1CCCCC1",0.006340314839409,,0.006340498215315,0.006278594629684,0.006126584700986,0.005922089429307 +"c1ccc2c(c1)Cc1cc3ccccc3cc1-2",0.001114625848908,,0.001112721022853,0.001101521731849,0.00110150219775,0.001108222616505 +"c1ccc2c(c1)Cc1cc3ccccc3cc1-2",0.003596873343085,,0.003592651122822,0.003520922404186,0.003399436134198,0.003234602239806 +"O=C(O)c1ccccc1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1",0.00057790463162,,0.000576775593398,0.000568041722842,0.000561798345277,0.000554927895698 +"O=C(O)c1ccccc1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1",0.000801589964393,,0.00080186919873,0.000802975186025,0.000800929261023,0.000795683091934 +"Cc1ccc(/C=C/C(=O)O)o1",0.001737047861411,,0.001734981293203,0.001714651911826,0.001693055636889,0.001667313852754 +"Cc1ccc(/C=C/C(=O)O)o1",0.002403074396331,,0.00240138076174,0.002372487682542,0.002325768587323,0.002266614549837 +"CCCCNc1ccccc1",0.001448557231584,,0.00144780897147,0.001452239033618,0.001473340300037,0.001503993482388 +"CCCCNc1ccccc1",0.001395728285315,,0.001396301467008,0.001395168959663,0.001382050527161,0.001357968830524 +"NCCNCCN",,0.001301158513397,0.001299248800555,0.001280077319527,0.001259397847293,0.001235077097568 +"COC(=O)/C=C/c1ccco1",,0.0006680060611,0.000667152636274,0.000658060221716,0.000648497505044,0.00063908627888 +"COC(=O)/C=C/c1ccco1",,0.000495688605345,0.000494683202909,0.000486304382877,0.000478167807101,0.00046634033155 +"COC(=O)c1ccc(C(C)=O)o1",,0.000437024622862,0.000435900468775,0.000431960427333,0.000439234183464,0.00045513935411 +"COC(=O)c1ccc(C(C)=O)o1",,0.003290130195429,0.003289035887059,0.003285587606594,0.003291887745201,0.003302257448652 +"COC(=O)/C=C/c1ccc(C)o1",,0.000355106417068,0.000355576192307,0.000363368185437,0.000375731528014,0.000391180201359 +"COC(=O)/C=C/c1ccc(C)o1",,0.001408683538118,0.001406781729215,0.001389552782119,0.00137414614952,0.00135830917763 +"OCc1cccs1",,0.001246981971416,0.001246430066127,0.001231250066406,0.001200760429801,0.001158371504403 +"Cc1ccc(C=O)s1",,0.002489728664158,0.002487585792396,0.002494911596595,0.002540700165362,0.002605171776548 +"Cc1ccc(C=O)s1",,0.000658200227859,0.000657930690997,0.000659913974168,0.000668579611328,0.000681328571521 +"Cc1ccc(C=O)s1",,0.000875323720374,0.000875816823539,0.000889239356529,0.0009136490722,0.00094251224163 +"c1cscn1",,0.000998241037688,0.000998427145578,0.000999186783385,0.000997778982712,0.000994009967793 +"C#CCCO",,0.001643112620979,0.001640688282696,0.001612580628741,0.001577227307734,0.00153409570317 +"CC/C=C/CC",,0.004916553689412,0.004911941961109,0.004861441768465,0.004800464218471,0.004724956106222 +"C/C=C/C(CC)CC",,0.00274067330068,0.002741975036957,0.002789781185404,0.00289129940497,0.003033654734951 +"CCCCCC(C)C(C)C",,0.004202068981141,0.004196401141147,0.004117398926314,0.004002498663441,0.003857989138471 +"CCCCCC(C)(C)CC",,0.003472157975402,0.003462863433988,0.003377112873657,0.003291519489775,0.003187764407276 +"CC1(C)C(=O)[C@]2(C)CC[C@H]1C2",0.003577895131815,0.003577895131815,,0.003608946910462,0.0036266480917,0.00363888982431 +"CCCCCCC(C)(C)C",0.005722584947602,0.005722584947602,,0.005667275162823,0.005553997348974,0.005407709938593 +"CCC(C)CCCC(C)C",0.005745379979516,0.005745379979516,,0.005613385765233,0.005398374012888,0.005120185452244 +"Cc1cc(C)c2ccccc2c1",0.002301083694001,0.002301083694001,,0.002313828853115,0.002337856288365,0.002369894114871 +"C=CCCC(C)=O",0.000908943007738,0.000908943007738,,0.000895325901391,0.000884913417855,0.000874061876574 +"C=CCCC(C)=O",0.000859837077208,0.000859837077208,,0.000887498621312,0.000912852970538,0.000938220738353 +"OC/C=C/c1ccccc1",0.000252673524188,0.000252673524188,,0.000245647412717,0.000236203694218,0.000224208876858 +"OC/C=C/c1ccccc1",0.001044371847333,0.001044371847333,,0.001025652899677,0.000972238129473,0.000895359451555 +"OC/C=C/c1ccccc1",0.000430908276566,0.000430908276566,,0.000423122932184,0.000415358210613,0.000402981709727 +"OC/C=C/c1ccccc1",0.002683496806127,0.002683496806127,,0.002618518763381,0.00249045810214,0.002333891535566 +"C/C=C/C=O",0.001692808975099,0.001692808975099,,0.001670698975495,0.001634382188318,0.001584953843554 +"C/C=C/C=O",0.001414324753441,0.001414324753441,,0.001356654807834,0.001282861991468,0.001194902945053 +"Clc1cc(Cl)cc(Cl)c1",0.001110120979772,0.001110120979772,,0.001117979858102,0.001133072649557,0.001153078165939 +"Clc1cc(Cl)cc(Cl)c1",0.001100341638378,0.001100341638378,,0.001153042176604,0.001212993584033,0.001287639803331 +"Clc1cc(Cl)cc(Cl)c1",0.001095249777361,0.001095249777361,,0.001086137647633,0.001075831201719,0.001063078978437 +"C#CCO",0.00058441750236,0.00058441750236,,0.00057694925639,0.000571384658625,0.000565545499676 +"C#CCO",0.001177642618512,0.001177642618512,,0.001162562139579,0.0011391844742,0.001106895214881 +"O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1",0.001258785286516,0.001258785286516,,0.001265999002663,0.001286263405937,0.001311340606229 +"O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1",0.000885758280606,0.000885758280606,,0.000900572377555,0.000943338429253,0.001001806787228 +"O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1",0.002390863978396,0.002390863978396,,0.002345663608308,0.002260096760039,0.002148284885808 +"BrC(Br)c1cccc(C(Br)Br)c1",0.000590317533026,0.000590317533026,0.000591686331479,,0.000611499237075,0.000620744191319 +"BrC(Br)c1cccc(C(Br)Br)c1",0.000636209163384,0.000636209163384,0.000636006843913,,0.000636836585259,0.000642513976296 +"BrC(Br)c1cccc(C(Br)Br)c1",0.004042107521661,0.004042107521661,0.004036783885137,,0.003896694215722,0.003805833716808 +"CCSC#N",0.003910657833755,0.003910657833755,0.003905707289248,,0.00370523952941,0.003542392466382 +"CCOC(=O)CC(C(=O)OCC)C(C(=O)OCC)C(=O)OCC",0.002277592362436,0.002277592362436,0.002276414796252,,0.002240773249748,0.002214276421924 +"CCCCCCCCCCCCCCCCCCCCCC",0.004994984462008,0.004994984462008,0.004989466382052,,0.004836282746394,0.004722534573558 +"CCCCCCCCCCCCCCCCCCCCCC",0.003929900476866,0.003929900476866,0.003929739664487,,0.00386756188552,0.003812486739059 +"CC(C)CCC#N",0.000969720221234,0.000969720221234,0.000967864269898,,0.000973083625161,0.000995616079035 +"CC(C)CCC#N",0.003244411988843,0.003244411988843,0.003232110486425,,0.003610427694565,0.00386829697764 +"C=CCOC(=O)CCC(=O)OCC=C",0.001470167487509,0.001470167487509,0.001467587142741,,0.001394808566463,0.001342346141865 +"C=CC1CC=CCC1",0.000562749956281,0.000562749956281,0.000563970290969,,0.000560271748922,0.000542604792027 +"C=CC1CC=CCC1",0.000767878383041,0.000767878383041,0.000765940433071,,0.000752016816537,0.000759027582971 +"CCc1ccccn1",0.004409861887012,0.004409861887012,0.004400725560892,,0.004111545227854,0.003892314902875 +"CCCCCCCC/C=C\CCCCCCCC(=O)OCCCC",0.000776615776034,0.000776615776034,0.000778643756685,,0.00080879429736,0.000819325343349 +"C#CC(O)c1ccccc1",0.000620778333046,0.000620778333046,0.000620428076424,,0.000583924153737,0.000550343998713 +"CC(N)CN",0.001924668669902,0.001924668669902,0.001923010622307,,0.001867862171346,0.001824175756107 +"CCOC(=O)C(=O)C(C)C(=O)OCC",0.000805514213129,0.000805514213129,0.000805136190559,,0.000792436353711,0.0007836011027 +"O=S(=O)(Cl)c1ccccc1",0.000464934288403,0.000464934288403,0.000465605340513,,0.000478509094289,0.000484924890707 +"C=CCc1ccc(O)c(OC)c1",0.001850196251208,0.001850196251208,0.001851533135608,,0.001916496246382,0.001968326004411 +"C=CCc1ccc(O)c(OC)c1",0.001543725936885,0.001543725936885,0.00154178843564,,0.00149085229164,0.00145781239583 +"ClCC(Cl)CCl",0.002701306496333,0.002701306496333,0.002699788526744,,0.002688403842426,0.002689004776899 +"C=CCc1ccc(OC(C)=O)c(OC)c1",0.000623804770522,0.000623804770522,0.000622247524325,,0.000588980919232,0.00056474208899 +"CCc1ccccc1[N+](=O)[O-]",0.001123020103993,0.001123020103993,0.001120753642306,,0.001034173086507,0.000971041005104 +"CCc1ccccc1[N+](=O)[O-]",0.002548625192808,,0.002546804920817,0.002507940984867,0.002439432724395,0.002352405155428 +"Cc1ccccc1",0.003027615937978,,0.003020276589253,0.002926049354785,0.002791305316685,0.002613708788309 +"CC(=O)OC1CCCCC1",0.000508351036366,,0.000509099010623,0.000515766117555,0.000521588169948,0.00052762642869 +"CC1(C)CC(O)CC(C)(C)N1[O]",0.001456558638968,,0.001453106489428,0.001420316444776,0.001383219017534,0.001331604967575 +"CC1(C)CC(O)CC(C)(C)N1[O]",0.002062916952092,,0.002061752091391,0.002037043066483,0.001992084392317,0.001931917938831 +"C/C=C/C=C/C(=O)O",0.000638912210565,,0.000637998034308,0.000639559146041,0.000656612557764,0.000683754033542 +"C/C=C/C=C/C(=O)O",0.001408764499532,,0.001407706097217,0.001394786530306,0.001377306077887,0.001354719573875 +"O=C(O)CCN1C(=O)c2ccccc2C1=O",0.000955020306053,,0.000957064647584,0.000996924937896,0.001066609421572,0.001158227169969 +"O=C(O)CCN1C(=O)c2ccccc2C1=O",0.00066013516606,,0.000662304048604,0.000687102131517,0.000719663662809,0.000763932700238 +"CCCCCCCCCCCCCCCCC(=O)OC",0.00408949853518,,0.004082324203529,0.003969672203026,0.003792776989075,0.003566511282459 +"CCCCCCCCCCCCCCCCC(=O)OC",0.001457749049853,,0.001455477357252,0.001419446005857,0.001359380382703,0.001276393968015 +"O=C1c2ccccc2C(=O)c2cc(Cl)ccc21",0.00082210696046,,0.000819278493129,0.0008045293702,0.000807011029494,0.00081658486468 +"O=C1c2ccccc2C(=O)c2cc(Cl)ccc21",0.003643428769101,,0.003641126761025,0.003597628739648,0.003524758073951,0.003433401932937 +"O=C1c2ccccc2C(=O)c2ccccc21",0.002393759466363,,0.002387549137707,0.002314532589091,0.002221070981982,0.002106251577641 +"O=C1c2ccccc2C(=O)c2ccccc21",0.001363319981965,,0.001361544242838,0.00133751527733,0.001302973122143,0.001259086181017 +"CCCCCCc1c2ccccc2cc2ccccc12",0.004935719541282,,0.004926118043482,0.004838101990824,0.004749489588371,0.004638510548077 +"CCCCCCc1c2ccccc2cc2ccccc12",0.000447649326779,,0.00045031624782,0.000468960155242,0.000479136882499,0.000491236088445 +"FC1(F)C(F)(F)C(F)(F)C2(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C2(F)C1(F)F",0.001506751653308,,0.001506562979692,0.001498098357858,0.001478181337091,0.001447701012395 +"CSC1=CC(=O)C=CC1=O",0.000680334744945,,0.000682601528949,0.000706206461363,0.000732086948856,0.00076153828672 +"NCc1ccccc1",0.001092425906411,,0.001091860026198,0.001083129051778,0.001069798314772,0.001053195682325 +"NCc1ccccc1",0.000108653789486,,0.000108761935631,0.000107854165763,0.000104037227273,9.84311719646078E-05 +"N=c1nc(O)c2[nH]c(=O)cnc2[nH]1",0.001099641038101,,0.001099641038101,0.001092135969085,0.001071222889446,0.00104393405244 +"N=c1nc(O)c2[nH]c(=O)cnc2[nH]1",0.000624460599789,,0.000623718467253,0.000627242458678,0.000645762124857,0.000672352650184 +"CCCCCCCCCCCc1c2ccccc2cc2ccccc12",,0.002934491741835,0.002929721576638,0.002864769498483,0.002770435423108,0.002649543747599 +"CCCCCCCCCCCc1c2ccccc2cc2ccccc12",,0.002238921272485,0.002240783746441,0.002237826941303,0.002203289309605,0.002148993733298 +"CC(CC(=O)O)c1ccccc1",,0.000652429788979,0.000651998751927,0.000639206908178,0.000614009842144,0.00058083283661 +"CC(CC(=O)O)c1ccccc1",,0.001482328962704,0.001480589679768,0.001450931277174,0.001403469486115,0.00134441324605 +"CC(CC(=O)O)c1ccccc1",,0.002659047324208,0.002655461742804,0.002602062168189,0.002520710964502,0.00241691668491 +"COC(=O)[C@@H]1CCC[C@H]1C(=O)OC",,0.001001450445642,0.001000736314585,0.00098937284165,0.000970544362528,0.000944584651821 +"CC(=O)n1cnc(/C=C2\N=C(c3ccccc3)OC2=O)c1",,0.00135240208531,0.001349499873797,0.001322716564314,0.001294392861261,0.001256649074795 +"CC(=O)n1cnc(/C=C2\N=C(c3ccccc3)OC2=O)c1",,0.004171883538026,0.004165772091148,0.004092425579288,0.00399345740674,0.003864014917082 +"COC(=O)/C=C/c1ccc([N+](=O)[O-])s1",,0.001201276992511,0.001198042710525,0.0011546269286,0.001089022564254,0.000999812579256 +"COC(=O)/C=C/c1ccc([N+](=O)[O-])s1",,0.003582717070019,0.003579002460625,0.003521023742011,0.003431539030957,0.003319251424274 +"COC(=O)/C=C/c1ccc(Br)s1",,0.000935722616009,0.000934810776445,0.000936175414041,0.000952615255936,0.000978553209962 +"COC(=O)/C=C/c1ccc(Br)s1",,0.007763918244485,0.00775576965242,0.007639252563432,0.00747176203788,0.007270127390359 +"O=C(O)/C=C/c1ccc([N+](=O)[O-])s1",,0.000620214389512,0.000618581387968,0.000598902496643,0.000573369091056,0.000542270705958 +"O=C(O)/C=C/c1ccc([N+](=O)[O-])s1",,0.001873009041489,0.001865665095909,0.001798360571644,0.001731497194345,0.001650531358074 +"O=C(O)/C=C/c1ccc(Cl)s1",,0.002575923417334,0.002572500301067,0.002521705204504,0.002439830527684,0.002326258705307 +"O=C(O)/C=C/c1ccc(Cl)s1",,0.004765302802949,0.004757410728739,0.004683418332195,0.004611057513713,0.004529205487703 +"O=C(O)C=Cc1ccc(Br)s1",,0.002159911677497,0.00215562438673,0.002098904538199,0.002017060157172,0.00191053927471 +"O=C(O)C=Cc1ccc(Br)s1",,0.001711612555351,0.001711622994992,0.001698446770849,0.001665894396997,0.001620968157293 +"C/C=C/C(=O)OC(C)(C)C",,0.000545599990522,0.000544310639453,0.000526270055461,0.000498540940207,0.000461140224687 +"O=C(O)[C@@H]1CC[C@H]1C(=O)O",,0.001258387118705,0.001258497202538,0.001271927290944,0.001303903445908,0.00134906368119 +"O=C(O)[C@@H]1CC[C@H]1C(=O)O",,0.001347510688857,0.001348113796452,0.001350182047046,0.001346387333535,0.001338789705495 +"Cc1ccc(C=NO)s1",,0.001951859605456,0.001949151324343,0.0019265135138,0.001905590642066,0.001877206867095 +"Cc1ccc(C=NO)s1",,0.001206283257257,0.001205416539329,0.001193600173009,0.001177158981928,0.001157526274622 +"Cc1ccc(Br)s1",,0.002958940397577,0.002955725089221,0.002887856137041,0.002767603160027,0.002613051533751 +"O=[N+]([O-])c1cccs1",0.00121976090843,0.00121976090843,,0.001204299650943,0.001172339055533,0.001130890798451 +"CCCCCCCCCCC(C)C",0.003679026398294,0.003679026398294,,0.003623919127817,0.003556715308429,0.003469161729441 +"CCCCCCCC1CCCCC1",0.00523019493678,0.00523019493678,,0.005159683858506,0.005076219349197,0.004974621655032 +"c1ccc(CC2CCNCC2)cc1",0.002003307668452,0.002003307668452,,0.001974037345974,0.001927542088939,0.001867731340188 +"C/C=C\Cl",0.000256313554212,0.000256313554212,,0.000250821597679,0.000243158438802,0.000233527124157 +"N#Cc1ccc2c(Cl)cccc2n1",0.001825744162353,0.001825744162353,,0.001811574061474,0.001792363320881,0.001769694961224 +"N#Cc1ccc2c(Cl)cccc2n1",0.002192530310623,0.002192530310623,,0.002139620093867,0.002110795281417,0.002090768614637 +"Cc1nc(-c2ccccc2)c(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1",0.004719585457034,0.004719585457034,,0.004628797677538,0.004484975890453,0.004288701315043 +"Cc1nc(-c2ccccc2)c(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1",0.000644355887184,0.000644355887184,,0.000647129789641,0.000643187548324,0.000635278356872 +"O=c1c(O[C@@H]2O[C@@H](CO)[C@H](O)[C@H]2O)c(-c2ccc(O)c(O)c2)oc2cc(O)cc(O)c12",0.000146866708032,0.000146866708032,,0.000161598561242,0.000184279585597,0.000214835127367 +"O=c1c(O[C@@H]2O[C@@H](CO)[C@H](O)[C@H]2O)c(-c2ccc(O)c(O)c2)oc2cc(O)cc(O)c12",0.000813923805172,0.000813923805172,,0.000818602335527,0.000834283226656,0.000856039816226 +"CC1(C)CC1C(=O)OCC(=O)c1ccc(-c2ccccc2)cc1",0.001600427562802,0.001600427562802,,0.001589812946977,0.001567339982101,0.001538327116728 +"CC1(C)CC1C(=O)OCC(=O)c1ccc(-c2ccccc2)cc1",0.003495395407337,0.003495395407337,,0.003407485665784,0.003300844256072,0.003157242089638 +"CCOC(=O)CCc1ccccc1",0.001801679591141,0.001801679591141,,0.001760929958604,0.001713657476351,0.001656223795507 +"CCCN(CCC)C(=O)c1ccccc1",0.002921779086415,0.002921779086415,,0.00287884440293,0.002857322696033,0.002842301414084 +"O=C(Cl)CCl",0.000442319724511,0.000442319724511,,0.000465648171886,0.000489471109914,0.000517689492641 +"O=C(Cl)CCl",0.001028435476674,0.001028435476674,,0.000979074868873,0.000909751498723,0.000820260607825 +"O=C(Cl)CCl",0.000234633280642,0.000234633280642,,0.000229617919318,0.000226696275458,0.00022597057443 +"O=C(Cl)CCl",0.000514462517725,0.000514462517725,,0.000490162362369,0.000460856892773,0.000421270375364 +"Oc1cccnc1O",0.000523957679607,0.000523957679607,,0.000543381074787,0.000571826906485,0.000608765153379 +"Oc1cccnc1O",0.001376451598925,0.001376451598925,,0.00137473855941,0.001377235383874,0.001383186710387 +"Oc1cccnc1O",0.001572651018218,0.001572651018218,,0.001496223625128,0.001399299531239,0.00127587134396 +"CCC=C(CC)CC",0.004331797155989,0.004331797155989,,0.004213072228435,0.004088980725559,0.003942602777491 +"CCCC(CC)CCC",0.003998977942541,0.003998977942541,,0.003967274317822,0.003925669539396,0.00387614831022 +"CCC(CC)C(CC)CC",0.004792548774896,0.004792548774896,,0.004785947419391,0.004756865263184,0.004713649987006 +"CCC(C)CCC(C)CC",0.002001793736694,0.002001793736694,,0.001956546171054,0.001931924288507,0.001903482293456 +"CCCCC(C)(C)CCC",0.002778685707643,0.002778685707643,,0.002744298378059,0.002725097737185,0.002694624214078 +"CCCCC(C)(CC)CC",0.00332617481953,0.00332617481953,,0.003365976144788,0.003432510276172,0.003513539894214 +"CCCCCCCC(C)C",0.00781520215437,0.00781520215437,,0.007621110676782,0.007430400486176,0.00721619563018 +"CCCCCCC(C)CC",0.004129425306313,0.004129425306313,,0.004076343290657,0.003995268174461,0.00389260968071 diff --git a/chemprop/tests/data/spectra/features.csv b/chemprop/tests/data/spectra/features.csv new file mode 100644 index 0000000000000000000000000000000000000000..e33bad8a862a5cc55c16d2abb30c7cd5a263f5ca --- /dev/null +++ b/chemprop/tests/data/spectra/features.csv @@ -0,0 +1,201 @@ +"gas","liquid","KBr","nujol mull","CCl4" +0,0,0,0,1 +0,0,0,1,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,1,0,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,0,1 +0,0,0,1,0 +0,1,0,0,0 +0,1,0,0,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,1,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,1,0,0,0 +0,1,0,0,0 +0,0,0,1,0 +0,0,0,0,1 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,0,1,0,0 +0,1,0,0,0 +0,0,1,0,0 +0,0,0,1,0 +0,0,0,0,1 +0,1,0,0,0 +0,0,1,0,0 +0,0,0,1,0 +0,0,1,0,0 +0,0,0,0,1 +0,0,1,0,0 +0,1,0,0,0 +0,0,1,0,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,1,0,0 +0,0,0,0,1 +0,1,0,0,0 +0,1,0,0,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,1,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,0,1,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,0,1,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,0,1,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,1,0,0,0 +0,0,0,1,0 +0,1,0,0,0 +0,0,1,0,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,1,0 +0,0,0,0,1 +0,1,0,0,0 +0,1,0,0,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,1,0,0,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,0,1 +0,0,0,1,0 +0,1,0,0,0 +0,0,0,1,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,0,0,1,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,0,1,0,0 +0,0,0,1,0 +0,0,0,0,1 +0,0,0,1,0 +0,0,0,0,1 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 +0,1,0,0,0 diff --git a/chemprop/tests/data/spectra/mask.csv b/chemprop/tests/data/spectra/mask.csv new file mode 100644 index 0000000000000000000000000000000000000000..516a569756824d0d81a398e6feca2118d8f5b634 --- /dev/null +++ b/chemprop/tests/data/spectra/mask.csv @@ -0,0 +1,6 @@ +,400,402,404,406,408,410 +"gas",1,1,1,1,1,1 +"liquid",1,1,1,1,1,1 +"KBr",0,0,1,1,1,1 +"nujol mull",1,1,1,1,0,0 +"CCl4",1,1,0,0,1,1 diff --git a/chemprop/tests/data/spectra/test_smiles.csv b/chemprop/tests/data/spectra/test_smiles.csv new file mode 100644 index 0000000000000000000000000000000000000000..c7b41df8cf1c6ccaa5a4a5b8a28bed4f2b29d4d1 --- /dev/null +++ b/chemprop/tests/data/spectra/test_smiles.csv @@ -0,0 +1,201 @@ +"smiles" +"O=C(O)c1ccco1" +"O=C(O)c1ccco1" +"CCOP(=O)(OCC)C(O)[C@@H]1OC(C)(C)O[C@H]1[C@@H]1COC(C)(C)O1" +"CCOP(=O)(OCC)C(O)[C@@H]1OC(C)(C)O[C@H]1[C@@H]1COC(C)(C)O1" +"c1ccc(C2=NOC(c3ccccc3)C2)cc1" +"c1ccc(C2=NOC(c3ccccc3)C2)cc1" +"Clc1ccc(OCc2ccccc2)cc1" +"Clc1ccc(OCc2ccccc2)cc1" +"Cc1ccc(OCc2ccccc2)cc1" +"Cc1ccc(OCc2ccccc2)cc1" +"c1ccc(P(CCP(c2ccccc2)c2ccccc2)c2ccccc2)cc1" +"c1ccc(P(CCP(c2ccccc2)c2ccccc2)c2ccccc2)cc1" +"CC(=O)Oc1ccc(C=O)cc1Br" +"CC(=O)Oc1ccc(C=O)cc1Br" +"CN=C(O)Oc1cccc(C(=O)O)c1" +"CN=C(O)Oc1cccc(C(=O)O)c1" +"CN=C(O)Oc1cccc(C(=O)OC)c1" +"CN=C(O)Oc1cccc(C(=O)OC)c1" +"COc1ccc(OCc2ccccc2)cc1" +"COc1ccc(OCc2ccccc2)cc1" +"COc1cc(/C=C/C(=O)O)ccc1OC(C)=O" +"COc1cc(/C=C/C(=O)O)ccc1OC(C)=O" +"CC1(C)CC[C@]2(C(=O)O)CC[C@]3(C)C(=CC[C@@H]4[C@@]5(C)CC[C@H](O)[C@@](C)(CO)[C@@H]5CC[C@]43C)[C@@H]2C1" +"CC1(C)CC[C@]2(C(=O)O)CC[C@]3(C)C(=CC[C@@H]4[C@@]5(C)CC[C@H](O)[C@@](C)(CO)[C@@H]5CC[C@]43C)[C@@H]2C1" +"CS(=O)(=O)OC1C2CC3CC(C2)C(=O)C1C3" +"CS(=O)(=O)OC1C2CC3CC(C2)C(=O)C1C3" +"O=C(O)[C@@H]1C[C@@H]2C=CC[C@H](C1)C2" +"O=C(O)[C@@H]1C[C@@H]2C=CC[C@H](C1)C2" +"OC1=NC2CC3CC(C2)CC1C3" +"OC1=NC2CC3CC(C2)CC1C3" +"ON=C1C2CC3CC(C2)CC1C3" +"ON=C1C2CC3CC(C2)CC1C3" +"OC1C2CC3CC(C2)CC1C3" +"OC1C2CC3CC(C2)CC1C3" +"O=C(O)C1CC2CCCC(C2)C1" +"O=C(O)C1CC2CCCC(C2)C1" +"NC1C2CC3CC(C2)CC1C3" +"NC1C2CC3CC(C2)CC1C3" +"O=C(O)C12CC3CC(CC(C3)C1)C2" +"O=C(O)C12CC3CC(CC(C3)C1)C2" +"O=C(O)C12CC3CC(CC(C3)C1)C2" +"O/N=C1/CC2C[C@H]3CC1C[C@@H](C2)C3" +"O/N=C1/CC2C[C@H]3CC1C[C@@H](C2)C3" +"O=C(O)CC1C[C@H]2CCC[C@@H](C1)C2" +"O=C(O)CC1C[C@H]2CCC[C@@H](C1)C2" +"ClC(Cl)[C@]12C[C@@H]3C[C@H](C1)C[C@@](C(Cl)Cl)(C3)C2" +"ClC(Cl)[C@]12C[C@@H]3C[C@H](C1)C[C@@](C(Cl)Cl)(C3)C2" +"O=C(O)C12CC3CC(CC(CBr)(C3)C1)C2" +"O=C(O)C12CC3CC(CC(CBr)(C3)C1)C2" +"CC(O)=NC12CC3CC(CC(C3)C1)C2" +"CC(O)=NC12CC3CC(CC(C3)C1)C2" +"CC(O)=NC1(C)C2CC3CC(C2)CC1C3" +"CC(O)=NC1(C)C2CC3CC(C2)CC1C3" +"CCCCCCCCCC1CCCCC1" +"CCCCCCCCCCC1CCCCC1" +"c1ccc2c(c1)Cc1cc3ccccc3cc1-2" +"c1ccc2c(c1)Cc1cc3ccccc3cc1-2" +"O=C(O)c1ccccc1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1" +"O=C(O)c1ccccc1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1" +"Cc1ccc(/C=C/C(=O)O)o1" +"Cc1ccc(/C=C/C(=O)O)o1" +"CCCCNc1ccccc1" +"CCCCNc1ccccc1" +"NCCNCCN" +"COC(=O)/C=C/c1ccco1" +"COC(=O)/C=C/c1ccco1" +"COC(=O)c1ccc(C(C)=O)o1" +"COC(=O)c1ccc(C(C)=O)o1" +"COC(=O)/C=C/c1ccc(C)o1" +"COC(=O)/C=C/c1ccc(C)o1" +"OCc1cccs1" +"Cc1ccc(C=O)s1" +"Cc1ccc(C=O)s1" +"Cc1ccc(C=O)s1" +"c1cscn1" +"C#CCCO" +"CC/C=C/CC" +"C/C=C/C(CC)CC" +"CCCCCC(C)C(C)C" +"CCCCCC(C)(C)CC" +"CC1(C)C(=O)[C@]2(C)CC[C@H]1C2" +"CCCCCCC(C)(C)C" +"CCC(C)CCCC(C)C" +"Cc1cc(C)c2ccccc2c1" +"C=CCCC(C)=O" +"C=CCCC(C)=O" +"OC/C=C/c1ccccc1" +"OC/C=C/c1ccccc1" +"OC/C=C/c1ccccc1" +"OC/C=C/c1ccccc1" +"C/C=C/C=O" +"C/C=C/C=O" +"Clc1cc(Cl)cc(Cl)c1" +"Clc1cc(Cl)cc(Cl)c1" +"Clc1cc(Cl)cc(Cl)c1" +"C#CCO" +"C#CCO" +"O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1" +"O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1" +"O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1" +"BrC(Br)c1cccc(C(Br)Br)c1" +"BrC(Br)c1cccc(C(Br)Br)c1" +"BrC(Br)c1cccc(C(Br)Br)c1" +"CCSC#N" +"CCOC(=O)CC(C(=O)OCC)C(C(=O)OCC)C(=O)OCC" +"CCCCCCCCCCCCCCCCCCCCCC" +"CCCCCCCCCCCCCCCCCCCCCC" +"CC(C)CCC#N" +"CC(C)CCC#N" +"C=CCOC(=O)CCC(=O)OCC=C" +"C=CC1CC=CCC1" +"C=CC1CC=CCC1" +"CCc1ccccn1" +"CCCCCCCC/C=C\CCCCCCCC(=O)OCCCC" +"C#CC(O)c1ccccc1" +"CC(N)CN" +"CCOC(=O)C(=O)C(C)C(=O)OCC" +"O=S(=O)(Cl)c1ccccc1" +"C=CCc1ccc(O)c(OC)c1" +"C=CCc1ccc(O)c(OC)c1" +"ClCC(Cl)CCl" +"C=CCc1ccc(OC(C)=O)c(OC)c1" +"CCc1ccccc1[N+](=O)[O-]" +"CCc1ccccc1[N+](=O)[O-]" +"Cc1ccccc1" +"CC(=O)OC1CCCCC1" +"CC1(C)CC(O)CC(C)(C)N1[O]" +"CC1(C)CC(O)CC(C)(C)N1[O]" +"C/C=C/C=C/C(=O)O" +"C/C=C/C=C/C(=O)O" +"O=C(O)CCN1C(=O)c2ccccc2C1=O" +"O=C(O)CCN1C(=O)c2ccccc2C1=O" +"CCCCCCCCCCCCCCCCC(=O)OC" +"CCCCCCCCCCCCCCCCC(=O)OC" +"O=C1c2ccccc2C(=O)c2cc(Cl)ccc21" +"O=C1c2ccccc2C(=O)c2cc(Cl)ccc21" +"O=C1c2ccccc2C(=O)c2ccccc21" +"O=C1c2ccccc2C(=O)c2ccccc21" +"CCCCCCc1c2ccccc2cc2ccccc12" +"CCCCCCc1c2ccccc2cc2ccccc12" +"FC1(F)C(F)(F)C(F)(F)C2(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C2(F)C1(F)F" +"CSC1=CC(=O)C=CC1=O" +"NCc1ccccc1" +"NCc1ccccc1" +"N=c1nc(O)c2[nH]c(=O)cnc2[nH]1" +"N=c1nc(O)c2[nH]c(=O)cnc2[nH]1" +"CCCCCCCCCCCc1c2ccccc2cc2ccccc12" +"CCCCCCCCCCCc1c2ccccc2cc2ccccc12" +"CC(CC(=O)O)c1ccccc1" +"CC(CC(=O)O)c1ccccc1" +"CC(CC(=O)O)c1ccccc1" +"COC(=O)[C@@H]1CCC[C@H]1C(=O)OC" +"CC(=O)n1cnc(/C=C2\N=C(c3ccccc3)OC2=O)c1" +"CC(=O)n1cnc(/C=C2\N=C(c3ccccc3)OC2=O)c1" +"COC(=O)/C=C/c1ccc([N+](=O)[O-])s1" +"COC(=O)/C=C/c1ccc([N+](=O)[O-])s1" +"COC(=O)/C=C/c1ccc(Br)s1" +"COC(=O)/C=C/c1ccc(Br)s1" +"O=C(O)/C=C/c1ccc([N+](=O)[O-])s1" +"O=C(O)/C=C/c1ccc([N+](=O)[O-])s1" +"O=C(O)/C=C/c1ccc(Cl)s1" +"O=C(O)/C=C/c1ccc(Cl)s1" +"O=C(O)C=Cc1ccc(Br)s1" +"O=C(O)C=Cc1ccc(Br)s1" +"C/C=C/C(=O)OC(C)(C)C" +"O=C(O)[C@@H]1CC[C@H]1C(=O)O" +"O=C(O)[C@@H]1CC[C@H]1C(=O)O" +"Cc1ccc(C=NO)s1" +"Cc1ccc(C=NO)s1" +"Cc1ccc(Br)s1" +"O=[N+]([O-])c1cccs1" +"CCCCCCCCCCC(C)C" +"CCCCCCCC1CCCCC1" +"c1ccc(CC2CCNCC2)cc1" +"C/C=C\Cl" +"N#Cc1ccc2c(Cl)cccc2n1" +"N#Cc1ccc2c(Cl)cccc2n1" +"Cc1nc(-c2ccccc2)c(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1" +"Cc1nc(-c2ccccc2)c(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1" +"O=c1c(O[C@@H]2O[C@@H](CO)[C@H](O)[C@H]2O)c(-c2ccc(O)c(O)c2)oc2cc(O)cc(O)c12" +"O=c1c(O[C@@H]2O[C@@H](CO)[C@H](O)[C@H]2O)c(-c2ccc(O)c(O)c2)oc2cc(O)cc(O)c12" +"CC1(C)CC1C(=O)OCC(=O)c1ccc(-c2ccccc2)cc1" +"CC1(C)CC1C(=O)OCC(=O)c1ccc(-c2ccccc2)cc1" +"CCOC(=O)CCc1ccccc1" +"CCCN(CCC)C(=O)c1ccccc1" +"O=C(Cl)CCl" +"O=C(Cl)CCl" +"O=C(Cl)CCl" +"O=C(Cl)CCl" +"Oc1cccnc1O" +"Oc1cccnc1O" +"Oc1cccnc1O" +"CCC=C(CC)CC" +"CCCC(CC)CCC" +"CCC(CC)C(CC)CC" +"CCC(C)CCC(C)CC" +"CCCCC(C)(C)CCC" +"CCCCC(C)(CC)CC" +"CCCCCCCC(C)C" +"CCCCCCC(C)CC" diff --git a/chemprop/tests/data/test_smiles.csv b/chemprop/tests/data/test_smiles.csv new file mode 100644 index 0000000000000000000000000000000000000000..fae67e410bbe2d62968b7af8174ac4c083536782 --- /dev/null +++ b/chemprop/tests/data/test_smiles.csv @@ -0,0 +1,11 @@ +smiles +CCOP(=S)(OCC)SC(CCl)N2C(=O)c1ccccc1C2=O +Brc1ccc(Br)cc1 +Cn2c(=O)on(c1ccc(Cl)c(Cl)c1)c2=O +Oc1ccc(cc1)c2ccccc2 +CC1=C(CCCO1)C(=O)Nc2ccccc2 +CCOC=C +CCC#C +COc1ncnc2nccnc12 +CCCCC(C)(O)CC +Clc1ccc(Cl)cc1 diff --git a/chemprop/tests/integration/conftest.py b/chemprop/tests/integration/conftest.py new file mode 100644 index 0000000000000000000000000000000000000000..707132223318188dc7f46fdc33043bd7085b720a --- /dev/null +++ b/chemprop/tests/integration/conftest.py @@ -0,0 +1,74 @@ +import warnings + +import pytest + +from chemprop import models, nn +from chemprop.models import multi + +warnings.filterwarnings("ignore", module=r"lightning.*", append=True) + + +@pytest.fixture(scope="session") +def mpnn(request): + message_passing, agg = request.param + ffn = nn.RegressionFFN() + + return models.MPNN(message_passing, agg, ffn, True) + + +@pytest.fixture(scope="session") +def regression_mpnn_mve(request): + agg = nn.SumAggregation() + ffn = nn.MveFFN() + + return models.MPNN(request.param, agg, ffn, True) + + +@pytest.fixture(scope="session") +def regression_mpnn_evidential(request): + agg = nn.SumAggregation() + ffn = nn.EvidentialFFN() + + return models.MPNN(request.param, agg, ffn, True) + + +@pytest.fixture(scope="session") +def classification_mpnn_dirichlet(request): + agg = nn.SumAggregation() + ffn = nn.BinaryDirichletFFN() + + return models.MPNN(request.param, agg, ffn, True) + + +@pytest.fixture(scope="session") +def classification_mpnn(request): + agg = nn.SumAggregation() + ffn = nn.BinaryClassificationFFN() + + return models.MPNN(request.param, agg, ffn, True) + + +@pytest.fixture(scope="session") +def classification_mpnn_multiclass(request): + agg = nn.SumAggregation() + ffn = nn.MulticlassClassificationFFN(n_classes=3) + + return models.MPNN(request.param, agg, ffn, True) + + +@pytest.fixture(scope="session") +def classification_mpnn_multiclass_dirichlet(request): + agg = nn.SumAggregation() + ffn = nn.MulticlassDirichletFFN(n_classes=3) + + return models.MPNN(request.param, agg, ffn, True) + + +@pytest.fixture(scope="session") +def mcmpnn(request): + blocks, n_components, shared = request.param + mcmp = nn.MulticomponentMessagePassing(blocks, n_components, shared=shared) + agg = nn.SumAggregation() + ffn = nn.RegressionFFN(input_dim=mcmp.output_dim) + + return multi.MulticomponentMPNN(mcmp, agg, ffn, True) diff --git a/chemprop/tests/integration/test_classification_mol.py b/chemprop/tests/integration/test_classification_mol.py new file mode 100644 index 0000000000000000000000000000000000000000..7052602a4f03581f7598a9eee2dd20d6318aac14 --- /dev/null +++ b/chemprop/tests/integration/test_classification_mol.py @@ -0,0 +1,134 @@ +"""This integration test is designed to ensure that the chemprop model can _overfit_ the training +data. A small enough dataset should be memorizable by even a moderately sized model, so this test +should generally pass.""" + +from lightning import pytorch as pl +import pytest +import torch +from torch.utils.data import DataLoader +from torchmetrics import functional as F + +from chemprop import nn +from chemprop.data import MoleculeDatapoint, MoleculeDataset, collate_batch + + +@pytest.fixture +def data(mol_classification_data): + smis, Y = mol_classification_data + + return [MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, Y)] + + +@pytest.fixture +def dataloader(data): + dset = MoleculeDataset(data) + + return DataLoader(dset, 32, collate_fn=collate_batch) + + +@pytest.mark.parametrize( + "classification_mpnn", [nn.BondMessagePassing(), nn.AtomMessagePassing()], indirect=True +) +@pytest.mark.integration +def test_quick(classification_mpnn, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + fast_dev_run=True, + ) + trainer.fit(classification_mpnn, dataloader, None) + + +@pytest.mark.parametrize( + "classification_mpnn_dirichlet", + [nn.BondMessagePassing(), nn.AtomMessagePassing()], + indirect=True, +) +@pytest.mark.integration +def test_dirichlet_quick(classification_mpnn_dirichlet, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + fast_dev_run=True, + ) + trainer.fit(classification_mpnn_dirichlet, dataloader, None) + + +@pytest.mark.parametrize( + "classification_mpnn", [nn.BondMessagePassing(), nn.AtomMessagePassing()], indirect=True +) +@pytest.mark.integration +def test_overfit(classification_mpnn, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=True, + enable_model_summary=False, + accelerator="cpu", + devices=1, + max_epochs=100, + overfit_batches=1.00, + ) + trainer.fit(classification_mpnn, dataloader) + + predss = [] + targetss = [] + masks = [] + for batch in dataloader: + bmg, _, _, targets, *_ = batch + preds = classification_mpnn(bmg) + not_nan = ~targets.isnan() + predss.append(preds) + targetss.append(targets) + masks.append(not_nan) + + preds = torch.cat(predss) + targets = torch.cat(targetss) + mask = torch.cat(masks) + auroc = F.auroc(preds[mask], targets[mask].long(), task="binary") + assert auroc >= 0.99 + + +@pytest.mark.parametrize( + "classification_mpnn_dirichlet", + [nn.BondMessagePassing(), nn.AtomMessagePassing()], + indirect=True, +) +@pytest.mark.integration +def test_dirichlet_overfit(classification_mpnn_dirichlet, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=True, + enable_model_summary=False, + accelerator="cpu", + devices=1, + max_epochs=200, + overfit_batches=1.00, + ) + trainer.fit(classification_mpnn_dirichlet, dataloader) + + predss = [] + targetss = [] + masks = [] + for batch in dataloader: + bmg, _, _, targets, *_ = batch + preds = classification_mpnn_dirichlet(bmg) + not_nan = ~targets.isnan() + predss.append(preds) + targetss.append(targets) + masks.append(not_nan) + + preds = torch.cat(predss)[..., 0] + targets = torch.cat(targetss) + mask = torch.cat(masks) + auroc = F.auroc(preds[mask], targets[mask].long(), task="binary") + assert auroc >= 0.99 diff --git a/chemprop/tests/integration/test_classification_mol_multiclass.py b/chemprop/tests/integration/test_classification_mol_multiclass.py new file mode 100644 index 0000000000000000000000000000000000000000..0f9724e0873eea669f8d81f8c39f18651203ca34 --- /dev/null +++ b/chemprop/tests/integration/test_classification_mol_multiclass.py @@ -0,0 +1,136 @@ +"""This integration test is designed to ensure that the chemprop model can _overfit_ the training +data. A small enough dataset should be memorizable by even a moderately sized model, so this test +should generally pass.""" + +from lightning import pytorch as pl +import pytest +import torch +from torch.utils.data import DataLoader +import torchmetrics + +from chemprop import nn +from chemprop.data import MoleculeDatapoint, MoleculeDataset, collate_batch + + +@pytest.fixture +def data(mol_classification_data_multiclass): + smis, Y = mol_classification_data_multiclass + + return [MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, Y)] + + +@pytest.fixture +def dataloader(data): + dset = MoleculeDataset(data) + + return DataLoader(dset, 32, collate_fn=collate_batch) + + +@pytest.mark.parametrize( + "classification_mpnn_multiclass", + [nn.BondMessagePassing(), nn.AtomMessagePassing()], + indirect=True, +) +@pytest.mark.integration +def test_quick(classification_mpnn_multiclass, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + fast_dev_run=True, + ) + trainer.fit(classification_mpnn_multiclass, dataloader, None) + + +@pytest.mark.parametrize( + "classification_mpnn_multiclass_dirichlet", + [nn.BondMessagePassing(), nn.AtomMessagePassing()], + indirect=True, +) +@pytest.mark.integration +def test_dirichlet_quick(classification_mpnn_multiclass_dirichlet, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + fast_dev_run=True, + ) + trainer.fit(classification_mpnn_multiclass_dirichlet, dataloader, None) + + +@pytest.mark.parametrize( + "classification_mpnn_multiclass", + [nn.BondMessagePassing(), nn.AtomMessagePassing()], + indirect=True, +) +@pytest.mark.integration +def test_overfit(classification_mpnn_multiclass, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=True, + enable_model_summary=False, + accelerator="cpu", + devices=1, + max_epochs=100, + overfit_batches=1.00, + ) + trainer.fit(classification_mpnn_multiclass, dataloader) + + predss = [] + targetss = [] + for batch in dataloader: + bmg, _, _, targets, *_ = batch + preds = classification_mpnn_multiclass(bmg) + preds = preds.transpose(1, 2) + predss.append(preds) + targetss.append(targets) + + preds = torch.cat(predss) + targets = torch.cat(targetss) + accuracy = torchmetrics.functional.accuracy( + preds, targets.long(), task="multiclass", num_classes=3 + ) + assert accuracy >= 0.99 + + +@pytest.mark.parametrize( + "classification_mpnn_multiclass_dirichlet", + [nn.BondMessagePassing(), nn.AtomMessagePassing()], + indirect=True, +) +@pytest.mark.integration +def test_dirichlet_overfit(classification_mpnn_multiclass_dirichlet, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=True, + enable_model_summary=False, + accelerator="cpu", + devices=1, + max_epochs=200, + overfit_batches=1.00, + ) + trainer.fit(classification_mpnn_multiclass_dirichlet, dataloader) + + predss = [] + targetss = [] + for batch in dataloader: + bmg, _, _, targets, *_ = batch + preds = classification_mpnn_multiclass_dirichlet(bmg) + preds = preds.transpose(1, 2) + predss.append(preds) + targetss.append(targets) + + preds = torch.cat(predss) + targets = torch.cat(targetss) + accuracy = torchmetrics.functional.accuracy( + preds, targets.long(), task="multiclass", num_classes=3 + ) + assert accuracy >= 0.99 diff --git a/chemprop/tests/integration/test_output_transform.py b/chemprop/tests/integration/test_output_transform.py new file mode 100644 index 0000000000000000000000000000000000000000..7890e3aa08fcaa3009311dc773bc01bbca150f82 --- /dev/null +++ b/chemprop/tests/integration/test_output_transform.py @@ -0,0 +1,61 @@ +from lightning import pytorch as pl +import pytest +import torch +from torch.utils.data import DataLoader + +from chemprop import nn +from chemprop.data import MoleculeDatapoint, MoleculeDataset, collate_batch +from chemprop.models import MPNN + + +@pytest.fixture +def data(mol_regression_data): + smis, Y = mol_regression_data + + return [MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, Y)] + + +def test_output_transform(data): + train_dset = MoleculeDataset(data) + output_scaler = train_dset.normalize_targets() + train_loader = DataLoader(train_dset, 32, collate_fn=collate_batch) + + test_dset = MoleculeDataset(data) + test_loader = DataLoader(test_dset, 32, collate_fn=collate_batch, shuffle=False) + + output_transform = nn.UnscaleTransform.from_standard_scaler(output_scaler) + ffn = nn.RegressionFFN(output_transform=output_transform) + mpnn = MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), ffn) + + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=True, + enable_model_summary=False, + accelerator="cpu", + devices=1, + max_epochs=100, + overfit_batches=1.00, + ) + trainer.fit(mpnn, train_loader) + + mpnn.train() + predss = [] + for batch in train_loader: + bmg, _, _, targets, *_ = batch + preds = mpnn(bmg) + predss.append(preds) + + preds = torch.cat(predss) + std, mean = torch.std_mean(preds, dim=0) + + assert torch.allclose(std, torch.ones_like(std), atol=0.1) + assert torch.allclose(mean, torch.zeros_like(mean), atol=0.1) + + predss = trainer.predict(mpnn, test_loader) + preds = torch.cat(predss) + std, mean = torch.std_mean(preds, dim=0) + y_std, y_mean = torch.std_mean(torch.from_numpy(test_dset.Y).float(), dim=0) + + assert torch.allclose(std, y_std, atol=0.1) + assert torch.allclose(mean, y_mean, atol=0.1) diff --git a/chemprop/tests/integration/test_regression_mol+mol.py b/chemprop/tests/integration/test_regression_mol+mol.py new file mode 100644 index 0000000000000000000000000000000000000000..22a811ae26ed3677cf7f9c17e5df5d5830571d1f --- /dev/null +++ b/chemprop/tests/integration/test_regression_mol+mol.py @@ -0,0 +1,87 @@ +"""This integration test is designed to ensure that the chemprop model can _overfit_ the training +data. A small enough dataset should be memorizable by even a moderately sized model, so this test +should generally pass.""" + +from lightning import pytorch as pl +import pytest +import torch +from torch.utils.data import DataLoader + +from chemprop import nn +from chemprop.data import ( + MoleculeDatapoint, + MoleculeDataset, + MulticomponentDataset, + collate_multicomponent, +) + +N_COMPONENTS = 2 +pytestmark = [ + pytest.mark.parametrize( + "mcmpnn", + [ + ([nn.BondMessagePassing() for _ in range(N_COMPONENTS)], N_COMPONENTS, False), + ([nn.AtomMessagePassing() for _ in range(N_COMPONENTS)], N_COMPONENTS, False), + ([nn.BondMessagePassing()], N_COMPONENTS, True), + ], + indirect=True, + ), + pytest.mark.integration, +] + + +@pytest.fixture +def datas(mol_mol_regression_data): + smis1, smis2, Y = mol_mol_regression_data + + return [ + [MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis1, Y)], + [MoleculeDatapoint.from_smi(smi) for smi in smis2], + ] + + +@pytest.fixture +def dataloader(datas): + dsets = [MoleculeDataset(data) for data in datas] + mcdset = MulticomponentDataset(dsets) + mcdset.normalize_targets() + + return DataLoader(mcdset, 32, collate_fn=collate_multicomponent) + + +def test_quick(mcmpnn, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + fast_dev_run=True, + ) + trainer.fit(mcmpnn, dataloader) + + +def test_overfit(mcmpnn, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=True, + enable_model_summary=False, + accelerator="cpu", + devices=1, + max_epochs=100, + overfit_batches=1.00, + ) + trainer.fit(mcmpnn, dataloader) + + errors = [] + for batch in dataloader: + bmg, _, _, targets, *_ = batch + preds = mcmpnn(bmg) + errors.append(preds - targets) + + errors = torch.cat(errors) + mse = errors.square().mean().item() + + assert mse <= 0.05 diff --git a/chemprop/tests/integration/test_regression_mol.py b/chemprop/tests/integration/test_regression_mol.py new file mode 100644 index 0000000000000000000000000000000000000000..3d17277de9cad01e01d35298a37f2a99701ae187 --- /dev/null +++ b/chemprop/tests/integration/test_regression_mol.py @@ -0,0 +1,118 @@ +"""This integration test is designed to ensure that the chemprop model can _overfit_ the training +data. A small enough dataset should be memorizable by even a moderately sized model, so this test +should generally pass.""" + +from lightning import pytorch as pl +import pytest +import torch +from torch.utils.data import DataLoader + +from chemprop import nn +from chemprop.data import MoleculeDatapoint, MoleculeDataset, collate_batch + + +@pytest.fixture +def data(mol_regression_data): + smis, Y = mol_regression_data + + return [MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, Y)] + + +@pytest.fixture +def dataloader(data): + dset = MoleculeDataset(data) + dset.normalize_targets() + + return DataLoader(dset, 32, collate_fn=collate_batch) + + +@pytest.mark.parametrize( + "mpnn", + [ + (nn.BondMessagePassing(), nn.MeanAggregation()), + (nn.AtomMessagePassing(), nn.SumAggregation()), + (nn.BondMessagePassing(), nn.NormAggregation()), + ], + indirect=True, +) +@pytest.mark.integration +def test_quick(mpnn, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + fast_dev_run=True, + ) + trainer.fit(mpnn, dataloader, None) + + +@pytest.mark.parametrize( + "mpnn", + [ + (nn.BondMessagePassing(), nn.MeanAggregation()), + (nn.AtomMessagePassing(), nn.SumAggregation()), + (nn.BondMessagePassing(), nn.NormAggregation()), + ], + indirect=True, +) +@pytest.mark.integration +def test_overfit(mpnn, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=True, + enable_model_summary=False, + accelerator="cpu", + devices=1, + max_epochs=100, + overfit_batches=1.00, + ) + trainer.fit(mpnn, dataloader) + + errors = [] + for batch in dataloader: + bmg, _, _, targets, *_ = batch + preds = mpnn(bmg) + errors.append(preds - targets) + + errors = torch.cat(errors) + mse = errors.square().mean().item() + + assert mse <= 0.05 + + +@pytest.mark.parametrize( + "regression_mpnn_mve", [nn.BondMessagePassing(), nn.AtomMessagePassing()], indirect=True +) +@pytest.mark.integration +def test_mve_quick(regression_mpnn_mve, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + fast_dev_run=True, + ) + trainer.fit(regression_mpnn_mve, dataloader, None) + + +@pytest.mark.parametrize( + "regression_mpnn_evidential", [nn.BondMessagePassing(), nn.AtomMessagePassing()], indirect=True +) +@pytest.mark.integration +def test_evidential_quick(regression_mpnn_evidential, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + fast_dev_run=True, + ) + trainer.fit(regression_mpnn_evidential, dataloader, None) diff --git a/chemprop/tests/integration/test_regression_rxn+mol.py b/chemprop/tests/integration/test_regression_rxn+mol.py new file mode 100644 index 0000000000000000000000000000000000000000..c128db0957e8a3a5cbc46517f3e8437515d55703 --- /dev/null +++ b/chemprop/tests/integration/test_regression_rxn+mol.py @@ -0,0 +1,90 @@ +"""This integration test is designed to ensure that the chemprop model can _overfit_ the training +data. A small enough dataset should be memorizable by even a moderately sized model, so this test +should generally pass.""" + +from lightning import pytorch as pl +import pytest +import torch +from torch.utils.data import DataLoader + +from chemprop import nn +from chemprop.data import ( + MoleculeDatapoint, + MoleculeDataset, + MulticomponentDataset, + ReactionDatapoint, + ReactionDataset, + collate_multicomponent, +) +from chemprop.featurizers.molgraph import CondensedGraphOfReactionFeaturizer + +N_COMPONENTS = 2 +SHAPE = CondensedGraphOfReactionFeaturizer().shape +pytestmark = [ + pytest.mark.parametrize( + "mcmpnn", + [ + ([nn.BondMessagePassing(*SHAPE), nn.BondMessagePassing()], N_COMPONENTS, False), + ([nn.AtomMessagePassing(*SHAPE), nn.AtomMessagePassing()], N_COMPONENTS, False), + ], + indirect=True, + ), + pytest.mark.integration, +] + + +@pytest.fixture +def datas(rxn_mol_regression_data): + rxns, smis, Y = rxn_mol_regression_data + + return [ + [ReactionDatapoint.from_smi(smi, y) for smi, y in zip(rxns, Y)], + [MoleculeDatapoint.from_smi(smi) for smi in smis], + ] + + +@pytest.fixture +def dataloader(datas): + dsets = [ReactionDataset(datas[0]), MoleculeDataset(datas[1])] + dset = MulticomponentDataset(dsets) + dset.normalize_targets() + + return DataLoader(dset, 32, collate_fn=collate_multicomponent) + + +def test_quick(mcmpnn, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + fast_dev_run=True, + ) + trainer.fit(mcmpnn, dataloader) + + +def test_overfit(mcmpnn, dataloader): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=True, + enable_model_summary=False, + accelerator="cpu", + devices=1, + max_epochs=100, + overfit_batches=1.00, + ) + trainer.fit(mcmpnn, dataloader) + + errors = [] + for batch in dataloader: + bmg, _, _, targets, *_ = batch + preds = mcmpnn(bmg) + errors.append(preds - targets) + + errors = torch.cat(errors) + mse = errors.square().mean().item() + + assert mse <= 0.05 diff --git a/chemprop/tests/integration/test_regression_rxn.py b/chemprop/tests/integration/test_regression_rxn.py new file mode 100644 index 0000000000000000000000000000000000000000..46e2e899442b8d088e093563e8b35fcbfc5a2193 --- /dev/null +++ b/chemprop/tests/integration/test_regression_rxn.py @@ -0,0 +1,77 @@ +"""This integration test is designed to ensure that the chemprop model can _overfit_ the training +data. A small enough dataset should be memorizable by even a moderately sized model, so this test +should generally pass.""" + +from lightning import pytorch as pl +import pytest +import torch +from torch.utils.data import DataLoader + +from chemprop import nn +from chemprop.data import ReactionDatapoint, ReactionDataset, collate_batch +from chemprop.featurizers.molgraph import CondensedGraphOfReactionFeaturizer + +SHAPE = CondensedGraphOfReactionFeaturizer().shape +pytestmark = pytest.mark.parametrize( + "mpnn", + [ + (nn.BondMessagePassing(*SHAPE), nn.MeanAggregation()), + (nn.AtomMessagePassing(*SHAPE), nn.SumAggregation()), + (nn.BondMessagePassing(*SHAPE), nn.NormAggregation()), + ], + indirect=True, +) + + +@pytest.fixture +def data(rxn_regression_data): + smis, Y = rxn_regression_data + + return [ReactionDatapoint.from_smi(smi, y) for smi, y in zip(smis, Y)] + + +@pytest.fixture +def dataloader(data): + dset = ReactionDataset(data) + dset.normalize_targets() + + return DataLoader(dset, 32, collate_fn=collate_batch) + + +def test_quick(dataloader, mpnn): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + fast_dev_run=True, + ) + trainer.fit(mpnn, dataloader) + + +def test_overfit(dataloader, mpnn): + trainer = pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=True, + enable_model_summary=False, + accelerator="cpu", + devices=1, + max_epochs=100, + overfit_batches=1.00, + ) + trainer.fit(mpnn, dataloader) + + with torch.inference_mode(): + errors = [] + for batch in dataloader: + bmg, _, _, targets, *_ = batch + preds = mpnn(bmg) + errors.append(preds - targets) + + errors = torch.cat(errors) + mse = errors.square().mean().item() + + assert mse <= 0.01 diff --git a/chemprop/tests/regenerate_models.sh b/chemprop/tests/regenerate_models.sh new file mode 100644 index 0000000000000000000000000000000000000000..942e46a123466aa19b86b9068f2c8b5d8d597b99 --- /dev/null +++ b/chemprop/tests/regenerate_models.sh @@ -0,0 +1,105 @@ +#!/bin/bash -l + +CHEMPROP_ENV=$1 +CHEMPROP_PATH=$2 + +if [ -z "${CHEMPROP_ENV}" ] || [ -z "${CHEMPROP_PATH}" ]; then + echo "Usage: regenerate_models.sh " + exit 1 +fi + +conda activate $CHEMPROP_ENV + +# test_cli_classification_mol + +rm -rf test_cli_classification_mol + +chemprop train -i $CHEMPROP_PATH/tests/data/classification/mol.csv --accelerator cpu --epochs 3 --num-workers 0 --task-type classification --save-dir test_cli_classification_mol + +cp -L test_cli_classification_mol/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_classification_mol.pt + +# test_cli_classification_mol_multiclass + +rm -rf test_cli_classification_mol_multiclass + +chemprop train -i $CHEMPROP_PATH/tests/data/classification/mol_multiclass.csv --accelerator cpu --epochs 3 --num-workers 0 --save-dir test_cli_classification_mol_multiclass --task-type multiclass + +cp -L test_cli_classification_mol_multiclass/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_classification_mol_multiclass.pt + +# test_cli_regression_mol+mol + +rm -rf test_cli_regression_mol+mol + +chemprop train -i $CHEMPROP_PATH/tests/data/regression/mol+mol/mol+mol.csv --accelerator cpu --epochs 3 --num-workers 0 --smiles-columns smiles solvent --save-dir test_cli_regression_mol+mol + +cp -L test_cli_regression_mol+mol/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_regression_mol+mol.pt + +cp -L test_cli_regression_mol+mol/model_0/checkpoints/best*.ckpt $CHEMPROP_PATH/tests/data/example_model_v2_regression_mol+mol.ckpt + +# test_cli_regression_mol + +rm -rf test_cli_regression_mol + +chemprop train -i $CHEMPROP_PATH/tests/data/regression/mol/mol.csv --accelerator cpu --epochs 3 --num-workers 0 --save-dir test_cli_regression_mol + +cp -L test_cli_regression_mol/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_regression_mol.pt + +cp -L test_cli_regression_mol/model_0/checkpoints/best*.ckpt $CHEMPROP_PATH/tests/data/example_model_v2_regression_mol.ckpt + +# test_cli_regression_mol_multitask + +rm -rf test_cli_regression_mol_multitask + +chemprop train -i $CHEMPROP_PATH/tests/data/regression/mol_multitask.csv --accelerator cpu --epochs 3 --num-workers 0 --save-dir test_cli_regression_mol_multitask + +cp -L test_cli_regression_mol_multitask/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_regression_mol_multitask.pt + +# test_cli_regression_rxn+mol + +rm -rf test_cli_regression_rxn+mol + +chemprop train -i $CHEMPROP_PATH/tests/data/regression/rxn+mol/rxn+mol.csv --accelerator cpu --epochs 3 --num-workers 0 --reaction-columns rxn_smiles --smiles-columns solvent_smiles --save-dir test_cli_regression_rxn+mol + +cp -L test_cli_regression_rxn+mol/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_regression_rxn+mol.pt + +# test_cli_regression_rxn + +rm -rf test_cli_regression_rxn + +chemprop train -i $CHEMPROP_PATH/tests/data/regression/rxn/rxn.csv --accelerator cpu --epochs 3 --num-workers 0 --reaction-columns smiles --save-dir test_cli_regression_rxn + +cp -L test_cli_regression_rxn/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_regression_rxn.pt + +cp -L test_cli_regression_rxn/model_0/checkpoints/best*.ckpt $CHEMPROP_PATH/tests/data/example_model_v2_regression_rxn.ckpt + +# test_cli_regression_mve_mol + +rm -rf test_cli_regression_mve_mol + +chemprop train -i $CHEMPROP_PATH/tests/data/regression/mol/mol.csv --accelerator cpu --epochs 3 --num-workers 0 --save-dir test_cli_regression_mve_mol --task-type regression-mve + +cp -L test_cli_regression_mve_mol/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_regression_mve_mol.pt + +# test_cli_regression_evidential_mol + +rm -rf test_cli_regression_evidential_mol + +chemprop train -i $CHEMPROP_PATH/tests/data/regression/mol/mol.csv --accelerator cpu --epochs 3 --num-workers 0 --save-dir test_cli_regression_evidential_mol --task-type regression-evidential + +cp -L test_cli_regression_evidential_mol/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_regression_evidential_mol.pt + +test_cli_classification_dirichlet_mol + +rm -rf test_cli_classification_dirichlet_mol + +chemprop train -i $CHEMPROP_PATH/tests/data/classification/mol.csv --accelerator cpu --epochs 3 --num-workers 0 --save-dir test_cli_classification_dirichlet_mol --task-type classification-dirichlet + +cp -L test_cli_classification_dirichlet_mol/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_classification_dirichlet_mol.pt + +# test_cli_classification_dirichlet_mol + +rm -rf test_cli_multiclass_dirichlet_mol + +chemprop train -i $CHEMPROP_PATH/tests/data/classification/mol_multiclass.csv --accelerator cpu --epochs 3 --num-workers 0 --save-dir test_cli_multiclass_dirichlet_mol --task-type multiclass-dirichlet + +cp -L test_cli_multiclass_dirichlet_mol/model_0/best.pt $CHEMPROP_PATH/tests/data/example_model_v2_multiclass_dirichlet_mol.pt \ No newline at end of file diff --git a/chemprop/tests/unit/data/test_data_utils.py b/chemprop/tests/unit/data/test_data_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b857bf9e49a4c80494e8eaed4d7683e278ef21a3 --- /dev/null +++ b/chemprop/tests/unit/data/test_data_utils.py @@ -0,0 +1,158 @@ +from astartes import train_val_test_split +from astartes.utils.warnings import NormalizationWarning +import numpy as np +import pytest +from rdkit import Chem + +from chemprop.data.splitting import _unpack_astartes_result, make_split_indices + + +@pytest.fixture(params=[["C", "CC", "CCC", "CN", "CCN", "CCCN", "CCCCN", "CO", "CCO", "CCCO"]]) +def mol_data(request): + """A dataset with single molecules""" + return [Chem.MolFromSmiles(smi) for smi in request.param] + + +@pytest.fixture(params=[["C", "CC", "CN", "CN", "CO", "C"]]) +def mol_data_with_repeated_mols(request): + """A dataset with repeated single molecules""" + return [Chem.MolFromSmiles(smi) for smi in request.param] + + +@pytest.fixture(params=[["C", "CC", "CCC", "C1CC1", "C1CCC1"]]) +def molecule_dataset_with_rings(request): + """A dataset with rings (for scaffold splitting)""" + return [Chem.MolFromSmiles(smi) for smi in request.param] + + +def test_splits_sum1_warning(mol_data): + """Testing that the splits are normalized to 1, for overspecified case.""" + with pytest.warns(NormalizationWarning): + make_split_indices(mols=mol_data, sizes=(0.4, 0.6, 0.2)) + + +def test_splits_sum2_warning(mol_data): + """Testing that the splits are normalized to 1, for underspecified case.""" + with pytest.warns(NormalizationWarning): + make_split_indices(mols=mol_data, sizes=(0.1, 0.1, 0.1)) + + +def test_three_splits_provided(mol_data): + """Testing that three splits are provided""" + with pytest.raises(ValueError): + make_split_indices(mols=mol_data, sizes=(0.8, 0.2)) + + +def test_seed0(mol_data): + """ + Testing that make_split_indices can get expected output using astartes as backend for random split with seed 0. + Note: the behaviour of randomness for data splitting is not controlled by chemprop but by the chosen backend. + """ + train, val, test = make_split_indices(mols=mol_data, seed=0) + train_astartes, val_astartes, test_astartes = _unpack_astartes_result( + train_val_test_split(np.arange(len(mol_data)), sampler="random", random_state=0), True + ) + assert set(train[0]) == set(train_astartes) + assert set(val[0]) == set(val_astartes) + assert set(test[0]) == set(test_astartes) + + +def test_seed100(mol_data): + """ + Testing that make_split_indices can get expected output using astartes as backend for random split with seed 100. + Note: the behaviour of randomness for data splitting is not controlled by chemprop but by the chosen backend. + """ + train, val, test = make_split_indices(mols=mol_data, seed=100) + train_astartes, val_astartes, test_astartes = _unpack_astartes_result( + train_val_test_split(np.arange(len(mol_data)), sampler="random", random_state=100), True + ) + assert set(train[0]) == set(train_astartes) + assert set(val[0]) == set(val_astartes) + assert set(test[0]) == set(test_astartes) + + +def test_split_4_4_2(mol_data): + """Testing the random split with changed sizes""" + train, val, test = make_split_indices(mols=mol_data, sizes=(0.4, 0.4, 0.2)) + train_astartes, val_astartes, test_astartes = _unpack_astartes_result( + train_val_test_split( + np.arange(len(mol_data)), + sampler="random", + train_size=0.4, + val_size=0.4, + test_size=0.2, + random_state=0, + ), + True, + ) + assert set(train[0]) == set(train_astartes) + assert set(val[0]) == set(val_astartes) + assert set(test[0]) == set(test_astartes) + + +def test_split_empty_validation_set(mol_data): + """Testing the random split with an empty validation set""" + train, val, test = make_split_indices(mols=mol_data, sizes=(0.4, 0, 0.6)) + assert set(val[0]) == set([]) + + +def test_random_split(mol_data_with_repeated_mols): + """ + Testing if random split yield expected results. + Note: This test mainly serves as a red flag. Test failure strongly indicates unexpected change of data splitting backend that needs attention. + """ + split_type = "random" + train, val, test = make_split_indices( + mols=mol_data_with_repeated_mols, sizes=(0.4, 0.4, 0.2), split=split_type + ) + + assert train[0] == [2, 1] + + +def test_repeated_smiles(mol_data_with_repeated_mols): + """ + Testing if random split with repeated smiles yield expected results. + Note: This test mainly serves as a red flag. Test failure strongly indicates unexpected change of data splitting backend that needs attention. + """ + split_type = "random_with_repeated_smiles" + train, val, test = make_split_indices( + mols=mol_data_with_repeated_mols, sizes=(0.8, 0.0, 0.2), split=split_type + ) + + assert train[0] == [4, 1, 0, 5] + assert test[0] == [2, 3] + + +def test_kennard_stone(mol_data): + """ + Testing if Kennard-Stone split yield expected results. + Note: This test mainly serves as a red flag. Test failure strongly indicates unexpected change of data splitting backend that needs attention. + """ + split_type = "kennard_stone" + train, val, test = make_split_indices(mols=mol_data, sizes=(0.4, 0.4, 0.2), split=split_type) + + assert set(test[0]) == set([9, 5]) + + +def test_kmeans(mol_data): + """ + Testing if Kmeans split yield expected results. + Note: This test mainly serves as a red flag. Test failure strongly indicates unexpected change of data splitting backend that needs attention. + """ + split_type = "kmeans" + train, val, test = make_split_indices(mols=mol_data, sizes=(0.5, 0.0, 0.5), split=split_type) + + assert train[0] == [0, 1, 2, 3, 7, 8, 9] + + +def test_scaffold(molecule_dataset_with_rings): + """ + Testing if Bemis-Murcko Scaffolds split yield expected results. + Note: This test mainly serves as a red flag. Test failure strongly indicates unexpected change of data splitting backend that needs attention. + """ + split_type = "scaffold_balanced" + train, val, test = make_split_indices( + mols=molecule_dataset_with_rings, sizes=(0.3, 0.3, 0.3), split=split_type + ) + + assert train[0] == [0, 1, 2] diff --git a/chemprop/tests/unit/data/test_dataloader.py b/chemprop/tests/unit/data/test_dataloader.py new file mode 100644 index 0000000000000000000000000000000000000000..b1a3a5ad65a92e34067db3938b7fe1755953bd8b --- /dev/null +++ b/chemprop/tests/unit/data/test_dataloader.py @@ -0,0 +1,84 @@ +import numpy as np +import pytest +import torch + +from chemprop.data.collate import BatchMolGraph, collate_batch +from chemprop.data.datasets import Datum +from chemprop.data.molgraph import MolGraph + + +@pytest.fixture +def datum_1(): + mol_graph1 = MolGraph( + V=np.array([[1.0], [2.0], [3.0]]), + E=np.array([[0.5], [1.5]]), + edge_index=np.array([[0, 1, 0, 2], [1, 0, 2, 0]]), + rev_edge_index=np.array([1, 0, 3, 2]), + ) + return Datum( + mol_graph1, + V_d=np.array([[1.0], [2.0], [4.0]]), + x_d=[3, 4], + y=[6, 7], + weight=[8.0], + lt_mask=[True], + gt_mask=[False], + ) + + +@pytest.fixture +def datum_2(): + mol_graph2 = MolGraph( + V=np.array([[4.0], [5.0]]), + E=np.array([[2.5]]), + edge_index=np.array([[0, 1], [1, 0]]), + rev_edge_index=np.array([1, 0]), + ) + return Datum( + mol_graph2, + V_d=np.array([[5.0], [7.0]]), + x_d=[8, 9], + y=[6, 4], + weight=[1.0], + lt_mask=[False], + gt_mask=[True], + ) + + +def test_collate_batch_single_graph(datum_1): + batch = [datum_1] + + result = collate_batch(batch) + mgs, V_ds, x_ds, ys, weights, lt_masks, gt_masks = result + + assert isinstance(result, tuple) + assert isinstance(mgs, BatchMolGraph) + assert ( + mgs.V.shape[0] == V_ds.shape[0] + ) # V is number of atoms x number of atom features, V_ds is number of atoms x number of atom descriptors + torch.testing.assert_close(V_ds, torch.tensor([[1.0], [2.0], [4.0]], dtype=torch.float32)) + torch.testing.assert_close(x_ds, torch.tensor([[3.0, 4.0]], dtype=torch.float32)) + torch.testing.assert_close(ys, torch.tensor([[6.0, 7.0]], dtype=torch.float32)) + torch.testing.assert_close(weights, torch.tensor([[[8.0]]], dtype=torch.float32)) + torch.testing.assert_close(lt_masks, torch.tensor([[1]], dtype=torch.bool)) + torch.testing.assert_close(gt_masks, torch.tensor([[0]], dtype=torch.bool)) + + +def test_collate_batch_multiple_graphs(datum_1, datum_2): + batch = [datum_1, datum_2] + + result = collate_batch(batch) + mgs, V_ds, x_ds, ys, weights, lt_masks, gt_masks = result + + assert isinstance(mgs, BatchMolGraph) + assert ( + mgs.V.shape[0] == V_ds.shape[0] + ) # V is number of atoms x number of atom features, V_ds is number of atoms x number of atom descriptors + torch.testing.assert_close( + V_ds, torch.tensor([[1.0], [2.0], [4.0], [5.0], [7.0]], dtype=torch.float32) + ) + torch.testing.assert_close(x_ds, torch.tensor([[3.0, 4.0], [8.0, 9.0]], dtype=torch.float32)) + torch.testing.assert_close(ys, torch.tensor([[6.0, 7.0], [6.0, 4.0]], dtype=torch.float32)) + torch.testing.assert_close(weights, torch.tensor([[[8.0]], [[1.0]]], dtype=torch.float32)) + torch.testing.assert_close(lt_masks, torch.tensor([[1], [0]], dtype=torch.bool)) + torch.testing.assert_close(gt_masks, torch.tensor([[0], [1]], dtype=torch.bool)) diff --git a/chemprop/tests/unit/data/test_datapoint.py b/chemprop/tests/unit/data/test_datapoint.py new file mode 100644 index 0000000000000000000000000000000000000000..fbb6fe7fdccb7d219a24f65857dc95ed05d378ea --- /dev/null +++ b/chemprop/tests/unit/data/test_datapoint.py @@ -0,0 +1,46 @@ +import numpy as np +import pytest + +from chemprop.data import MoleculeDatapoint + +SMI = "c1ccccc1" + + +@pytest.fixture(params=range(1, 3)) +def targets(request): + return np.random.rand(request.param) + + +@pytest.fixture(params=[0.5, 0.9]) +def features(request): + return np.where(np.random.rand(1024) > request.param, 1.0, 0.0) + + +@pytest.fixture +def features_with_nans(features): + idxs = np.random.choice(len(features), len(features) // 100, False) + features[idxs] = np.nan + + return features + + +def test_num_tasks(targets): + d = MoleculeDatapoint.from_smi(SMI, y=targets) + + assert d.t == targets.shape[0] + + +def test_addh(smi, targets): + d1 = MoleculeDatapoint.from_smi(smi, y=targets) + d2 = MoleculeDatapoint.from_smi(smi, y=targets, add_h=True) + + assert d1.mol.GetNumAtoms() != d2.mol.GetNumAtoms() + + +def test_replace_token(smi, targets, features_with_nans): + if not np.isnan(features_with_nans).any(): + pytest.skip("no `nan`s") + + d = MoleculeDatapoint.from_smi(smi, y=targets, x_d=features_with_nans) + + assert not np.isnan(d.x_d).any() diff --git a/chemprop/tests/unit/data/test_dataset.py b/chemprop/tests/unit/data/test_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..d34b166937e03a994b3a195bfb4d2cc63acf56de --- /dev/null +++ b/chemprop/tests/unit/data/test_dataset.py @@ -0,0 +1,180 @@ +from unittest.mock import MagicMock, call + +import numpy as np +import pytest +from rdkit import Chem +from sklearn.preprocessing import StandardScaler + +from chemprop.data.datasets import MoleculeDatapoint, MoleculeDataset +from chemprop.data.molgraph import MolGraph +from chemprop.featurizers.molgraph import SimpleMoleculeMolGraphFeaturizer + + +@pytest.fixture(params=[1, 5, 10]) +def smis(smis, request): + return smis.sample(request.param).to_list() + + +@pytest.fixture +def targets(smis): + return np.random.rand(len(smis), 1) + + +@pytest.fixture +def mols(smis): + return [Chem.MolFromSmiles(smi) for smi in smis] + + +@pytest.fixture +def X_d(mols): + return [np.random.rand(1) for _ in mols] + + +@pytest.fixture +def V_fs(mols): + return [np.random.rand(mol.GetNumAtoms(), 1) for mol in mols] + + +@pytest.fixture +def E_fs(mols): + return [np.random.rand(mol.GetNumBonds(), 2) for mol in mols] + + +@pytest.fixture +def V_ds(mols): + return [np.random.rand(mol.GetNumAtoms(), 3) for mol in mols] + + +@pytest.mark.parametrize( + "X_d, V_fs, E_fs, V_ds", + [(None, None, None, None), ("X_d", "V_fs", "E_fs", "V_ds")], + indirect=True, +) +@pytest.fixture +def data(mols, targets, X_d, V_fs, E_fs, V_ds): + return [ + MoleculeDatapoint(mol=mol, y=target, x_d=x_d, V_f=V_f, E_f=E_f, V_d=V_d) + for mol, target, x_d, V_f, E_f, V_d in zip(mols, targets, X_d, V_fs, E_fs, V_ds) + ] + + +@pytest.fixture(params=[False, True]) +def cache(request): + return request.param + + +@pytest.fixture +def dataset(data, cache): + extra_atom_fdim = data[0].V_f.shape[1] if data[0].V_f is not None else 0 + extra_bond_fdim = data[0].E_f.shape[1] if data[0].E_f is not None else 0 + + dset = MoleculeDataset( + data, + SimpleMoleculeMolGraphFeaturizer( + extra_atom_fdim=extra_atom_fdim, extra_bond_fdim=extra_bond_fdim + ), + ) + dset.cache = cache + + return dset + + +def test_none(): + with pytest.raises(ValueError): + MoleculeDataset(None, SimpleMoleculeMolGraphFeaturizer()) + + +def test_empty(): + """TODO""" + + +def test_len(data, dataset): + assert len(data) == len(dataset) + + +def test_smis(dataset, smis): + assert smis == dataset.smiles + + +def test_targets(dataset, targets): + np.testing.assert_array_equal(dataset.Y, targets) + + +def test_set_targets_too_short(dataset): + with pytest.raises(ValueError): + dataset.Y = np.random.rand(len(dataset) // 2, 1) + + +def test_num_tasks(dataset, targets): + assert dataset.t == targets.shape[1] + + +@pytest.mark.skipif( + not all([x is None for x in ["X_d", "V_fs", "E_fs", "V_ds"]]), reason="Not all inputs are None" +) +def test_aux_nones(dataset: MoleculeDataset): + np.testing.assert_array_equal(dataset.X_d, None) + np.testing.assert_array_equal(dataset.V_fs, None) + np.testing.assert_array_equal(dataset.E_fs, None) + np.testing.assert_array_equal(dataset.V_ds, None) + np.testing.assert_array_equal(dataset.gt_mask, None) + np.testing.assert_array_equal(dataset.lt_mask, None) + assert dataset.d_xd == 0 + assert dataset.d_vf == 0 + assert dataset.d_ef == 0 + assert dataset.d_vd == 0 + + +def test_normalize_targets(dataset): + dset_scaler = dataset.normalize_targets() + scaler = StandardScaler() + scaler.fit(dataset._Y) + Y = scaler.transform(dataset._Y) + + np.testing.assert_array_equal(dataset.Y, Y) + np.testing.assert_array_equal(dset_scaler.mean_, scaler.mean_) + np.testing.assert_array_equal(dset_scaler.scale_, scaler.scale_) + + +def test_normalize_inputs(dataset): + dset_scaler = dataset.normalize_inputs("X_d") + scaler = StandardScaler() + scaler.fit(dataset._X_d) + X = scaler.transform(dataset._X_d) + + np.testing.assert_array_equal(dataset.X_d, X) + np.testing.assert_array_equal(dset_scaler.mean_, scaler.mean_) + np.testing.assert_array_equal(dset_scaler.scale_, scaler.scale_) + + inputs = ["V_f", "E_f", "V_d"] + for input_ in inputs: + dset_scaler = dataset.normalize_inputs(input_) + scaler = StandardScaler() + Xs = getattr(dataset, f"_{input_}s") + X = np.concatenate(Xs, axis=0) + scaler.fit(X) + Xs = [scaler.transform(x) for x in Xs] + + for X, dset_X in zip(Xs, getattr(dataset, f"{input_}s")): + np.testing.assert_array_equal(X, dset_X) + np.testing.assert_array_equal(getattr(dset_scaler, "mean_"), scaler.mean_) + np.testing.assert_array_equal(getattr(dset_scaler, "scale_"), scaler.scale_) + + +@pytest.mark.parametrize("cache", [False, True]) +def test_cache(dataset: MoleculeDataset, cache): + """Test that cache attribute is being set appropriately and that the underlying cache is being + used correctly to load the molgraphs.""" + mg = MolGraph(None, None, None, None) + + dataset.cache = cache + assert dataset.cache == cache + dataset.mg_cache = MagicMock() + dataset.mg_cache.__getitem__.side_effect = lambda i: mg + + calls = [] + for i in range(len(dataset)): + assert dataset[i].mg is mg + calls.append(call(i)) + + dataset.mg_cache.__getitem__.assert_has_calls(calls) diff --git a/chemprop/tests/unit/data/test_samplers.py b/chemprop/tests/unit/data/test_samplers.py new file mode 100644 index 0000000000000000000000000000000000000000..354158bce205eb0a4d2c3216501ac569eb81421a --- /dev/null +++ b/chemprop/tests/unit/data/test_samplers.py @@ -0,0 +1,108 @@ +import numpy as np +import pytest + +from chemprop.data import ClassBalanceSampler, MoleculeDatapoint, MoleculeDataset, SeededSampler +from chemprop.featurizers.molgraph import SimpleMoleculeMolGraphFeaturizer + + +@pytest.fixture(params=[0.0, 0.1, 0.5, 1.0]) +def threshold(request): + return request.param + + +@pytest.fixture +def bin_targets(targets, threshold): + return targets <= threshold + + +@pytest.fixture +def featurizer(): + return SimpleMoleculeMolGraphFeaturizer() + + +@pytest.fixture +def dataset(mols, targets, featurizer): + data = [MoleculeDatapoint(mol, y) for mol, y in zip(mols, targets)] + + return MoleculeDataset(data, featurizer) + + +@pytest.fixture(params=[0, 24, 100]) +def seed(request): + return request.param + + +@pytest.fixture +def class_sampler(mols, bin_targets, featurizer): + data = [MoleculeDatapoint(mol, y) for mol, y in zip(mols, bin_targets)] + dset = MoleculeDataset(data, featurizer) + + return ClassBalanceSampler(dset.Y, shuffle=True) + + +def test_seeded_no_seed(dataset): + with pytest.raises(ValueError): + SeededSampler(len(dataset), None) + + +def test_seeded_shuffle(dataset, seed): + sampler = SeededSampler(len(dataset), seed) + + assert list(sampler) != list(sampler) + + +def test_seeded_fixed_shuffle(dataset, seed): + sampler1 = SeededSampler(len(dataset), seed) + sampler2 = SeededSampler(len(dataset), seed) + + idxs1 = list(sampler1) + idxs2 = list(sampler2) + + assert idxs1 == idxs2 + + +def test_class_balance_length(class_sampler, bin_targets: np.ndarray): + n_actives = bin_targets.any(1).sum(0) + n_inactives = len(bin_targets) - n_actives + expected_length = 2 * min(n_actives, n_inactives) + + assert len(class_sampler) == expected_length + + +def test_class_balance_sample(class_sampler, bin_targets: np.ndarray): + idxs = list(class_sampler) + + # sampled indices should be 50/50 actives/inacitves + assert sum(bin_targets[idxs]) == len(idxs) // 2 + + +def test_class_balance_shuffle(class_sampler): + idxs1 = list(class_sampler) + idxs2 = list(class_sampler) + + if len(class_sampler) == 0: + pytest.skip("no indices to sample!") + + assert idxs1 != idxs2 + + +def test_seed_class_balance_shuffle(smis, bin_targets, featurizer, seed): + data = [MoleculeDatapoint.from_smi(smi, target) for smi, target in zip(smis, bin_targets)] + dset = MoleculeDataset(data, featurizer) + + sampler = ClassBalanceSampler(dset.Y, seed, True) + + if len(sampler) == 0: + pytest.skip("no indices to sample!") + + assert list(sampler) != list(sampler) + + +def test_seed_class_balance_reproducibility(smis, bin_targets, featurizer, seed): + data = [MoleculeDatapoint.from_smi(smi, target) for smi, target in zip(smis, bin_targets)] + dset = MoleculeDataset(data, featurizer) + + sampler1 = ClassBalanceSampler(dset.Y, seed, True) + sampler2 = ClassBalanceSampler(dset.Y, seed, True) + + assert list(sampler1) == list(sampler2) diff --git a/chemprop/tests/unit/featurizers/test_atom.py b/chemprop/tests/unit/featurizers/test_atom.py new file mode 100644 index 0000000000000000000000000000000000000000..a26fc93cc7bec20fa82f7d828e9faf82fc760da4 --- /dev/null +++ b/chemprop/tests/unit/featurizers/test_atom.py @@ -0,0 +1,141 @@ +"""NOTE: these tests make a lot of assumptions about the internal mechanics of the AtomFeaturizer, +so they'll need to be reworked if something ever changes about that.""" + +import numpy as np +import pytest +from rdkit import Chem +from rdkit.Chem.rdchem import HybridizationType + +from chemprop.featurizers import MultiHotAtomFeaturizer + +SMI = "Cn1nc(CC(=O)Nc2ccc3oc4ccccc4c3c2)c2ccccc2c1=O" + + +@pytest.fixture(params=list(Chem.MolFromSmiles(SMI).GetAtoms())[:5]) +def atom(request): + return request.param + + +@pytest.fixture +def aromatic(atom): + return atom.GetIsAromatic() + + +@pytest.fixture +def mass_bit(atom): + return 0.01 * atom.GetMass() + + +@pytest.fixture +def atomic_num(): + return list(range(1, 37)) + [53] + + +@pytest.fixture +def degree(): + return list(range(6)) + + +@pytest.fixture +def formal_charge(): + return [-1, -2, 1, 2, 0] + + +@pytest.fixture +def chiral_tag(): + return list(range(4)) + + +@pytest.fixture +def num_Hs(): + return list(range(5)) + + +@pytest.fixture +def hybridization(): + return [ + HybridizationType.S, + HybridizationType.SP, + HybridizationType.SP2, + HybridizationType.SP2D, + HybridizationType.SP3, + HybridizationType.SP3D, + HybridizationType.SP3D2, + HybridizationType.OTHER, + ] + + +@pytest.fixture +def featurizer(atomic_num, degree, formal_charge, chiral_tag, num_Hs, hybridization): + return MultiHotAtomFeaturizer( + atomic_num, degree, formal_charge, chiral_tag, num_Hs, hybridization + ) + + +@pytest.fixture +def expected_len(atomic_num, degree, formal_charge, chiral_tag, num_Hs, hybridization): + return ( + +sum( + len(xs) + 1 + for xs in (atomic_num, degree, formal_charge, chiral_tag, num_Hs, hybridization) + ) + + 2 + ) + + +@pytest.fixture +def x(featurizer, atom): + return featurizer(atom) + + +def test_len(featurizer, expected_len): + assert len(featurizer) == expected_len + + +def test_none(featurizer): + np.testing.assert_array_equal(featurizer(None), np.zeros(len(featurizer))) + + +def test_atomic_num_bit(atom, x, atomic_num): + n = atom.GetAtomicNum() + + if n == 53: # special check for Iodine + assert x[len(atomic_num) - 1] == 1 + else: + if n in atomic_num: + assert x[n - 1] == 1 + else: + assert x[len(atomic_num)] == 1 + + +def test_aromatic_bit(x, aromatic): + i = -2 + if aromatic: + assert x[i] == 1 + else: + assert x[i] == 0 + + +def test_mass_bit(x, mass_bit): + assert x[-1] == pytest.approx(mass_bit) + + +@pytest.mark.parametrize( + "a,x_v_orig", + zip( + list(Chem.MolFromSmiles("Fc1cccc(C2(c3nnc(Cc4cccc5ccccc45)o3)CCOCC2)c1").GetAtoms()), + # fmt: off + [ + [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0.18998], + [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0.12011], + [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0.12011], + [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0.12011], + ] + # fmt: on + ), +) +def test_x_orig(a, x_v_orig): + f = MultiHotAtomFeaturizer.v2() + x_v_calc = f(a) + + np.testing.assert_array_almost_equal(x_v_calc, x_v_orig) diff --git a/chemprop/tests/unit/featurizers/test_bond.py b/chemprop/tests/unit/featurizers/test_bond.py new file mode 100644 index 0000000000000000000000000000000000000000..3277d1dc60a53f9f33a210dce0daa6d9af84af67 --- /dev/null +++ b/chemprop/tests/unit/featurizers/test_bond.py @@ -0,0 +1,93 @@ +import numpy as np +import pytest +from rdkit import Chem + +from chemprop.featurizers import MultiHotBondFeaturizer + +SMI = "Cn1nc(CC(=O)Nc2ccc3oc4ccccc4c3c2)c2ccccc2c1=O" + + +@pytest.fixture(params=list(Chem.MolFromSmiles(SMI).GetBonds())) +def bond(request): + return request.param + + +@pytest.fixture +def bond_types(): + return [1, 2, 3, 12] + + +@pytest.fixture +def stereo(): + return list(range(6)) + + +@pytest.fixture +def featurizer(bond_types, stereo): + return MultiHotBondFeaturizer(bond_types, stereo) + + +@pytest.fixture +def exp_len(bond_types, stereo): + return sum([1, len(bond_types), 1, 1, (len(stereo) + 1)]) + + +@pytest.fixture +def bt_bit(bond, bond_types, featurizer): + bt = bond.GetBondType() + i_bt = int(bt) + + i = bond_types.index(i_bt) if i_bt in bond_types else -1 + + if i == -1: + return i + + return featurizer.one_hot_index(bt, featurizer.bond_types)[0] + 1 + + +@pytest.fixture +def x(featurizer, bond): + return featurizer(bond) + + +def test_len(featurizer, exp_len): + assert len(featurizer) == exp_len + + +def test_none(featurizer): + x_e = np.zeros(len(featurizer)) + x_e[0] = 1 + + np.testing.assert_array_equal(x_e, featurizer(None)) + + +def test_bt_bit(x, bt_bit): + assert x[bt_bit] == 1 + + +def test_conj_bit(featurizer, bond, x): + conj_bit = 1 + len(featurizer.bond_types) + assert x[conj_bit] == int(bond.GetIsConjugated()) + + +@pytest.mark.parametrize( + "mol,X_e_orig", + [ + ( + Chem.MolFromSmiles("O=C(NCc1ccc(Cn2ccccc2=O)cc1)c1ccccc1CCc1ccccc1"), + np.array( + [ + [0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], + ] + ), + ) + ], +) +def test_x_hand_calc(mol, X_e_orig): + f = MultiHotBondFeaturizer() + + bonds = list(mol.GetBonds()) + X_e_calc = np.array([f(b) for b in bonds[: len(X_e_orig)]]) + np.testing.assert_array_almost_equal(X_e_calc, X_e_orig) diff --git a/chemprop/tests/unit/featurizers/test_cgr.py b/chemprop/tests/unit/featurizers/test_cgr.py new file mode 100644 index 0000000000000000000000000000000000000000..dc84602d6667cb2f61f8d4a5c825a780128bcf7c --- /dev/null +++ b/chemprop/tests/unit/featurizers/test_cgr.py @@ -0,0 +1,406 @@ +import random +from typing import NamedTuple +import uuid + +import numpy as np +import pytest + +from chemprop.featurizers.molgraph import CGRFeaturizer, RxnMode +from chemprop.utils import make_mol + +AVAILABLE_RXN_MODE_NAMES = [ + "REAC_PROD", + "REAC_PROD_BALANCE", + "REAC_DIFF", + "REAC_DIFF_BALANCE", + "PROD_DIFF", + "PROD_DIFF_BALANCE", +] + + +@pytest.fixture +def expected_aliases(): + return AVAILABLE_RXN_MODE_NAMES + + +@pytest.fixture(params=AVAILABLE_RXN_MODE_NAMES) +def mode_name(request): + return request.param + + +@pytest.fixture(params=AVAILABLE_RXN_MODE_NAMES[::2]) +def mode_imbalanced(request): + return request.param + + +@pytest.fixture(params=AVAILABLE_RXN_MODE_NAMES[1::2]) +def mode_balanced(request): + return request.param + + +@pytest.fixture +def rxn_mode(mode_name): + return getattr(RxnMode, mode_name) + + +@pytest.fixture(params=[str(uuid.uuid4()) for _ in range(3)]) +def invalid_alias(request): + return request.param + + +rxn_smis = [ + # reactant and product with the same number of atoms + "[CH3:1][H:2]>>[CH3:1].[H:2]", # reactant and product are balanced and mapped + "[CH3:2][H:1]>>[H:1].[CH3:2]", # reactant and product are balanced, mapped but with different atom index order + "[CH3:1][H]>>[CH3:1].[H:2]", # reactant and product are balanced and but reactant has less atom-mapped atoms + "[CH3:1][H:2]>>[H].[CH3:1]", # reactant and product are balanced and but product has less atom-mapped atoms + # reactant and product has different numbers of atoms + "[CH4:1]>>[CH2:1].[H:2][H:3]", # product has more atoms and more atom-mapped atoms + "[H:1].[CH2:2][H:3]>>[CH3:2][H:3]", # reactant with more atoms and atom-mapped atoms + "[CH4:1]>>[CH3:1].[H:2]", # product with more atoms and atom-mapped atoms with 0 edge +] + +# Expected output for CGRFeaturizer.map_reac_to_prod +reac_prod_maps = { + "[CH3:1][H:2]>>[CH3:1].[H:2]": ({0: 0, 1: 1}, [], []), + "[CH3:2][H:1]>>[H:1].[CH3:2]": ({0: 1, 1: 0}, [], []), + "[CH3:1][H]>>[CH3:1].[H:2]": ({0: 0}, [1], [1]), + "[CH3:1][H:2]>>[H].[CH3:1]": ({0: 1}, [0], [1]), + "[CH4:1]>>[CH2:1].[H:2][H:3]": ({0: 0}, [1, 2], []), + "[H:1].[CH2:2][H:3]>>[CH3:2][H:3]": ({1: 0, 2: 1}, [], [0]), + "[CH4:1]>>[CH3:1].[H:2]": ({0: 0}, [1], []), +} + + +@pytest.fixture(params=rxn_smis) +def rxn_smi(request): + return request.param + + +class BondExpectation(NamedTuple): + """ + whether elements in the returns for _get_bonds are Nones under + imbalanced and balanced modes for provided bond + """ + + bond: tuple + bond_reac_none: bool + bond_prod_none: bool + + +bond_expect_imbalanced = { + "[CH3:1][H:2]>>[CH3:1].[H:2]": [ + BondExpectation((0, 1), bond_reac_none=False, bond_prod_none=True) + ], + "[CH3:2][H:1]>>[H:1].[CH3:2]": [ + BondExpectation((0, 1), bond_reac_none=False, bond_prod_none=True) + ], + "[CH3:1][H]>>[CH3:1].[H:2]": [ + BondExpectation((0, 1), bond_reac_none=False, bond_prod_none=True), + BondExpectation((0, 2), bond_reac_none=True, bond_prod_none=True), + BondExpectation((1, 2), bond_reac_none=True, bond_prod_none=True), + ], + "[CH3:1][H:2]>>[H].[CH3:1]": [ + BondExpectation((0, 1), bond_reac_none=False, bond_prod_none=True), + BondExpectation((0, 2), bond_reac_none=True, bond_prod_none=True), + BondExpectation((1, 2), bond_reac_none=True, bond_prod_none=True), + ], + "[CH4:1]>>[CH2:1].[H:2][H:3]": [ + BondExpectation((0, 1), bond_reac_none=True, bond_prod_none=True), + BondExpectation((0, 2), bond_reac_none=True, bond_prod_none=True), + BondExpectation((1, 2), bond_reac_none=True, bond_prod_none=False), + ], + "[H:1].[CH2:2][H:3]>>[CH3:2][H:3]": [ + BondExpectation((0, 1), bond_reac_none=True, bond_prod_none=True), + BondExpectation((0, 2), bond_reac_none=True, bond_prod_none=True), + BondExpectation((1, 2), bond_reac_none=False, bond_prod_none=False), + ], + "[CH4:1]>>[CH3:1].[H:2]": [ + BondExpectation((0, 0), bond_reac_none=True, bond_prod_none=True) + ], # this last entry doesn't test for anything meaningful, only to enable other tests for graph with zero edges +} +bond_expect_balanced = bond_expect_imbalanced.copy() +bond_expect_balanced.update( + { + "[CH4:1]>>[CH2:1].[H:2][H:3]": [ + BondExpectation((0, 1), bond_reac_none=True, bond_prod_none=True), + BondExpectation((0, 2), bond_reac_none=True, bond_prod_none=True), + BondExpectation((1, 2), bond_reac_none=False, bond_prod_none=False), + ] # this is the only difference compared to the imbalanced case + } +) + + +# A fake `bond` is used in test_calc_edge_features. This is a workaround, +# as RDKit cannot construct a bond directly in Python +bond = make_mol("[CH3:1][H:2]", keep_h=True, add_h=False).GetBondWithIdx(0) + + +def get_reac_prod(rxn_smi: str) -> list: + return [make_mol(smi, keep_h=True, add_h=False) for smi in rxn_smi.split(">>")] + + +def randomize_case(s: str) -> str: + choices = (str.upper, str.lower) + + return "".join(random.choice(choices)(x) for x in s) + + +@pytest.mark.parametrize("s", [str(uuid.uuid4()) for _ in range(3)]) +def test_randomize_case(s): + """test our helper function to ensure that it's not mangling our strings""" + assert randomize_case(s).upper() == s.upper() + + +def test_len(expected_aliases): + """ + Test that the RxnMode class has the correct length. + """ + assert len(RxnMode) == len(expected_aliases) + + +def test_keys(expected_aliases): + """ + Test that the keys function returns the correct set of modes. + """ + assert set(RxnMode.keys()) == set(alias.upper() for alias in expected_aliases) + + +@pytest.mark.parametrize( + "alias,rxn_mode", + [ + ("REAC_PROD", RxnMode.REAC_PROD), + ("REAC_PROD_BALANCE", RxnMode.REAC_PROD_BALANCE), + ("REAC_DIFF", RxnMode.REAC_DIFF), + ("REAC_DIFF_BALANCE", RxnMode.REAC_DIFF_BALANCE), + ("PROD_DIFF", RxnMode.PROD_DIFF), + ("PROD_DIFF_BALANCE", RxnMode.PROD_DIFF_BALANCE), + ], +) +class TestRxnModeGet: + def test_name_and_value(self, alias, rxn_mode): + assert alias.upper() == rxn_mode.name + assert alias.lower() == rxn_mode.value + + def test_getitem(self, alias, rxn_mode): + """ + Test that the RxnMode class can be indexed with uppercase mode. + """ + assert RxnMode[alias.upper()] == rxn_mode + + def test_get(self, alias, rxn_mode): + """ + Test that the get function returns the correct RxnMode. + """ + assert RxnMode.get(alias.upper()) == rxn_mode + + def test_get_random_case(self, alias, rxn_mode): + """ + Test that the get function returns the correct RxnMode when given an alias with random case. + """ + assert RxnMode.get(randomize_case(alias)) == rxn_mode + + def test_get_enum_identity(self, alias, rxn_mode): + """ + Test that the get function returns the correct RxnMode when given a RxnMode. + """ + assert RxnMode.get(rxn_mode) == rxn_mode + + +def test_getitem_invalid_mode(invalid_alias): + """ + Test that the RxnMode class raises a ValueError when indexed with an invalid mode. + """ + with pytest.raises(KeyError): + RxnMode[invalid_alias] + + +def test_get_invalid_mode(invalid_alias): + """ + Test that the get function raises a ValueError when given an invalid mode. + """ + with pytest.raises(KeyError): + RxnMode.get(invalid_alias) + + +class TestCondensedGraphOfReactionFeaturizer: + def test_init_without_mode_(self): + """ + Test that the CondensedGraphOfReactionFeaturizer can be initialized without a mode. + """ + featurizer = CGRFeaturizer() + assert featurizer.mode == RxnMode.REAC_DIFF + + def test_init_with_mode_str(self, mode_name, rxn_mode): + """ + Test that the CondensedGraphOfReactionFeaturizer can be initialized with a string of the mode. + """ + featurizer = CGRFeaturizer(mode_=mode_name) + assert featurizer.mode == rxn_mode + + def test_init_with_mode_enum(self, rxn_mode): + """ + Test that the CondensedGraphOfReactionFeaturizer can be initialized with a RxnMode. + """ + featurizer = CGRFeaturizer(mode_=rxn_mode) + assert featurizer.mode == rxn_mode + + def test_map_reac_to_prod(self, rxn_smi): + """ + Test that the map_reac_to_prod method returns the correct mapping. + """ + reac, prod = get_reac_prod(rxn_smi) + assert CGRFeaturizer.map_reac_to_prod(reac, prod) == reac_prod_maps[rxn_smi] + + def test_calc_node_feature_matrix_shape(self, rxn_smi, mode_name): + """ + Test that the calc_node_feature_matrix method returns the correct node feature matrix. + """ + featurizer = CGRFeaturizer(mode_=mode_name) + + reac, prod = get_reac_prod(rxn_smi) + ri2pj, pids, rids = featurizer.map_reac_to_prod(reac, prod) + + num_nodes, atom_fdim = featurizer._calc_node_feature_matrix( + reac, prod, ri2pj, pids, rids + ).shape + assert num_nodes == len(ri2pj) + len(pids) + len(rids) + assert atom_fdim == featurizer.atom_fdim + + def test_calc_node_feature_matrix_atomic_number_features(self, rxn_smi, rxn_mode): + """ + Test that the calc_node_feature_matrix method returns the correct feature matrix for the atomic number features. + """ + featurizer = CGRFeaturizer(mode_=rxn_mode) + reac, prod = get_reac_prod(rxn_smi) + ri2pj, pids, rids = featurizer.map_reac_to_prod(reac, prod) + atom_featurizer = featurizer.atom_featurizer + + atomic_num_features_expected = np.array( + [atom_featurizer.num_only(a) for a in reac.GetAtoms()] + + [atom_featurizer.num_only(prod.GetAtomWithIdx(pid)) for pid in pids] + )[ + :, : len(atom_featurizer.atomic_nums) + 1 + ] # only create and keep the atomic number features + + atomic_num_features = featurizer._calc_node_feature_matrix(reac, prod, ri2pj, pids, rids)[ + :, : len(atom_featurizer.atomic_nums) + 1 + ] + + np.testing.assert_equal(atomic_num_features, atomic_num_features_expected) + + def test_get_bonds_imbalanced(self, rxn_smi, mode_imbalanced): + """ + Test that the get_bonds method returns the correct bonds when modes are imbalanced. + """ + featurizer = CGRFeaturizer(mode_=mode_imbalanced) + reac, prod = get_reac_prod(rxn_smi) + ri2pj, pids, _ = featurizer.map_reac_to_prod(reac, prod) + + for bond_expect in bond_expect_imbalanced[rxn_smi]: + bond_reac, bond_prod = featurizer._get_bonds( + reac, prod, ri2pj, pids, reac.GetNumAtoms(), *bond_expect.bond + ) + assert (bond_reac is None) == bond_expect.bond_reac_none + assert (bond_prod is None) == bond_expect.bond_prod_none + + def test_get_bonds_balanced(self, rxn_smi, mode_balanced): + """ + Test that the get_bonds method returns the correct bonds when modes are balanced. + """ + featurizer = CGRFeaturizer(mode_=mode_balanced) + reac, prod = get_reac_prod(rxn_smi) + ri2pj, pids, _ = featurizer.map_reac_to_prod(reac, prod) + + for bond_expect in bond_expect_balanced[rxn_smi]: + bond_reac, bond_prod = featurizer._get_bonds( + reac, prod, ri2pj, pids, reac.GetNumAtoms(), *bond_expect.bond + ) + assert (bond_reac is None) == bond_expect.bond_reac_none + assert (bond_prod is None) == bond_expect.bond_prod_none + + @pytest.mark.parametrize( + "reac_prod_bonds", [(bond, bond), (bond, None), (None, bond), (None, None)] + ) + def test_calc_edge_feature_shape(self, reac_prod_bonds, rxn_mode): + """ + Test that the calc_edge_feature method returns the correct edge feature. + """ + featurizer = CGRFeaturizer(mode_=rxn_mode) + reac_bond, prod_bond = reac_prod_bonds + + assert featurizer._calc_edge_feature(reac_bond, prod_bond).shape == ( + len(featurizer.bond_featurizer) * 2, + ) + + def test_featurize_balanced(self, rxn_smi, mode_balanced): + """ + Test CGR featurizer returns the correct features with balanced modes. + """ + featurizer = CGRFeaturizer(mode_=mode_balanced) + reac, prod = get_reac_prod(rxn_smi) + ri2pj, pids, rids = featurizer.map_reac_to_prod(reac, prod) + + molgraph = featurizer((reac, prod)) + + n_atoms = len(ri2pj) + len(pids) + len(rids) + atom_fdim = featurizer.atom_fdim + + assert molgraph.V.shape == (n_atoms, atom_fdim) + + bonds = [ + b.bond + for b in bond_expect_balanced[rxn_smi] + if not (b.bond_reac_none and b.bond_prod_none) + ] + bond_fdim = featurizer.bond_fdim + + assert molgraph.E.shape == (len(bonds) * 2, bond_fdim) + + expect_edge_index = [[], []] + expect_rev_edge_index = [] + + for i, bond in enumerate(bonds): + bond = list(bond) + expect_edge_index[0].extend(bond) + expect_edge_index[1].extend(bond[::-1]) + expect_rev_edge_index.extend([i * 2 + 1, i * 2]) + + assert np.array_equal(molgraph.edge_index, expect_edge_index) + assert np.array_equal(molgraph.rev_edge_index, expect_rev_edge_index) + + def test_featurize_imbalanced(self, rxn_smi, mode_imbalanced): + """ + Test CGR featurizer returns the correct features with balanced modes. + """ + featurizer = CGRFeaturizer(mode_=mode_imbalanced) + reac, prod = get_reac_prod(rxn_smi) + ri2pj, pids, rids = featurizer.map_reac_to_prod(reac, prod) + + molgraph = featurizer((reac, prod)) + + n_atoms = len(ri2pj) + len(pids) + len(rids) + atom_fdim = featurizer.atom_fdim + + assert molgraph.V.shape == (n_atoms, atom_fdim) + + bonds = [ + b.bond + for b in bond_expect_imbalanced[rxn_smi] + if not (b.bond_reac_none and b.bond_prod_none) + ] + bond_fdim = featurizer.bond_fdim + + assert molgraph.E.shape == (len(bonds) * 2, bond_fdim) + + expect_edge_index = [[], []] + expect_rev_edge_index = [] + + for i, bond in enumerate(bonds): + bond = list(bond) + expect_edge_index[0].extend(bond) + expect_edge_index[1].extend(bond[::-1]) + expect_rev_edge_index.extend([i * 2 + 1, i * 2]) + + assert np.array_equal(molgraph.edge_index, expect_edge_index) + assert np.array_equal(molgraph.rev_edge_index, expect_rev_edge_index) diff --git a/chemprop/tests/unit/featurizers/test_molecule.py b/chemprop/tests/unit/featurizers/test_molecule.py new file mode 100644 index 0000000000000000000000000000000000000000..2a14ad8866d81605f4907808a1d86223ee4d046d --- /dev/null +++ b/chemprop/tests/unit/featurizers/test_molecule.py @@ -0,0 +1,238 @@ +# flake8: noqa +import sys + +import numpy as np +import pytest +from rdkit import Chem + +from chemprop.featurizers import ( + MorganBinaryFeaturizer, + MorganCountFeaturizer, + RDKit2DFeaturizer, + V1RDKit2DFeaturizer, + V1RDKit2DNormalizedFeaturizer, +) + + +@pytest.fixture +def mol(): + return Chem.MolFromSmiles("Fc1cccc(C2(c3nnc(Cc4cccc5ccccc45)o3)CCOCC2)c1") + + +# fmt: off +@pytest.fixture +def morgan_binary_bits(): + return np.array([[ 80, 230, 332, 378, 429, 450, 502, 503, 523, 544, 556, + 645, 649, 656, 663, 699, 772, 875, 917, 926, 950, 1039, + 1060, 1087, 1088, 1104, 1136, 1162, 1164, 1199, 1349, 1357, 1380, + 1405, 1430, 1487, 1510, 1561, 1573, 1597, 1604, 1670, 1742, 1747, + 1750, 1824, 1855, 1873, 1928]]) + + +@pytest.fixture +def morgan_count_bits(): + return np.array([ 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, + 1, 1, 4, 2, 2, 1, 2, 4, 1, 1, 2, 2, 2, 1, 1, 7, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 2, 1, 11, 1]) + + +@pytest.fixture +def morgan_binary_custom(): + return np.array([[ 15, 36, 49, 63, 64, 80, 112, 138, 140, 175, 230, 275, 301, + 325, 332, 333, 339, 356, 378, 381, 406, 429, 450, 463, 465, 478, + 486, 502, 503, 517, 523, 524, 537, 544, 549, 554, 556, 573, 579, + 580, 645, 646, 647, 649, 652, 656, 663, 699, 718, 721, 723, 726, + 731, 772, 773, 800, 818, 821, 828, 831, 836, 849, 865, 875, 887, + 894, 904, 917, 926, 950, 951, 989]]) + + +@pytest.fixture +def rdkit_2d_values(): + return np.array([ 13.9511, 13.9511, 0.2603, -0.5096, + 0.4909, 16.1724, 388.442 , 367.274 , + 388.1587, 146. , 0. , 0.2267, + -0.4239, 0.4239, 0.2267, 0.8966, + 1.6897, 2.5517, 19.1421, 9.7377, + 2.4117, -2.34 , 2.4051, -2.3511, + 5.8532, 0.054 , 3.2361, 1.5168, + 1143.0568, 19.6836, 15.9753, 15.9753, + 14.244 , 9.8787, 9.8787, 7.5208, + 7.5208, 5.8214, 5.8214, 4.26 , + 4.26 , -3.05 , 9626644.372 , 18.0088, + 7.4091, 3.3162, 167.8922, 9.154 , + 5.8172, 0. , 11.7814, 0. , + 0. , 0. , 4.3904, 0. , + 10.1974, 54.5973, 46.8737, 13.2138, + 11.8358, 13.5444, 10.7724, 0. , + 10.1974, 0. , 24.6775, 13.2138, + 95.4556, 0. , 0. , 0. , + 4.3904, 0. , 0. , 23.4111, + 16.5727, 5.8172, 35.75 , 71.1472, + 0. , 10.7724, 0. , 48.15 , + 5.415 , 4.3904, 0. , 5.8172, + 44.2577, 11.1269, 16.8388, 12.1327, + 24.2655, 34.4628, 9.154 , 25.6895, + 0. , 0. , 11.1016, 1.4962, + 0.851 , 21.1832, 1.9333, 1.1618, + 0. , 0.25 , 29. , 0. , + 4. , 0. , 1. , 1. , + 0. , 3. , 1. , 4. , + 0. , 0. , 4. , 0. , + 5. , 2. , 4. , 0. , + 1. , 1. , 0. , 0. , + 4.601 , 5. , 5.0492, 108.285 , + 0. , 0. , 0. , 0. , + 0. , 2. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 2. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 3. , 0. , + 1. , 0. , 0. , 0. , + 0. , 1. , 0. , 0. , + 1. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. ]) + +@pytest.fixture +def v1_rdkit_2d_values(): + return np.array([ 1.5168, 1143.0568, 19.6836, 15.9753, + 15.9753, 14.244 , 9.8787, 9.8787, + 7.5208, 7.5208, 5.8214, 5.8214, + 4.26 , 4.26 , 5.415 , 4.3904, + 0. , 5.8172, 44.2577, 11.1269, + 16.8388, 12.1327, 24.2655, 34.4628, + 9.154 , 388.1587, 0.8966, 1.6897, + 2.5517, 0.25 , -3.05 , 29. , + 367.274 , 9626644.372 , 18.0088, 7.4091, + 3.3162, 167.8922, 13.9511, 0.4239, + 13.9511, 0.2267, 0.2603, 0.2267, + -0.5096, -0.4239, 5.0492, 108.285 , + 388.442 , 0. , 4. , 0. , + 1. , 1. , 3. , 1. , + 4. , 4. , 0. , 5. , + 0. , 4. , 0. , 1. , + 1. , 146. , 9.154 , 5.8172, + 0. , 11.7814, 0. , 0. , + 0. , 4.3904, 0. , 10.1974, + 54.5973, 46.8737, 13.2138, 11.8358, + 5. , 13.5444, 10.7724, 0. , + 10.1974, 0. , 24.6775, 13.2138, + 95.4556, 0. , 0. , 0. , + 4.3904, 0. , 0. , 23.4111, + 16.5727, 5.8172, 35.75 , 71.1472, + 0. , 10.7724, 0. , 48.15 , + 25.6895, 0. , 0. , 11.1016, + 1.4962, 0.851 , 21.1832, 1.9333, + 1.1618, 0. , 0. , 0. , + 0. , 0. , 0. , 2. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 2. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 3. , 0. , 1. , 0. , + 0. , 0. , 0. , 1. , + 0. , 0. , 1. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0.4909]) + +@pytest.fixture +def v1_rdkit_2d_normalized_values(): + return np.array([0.2662, 0.6887, 0.5077, 0.5362, 0.4843, 0.6014, 0.6126, 0.534 , + 0.6197, 0.513 , 0.7176, 0.6135, 0.7476, 0.6436, 0.5736, 0.2421, + 0. , 0.2162, 0.9261, 0.2905, 0.8332, 0.5472, 0.6221, 0.8157, + 0.5639, 0.4934, 0.1407, 0.2732, 0.553 , 0.3169, 0.3848, 0.5742, + 0.4977, 1. , 0.4275, 0.3974, 0.4283, 0.5421, 0.8529, 0.349 , + 0.8529, 0.2728, 0.8296, 0.2614, 0.4263, 0.6376, 0.8529, 0.5321, + 0.4905, 0.0613, 0.1937, 0. , 0.9187, 0.5 , 0.964 , 0.865 , + 0.9176, 0.3071, 0.0553, 0.2075, 0. , 0.2143, 0. , 0.98 , + 0.8807, 0.5194, 0.3119, 0.4701, 0. , 0.9161, 0. , 0. , + 0.06 , 0.6132, 0. , 1. , 0.8269, 0.6454, 0.2879, 0.4656, + 0.8852, 0.5202, 0.218 , 0.1671, 0.4275, 0. , 0.5073, 0.4523, + 0.9257, 0.0001, 0. , 0.0373, 0.9759, 0. , 0. , 0.2569, + 0.6995, 0.9386, 0.6704, 0.8781, 0. , 0.9855, 0.0001, 0.1612, + 0.0001, 0.5 , 0.3847, 0.0001, 0.0001, 0.9999, 0.0001, 0.9987, + 0.646 , 0.0203, 0. , 0. , 0. , 0. , 0. , 0.9012, + 0.1651, 0.167 , 0.1665, 0.1665, 0.2029, 0.0694, 0. , 0.1683, + 0.168 , 0.5223, 0.0012, 0.1643, 0.0008, 0.1663, 0.163 , 0.1651, + 0. , 0. , 0.1682, 0.1658, 0.1673, 0. , 0. , 0.0999, + 0. , 0.3777, 0.0045, 0.1333, 0.964 , 0. , 0.914 , 0. , + 0. , 0.4993, 0.1649, 0.7608, 0. , 0. , 0.9095, 0. , + 0.1681, 0.1655, 0. , 0. , 0.1647, 0.1669, 0. , 0. , + 0. , 0.1547, 0. , 0. , 0.1676, 0. , 0.1682, 0.0091, + 0.1684, 0. , 0.1563, 0. , 0. , 0.0211, 0.0211, 0. , + 0. , 0. , 0.0001, 0.157 , 0. , 0. , 0. , 0. , + 0. , 0.1684, 0.1674, 0. , 0. , 0. , 0.1666, 0.3442]) +# fmt: on + + +def test_morgan_binary(mol, morgan_binary_bits): + featurizer = MorganBinaryFeaturizer() + features = featurizer(mol) + + np.testing.assert_array_almost_equal(np.nonzero(features), morgan_binary_bits) + + +def test_morgan_count(mol, morgan_count_bits, morgan_binary_bits): + featurizer = MorganCountFeaturizer() + features = featurizer(mol) + + np.testing.assert_array_almost_equal(features[np.nonzero(features)], morgan_count_bits) + + +def test_morgan_binary_custom(mol, morgan_binary_custom): + featurizer = MorganBinaryFeaturizer(radius=3, length=1024) + features = featurizer(mol) + + np.testing.assert_array_almost_equal(np.nonzero(features), morgan_binary_custom) + + +@pytest.mark.skipif( + sys.platform.startswith("win"), reason="rdkit's BertzCT gives different values on Windows" +) +def test_rdkit_2d(mol, rdkit_2d_values): + featurizer = RDKit2DFeaturizer() + features = featurizer(mol) + + np.testing.assert_array_almost_equal(features, rdkit_2d_values, decimal=2) + + +@pytest.mark.skipif( + sys.platform.startswith("win"), reason="rdkit's BertzCT gives different values on Windows" +) +def test_v1_rdkit_2d(mol, v1_rdkit_2d_values): + featurizer = V1RDKit2DFeaturizer() + features = featurizer(mol) + + np.testing.assert_array_almost_equal(features, v1_rdkit_2d_values, decimal=2) + + +@pytest.mark.skipif( + sys.platform.startswith("win"), reason="rdkit's BertzCT gives different values on Windows" +) +def test_v1_rdkit_2d_normalized(mol, v1_rdkit_2d_normalized_values): + featurizer = V1RDKit2DNormalizedFeaturizer() + features = featurizer(mol) + + np.testing.assert_array_almost_equal(features, v1_rdkit_2d_normalized_values, decimal=2) diff --git a/chemprop/tests/unit/featurizers/test_molgraph.py b/chemprop/tests/unit/featurizers/test_molgraph.py new file mode 100644 index 0000000000000000000000000000000000000000..a9cb948b828f6fa349ddf4c143f47bed9cb3045a --- /dev/null +++ b/chemprop/tests/unit/featurizers/test_molgraph.py @@ -0,0 +1,115 @@ +import numpy as np +import pytest +from rdkit import Chem + +from chemprop.data.molgraph import MolGraph +from chemprop.featurizers.atom import MultiHotAtomFeaturizer +from chemprop.featurizers.molgraph import SimpleMoleculeMolGraphFeaturizer + + +@pytest.fixture(params=[0, 10, 100]) +def extra(request): + return request.param + + +@pytest.fixture +def atom_features_extra(mol, extra): + n_a = mol.GetNumAtoms() + + return np.random.rand(n_a, extra) + + +@pytest.fixture +def bond_features_extra(mol, extra): + n_b = mol.GetNumBonds() + + return np.random.rand(n_b, extra) + + +@pytest.fixture +def mol_featurizer(): + return SimpleMoleculeMolGraphFeaturizer() + + +@pytest.fixture +def mol_featurizer_extra(extra): + return SimpleMoleculeMolGraphFeaturizer(None, None, extra, extra) + + +@pytest.fixture +def mg(mol, mol_featurizer): + return mol_featurizer(mol) + + +def test_atom_fdim(extra): + mf = SimpleMoleculeMolGraphFeaturizer(extra_atom_fdim=extra) + + assert mf.atom_fdim == len(mf.atom_featurizer) + extra + + +def test_V_shape(mol, mol_featurizer: SimpleMoleculeMolGraphFeaturizer, mg: MolGraph): + n_a = mol.GetNumAtoms() + d_a = mol_featurizer.atom_fdim + + assert mg.V.shape == (n_a, d_a) + + +def test_E_shape(mol, mol_featurizer: SimpleMoleculeMolGraphFeaturizer, mg: MolGraph): + n_b = mol.GetNumBonds() + d_b = mol_featurizer.bond_fdim + + assert mg.E.shape == (2 * n_b, d_b) + + +def test_x2y_len(mol: Chem.Mol, mg: MolGraph): + num_bonds = mol.GetNumBonds() + + assert mg.edge_index.shape == (2, 2 * num_bonds) + assert mg.rev_edge_index.shape == (2 * num_bonds,) + + +def test_composability(mol): + mf1 = SimpleMoleculeMolGraphFeaturizer(MultiHotAtomFeaturizer.v1(50)) + mf2 = SimpleMoleculeMolGraphFeaturizer(MultiHotAtomFeaturizer.v1(100)) + + assert mf1(mol).V.shape != mf2(mol).V.shape + + +def test_invalid_atom_extra_shape(mol_featurizer, mol): + n_a = mol.GetNumAtoms() + with pytest.raises(ValueError): + mol_featurizer(mol, atom_features_extra=np.random.rand(n_a + 1, 10)) + + +def test_invalid_bond_extra_shape(mol_featurizer, mol): + n_b = mol.GetNumBonds() + with pytest.raises(ValueError): + mol_featurizer(mol, bond_features_extra=np.random.rand(n_b + 1, 10)) + + +def test_atom_extra_shape(mol, extra, atom_features_extra): + mf = SimpleMoleculeMolGraphFeaturizer(extra_atom_fdim=extra) + mg = mf(mol, atom_features_extra=atom_features_extra) + + assert mg.V.shape == (mol.GetNumAtoms(), mf.atom_fdim) + + +def test_atom_extra_values(mol, extra, atom_features_extra): + mf = SimpleMoleculeMolGraphFeaturizer(extra_atom_fdim=extra) + mg = mf(mol, atom_features_extra=atom_features_extra) + + np.testing.assert_array_equal(mg.V[:, len(mf.atom_featurizer) :], atom_features_extra) + + +def test_bond_extra(mol, extra, bond_features_extra): + mf = SimpleMoleculeMolGraphFeaturizer(extra_bond_fdim=extra) + mg = mf(mol, bond_features_extra=bond_features_extra) + + assert mg.E.shape == (2 * mol.GetNumBonds(), mf.bond_fdim) + + +def test_atom_bond_extra(mol, extra, atom_features_extra, bond_features_extra): + mf = SimpleMoleculeMolGraphFeaturizer(extra_atom_fdim=extra, extra_bond_fdim=extra) + mg = mf(mol, atom_features_extra, bond_features_extra) + + assert mg.E.shape == (2 * mol.GetNumBonds(), len(mf.bond_featurizer) + extra) diff --git a/chemprop/tests/unit/nn/test_loss_functions.py b/chemprop/tests/unit/nn/test_loss_functions.py new file mode 100644 index 0000000000000000000000000000000000000000..3d748f5e2095569f6f69f9e49869561d4ad0f787 --- /dev/null +++ b/chemprop/tests/unit/nn/test_loss_functions.py @@ -0,0 +1,504 @@ +"""Chemprop unit tests for chemprop/models/loss.py""" + +import numpy as np +import pytest +import torch + +from chemprop.nn.metrics import ( + SID, + BCELoss, + BinaryMCCLoss, + BoundedMSE, + CrossEntropyLoss, + DirichletLoss, + EvidentialLoss, + MulticlassMCCLoss, + MVELoss, + Wasserstein, +) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,mse", + [ + ( + torch.tensor([[-3, 2], [1, -1]], dtype=torch.float), + torch.zeros([2, 2], dtype=torch.float), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([2]), + torch.ones([2]), + torch.zeros([2, 2], dtype=torch.bool), + torch.zeros([2, 2], dtype=torch.bool), + torch.tensor(3.75000, dtype=torch.float), + ), + ( + torch.tensor([[-3, 2], [1, -1]], dtype=torch.float), + torch.zeros([2, 2], dtype=torch.float), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([2]), + torch.ones([2]), + torch.zeros([2, 2], dtype=torch.bool), + torch.ones([2, 2], dtype=torch.bool), + torch.tensor(2.5000, dtype=torch.float), + ), + ( + torch.tensor([[-3, 2], [1, -1]], dtype=torch.float), + torch.zeros([2, 2], dtype=torch.float), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([2]), + torch.ones([2]), + torch.ones([2, 2], dtype=torch.bool), + torch.zeros([2, 2], dtype=torch.bool), + torch.tensor(1.25000, dtype=torch.float), + ), + ], +) +def test_BoundedMSE(preds, targets, mask, weights, task_weights, lt_mask, gt_mask, mse): + """ + Testing the bounded_mse loss function + """ + bmse_loss = BoundedMSE(task_weights) + loss = bmse_loss(preds, targets, mask, weights, lt_mask, gt_mask) + torch.testing.assert_close(loss, mse) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,likelihood", + [ + ( + torch.tensor([[0, 1]], dtype=torch.float), + torch.zeros([1, 1]), + torch.ones([1, 2], dtype=torch.bool), + torch.ones([1]), + torch.ones([2]), + torch.zeros([2], dtype=torch.bool), + torch.zeros([2], dtype=torch.bool), + torch.tensor(0.39894228, dtype=torch.float), + ) + ], +) +def test_MVE(preds, targets, mask, weights, task_weights, lt_mask, gt_mask, likelihood): + """ + Tests the normal_mve loss function + """ + mve_loss = MVELoss(task_weights) + nll_calc = mve_loss(preds, targets, mask, weights, lt_mask, gt_mask) + likelihood_calc = np.exp(-1 * nll_calc) + torch.testing.assert_close(likelihood_calc, likelihood) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,v_kl,expected_loss", + [ + ( + torch.tensor([[[2, 2]]]), + torch.ones([1, 1]), + torch.ones([1, 2], dtype=torch.bool), + torch.ones([1]), + torch.ones([1]), + torch.zeros([1], dtype=torch.bool), + torch.zeros([1], dtype=torch.bool), + 0, + torch.tensor(0.6, dtype=torch.float), + ), + ( + torch.tensor([[[2, 2]]]), + torch.ones([1, 1]), + torch.ones([1, 2], dtype=torch.bool), + torch.ones([1]), + torch.ones([1]), + torch.zeros([1], dtype=torch.bool), + torch.zeros([1], dtype=torch.bool), + 0.2, + torch.tensor(0.63862943, dtype=torch.float), + ), + ], +) +def test_BinaryDirichlet( + preds, targets, mask, weights, task_weights, lt_mask, gt_mask, v_kl, expected_loss +): + """ + Test on the dirichlet loss function for classification. + Note these values were not hand derived, just testing for + dimensional consistency. + """ + binary_dirichlet_loss = DirichletLoss(task_weights=task_weights, v_kl=v_kl) + loss = binary_dirichlet_loss(preds, targets, mask, weights, lt_mask, gt_mask) + torch.testing.assert_close(loss, expected_loss) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,", + [ + ( + torch.ones([1, 1]), + torch.ones([1, 1]), + torch.ones([1, 2], dtype=torch.bool), + torch.ones([1]), + torch.ones([1]), + torch.zeros([1], dtype=torch.bool), + torch.zeros([1], dtype=torch.bool), + ) + ], +) +def test_BinaryDirichlet_wrong_dimensions( + preds, targets, mask, weights, task_weights, lt_mask, gt_mask +): + """ + Test on the dirichlet loss function for classification + for dimension errors. + """ + with pytest.raises(IndexError): + binary_dirichlet_loss = DirichletLoss(task_weights) + binary_dirichlet_loss(preds, targets, mask, weights, lt_mask, gt_mask) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,v_kl,expected_loss", + [ + ( + torch.tensor([[[0.2, 0.1, 0.3], [0.1, 0.3, 0.1]], [[1.2, 0.5, 1.7], [1.1, 1.4, 0.8]]]), + torch.tensor([[0, 0], [1, 1]]), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([2]), + torch.ones([2]), + torch.zeros([2], dtype=torch.bool), + torch.zeros([2], dtype=torch.bool), + 0.2, + torch.tensor(1.868991, dtype=torch.float), + ), + ( + torch.tensor([[[0.2, 0.1, 0.3], [0.1, 0.3, 0.1]], [[1.2, 0.5, 1.7], [1.1, 1.4, 0.8]]]), + torch.tensor([[0, 0], [1, 1]]), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([2]), + torch.ones([2]), + torch.zeros([2], dtype=torch.bool), + torch.zeros([2], dtype=torch.bool), + 0.0, + torch.tensor(1.102344, dtype=torch.float), + ), + ], +) +def test_MulticlassDirichlet( + preds, targets, mask, weights, task_weights, lt_mask, gt_mask, v_kl, expected_loss +): + """ + Test on the dirichlet loss function for classification. + Note these values were not hand derived, just testing for + dimensional consistency. + """ + multiclass_dirichlet_loss = DirichletLoss(task_weights=task_weights, v_kl=v_kl) + loss = multiclass_dirichlet_loss(preds, targets, mask, weights, lt_mask, gt_mask) + torch.testing.assert_close(loss, expected_loss) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,v_kl,expected_loss", + [ + ( + torch.tensor([[2, 2, 2, 2]]), + torch.ones([1, 1]), + torch.ones([1, 1], dtype=torch.bool), + torch.ones([1]), + torch.ones([1]), + torch.zeros([1], dtype=torch.bool), + torch.zeros([1], dtype=torch.bool), + 0, + torch.tensor(1.56893861, dtype=torch.float), + ), + ( + torch.tensor([[2, 2, 2, 2]]), + torch.ones([1, 1]), + torch.ones([1, 1], dtype=torch.bool), + torch.ones([1]), + torch.ones([1]), + torch.zeros([1], dtype=torch.bool), + torch.zeros([1], dtype=torch.bool), + 0.2, + torch.tensor(2.768938541, dtype=torch.float), + ), + ], +) +def test_Evidential( + preds, targets, mask, weights, task_weights, lt_mask, gt_mask, v_kl, expected_loss +): + """ + Test on the evidential loss function for classification. + Note these values were not hand derived, just testing for + dimensional consistency. + """ + evidential_loss = EvidentialLoss(task_weights=task_weights, v_kl=v_kl) + loss = evidential_loss(preds, targets, mask, weights, lt_mask, gt_mask) + torch.testing.assert_close(loss, expected_loss) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask", + [ + ( + torch.ones([2, 2]), + torch.ones([2, 2]), + torch.ones([1, 1], dtype=torch.bool), + torch.ones([1]), + torch.ones([1]), + torch.zeros([1], dtype=torch.bool), + torch.zeros([1], dtype=torch.bool), + ) + ], +) +def test_Evidential_wrong_dimensions(preds, targets, mask, weights, task_weights, lt_mask, gt_mask): + """ + Test on the Evidential loss function for classification + for dimension errors. + """ + evidential_loss = EvidentialLoss(task_weights) + with pytest.raises(ValueError): + evidential_loss(preds, targets, mask, weights, lt_mask, gt_mask) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,expected_loss", + [ + ( + torch.tensor([2, 2], dtype=torch.float), + torch.ones([2], dtype=torch.float), + torch.ones([2], dtype=torch.bool), + torch.ones([1]), + torch.ones([2]), + torch.zeros([2], dtype=torch.bool), + torch.zeros([2], dtype=torch.bool), + torch.tensor(0.126928, dtype=torch.float), + ), + ( + torch.tensor([0.5, 0.5], dtype=torch.float), + torch.ones([2], dtype=torch.float), + torch.ones([2], dtype=torch.bool), + torch.ones([1]), + torch.ones([2]), + torch.zeros([2], dtype=torch.bool), + torch.zeros([2], dtype=torch.bool), + torch.tensor(0.474077, dtype=torch.float), + ), + ], +) +def test_BCE(preds, targets, mask, weights, task_weights, lt_mask, gt_mask, expected_loss): + """ + Test on the BCE loss function for classification. + """ + bce_loss = BCELoss(task_weights) + loss = bce_loss(preds, targets, mask, weights, lt_mask, gt_mask) + torch.testing.assert_close(loss, expected_loss) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,expected_loss", + [ + ( + torch.tensor([[[1.2, 0.5, 0.7], [-0.1, 0.3, 0.1]], [[1.2, 0.5, 0.7], [1.1, 1.3, 1.1]]]), + torch.tensor([[1, 0], [1, 2]]), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([2]), + torch.ones([2]), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([2, 2], dtype=torch.bool), + torch.tensor(1.34214, dtype=torch.float), + ), + ( + torch.tensor([[[1.2, 1.5, 0.7], [-0.1, 2.3, 1.1]], [[1.2, 1.5, 1.7], [2.1, 1.3, 1.1]]]), + torch.tensor([[1, 1], [2, 2]], dtype=torch.float64), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([2]), + torch.ones([2]), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([2, 2], dtype=torch.bool), + torch.tensor(0.899472, dtype=torch.float), + ), + ], +) +def test_CrossEntropy(preds, targets, mask, weights, task_weights, lt_mask, gt_mask, expected_loss): + """ + Test on the CE loss function for classification. + Note these values were not hand derived, just testing for + dimensional consistency. + """ + cross_entropy_loss = CrossEntropyLoss(task_weights) + loss = cross_entropy_loss(preds, targets, mask, weights, lt_mask, gt_mask) + torch.testing.assert_close(loss, expected_loss) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,expected_loss", + [ + ( + torch.tensor([0, 1, 1, 0]), + torch.tensor([0, 1, 1, 0]), + torch.ones([4], dtype=torch.bool), + torch.ones(1), + torch.ones(4), + torch.zeros([1, 4], dtype=torch.bool), + torch.zeros([1, 4], dtype=torch.bool), + torch.tensor(0, dtype=torch.float), + ), + ( + torch.tensor([0, 1, 0, 1, 1, 1, 0, 1, 1]), + torch.tensor([0, 1, 1, 0, 1, 1, 0, 0, 1]), + torch.ones([9], dtype=torch.bool), + torch.ones(1), + torch.ones(9), + torch.zeros([1, 9], dtype=torch.bool), + torch.zeros([1, 9], dtype=torch.bool), + torch.tensor(0.683772, dtype=torch.float), + ), + ], +) +def test_BinaryMCC(preds, targets, mask, weights, task_weights, lt_mask, gt_mask, expected_loss): + """ + Test on the BinaryMCC loss function for classification. Values have been checked using TorchMetrics. + """ + binary_mcc_loss = BinaryMCCLoss(task_weights) + loss = binary_mcc_loss(preds, targets, mask, weights, lt_mask, gt_mask) + torch.testing.assert_close(loss, expected_loss) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,expected_loss", + [ + ( + torch.tensor( + [[[0.16, 0.26, 0.58], [0.22, 0.61, 0.17]], [[0.71, 0.09, 0.20], [0.05, 0.82, 0.13]]] + ), + torch.tensor([[2, 1], [0, 0]]), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([2]), + torch.ones([2]), + torch.zeros([2, 2], dtype=torch.bool), + torch.zeros([2, 2], dtype=torch.bool), + torch.tensor(0.5, dtype=torch.float), + ), + ( + torch.tensor( + [[[0.16, 0.26, 0.58], [0.22, 0.61, 0.17]], [[0.71, 0.09, 0.20], [0.05, 0.82, 0.13]]] + ), + torch.tensor([[2, 1], [0, 0]]), + torch.tensor([[1, 1], [0, 1]], dtype=torch.bool), + torch.ones([2]), + torch.ones([2]), + torch.zeros([2, 2], dtype=bool), + torch.zeros([2, 2], dtype=bool), + torch.tensor(1.0, dtype=torch.float), + ), + ], +) +def test_MulticlassMCC( + preds, targets, mask, weights, task_weights, lt_mask, gt_mask, expected_loss +): + """ + Test on the MulticlassMCC loss function for classification. + """ + multiclass_mcc_loss = MulticlassMCCLoss(task_weights) + loss = multiclass_mcc_loss(preds, targets, mask, weights, lt_mask, gt_mask) + torch.testing.assert_close(loss, expected_loss) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,threshold,expected_loss", + [ + ( + torch.tensor([[0.8, 0.2], [0.3, 0.7]]), + torch.tensor([[0.9, 0.1], [0.4, 0.6]]), + torch.ones([2, 2], dtype=torch.bool), + torch.ones([1]), + torch.ones([2]), + torch.ones([2], dtype=torch.bool), + torch.ones([2], dtype=torch.bool), + None, + torch.tensor(0.031319, dtype=torch.float), + ), + ( + torch.tensor([[0.6, 0.4], [0.2, 0.8]]), + torch.tensor([[0.7, 0.3], [0.3, 0.7]]), + torch.tensor([[1, 1], [1, 0]], dtype=torch.bool), + torch.ones([1]), + torch.ones([2]), + torch.ones([2], dtype=torch.bool), + torch.ones([2], dtype=torch.bool), + None, + torch.tensor(0.295655, dtype=torch.float), + ), + ( + torch.tensor([[0.6, 0.4], [0.2, 0.8]]), + torch.tensor([[0.7, 0.3], [0.3, 0.7]]), + torch.tensor([[1, 1], [1, 1]], dtype=torch.bool), + torch.ones([1]), + torch.ones([2]), + torch.ones([2], dtype=torch.bool), + torch.ones([2], dtype=torch.bool), + 0.5, + torch.tensor(0.033673, dtype=torch.float), + ), + ], +) +def test_SID( + preds, targets, mask, weights, task_weights, lt_mask, gt_mask, threshold, expected_loss +): + """ + Test on the SID loss function. These values were not handchecked, + just checking function returns values with/without mask and threshold. + """ + sid_loss = SID(task_weights=task_weights, threshold=threshold) + loss = sid_loss(preds, targets, mask, weights, lt_mask, gt_mask) + torch.testing.assert_close(loss, expected_loss) + + +@pytest.mark.parametrize( + "preds,targets,mask,weights,task_weights,lt_mask,gt_mask,threshold,expected_loss", + [ + ( + torch.tensor([[0.1, 0.3, 0.5, 0.7], [0.2, 0.4, 0.6, 0.8]]), + torch.tensor([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]), + torch.tensor([[1, 1, 1, 1], [1, 0, 1, 0]], dtype=torch.bool), + torch.ones([2, 1]), + torch.ones([1, 4]), + torch.zeros([2, 4], dtype=torch.bool), + torch.zeros([2, 4], dtype=torch.bool), + None, + torch.tensor(0.1125, dtype=torch.float), + ), + ( + torch.tensor([[0.1, 0.3, 0.5, 0.7], [0.2, 0.4, 0.6, 0.8]]), + torch.tensor([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]), + torch.ones([2, 4], dtype=torch.bool), + torch.ones([2, 1]), + torch.ones([1, 4]), + torch.zeros([2, 4], dtype=torch.bool), + torch.zeros([2, 4], dtype=torch.bool), + None, + torch.tensor(0.515625, dtype=torch.float), + ), + ( + torch.tensor([[0.1, 0.3, 0.5, 0.7], [0.2, 0.4, 0.6, 0.8]]), + torch.tensor([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]), + torch.ones([2, 4], dtype=torch.bool), + torch.ones([2, 1]), + torch.ones([1, 4]), + torch.zeros([2, 4], dtype=torch.bool), + torch.zeros([2, 4], dtype=torch.bool), + 0.3, + torch.tensor(0.501984, dtype=torch.float), + ), + ], +) +def test_Wasserstein( + preds, targets, mask, weights, task_weights, lt_mask, gt_mask, threshold, expected_loss +): + """ + Test on the Wasserstein loss function. These values were not handchecked, + just checking function returns values with/without mask and threshold. + """ + wasserstein_loss = Wasserstein(task_weights=task_weights, threshold=threshold) + loss = wasserstein_loss(preds, targets, mask, weights, lt_mask, gt_mask) + torch.testing.assert_close(loss, expected_loss) + + +# TODO: Add quantile loss tests diff --git a/chemprop/tests/unit/nn/test_metrics.py b/chemprop/tests/unit/nn/test_metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..41ced0997258265b2312860ad7aa0dd2a1bae9b4 --- /dev/null +++ b/chemprop/tests/unit/nn/test_metrics.py @@ -0,0 +1,278 @@ +from io import StringIO +import re + +from lightning import pytorch as pl +from lightning.pytorch.callbacks.progress.tqdm_progress import Tqdm, TQDMProgressBar +import pytest +import torch +from torch.nn import functional as F +from torch.utils.data import DataLoader, Dataset + +from chemprop.nn.metrics import ( + MAE, + MSE, + RMSE, + SID, + BCELoss, + BinaryAccuracy, + BinaryAUPRC, + BinaryAUROC, + BinaryF1Score, + BinaryMCCLoss, + BinaryMCCMetric, + BoundedMAE, + BoundedMSE, + BoundedRMSE, + CrossEntropyLoss, + DirichletLoss, + EvidentialLoss, + MulticlassMCCLoss, + MulticlassMCCMetric, + MVELoss, + R2Score, + Wasserstein, +) + +reg_targets = torch.arange(-20, 20, dtype=torch.float32).view(-1, 2) +# fmt: off +b_class_targets = torch.tensor( + [0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, + 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0], dtype=torch.float32 +).view(-1, 2) +m_class_targets = torch.tensor( + [0, 2, 1, 0, 2, 0, 2, 2, 1, 0, 1, 1, 0, 1, 2, 1, 0, 0, 1, 0, + 0, 0, 0, 2, 1, 2, 2, 1, 2, 2, 2, 0, 1, 1, 0, 0, 1, 1, 2, 0], dtype=torch.float32 +).view(-1, 2) +raw_spectra = torch.tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 4, 3, 2, 1, + 4, 3, 2, 1, 0, 1, 2, 3, 4, 5, + 9, 1, 8, 0, 5, 4, 3, 6, 8, 3, + 2, 1, 6, 4, 7, 2, 6, 2, 5, 1, + 5, 3, 4, 4, 4, 4, 5, 1, 2, 8, + 9, 7, 6, 5, 4, 3, 2, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, + 9, 0, 1, 5, 2, 6, 2, 7, 4, 7, + 3, 8, 2, 9, 1, 0, 1, 0, 1, 0, + 4, 4, 2, 3, 1, 3, 2, 3, 1, 3, + 9, 1, 8, 0, 5, 4, 3, 6, 8, 3], dtype=torch.float32).view(-1, 10) +spectral_targets = raw_spectra / raw_spectra.sum(1, keepdim=True) + +mockffn = torch.tensor([ + -3.7, -14.2, 3.4, 7.5, 11.7, 13.8, 10.2, -0.7, 9.2, -8.0, + -5.3, -2.7, -5.3, -14.4, 1.3, 9.0, -0.4, -10.9, 14.8, 16.4, + 10.9, 5.8, -18.9, 3.6, 18.3, -2.7, -16.8, -8.4, 9.7, -7.2, + 17.1, -9.6, -3.3, -1.0, -11.9, -19.6, -12.3, -13.9, -1.1, -6.0, + 1.1, 12.0, -7.8, 0.2, -12.9, 13.8, 1.1, -9.4, 4.3, -14.9, + 10.0, 9.2, -1.3, -4.4, -7.0, 18.5, -17.5, -0.3, -13.2, -0.1, + 16.2, -14.6, -19.6, 5.5, 4.7, -4.5, -4.9, 13.8, 12.3, -6.9, +-12.1, -18.6, -9.5, 9.8, -9.6, -9.9, 8.7, 0.5, 11.2, 13.0, + -1.2, 4.2, -15.9, 11.4, 14.6, -19.9, 14.7, -3.0, -10.0, 9.5, + 9.0, -6.8, -13.0, -18.0, -12.6, 8.5, 16.9, -17.8, -11.2, 14.5, +-11.8, -5.1, 5.1, 8.5, -4.2, 11.6, 14.5, 19.7, -17.1, 19.0, + 19.2, 17.7, -4.9, 0.7, -16.5, 2.9, 11.3, -5.5, 17.8, 14.6, + -4.2, -1.4, -7.3, 8.4, -8.0, 2.5, 17.5, 13.3, -6.0, -7.9, + 3.5, -2.8, 2.8, 15.3, 15.2, -9.3, -1.0, -20.0, -19.6, -16.7, +-15.5, -10.3, -16.6, 17.9, 18.3, 4.2, -15.8, 5.8, 13.0, 7.9, + 19.7, 7.7, 16.5, 1.8, -16.6, -4.3, 2.9, 18.4, 4.2, 13.1, + ], dtype=torch.float32, +) +# fmt: on + +reg_train_step = mockffn.clone()[:40].view(-1, 2) +reg_forward = reg_train_step.clone() +mve_train_step = torch.stack( + (mockffn.clone()[:40].view(-1, 2), F.softplus(mockffn.clone()[40:80].view(-1, 2))), 2 +) +mve_forward = mve_train_step.clone() +evi_train_step = torch.stack( + ( + mockffn.clone()[:40].view(-1, 2), + F.softplus(mockffn.clone()[40:80].view(-1, 2)), + F.softplus(mockffn.clone()[80:120].view(-1, 2)) + 1, + F.softplus(mockffn.clone()[120:160].view(-1, 2)), + ), + 2, +) +evi_forward = evi_train_step.clone() + +b_class_train_step = mockffn.clone()[:40].view(-1, 2) +b_class_forward = b_class_train_step.clone().sigmoid() +b_diri_train_step = F.softplus(mockffn.clone()[0:80].view(-1, 2, 2)) + 1 +b_diri_forward = b_diri_train_step[..., 1] / b_diri_train_step.sum(-1) + +m_class_train_step = mockffn.clone()[:120].view(20, 2, 3) +m_class_forward = m_class_train_step.clone().softmax(-1) +m_diri_train_step = F.softplus(mockffn.clone()[:120].view(20, 2, 3)) + 1 +m_diri_forward = m_diri_train_step / m_diri_train_step.sum(-1, keepdim=True) +spectral_train_step = mockffn.clone()[:150].view(-1, 10).exp() / mockffn.clone()[:150].view( + -1, 10 +).exp().sum(1, keepdim=True) +spectral_forward = spectral_train_step.clone() + +# fmt: off +mask = torch.tensor( + [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, + 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=torch.bool +).view(-1, 2) +spectral_mask = torch.tensor( + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, + 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, + 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, + 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, + 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, + 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, + 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, + 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, + 1, 1, 1, 0, 0, 1, 1, 1, 1, 1], dtype=torch.bool +).view(-1, 10) +# fmt: on + + +class _MockDataset(Dataset): + def __init__(self, train_step, forward, targets, mask): + self.train_step = train_step + self.forward = forward + self.targets = targets + # fmt: off + self.mask = mask + self.w = torch.linspace(0.1, 1, len(self.targets), dtype=torch.float32).view(-1, 1) + self.lt_mask = torch.tensor( + [0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, + 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1], dtype=torch.bool + ).view(-1, 2) + self.gt_mask = torch.tensor( + [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, + 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=torch.bool + ).view(-1, 2) + # fmt: on + + def __len__(self): + return len(self.targets) + + def __getitem__(self, idx): + return ( + self.train_step[idx], + self.forward[idx], + self.targets[idx], + self.mask[idx], + self.w[idx], + self.lt_mask[idx], + self.gt_mask[idx], + ) + + +class _MockMPNN(pl.LightningModule): + def __init__(self, criterion, metric): + super().__init__() + self.automatic_optimization = False + self.ignore = torch.nn.Parameter(torch.tensor(0.0)) + self.criterion = criterion + self.metrics = torch.nn.ModuleList([metric, self.criterion.clone()]) + + def training_step(self, batch, batch_idx): + train_step, _, targets, mask, w, lt_mask, gt_mask = batch + loss = self.criterion(train_step, targets, mask, w, lt_mask, gt_mask) + self.log("train_loss", self.criterion, prog_bar=True, on_epoch=True) + return loss + + def validation_step(self, batch, batch_idx): + self._evalute_batch(batch, "val") + + train_step, _, targets, mask, w, lt_mask, gt_mask = batch + self.metrics[-1].update(train_step, targets, mask, w, lt_mask, gt_mask) + self.log("val_loss", self.metrics[-1], prog_bar=True) + + def test_step(self, batch, batch_idx): + self._evalute_batch(batch, "test") + + def _evalute_batch(self, batch, val_test): + _, forward, targets, mask, w, lt_mask, gt_mask = batch + if isinstance(self.metrics[-1], (MVELoss, EvidentialLoss)): + forward = forward[..., 0] + self.metrics[0].update(forward, targets, mask, w, lt_mask, gt_mask) + self.log(f"{val_test}_metric", self.metrics[0], prog_bar=True) + + def configure_optimizers(self): + return torch.optim.Adam(self.parameters(), lr=1e-3) + + +class _TestBar(TQDMProgressBar): + def __init__(self, bar_as_text, *args, **kwargs): + super().__init__(*args, **kwargs) + self.bar_as_text = bar_as_text + + def init_train_tqdm(self) -> Tqdm: + return Tqdm( + desc=self.train_description, + position=(2 * self.process_position), + disable=self.is_disabled, + leave=True, + dynamic_ncols=True, + file=self.bar_as_text, + smoothing=0, + bar_format=self.BAR_FORMAT, + ) + + +# fmt: off +groups = [ + (MSE(), R2Score(), reg_train_step, reg_forward, reg_targets, mask), + (MAE(), MSE(), reg_train_step, reg_forward, reg_targets, mask), + (RMSE(), MAE(), reg_train_step, reg_forward, reg_targets, mask), + (BoundedMSE(), RMSE(), reg_train_step, reg_forward, reg_targets, mask), + (BoundedMAE(), BoundedMSE(), reg_train_step, reg_forward, reg_targets, mask), + (BoundedRMSE(), BoundedMAE(), reg_train_step, reg_forward, reg_targets, mask), + (MSE(), BoundedRMSE(), reg_train_step, reg_forward, reg_targets, mask), + (MVELoss(), MSE(), mve_train_step, mve_forward, reg_targets, mask), + (EvidentialLoss(), MSE(), evi_train_step, evi_forward, reg_targets, mask), + (BCELoss(), BinaryMCCMetric(), b_class_train_step, b_class_forward, b_class_targets, mask), + (BinaryMCCLoss(), BinaryAUROC(), b_class_train_step, b_class_forward, b_class_targets, mask), + (BCELoss(), BinaryAUPRC(), b_class_train_step, b_class_forward, b_class_targets, mask), + (BCELoss(), BinaryAccuracy(), b_class_train_step, b_class_forward, b_class_targets, mask), + (DirichletLoss(), BinaryF1Score(), b_diri_train_step, b_diri_forward, b_class_targets, mask), + (CrossEntropyLoss(), MulticlassMCCMetric(), m_class_train_step, m_class_forward, m_class_targets, mask), + (MulticlassMCCLoss(), MulticlassMCCMetric(), m_class_train_step, m_class_forward, m_class_targets, mask), + (DirichletLoss(), MulticlassMCCMetric(), m_diri_train_step, m_diri_forward, m_class_targets, mask), + (SID(), Wasserstein(), spectral_train_step, spectral_forward, spectral_targets, spectral_mask), + (Wasserstein(), SID(), spectral_train_step, spectral_forward, spectral_targets, spectral_mask), +] +# fmt: on + + +@pytest.mark.parametrize("loss_fn, metric_fn, train_step, forward, targets, mask", groups) +def test_metric_integeration(loss_fn, metric_fn, train_step, forward, targets, mask): + model = _MockMPNN(loss_fn, metric_fn) + + dataset = _MockDataset(train_step, forward, targets, mask) + train_loader = DataLoader(dataset, batch_size=5, shuffle=True) + val_loader = DataLoader(dataset, batch_size=5, shuffle=False) + test_loader = DataLoader(dataset, batch_size=20, shuffle=False) + + bar_as_text = StringIO() + trainer = pl.Trainer(max_epochs=2, log_every_n_steps=1, callbacks=[_TestBar(bar_as_text)]) + trainer.fit(model, train_loader, val_loader) + + x = bar_as_text.getvalue() + train_losses = re.findall(r"train_loss_epoch=(-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?)", x) + val_losses = re.findall(r"val_loss=(-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?)", x) + val_metrics = re.findall(r"val_metric=(-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?)", x) + + test_results = trainer.test(model, test_loader) + test_metric = test_results[0]["test_metric"] + + for train_loss in train_losses: + for val_loss in val_losses: + train_loss, val_loss = float(train_loss), float(val_loss) + assert abs(train_loss - val_loss) <= 0.01 * max(abs(train_loss), abs(val_loss)) + + for value in val_metrics: + assert abs(float(value) - test_metric) <= 0.01 * max(abs(float(value)), abs(test_metric)) diff --git a/chemprop/tests/unit/nn/test_transforms.py b/chemprop/tests/unit/nn/test_transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..54bd4ed7a763f28fda9fe0e9c4b4e0795e36b340 --- /dev/null +++ b/chemprop/tests/unit/nn/test_transforms.py @@ -0,0 +1,156 @@ +import numpy as np +import pytest +from sklearn.preprocessing import StandardScaler +import torch + +from chemprop.nn.transforms import GraphTransform, ScaleTransform, UnscaleTransform + + +class MockBatchMolGraph: + def __init__(self, V, E): + self.V = V + self.E = E + + +@pytest.fixture +def mean(): + return np.array([0.0, 1.0], dtype=np.float32) + + +@pytest.fixture +def scale(): + return np.array([2.0, 3.0], dtype=np.float32) + + +@pytest.fixture +def pad(): + return 2 + + +@pytest.fixture +def tensor_mean(mean, pad): + return torch.cat([torch.zeros(pad), torch.tensor(mean, dtype=torch.float)]) + + +@pytest.fixture +def tensor_scale(scale, pad): + return torch.cat([torch.ones(pad), torch.tensor(scale, dtype=torch.float)]) + + +@pytest.fixture +def scaler(mean, scale): + scaler = StandardScaler() + scaler.mean_ = mean + scaler.scale_ = scale + return scaler + + +@pytest.fixture +def scale_transform(scaler, pad): + return ScaleTransform.from_standard_scaler(scaler, pad) + + +@pytest.fixture +def unscale_transform(scaler, pad): + return UnscaleTransform.from_standard_scaler(scaler, pad=0) + + +@pytest.fixture +def graph_transform(scale_transform): + return GraphTransform(V_transform=scale_transform, E_transform=scale_transform) + + +@pytest.fixture +def X(): + return torch.tensor([[99.0, 99.0, 1.0, 2.0], [99.0, 99.0, 3.0, 4.0]]) + + +@pytest.fixture +def prediction(): + return torch.tensor([[1.0, 2.0]]) + + +@pytest.fixture +def variance(): + return torch.tensor([[0.1, 0.2]]) + + +@pytest.fixture +def bmg(): + V = torch.tensor([[99.0, 99.0, 1.0, 2.0], [99.0, 99.0, 3.0, 4.0]]) + E = torch.tensor([[99.0, 99.0, 1.0, 2.0], [99.0, 99.0, 3.0, 4.0]]) + return MockBatchMolGraph(V=V, E=E) + + +def test_uneven_shapes(): + with pytest.raises(ValueError): + ScaleTransform(mean=[0.0], scale=[1.0, 2.0]) + + +def test_padding(mean, scale, pad): + scale_transform = ScaleTransform(mean, scale, pad) + assert torch.all(scale_transform.mean[0, :pad] == 0.0).item() + assert torch.all(scale_transform.scale[0, :pad] == 1.0).item() + + +def test_from_standard_scaler(mean, scale, scaler): + scale_transform = ScaleTransform.from_standard_scaler(scaler) + + assert torch.all(scale_transform.mean == torch.tensor([0.0, 1.0])).item() + assert torch.all(scale_transform.scale == torch.tensor([2.0, 3.0])).item() + + +def test_scale_transform_forward_train(scale_transform, X): + scale_transform.train() + output_X = scale_transform(X) + assert output_X is X + + +def test_scale_transform_forward_eval(tensor_mean, tensor_scale, scale_transform, X): + scale_transform.eval() + output_X = scale_transform(X) + expected_X = (X - tensor_mean) / tensor_scale + assert torch.equal(output_X, expected_X) + + +def test_unscale_transform_forward_train(unscale_transform, X): + unscale_transform.train() + output_X = unscale_transform(X) + assert output_X is X + + +def test_unscale_transform_forward_eval(mean, scale, unscale_transform, prediction): + unscale_transform.eval() + output = unscale_transform(prediction) + expected = prediction * scale + mean + assert torch.equal(output, expected) + + +def test_unscale_transform_variance_train(unscale_transform, variance): + unscale_transform.train() + output_variance = unscale_transform.transform_variance(variance) + assert output_variance is variance + + +def test_unscale_transform_variance_eval(scale, unscale_transform, variance): + unscale_transform.eval() + output_variance = unscale_transform.transform_variance(variance) + expected_variance = variance * scale**2 + assert torch.equal(output_variance, expected_variance) + + +def test_graph_transform_forward_train(graph_transform, bmg): + graph_transform.train() + output_bmg = graph_transform(bmg) + assert output_bmg is bmg + + +def test_graph_transform_forward_eval(graph_transform, bmg): + graph_transform.eval() + expected_V = graph_transform.V_transform(bmg.V) + expected_E = graph_transform.E_transform(bmg.E) + + transformed_bmg = graph_transform(bmg) + + assert torch.equal(transformed_bmg.V, expected_V) + assert torch.equal(transformed_bmg.E, expected_E) diff --git a/chemprop/tests/unit/uncertainty/test_calibrators.py b/chemprop/tests/unit/uncertainty/test_calibrators.py new file mode 100644 index 0000000000000000000000000000000000000000..4d126f198569350b00f2479fd13ce8d39ab16905 --- /dev/null +++ b/chemprop/tests/unit/uncertainty/test_calibrators.py @@ -0,0 +1,376 @@ +import pytest +import torch + +from chemprop.uncertainty.calibrator import ( + AdaptiveMulticlassConformalCalibrator, + IsotonicCalibrator, + IsotonicMulticlassCalibrator, + MulticlassConformalCalibrator, + MultilabelConformalCalibrator, + MVEWeightingCalibrator, + PlattCalibrator, + RegressionConformalCalibrator, + ZelikmanCalibrator, + ZScalingCalibrator, +) + + +@pytest.mark.parametrize( + "cal_uncs,cal_targets,cal_mask,test_uncs,cal_test_uncs", + [ + ( + torch.tensor( + [ + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + [0.7, 0.8, 0.9], + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + [0.7, 0.8, 0.9], + ] + ), + torch.tensor([[0, 1, 0], [0, 0, 1], [0, 1, 1], [1, 1, 0], [1, 0, 0], [1, 1, 0]]), + torch.tensor( + [[1, 1, 1], [1, 0, 1], [1, 1, 1], [1, 1, 1], [0, 1, 1], [1, 1, 1]], dtype=torch.bool + ), + torch.tensor([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]), + torch.tensor([[1 / 3, 2 / 3, 0.0], [1 / 3, 2 / 3, 0.5]]), + ) + ], +) +def test_IsotonicCalibrator(cal_uncs, cal_targets, cal_mask, test_uncs, cal_test_uncs): + """ + Testing the IsotonicCalibrator + """ + calibrator = IsotonicCalibrator() + calibrator.fit(cal_uncs, cal_targets, cal_mask) + uncs = calibrator.apply(test_uncs) + + torch.testing.assert_close(uncs, cal_test_uncs) + + +@pytest.mark.parametrize( + "cal_uncs,cal_targets,cal_mask,test_uncs,cal_test_uncs,training_targets,cal_test_uncs_with_training_targets", + [ + ( + torch.tensor( + [ + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + [0.7, 0.8, 0.9], + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + [0.7, 0.8, 0.9], + ] + ), + torch.tensor([[0, 1, 0], [0, 0, 1], [0, 1, 1], [1, 1, 0], [1, 0, 0], [1, 1, 0]]), + torch.tensor( + [[1, 1, 1], [1, 0, 1], [1, 1, 1], [1, 1, 1], [0, 1, 1], [1, 1, 1]], dtype=torch.bool + ), + torch.tensor([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]), + torch.tensor( + [[0.4182101, 0.8000248, 0.1312900], [0.3973791, 0.7999378, 0.2770228]], + dtype=torch.float64, + ), + torch.tensor([[0, 0, 0], [1, 1, 1], [1, 1, 0], [1, 0, 1]]), + torch.tensor( + [[0.5285367, 0.6499191, 0.3089508], [0.5188822, 0.6499544, 0.3998689]], + dtype=torch.float64, + ), + ) + ], +) +def test_PlattCalibrator( + cal_uncs, + cal_targets, + cal_mask, + test_uncs, + cal_test_uncs, + training_targets, + cal_test_uncs_with_training_targets, +): + """ + Testing the PlattCalibrator + """ + calibrator1 = PlattCalibrator() + calibrator1.fit(cal_uncs, cal_targets, cal_mask) + uncs1 = calibrator1.apply(test_uncs) + + calibrator2 = PlattCalibrator() + calibrator2.fit(cal_uncs, cal_targets, cal_mask, training_targets) + uncs2 = calibrator2.apply(test_uncs) + + torch.testing.assert_close(uncs1, cal_test_uncs, rtol=1e-4, atol=1e-4) + torch.testing.assert_close(uncs2, cal_test_uncs_with_training_targets, rtol=1e-4, atol=1e-4) + + +@pytest.mark.parametrize( + "cal_preds,cal_uncs,cal_targets,cal_mask,test_uncs,cal_test_uncs", + [ + ( + torch.zeros(100, 1, dtype=float), + torch.arange(1, 101, dtype=float).unsqueeze(1).pow(2), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.ones(100, 1, dtype=bool), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.arange(1, 101, dtype=float).unsqueeze(1), + ), + ( + torch.zeros(100, 1, dtype=float), + torch.arange(2, 201, step=2, dtype=float).unsqueeze(1).pow(2), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.ones(100, 1, dtype=bool), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.arange(1, 101, dtype=float).unsqueeze(1) / 4, + ), + ], +) +def test_ZScalingCalibrator(cal_preds, cal_uncs, cal_targets, cal_mask, test_uncs, cal_test_uncs): + """ + Testing the ZScalingCalibrator + """ + calibrator = ZScalingCalibrator() + calibrator.fit(cal_preds, cal_uncs, cal_targets, cal_mask) + uncs = calibrator.apply(test_uncs) + + torch.testing.assert_close(uncs, cal_test_uncs) + + +@pytest.mark.parametrize( + "cal_preds,cal_uncs,cal_targets,cal_mask,test_uncs,cal_test_uncs", + [ + ( + torch.zeros(100, 1, dtype=float), + torch.arange(1, 101, dtype=float).unsqueeze(1).pow(2), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.ones(100, 1, dtype=bool), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.arange(1, 101, dtype=float).unsqueeze(1), + ), + ( + torch.zeros(100, 1, dtype=float), + torch.ones(100, 1, dtype=bool), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.ones(100, 1, dtype=bool), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.arange(1, 101, dtype=float).unsqueeze(1) * 8100, + ), + ], +) +def test_ZelikmanCalibrator(cal_preds, cal_uncs, cal_targets, cal_mask, test_uncs, cal_test_uncs): + """ + Testing the ZelikmanCalibrator + """ + calibrator = ZelikmanCalibrator(p=0.9) + calibrator.fit(cal_preds, cal_uncs, cal_targets, cal_mask) + uncs = calibrator.apply(test_uncs) + + torch.testing.assert_close(uncs, cal_test_uncs) + + +@pytest.mark.parametrize( + "cal_preds,cal_uncs,cal_targets,cal_mask,test_uncs,cal_test_uncs", + [ + ( + torch.zeros(100, 1, dtype=float), + torch.arange(1, 101, dtype=float).unsqueeze(1).repeat(5, 1, 1), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.ones(100, 1, dtype=bool), + torch.arange(1, 101, dtype=float).unsqueeze(1).repeat(5, 1, 1), + torch.arange(1, 101, dtype=float).unsqueeze(1), + ) + ], +) +def test_MVEWeightingCalibrator( + cal_preds, cal_uncs, cal_targets, cal_mask, test_uncs, cal_test_uncs +): + """ + Testing the MVEWeightingCalibrator + """ + calibrator = MVEWeightingCalibrator() + calibrator.fit(cal_preds, cal_uncs, cal_targets, cal_mask) + uncs = calibrator.apply(test_uncs) + + torch.testing.assert_close(uncs, cal_test_uncs) + + +@pytest.mark.parametrize( + "cal_uncs,cal_targets,cal_mask,test_uncs,cal_test_uncs", + [ + ( + torch.tensor( + [ + [[0.2, 0.3, 0.5], [0.1, 0.6, 0.3]], + [[0.1, 0.6, 0.3], [0.4, 0.4, 0.2]], + [[0.4, 0.4, 0.2], [0.2, 0.3, 0.5]], + ] + ), + torch.tensor([[2, 1], [1, 0], [0, 2]]).long(), + torch.ones([3, 2], dtype=torch.bool), + torch.tensor( + [ + [[0.3, 0.4, 0.3], [0.5, 0.2, 0.3]], + [[0.5, 0.2, 0.3], [0.6, 0.3, 0.1]], + [[0.6, 0.3, 0.1], [0.3, 0.4, 0.3]], + ] + ), + torch.tensor( + [[[0, 1, 0], [1, 0, 0]], [[1, 0, 0], [1, 0, 0]], [[1, 0, 0], [0, 1, 0]]] + ).int(), + ) + ], +) +def test_AdaptiveMulticlassConformalCalibrator( + cal_uncs, cal_targets, cal_mask, test_uncs, cal_test_uncs +): + """ + Testing the AdaptiveMulticlassConformalCalibrator + """ + calibrator = AdaptiveMulticlassConformalCalibrator(alpha=0.5) + calibrator.fit(cal_uncs, cal_targets, cal_mask) + uncs = calibrator.apply(test_uncs) + + torch.testing.assert_close(uncs, cal_test_uncs) + + +@pytest.mark.parametrize( + "cal_uncs,cal_targets,cal_mask,test_uncs,cal_test_uncs", + [ + ( + torch.tensor( + [ + [[0.2, 0.3, 0.5], [0.1, 0.6, 0.3]], + [[0.1, 0.6, 0.3], [0.4, 0.4, 0.2]], + [[0.4, 0.4, 0.2], [0.2, 0.3, 0.5]], + ] + ), + torch.tensor([[2, 2], [1, 0], [0, 2]]).long(), + torch.ones([3, 2], dtype=torch.bool), + torch.tensor( + [ + [[0.3, 0.4, 0.3], [0.5, 0.2, 0.3]], + [[0.5, 0.2, 0.3], [0.6, 0.3, 0.1]], + [[0.6, 0.3, 0.1], [0.3, 0.4, 0.3]], + ] + ), + torch.tensor( + [[[0, 1, 0], [1, 0, 1]], [[1, 0, 0], [1, 1, 0]], [[1, 0, 0], [1, 1, 1]]] + ).int(), + ) + ], +) +def test_MulticlassConformalCalibrator(cal_uncs, cal_targets, cal_mask, test_uncs, cal_test_uncs): + """ + Testing the MulticlassConformalCalibrator + """ + calibrator = MulticlassConformalCalibrator(alpha=0.5) + calibrator.fit(cal_uncs, cal_targets, cal_mask) + uncs = calibrator.apply(test_uncs) + + torch.testing.assert_close(uncs, cal_test_uncs) + + +@pytest.mark.parametrize( + "cal_uncs,cal_targets,cal_mask,test_uncs,cal_test_uncs", + [ + ( + torch.tensor([[0, 1, 0], [1, 0, 0], [0, 0, 1]]), + torch.tensor([[0, 1, 0], [1, 0, 0], [0, 0, 1]]), + torch.ones([3, 3], dtype=torch.bool), + torch.tensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), + torch.tensor( + [[[1, 1], [1, 0], [1, 0]], [[1, 0], [1, 1], [1, 0]], [[1, 0], [1, 0], [1, 1]]], + dtype=torch.int, + ), + ) + ], +) +def test_MultilabelConformalCalibrator(cal_uncs, cal_targets, cal_mask, test_uncs, cal_test_uncs): + """ + Testing the MultilabelConformalCalibrator + """ + calibrator = MultilabelConformalCalibrator(alpha=0.1) + calibrator.fit(cal_uncs, cal_targets, cal_mask) + uncs = calibrator.apply(test_uncs) + + torch.testing.assert_close(uncs, cal_test_uncs) + + +@pytest.mark.parametrize( + "cal_preds,cal_uncs,cal_targets,cal_mask,test_uncs,cal_test_uncs", + [ + ( + torch.arange(100).unsqueeze(1), + torch.arange(100).unsqueeze(1) / 10, + torch.arange(10, 110).unsqueeze(1), + torch.ones([100, 1], dtype=torch.bool), + torch.arange(100, 200).unsqueeze(1) / 10, + torch.arange(29.2, 39.1, 0.1).unsqueeze(1), + ), + ( + torch.arange(100).unsqueeze(1), + torch.zeros(100, 1), + torch.arange(10, 110).unsqueeze(1), + torch.ones([100, 1], dtype=torch.bool), + torch.zeros(100, 1), + torch.ones(100, 1) * 20, + ), + ], +) +def test_RegressionConformalCalibrator( + cal_preds, cal_uncs, cal_targets, cal_mask, test_uncs, cal_test_uncs +): + """ + Testing the RegressionConformalCalibrator + """ + calibrator = RegressionConformalCalibrator(alpha=0.1) + calibrator.fit(cal_preds, cal_uncs, cal_targets, cal_mask) + uncs = calibrator.apply(test_uncs) + + torch.testing.assert_close(uncs, cal_test_uncs) + + +@pytest.mark.parametrize( + "cal_uncs,cal_targets,cal_mask,test_uncs,cal_test_uncs", + [ + ( + torch.tensor( + [ + [[0.2, 0.3, 0.5], [0.1, 0.6, 0.3]], + [[0.1, 0.6, 0.3], [0.4, 0.4, 0.2]], + [[0.4, 0.4, 0.2], [0.2, 0.3, 0.5]], + [[0.0, 0.6, 0.4], [0.8, 0.1, 0.1]], + [[0.5, 0.2, 0.3], [0.4, 0.4, 0.2]], + [[0.4, 0.3, 0.3], [0.7, 0.3, 0.0]], + ] + ), + torch.tensor([[2, 1], [1, 2], [0, 2], [1, 1], [0, 0], [2, 0]]).long(), + torch.ones([6, 2], dtype=torch.bool), + torch.tensor( + [ + [[0.0, 0.1, 0.9], [0.5, 0.2, 0.3]], + [[0.3, 0.4, 0.3], [0.6, 0.3, 0.1]], + [[0.9, 0.1, 0.0], [0.3, 0.4, 0.3]], + ] + ), + torch.tensor( + [ + [[0.000000, 0.000000, 1.000000], [0.483871, 0.193548, 0.322581]], + [[0.500000, 0.000000, 0.500000], [0.714286, 0.285714, 0.000000]], + [[1.000000, 0.000000, 0.000000], [0.319149, 0.255319, 0.425532]], + ] + ), + ) + ], +) +def test_IsotonicMulticlassCalibratorCalibrator( + cal_uncs, cal_targets, cal_mask, test_uncs, cal_test_uncs +): + """ + Testing the IsotonicMulticlassCalibratorCalibrator + """ + calibrator = IsotonicMulticlassCalibrator() + calibrator.fit(cal_uncs, cal_targets, cal_mask) + uncs = calibrator.apply(test_uncs) + + torch.testing.assert_close(uncs, cal_test_uncs) diff --git a/chemprop/tests/unit/uncertainty/test_estimators.py b/chemprop/tests/unit/uncertainty/test_estimators.py new file mode 100644 index 0000000000000000000000000000000000000000..fc88e07c80632100070dcc02786e071ec5b70809 --- /dev/null +++ b/chemprop/tests/unit/uncertainty/test_estimators.py @@ -0,0 +1,150 @@ +from lightning import pytorch as pl +import pytest +import torch +from torch.utils.data import DataLoader + +from chemprop.data import MoleculeDatapoint, MoleculeDataset, collate_batch +from chemprop.models import MPNN +from chemprop.uncertainty.estimator import ( + ClassificationDirichletEstimator, + DropoutEstimator, + EnsembleEstimator, + EvidentialAleatoricEstimator, + EvidentialEpistemicEstimator, + EvidentialTotalEstimator, + MulticlassDirichletEstimator, + MVEEstimator, + NoUncertaintyEstimator, +) + + +@pytest.fixture +def dataloader(mol_regression_data): + smis, Y = mol_regression_data + data = [MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis[:2], Y[:2])] + dset = MoleculeDataset(data) + + return DataLoader(dset, 32, collate_fn=collate_batch) + + +@pytest.fixture +def trainer(): + return pl.Trainer( + logger=False, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + ) + + +def test_NoUncertaintyEstimator(data_dir, dataloader, trainer): + model = MPNN.load_from_file(data_dir / "example_model_v2_regression_mol.pt") + estimator = NoUncertaintyEstimator() + preds, uncs = estimator(dataloader, [model], trainer) + + torch.testing.assert_close(preds, torch.tensor([[[2.25354], [2.23501]]])) + assert uncs is None + + +def test_DropoutEstimator(data_dir, dataloader, trainer): + model = MPNN.load_from_file(data_dir / "example_model_v2_regression_mol.pt") + estimator = DropoutEstimator(ensemble_size=2, dropout=0.1) + preds, uncs = estimator(dataloader, [model], trainer) + + assert torch.all(uncs != 0) + assert getattr(model.message_passing.dropout, "p", None) == 0.0 + + +def test_EnsembleEstimator(data_dir, dataloader, trainer): + model1 = MPNN.load_from_file(data_dir / "example_model_v2_regression_mol.pt") + model2 = MPNN.load_from_file(data_dir / "example_model_v2_regression_mol.pt") + + # Make the second model predict different values than the first + model2.predictor.output_transform = torch.nn.Identity() + + estimator = EnsembleEstimator() + preds, uncs = estimator(dataloader, [model1, model2], trainer) + + torch.testing.assert_close( + preds, torch.tensor([[[2.25354], [2.23501]], [[0.09652], [0.08291]]]) + ) + torch.testing.assert_close(uncs, torch.tensor([[[1.16318], [1.15788]]])) + + +def test_EnsembleEstimator_wrong_n_models(): + estimator = EnsembleEstimator() + with pytest.raises(ValueError): + estimator("mock_dataloader", ["mock_model"], "mock_trainer") + + +def test_MVEEstimator(data_dir, dataloader, trainer): + model = MPNN.load_from_file(data_dir / "example_model_v2_regression_mve_mol.pt") + estimator = MVEEstimator() + preds, uncs = estimator(dataloader, [model], trainer) + + torch.testing.assert_close(preds, torch.tensor([[[2.10946], [2.10234]]])) + torch.testing.assert_close(uncs, torch.tensor([[[1.27602], [1.28058]]])) + + +@pytest.mark.parametrize( + "estimator_class, expected_preds, expected_uncs", + [ + ( + EvidentialTotalEstimator, + torch.tensor([[[2.09985], [2.09525]]]), + torch.tensor([[[4.63703], [4.67548]]]), + ), + ( + EvidentialEpistemicEstimator, + torch.tensor([[[2.09985], [2.09525]]]), + torch.tensor([[[2.77602], [2.80313]]]), + ), + ( + EvidentialAleatoricEstimator, + torch.tensor([[[2.09985], [2.09525]]]), + torch.tensor([[[1.86101], [1.87234]]]), + ), + ], +) +def test_EvidentialEstimators( + estimator_class, expected_preds, expected_uncs, data_dir, dataloader, trainer +): + model = MPNN.load_from_file(data_dir / "example_model_v2_regression_evidential_mol.pt") + + estimator = estimator_class() + preds, uncs = estimator(dataloader, [model], trainer) + + torch.testing.assert_close(preds, expected_preds) + torch.testing.assert_close(uncs, expected_uncs) + + +def test_ClassificationDirichletEstimator(data_dir, dataloader, trainer): + model = MPNN.load_from_file(data_dir / "example_model_v2_classification_dirichlet_mol.pt") + estimator = ClassificationDirichletEstimator() + preds, uncs = estimator(dataloader, [model], trainer) + + torch.testing.assert_close( + preds, + torch.tensor( + [[[0.085077, 0.085050, 0.086104, 0.138729], [0.069522, 0.069501, 0.070306, 0.116051]]] + ), + ) + torch.testing.assert_close( + uncs, + torch.tensor( + [[[0.170140, 0.170079, 0.172037, 0.277232], [0.139044, 0.138999, 0.140591, 0.232073]]] + ), + ) + + +def test_MulticlassDirichletEstimator(data_dir, dataloader, trainer): + model = MPNN.load_from_file(data_dir / "example_model_v2_multiclass_dirichlet_mol.pt") + estimator = MulticlassDirichletEstimator() + preds, uncs = estimator(dataloader, [model], trainer) + + torch.testing.assert_close( + preds, torch.tensor([[[[0.906426, 0.046787, 0.046787]], [[0.925395, 0.037303, 0.037303]]]]) + ) + torch.testing.assert_close(uncs, torch.tensor([[[0.140361], [0.111908]]])) diff --git a/chemprop/tests/unit/uncertainty/test_evaluators.py b/chemprop/tests/unit/uncertainty/test_evaluators.py new file mode 100644 index 0000000000000000000000000000000000000000..56d63574469c361fcd9cda0aa792b4c3fa9fe5b5 --- /dev/null +++ b/chemprop/tests/unit/uncertainty/test_evaluators.py @@ -0,0 +1,277 @@ +import pytest +import torch + +from chemprop.uncertainty.evaluator import ( + CalibrationAreaEvaluator, + ExpectedNormalizedErrorEvaluator, + MulticlassConformalEvaluator, + MultilabelConformalEvaluator, + NLLClassEvaluator, + NLLMulticlassEvaluator, + NLLRegressionEvaluator, + RegressionConformalEvaluator, + SpearmanEvaluator, +) + + +@pytest.mark.parametrize( + "uncs,targets,mask,likelihood", + [ + ( + torch.tensor([[0.8]]), + torch.ones([1, 1]), + torch.ones([1, 1], dtype=bool), + torch.tensor([0.8]), + ), + ( + torch.tensor([[0.8]]), + torch.zeros([1, 1]), + torch.ones([1, 1], dtype=bool), + torch.tensor([0.2]), + ), + ], +) +def test_NLLClassEvaluator(uncs, targets, mask, likelihood): + """ + Testing the NLLClassEvaluator + """ + evaluator = NLLClassEvaluator() + nll_calc = evaluator.evaluate(uncs, targets, mask) + likelihood_calc = torch.exp(-1 * nll_calc) + torch.testing.assert_close(likelihood_calc, likelihood) + + +@pytest.mark.parametrize( + "uncs,targets,mask,likelihood", + [ + ( + torch.tensor( + [ + [[0.29, 0.22, 0.49]], + [[0.35, 0.19, 0.46]], + [[0.55, 0.38, 0.07]], + [[0.15, 0.29, 0.56]], + [[0.08, 0.68, 0.24]], + ] + ), + torch.tensor([[0], [2], [2], [0], [1]]), + torch.ones([5, 1], dtype=bool), + torch.tensor([0.24875443]), + ), + ( + torch.tensor( + [ + [[8.7385e-01, 8.3770e-04, 3.3212e-02, 9.2103e-02]], + [[7.2274e-03, 1.0541e-01, 8.8703e-01, 3.2886e-04]], + [[1.7376e-03, 9.9478e-01, 1.4227e-03, 2.0596e-03]], + [[2.6487e-04, 1.3251e-03, 2.4325e-02, 9.7409e-01]], + ] + ), + torch.tensor([[0], [2], [1], [3]]), + torch.ones([4, 1], dtype=bool), + torch.tensor([0.93094635]), + ), + ], +) +def test_NLLMulticlassEvaluator(uncs, targets, mask, likelihood): + """ + Testing the NLLMulticlassEvaluator + """ + evaluator = NLLMulticlassEvaluator() + nll_calc = evaluator.evaluate(uncs, targets, mask) + likelihood_calc = torch.exp(-1 * nll_calc) + torch.testing.assert_close(likelihood_calc, likelihood) + + +@pytest.mark.parametrize( + "preds,uncs,targets,mask,likelihood", + [ + ( + torch.zeros([1, 1]), + torch.ones([1, 1]), + torch.zeros([1, 1]), + torch.ones([1, 1], dtype=bool), + torch.tensor([0.39894228]), + ), + ( + torch.zeros([2, 2]), + torch.ones([2, 2]), + torch.zeros([2, 2]), + torch.ones([2, 2], dtype=bool), + torch.tensor([0.39894228, 0.39894228]), + ), + ], +) +def test_NLLRegressionEvaluator(preds, uncs, targets, mask, likelihood): + """ + Testing the NLLRegressionEvaluator + """ + evaluator = NLLRegressionEvaluator() + nll_calc = evaluator.evaluate(preds, uncs, targets, mask) + likelihood_calc = torch.exp(-1 * nll_calc) + torch.testing.assert_close(likelihood_calc, likelihood) + + +@pytest.mark.parametrize( + "preds,uncs,targets,mask,spearman_exp", + [ + ( + torch.zeros(100, 1, dtype=float), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.ones(100, 1, dtype=bool), + torch.tensor([1.0]), + ), + ( + torch.zeros(100, 1, dtype=float), + -torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.arange(1, 101, dtype=float).unsqueeze(1), + torch.ones(100, 1, dtype=bool), + torch.tensor([-1.0]), + ), + ], +) +def test_SpearmanEvaluator(preds, uncs, targets, mask, spearman_exp): + """ + Testing the SpearmanEvaluator + """ + evaluator = SpearmanEvaluator() + area = evaluator.evaluate(preds, uncs, targets, mask) + torch.testing.assert_close(area, spearman_exp) + + +@pytest.mark.parametrize( + "uncs,targets,mask,coverage", + [ + ( + torch.tensor([[[1, 0], [0, 1]], [[0, 1], [1, 0]], [[1, 0], [1, 0]]]), + torch.tensor([[0, 0], [1, 0], [1, 1]]), + torch.ones([3, 2], dtype=torch.bool), + torch.tensor([0.66666, 0.33333]), + ) + ], +) +def test_MulticlassConformalEvaluator(uncs, targets, mask, coverage): + """ + Testing the MulticlassConformalEvaluator + """ + evaluator = MulticlassConformalEvaluator() + coverage_cal = evaluator.evaluate(uncs, targets, mask) + + torch.testing.assert_close(coverage_cal, coverage) + + +@pytest.mark.parametrize( + "uncs,targets,mask,coverage", + [ + ( + torch.tensor([[0, 0, 0, 0], [0, 1, 1, 1], [0, 0, 0, 0]]), + torch.tensor([[0, 0], [1, 0], [1, 1]]), + torch.ones([3, 2], dtype=torch.bool), + torch.tensor([0.66666, 0.33333]), + ) + ], +) +def test_MultilabelConformalEvaluator(uncs, targets, mask, coverage): + """ + Testing the MultilabelConformalEvaluator + """ + evaluator = MultilabelConformalEvaluator() + coverage_cal = evaluator.evaluate(uncs, targets, mask) + + torch.testing.assert_close(coverage_cal, coverage) + + +@pytest.mark.parametrize( + "preds,uncs,targets,mask,coverage", + [ + ( + torch.arange(100).unsqueeze(1), + torch.arange(100).unsqueeze(1), + torch.arange(10, 110).unsqueeze(1), + torch.ones([100, 1], dtype=torch.bool), + torch.tensor([0.8]), + ), + ( + torch.tensor([[0, 0.3, 1]]), + torch.tensor([[0.4, 0.6, 0.8]]), + torch.tensor([[0.5, 0.5, 0.5]]), + torch.ones([1, 3], dtype=torch.bool), + torch.tensor([0.0, 1.0, 0.0]), + ), + ( + torch.arange(100, 0, -1).unsqueeze(1), + torch.full((100, 1), 140), + torch.arange(1, 101, 1).unsqueeze(1), + torch.ones([100, 1], dtype=torch.bool), + torch.tensor([0.7]), + ), + ], +) +def test_RegressionConformalEvaluator(preds, uncs, targets, mask, coverage): + """ + Testing the RegressionConformalEvaluator + """ + evaluator = RegressionConformalEvaluator() + coverage_cal = evaluator.evaluate(preds, uncs, targets, mask) + + torch.testing.assert_close(coverage_cal, coverage) + + +@pytest.mark.parametrize( + "preds,uncs,targets,mask,miscal_area", + [ + ( + torch.zeros(100).unsqueeze(1), + torch.ones(100).unsqueeze(1), + torch.zeros(100).unsqueeze(1), + torch.ones([100, 1], dtype=torch.bool), + torch.tensor([0.495]), + ), + ( + torch.ones(100).unsqueeze(1), + torch.ones(100).unsqueeze(1), + torch.ones(100, 1) * 100, + torch.ones([100, 1], dtype=torch.bool), + torch.tensor([0.495]), + ), + ], +) +def test_CalibrationAreaEvaluator(preds, uncs, targets, mask, miscal_area): + """ + Testing the CalibrationAreaEvaluator + """ + evaluator = CalibrationAreaEvaluator() + miscal_area_cal = evaluator.evaluate(preds, uncs, targets, mask) + + torch.testing.assert_close(miscal_area_cal, miscal_area) + + +@pytest.mark.parametrize( + "preds,uncs,targets,mask,ence", + [ + ( + torch.zeros(100, 1), + torch.ones(100, 1), + torch.zeros(100, 1), + torch.ones([100, 1], dtype=torch.bool), + torch.tensor([1.0]), + ), + ( + torch.linspace(1, 100, steps=100).unsqueeze(1), + torch.linspace(1, 10, steps=100).unsqueeze(1), + torch.linspace(1, 100, steps=100).unsqueeze(1) + + torch.tensor([-2, -1, 1, 2]).repeat(25).unsqueeze(1), + torch.ones([100, 1], dtype=torch.bool), + torch.tensor([0.392]), + ), + ], +) +def test_ExpectedNormalizedErrorEvaluator(preds, uncs, targets, mask, ence): + """ + Testing the ExpectedNormalizedErrorEvaluator + """ + evaluator = ExpectedNormalizedErrorEvaluator() + ence_cal = evaluator.evaluate(preds, uncs, targets, mask) + + torch.testing.assert_close(ence_cal, ence) diff --git a/chemprop/tests/unit/utils/test_converter.py b/chemprop/tests/unit/utils/test_converter.py new file mode 100644 index 0000000000000000000000000000000000000000..e81efc89c7c0deac34d3edb5b185528a6bbf996f --- /dev/null +++ b/chemprop/tests/unit/utils/test_converter.py @@ -0,0 +1,69 @@ +import csv + +from lightning import pytorch as pl +import numpy as np +import pytest + +from chemprop.data.dataloader import build_dataloader +from chemprop.data.datapoints import MoleculeDatapoint +from chemprop.data.datasets import MoleculeDataset +from chemprop.featurizers.atom import MultiHotAtomFeaturizer +from chemprop.featurizers.molgraph.molecule import SimpleMoleculeMolGraphFeaturizer +from chemprop.models.model import MPNN +from chemprop.utils.v1_to_v2 import convert_model_file_v1_to_v2 + + +@pytest.fixture +def example_model_v1_path(data_dir): + return data_dir / "example_model_v1_regression_mol.pt" + + +@pytest.fixture +def example_model_v1_4_path(data_dir): + return data_dir / "example_model_v1_4.pt" + + +@pytest.fixture +def example_model_v1_prediction(data_dir): + path = data_dir / "example_model_v1_regression_mol_prediction.csv" + + with open(path) as fid: + reader = csv.reader(fid) + next(reader) + smis, ys = zip(*[(smi, float(score)) for smi, score in reader]) + + featurizer = SimpleMoleculeMolGraphFeaturizer(atom_featurizer=MultiHotAtomFeaturizer.v1()) + + ys = np.array(ys).reshape(-1, 1) + test_data = [MoleculeDatapoint.from_smi(smi, None) for smi in smis] + test_dset = MoleculeDataset(test_data, featurizer) + + test_loader = build_dataloader(test_dset, shuffle=False) + return ys, test_loader + + +def test_converter(tmp_path, example_model_v1_path, example_model_v1_prediction): + directory = tmp_path / "test_converter" + directory.mkdir() + model_v2_save_path = directory / "example_model_v2_regression_mol.pt" + + convert_model_file_v1_to_v2(example_model_v1_path, model_v2_save_path) + assert model_v2_save_path.exists() + + mpnn = MPNN.load_from_checkpoint(model_v2_save_path) + + ys_v1, test_loader = example_model_v1_prediction + + trainer = pl.Trainer(accelerator="cpu", logger=None, enable_progress_bar=False) + predss = trainer.predict(mpnn, test_loader) + ys_v2 = np.vstack(predss) + assert np.allclose(ys_v2, ys_v1, atol=1e-6) + + +def test_converter_v1_4(tmp_path, example_model_v1_4_path): + directory = tmp_path / "test_converter" + directory.mkdir() + model_v2_save_path = directory / "converted_v1_4.pt" + + convert_model_file_v1_to_v2(example_model_v1_4_path, model_v2_save_path) + assert model_v2_save_path.exists() diff --git a/chemprop/tests/unit/utils/test_save_load_mol+mol.py b/chemprop/tests/unit/utils/test_save_load_mol+mol.py new file mode 100644 index 0000000000000000000000000000000000000000..bd50d02e716515b2c1fa53ba25d9730d5f809989 --- /dev/null +++ b/chemprop/tests/unit/utils/test_save_load_mol+mol.py @@ -0,0 +1,187 @@ +from pathlib import Path + +from lightning import pytorch as pl +import numpy as np +import pytest +import torch +from torch.nn import Identity +from torch.utils.data import DataLoader + +from chemprop.data import ( + MoleculeDatapoint, + MoleculeDataset, + MulticomponentDataset, + collate_multicomponent, +) +from chemprop.models import MulticomponentMPNN +from chemprop.models.utils import load_model, save_model +from chemprop.nn import ( + MSE, + BondMessagePassing, + GraphTransform, + MulticomponentMessagePassing, + NormAggregation, + RegressionFFN, + ScaleTransform, + UnscaleTransform, +) + + +@pytest.fixture +def checkpoint_path(data_dir): + return data_dir / "example_model_v2_regression_mol+mol.ckpt" + + +@pytest.fixture +def file_path(data_dir): + return data_dir / "example_model_v2_regression_mol+mol.pt" + + +@pytest.fixture +def model(checkpoint_path): + model = MulticomponentMPNN.load_from_checkpoint(checkpoint_path) + return model + + +@pytest.fixture +def test_loader(mol_mol_regression_data): + smis1, smis2, _ = mol_mol_regression_data + data = [ + [MoleculeDatapoint.from_smi(smi) for smi in smis1], + [MoleculeDatapoint.from_smi(smi) for smi in smis2], + ] + dsets = [MoleculeDataset(d) for d in data] + dset = MulticomponentDataset(dsets) + + return DataLoader(dset, 32, collate_fn=collate_multicomponent) + + +@pytest.fixture +def trainer(): + return pl.Trainer( + logger=None, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + ) + + +@pytest.fixture +def ys(model, test_loader, trainer): + predss = trainer.predict(model, test_loader) + return np.vstack(predss) + + +def test_roundtrip(tmp_path, model, test_loader, trainer, ys): + save_path = Path(tmp_path) / "test.pt" + save_model(save_path, model) + + model_from_file = MulticomponentMPNN.load_from_file(save_path) + + predss_from_file = trainer.predict(model_from_file, test_loader) + ys_from_file = np.vstack(predss_from_file) + + assert np.allclose(ys_from_file, ys, atol=1e-6) + + +def test_checkpoint_is_valid(checkpoint_path, test_loader, trainer, ys): + model_from_checkpoint = MulticomponentMPNN.load_from_file(checkpoint_path) + + predss_from_checkpoint = trainer.predict(model_from_checkpoint, test_loader) + ys_from_checkpoint = np.vstack(predss_from_checkpoint) + + assert np.allclose(ys_from_checkpoint, ys, atol=1e-6) + + +def test_checkpoint_roundtrip(checkpoint_path, file_path, trainer, test_loader): + model_from_checkpoint = MulticomponentMPNN.load_from_checkpoint( + checkpoint_path, map_location="cpu" + ) + model_from_file = MulticomponentMPNN.load_from_file(file_path, map_location="cpu") + + predss_from_checkpoint = trainer.predict(model_from_checkpoint, test_loader) + ys_from_checkpoint = np.vstack(predss_from_checkpoint) + + predss_from_file = trainer.predict(model_from_file, test_loader) + ys_from_file = np.vstack(predss_from_file) + + assert np.allclose(ys_from_file, ys_from_checkpoint, atol=1e-6) + + +def test_scalers_roundtrip_one_block(tmp_path): + E_f_transform = ScaleTransform(mean=[0.0, 1.0], scale=[2.0, 3.0]) + graph_transform = GraphTransform(V_transform=Identity(), E_transform=E_f_transform) + V_d_transform = ScaleTransform(mean=[4.0, 5.0], scale=[6.0, 7.0]) + mcmp = MulticomponentMessagePassing( + blocks=[BondMessagePassing(graph_transform=graph_transform, V_d_transform=V_d_transform)], + n_components=2, + shared=True, + ) + + output_transform = UnscaleTransform(mean=[8.0, 9.0], scale=[10.0, 11.0]) + criterion = MSE(task_weights=[12.0]) + ffn = RegressionFFN(output_transform=output_transform, criterion=criterion) + + X_d_transform = ScaleTransform(mean=[13.0, 14.0], scale=[15.0, 16.0]) + original = MulticomponentMPNN(mcmp, NormAggregation(), ffn, X_d_transform=X_d_transform) + + save_model(tmp_path / "model.pt", original) + loaded = load_model(tmp_path / "model.pt", multicomponent=True) + + assert torch.equal( + original.message_passing.blocks[0].V_d_transform.mean, + loaded.message_passing.blocks[0].V_d_transform.mean, + ) + assert torch.equal( + original.message_passing.blocks[1].graph_transform.E_transform.mean, + loaded.message_passing.blocks[1].graph_transform.E_transform.mean, + ) + assert torch.equal( + original.predictor.criterion.task_weights, loaded.predictor.criterion.task_weights + ) + assert torch.equal( + original.predictor.output_transform.mean, loaded.predictor.output_transform.mean + ) + assert torch.equal(original.X_d_transform.mean, loaded.X_d_transform.mean) + + +def test_scalers_roundtrip_two_blocks(tmp_path): + E_f_transform = ScaleTransform(mean=[0.0, 1.0], scale=[2.0, 3.0]) + graph_transform = GraphTransform(V_transform=Identity(), E_transform=E_f_transform) + V_d_transform = ScaleTransform(mean=[4.0, 5.0], scale=[6.0, 7.0]) + mcmp = MulticomponentMessagePassing( + blocks=[ + BondMessagePassing(graph_transform=graph_transform, V_d_transform=V_d_transform), + BondMessagePassing(graph_transform=graph_transform), + ], + n_components=2, + shared=True, + ) + + output_transform = UnscaleTransform(mean=[8.0, 9.0], scale=[10.0, 11.0]) + criterion = MSE(task_weights=[12.0]) + ffn = RegressionFFN(output_transform=output_transform, criterion=criterion) + + X_d_transform = ScaleTransform(mean=[13.0, 14.0], scale=[15.0, 16.0]) + original = MulticomponentMPNN(mcmp, NormAggregation(), ffn, X_d_transform=X_d_transform) + + save_model(tmp_path / "model.pt", original) + loaded = load_model(tmp_path / "model.pt", multicomponent=True) + + assert torch.equal( + original.message_passing.blocks[0].V_d_transform.mean, + loaded.message_passing.blocks[0].V_d_transform.mean, + ) + assert torch.equal( + original.message_passing.blocks[1].graph_transform.E_transform.mean, + loaded.message_passing.blocks[1].graph_transform.E_transform.mean, + ) + assert torch.equal( + original.predictor.criterion.task_weights, loaded.predictor.criterion.task_weights + ) + assert torch.equal( + original.predictor.output_transform.mean, loaded.predictor.output_transform.mean + ) + assert torch.equal(original.X_d_transform.mean, loaded.X_d_transform.mean) diff --git a/chemprop/tests/unit/utils/test_save_load_mol.py b/chemprop/tests/unit/utils/test_save_load_mol.py new file mode 100644 index 0000000000000000000000000000000000000000..46ed8cfc159f486e0486636bcc6050338353ae72 --- /dev/null +++ b/chemprop/tests/unit/utils/test_save_load_mol.py @@ -0,0 +1,138 @@ +from pathlib import Path + +from lightning import pytorch as pl +import numpy as np +import pytest +import torch +from torch.nn import Identity +from torch.utils.data import DataLoader + +from chemprop.data import MoleculeDatapoint, MoleculeDataset, collate_batch +from chemprop.models import MPNN +from chemprop.models.utils import load_model, save_model +from chemprop.nn import ( + MSE, + BondMessagePassing, + GraphTransform, + NormAggregation, + RegressionFFN, + ScaleTransform, + UnscaleTransform, +) + + +@pytest.fixture +def checkpoint_path(data_dir): + return data_dir / "example_model_v2_regression_mol.ckpt" + + +@pytest.fixture +def model_path(data_dir): + return data_dir / "example_model_v2_regression_mol.pt" + + +@pytest.fixture +def model(checkpoint_path): + model = MPNN.load_from_checkpoint(checkpoint_path) + return model + + +@pytest.fixture +def test_loader(smis): + data = [MoleculeDatapoint.from_smi(smi) for smi in smis] + dset = MoleculeDataset(data) + + return DataLoader(dset, 32, collate_fn=collate_batch) + + +@pytest.fixture +def trainer(): + return pl.Trainer( + logger=None, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + accelerator="cpu", + devices=1, + ) + + +@pytest.fixture +def ys(model, test_loader, trainer): + predss = trainer.predict(model, test_loader) + return np.vstack(predss) + + +def test_roundtrip(tmp_path, model, test_loader, trainer, ys): + save_path = Path(tmp_path) / "test.pt" + save_model(save_path, model) + + model_from_file = MPNN.load_from_file(save_path) + + predss_from_file = trainer.predict(model_from_file, test_loader) + ys_from_file = np.vstack(predss_from_file) + + assert np.allclose(ys_from_file, ys, atol=1e-6) + + +def test_checkpoint_is_valid(checkpoint_path, test_loader, trainer, ys): + model_from_checkpoint = MPNN.load_from_file(checkpoint_path) + + predss_from_checkpoint = trainer.predict(model_from_checkpoint, test_loader) + ys_from_checkpoint = np.vstack(predss_from_checkpoint) + + assert np.allclose(ys_from_checkpoint, ys, atol=1e-6) + + +def test_checkpoint_roundtrip(checkpoint_path, model_path, trainer, test_loader): + model_from_checkpoint = MPNN.load_from_checkpoint(checkpoint_path, map_location="cpu") + model_from_file = MPNN.load_from_file(model_path, map_location="cpu") + + predss_from_checkpoint = trainer.predict(model_from_checkpoint, test_loader) + ys_from_checkpoint = np.vstack(predss_from_checkpoint) + + predss_from_file = trainer.predict(model_from_file, test_loader) + ys_from_file = np.vstack(predss_from_file) + + assert np.allclose(ys_from_file, ys_from_checkpoint, atol=1e-6) + + +def test_scalers_roundtrip(tmp_path): + E_f_transform = ScaleTransform(mean=[0.0, 1.0], scale=[2.0, 3.0]) + graph_transform = GraphTransform(V_transform=Identity(), E_transform=E_f_transform) + V_d_transform = ScaleTransform(mean=[4.0, 5.0], scale=[6.0, 7.0]) + mp = BondMessagePassing(graph_transform=graph_transform, V_d_transform=V_d_transform) + + output_transform = UnscaleTransform(mean=[8.0, 9.0], scale=[10.0, 11.0]) + criterion = MSE(task_weights=[12.0]) + ffn = RegressionFFN(output_transform=output_transform, criterion=criterion) + + X_d_transform = ScaleTransform(mean=[13.0, 14.0], scale=[15.0, 16.0]) + original = MPNN(mp, NormAggregation(), ffn, X_d_transform=X_d_transform) + + save_model(tmp_path / "model.pt", original) + loaded = load_model(tmp_path / "model.pt", multicomponent=False) + + assert torch.equal( + original.message_passing.V_d_transform.mean, loaded.message_passing.V_d_transform.mean + ) + assert torch.equal( + original.message_passing.graph_transform.E_transform.mean, + loaded.message_passing.graph_transform.E_transform.mean, + ) + assert torch.equal( + original.predictor.criterion.task_weights, loaded.predictor.criterion.task_weights + ) + assert torch.equal( + original.predictor.output_transform.mean, loaded.predictor.output_transform.mean + ) + assert torch.equal(original.X_d_transform.mean, loaded.X_d_transform.mean) + + +def test_load_checkpoint_with_metrics(data_dir): + MPNN.load_from_checkpoint(data_dir / "example_model_v2_regression_mol_with_metrics.ckpt") + MPNN.load_from_checkpoint(data_dir / "example_model_v2_classification_mol_with_metrics.ckpt") + + +def test_load_trained_on_cuda(data_dir): + MPNN.load_from_file(data_dir / "example_model_v2_trained_on_cuda.pt", map_location="cpu")