\n"
+ ],
+ "text/plain": [
+ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+ "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n",
+ "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+ "│\u001b[36m \u001b[0m\u001b[36m test/multiclass_auroc \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.6266666650772095 \u001b[0m\u001b[35m \u001b[0m│\n",
+ "└───────────────────────────┴───────────────────────────┘\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[{'test/multiclass_auroc': 0.6266666650772095}]"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "trainer = pl.Trainer(max_epochs=2)\n",
+ "trainer.fit(model, train_loader, val_loader)\n",
+ "trainer.test(model, test_loader)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/chemprop-updated/docs/source/tutorial/python/models/aggregation.ipynb b/chemprop-updated/docs/source/tutorial/python/models/aggregation.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..f99275ec1ed3db62386473ecf7aa0936730dc772
--- /dev/null
+++ b/chemprop-updated/docs/source/tutorial/python/models/aggregation.ipynb
@@ -0,0 +1,256 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Aggregation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "from chemprop.nn.agg import MeanAggregation, SumAggregation, NormAggregation, AttentiveAggregation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This is example output from [message passing](./message_passing.ipynb) for input to aggregation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "n_atoms_in_batch = 7\n",
+ "hidden_dim = 3\n",
+ "example_message_passing_output = torch.randn(n_atoms_in_batch, hidden_dim)\n",
+ "which_atoms_in_which_molecule = torch.tensor([0, 0, 1, 1, 1, 1, 2]).long()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Combine nodes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The aggregation layer combines the node level represenations into a graph level representaiton (usually atoms -> molecule)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Mean and sum aggregation "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Mean aggregation is recommended when the property to predict does not depend on the number of atoms in the molecules (intensive). Sum aggregation is recommended when the property is extensive, though usually norm aggregation is better."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "mean_agg = MeanAggregation()\n",
+ "sum_agg = SumAggregation()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tensor([[-0.4593, -0.1808, -0.3459],\n",
+ " [ 0.9343, -0.1746, 0.7430],\n",
+ " [-0.4747, -0.9394, -0.3877]])"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mean_agg(H=example_message_passing_output, batch=which_atoms_in_which_molecule)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tensor([[-0.9187, -0.3616, -0.6917],\n",
+ " [ 3.7373, -0.6986, 2.9720],\n",
+ " [-0.4747, -0.9394, -0.3877]])"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sum_agg(H=example_message_passing_output, batch=which_atoms_in_which_molecule)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Norm aggregation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Norm aggregation can be better than sum aggregation when the molecules are large as it is best to keep the hidden representation values on the order of 1 (though this is less important when batch normalization is used). The normalization constant can be customized (defaults to 100.0)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "norm_agg = NormAggregation()\n",
+ "big_norm = NormAggregation(norm=1000.0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tensor([[-0.0092, -0.0036, -0.0069],\n",
+ " [ 0.0374, -0.0070, 0.0297],\n",
+ " [-0.0047, -0.0094, -0.0039]])"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "norm_agg(H=example_message_passing_output, batch=which_atoms_in_which_molecule)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tensor([[-0.0009, -0.0004, -0.0007],\n",
+ " [ 0.0037, -0.0007, 0.0030],\n",
+ " [-0.0005, -0.0009, -0.0004]])"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "big_norm(H=example_message_passing_output, batch=which_atoms_in_which_molecule)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Attentive aggregation "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This uses a learned weighted average to combine atom representations within a molecule graph. It needs to be told the size of the hidden dimension as it uses the hidden representation of each atom to calculate the weight of that atom. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "att_agg = AttentiveAggregation(output_size=hidden_dim)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tensor([[-0.4551, -0.1791, -0.3438],\n",
+ " [ 0.9370, 0.1375, 0.3714],\n",
+ " [-0.4747, -0.9394, -0.3877]], grad_fn=)"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "att_agg(H=example_message_passing_output, batch=which_atoms_in_which_molecule)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "chemprop",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/chemprop-updated/docs/source/tutorial/python/models/basic_mpnn_model.ipynb b/chemprop-updated/docs/source/tutorial/python/models/basic_mpnn_model.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..4a52195f73b7fb145dc9f9888bb1cd6f2d4c6606
--- /dev/null
+++ b/chemprop-updated/docs/source/tutorial/python/models/basic_mpnn_model.ipynb
@@ -0,0 +1,351 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Chemprop MPNN models"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from chemprop.models.model import MPNN"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Composition"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "A Chemprop `MPNN` model is made up of several submodules including a [message passing](./message_passing.ipynb) layer, an [aggregation](./aggregation.ipynb) layer, an optional batch normalization layer, and a [predictor](./predictor.ipynb) feed forward network layer. `MPNN` defines the training and predicting logic used by `lightning` when using a Chemprop model in their framework. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "MPNN(\n",
+ " (message_passing): BondMessagePassing(\n",
+ " (W_i): Linear(in_features=86, out_features=300, bias=False)\n",
+ " (W_h): Linear(in_features=300, out_features=300, bias=False)\n",
+ " (W_o): Linear(in_features=372, out_features=300, bias=True)\n",
+ " (dropout): Dropout(p=0.0, inplace=False)\n",
+ " (tau): ReLU()\n",
+ " (V_d_transform): Identity()\n",
+ " (graph_transform): Identity()\n",
+ " )\n",
+ " (agg): NormAggregation()\n",
+ " (bn): Identity()\n",
+ " (predictor): RegressionFFN(\n",
+ " (ffn): MLP(\n",
+ " (0): Sequential(\n",
+ " (0): Linear(in_features=300, out_features=300, bias=True)\n",
+ " )\n",
+ " (1): Sequential(\n",
+ " (0): ReLU()\n",
+ " (1): Dropout(p=0.0, inplace=False)\n",
+ " (2): Linear(in_features=300, out_features=1, bias=True)\n",
+ " )\n",
+ " )\n",
+ " (criterion): MSE(task_weights=[[1.0]])\n",
+ " (output_transform): Identity()\n",
+ " )\n",
+ " (X_d_transform): Identity()\n",
+ " (metrics): ModuleList(\n",
+ " (0-1): 2 x MSE(task_weights=[[1.0]])\n",
+ " )\n",
+ ")"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from chemprop.nn import BondMessagePassing, NormAggregation, RegressionFFN\n",
+ "\n",
+ "mp = BondMessagePassing()\n",
+ "agg = NormAggregation()\n",
+ "ffn = RegressionFFN()\n",
+ "\n",
+ "basic_model = MPNN(mp, agg, ffn)\n",
+ "basic_model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Batch normalization"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Batch normalization can improve training by keeping the inputs to the FFN small and centered around zero. It is off by default, but can be turned on."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "MPNN(\n",
+ " (message_passing): BondMessagePassing(\n",
+ " (W_i): Linear(in_features=86, out_features=300, bias=False)\n",
+ " (W_h): Linear(in_features=300, out_features=300, bias=False)\n",
+ " (W_o): Linear(in_features=372, out_features=300, bias=True)\n",
+ " (dropout): Dropout(p=0.0, inplace=False)\n",
+ " (tau): ReLU()\n",
+ " (V_d_transform): Identity()\n",
+ " (graph_transform): Identity()\n",
+ " )\n",
+ " (agg): NormAggregation()\n",
+ " (bn): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (predictor): RegressionFFN(\n",
+ " (ffn): MLP(\n",
+ " (0): Sequential(\n",
+ " (0): Linear(in_features=300, out_features=300, bias=True)\n",
+ " )\n",
+ " (1): Sequential(\n",
+ " (0): ReLU()\n",
+ " (1): Dropout(p=0.0, inplace=False)\n",
+ " (2): Linear(in_features=300, out_features=1, bias=True)\n",
+ " )\n",
+ " )\n",
+ " (criterion): MSE(task_weights=[[1.0]])\n",
+ " (output_transform): Identity()\n",
+ " )\n",
+ " (X_d_transform): Identity()\n",
+ " (metrics): ModuleList(\n",
+ " (0-1): 2 x MSE(task_weights=[[1.0]])\n",
+ " )\n",
+ ")"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "MPNN(mp, agg, ffn, batch_norm=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Optimizer"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "`MPNN` also configures the optimizer used by lightning during training. The `torch.optim.Adam` optimizer is used with a Noam learning rate scheduler (defined in `chemprop.scheduler.NoamLR`). The following parameters are customizable:\n",
+ "\n",
+ " - number of warmup epochs, defaults to 2\n",
+ " - the initial learning rate, defaults to $10^{-4}$\n",
+ " - the max learning rate, defaults to $10^{-3}$\n",
+ " - the final learning rate, defaults to $10^{-4}$"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "model = MPNN(mp, agg, ffn, warmup_epochs=5, init_lr=1e-3, max_lr=1e-2, final_lr=1e-5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Metrics"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "During the validation and testing loops, lightning will use the metrics stored in `MPNN` to evaluate the current model's performance. The `MPNN` has a default metric defined by the type of predictor used. Other [metrics](../metrics.ipynb) can be given to `MPNN` to use instead."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from chemprop.nn import metrics\n",
+ "\n",
+ "metrics_list = [metrics.RMSE(), metrics.MAE()]\n",
+ "model = MPNN(mp, agg, ffn, metrics=metrics_list)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Fingerprinting and encoding"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "`MPNN` has two helper functions to get the hidden representations at different parts of the model. The fingerprint is the learned representation of the message passing layer after aggregation and batch normalization. The encoding is the hidden representation after a number of layers of the predictor. See the predictor notebook for more details. Note that the 0th encoding is equivalent to the fingerprint."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Example batch for the model. See the [data notebooks](../data/dataloaders.ipynb) for more details."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "from chemprop.data import MoleculeDatapoint, MoleculeDataset\n",
+ "from chemprop.data import build_dataloader\n",
+ "\n",
+ "smis = [\"C\" * i for i in range(1, 4)]\n",
+ "ys = np.random.rand(len(smis), 1)\n",
+ "dataset = MoleculeDataset([MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)])\n",
+ "dataloader = build_dataloader(dataset)\n",
+ "batch = next(iter(dataloader))\n",
+ "bmg, V_d, X_d, *_ = batch"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tensor([[0.0333],\n",
+ " [0.0331],\n",
+ " [0.0332]], grad_fn=)"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "basic_model(bmg, V_d, X_d)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "torch.Size([3, 300])"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "basic_model.fingerprint(bmg, V_d, X_d).shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "torch.Size([3, 300])"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "basic_model.encoding(bmg, V_d, X_d, i=1).shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tensor(True)"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "(basic_model.fingerprint(bmg, V_d, X_d) == basic_model.encoding(bmg, V_d, X_d, i=0)).all()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "chemprop",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/chemprop-updated/docs/source/tutorial/python/models/message_passing.ipynb b/chemprop-updated/docs/source/tutorial/python/models/message_passing.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..d834341e24577056aa5030495ca4f61da80b2d95
--- /dev/null
+++ b/chemprop-updated/docs/source/tutorial/python/models/message_passing.ipynb
@@ -0,0 +1,232 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Message passing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from chemprop.nn.message_passing.base import BondMessagePassing, AtomMessagePassing"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This is an example [dataloader](../data/dataloaders.ipynb) to make inputs for the message passing layer."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "from chemprop.data import MoleculeDatapoint, MoleculeDataset, build_dataloader\n",
+ "\n",
+ "smis = [\"C\" * i for i in range(1, 4)]\n",
+ "ys = np.random.rand(len(smis), 1)\n",
+ "dataset = MoleculeDataset([MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)])\n",
+ "dataloader = build_dataloader(dataset)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Message passing schemes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "There are two message passing schemes. Chemprop prefers a D-MPNN scheme (`BondMessagePassing`) where messages are passed between directed edges (bonds) rather than between nodes (atoms) as would be done in a traditional MPNN (`AtomMessagePassing`)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "mp = AtomMessagePassing()\n",
+ "mp = BondMessagePassing()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Input dimensions"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "By default, the bond message passing layer's input dimension is the sum of atom and bond features from the default [atom](../featurizers/atom_featurizers.ipynb) and [bond](../featurizers/bond_featurizers.ipynb) featurizers. If you use a custom featurizer, the message passing layer needs to be told when it is created.\n",
+ "\n",
+ "Also note that an atom message passing's default input dimension is the length of the atom features from the default atom featurizer."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from chemprop.featurizers import SimpleMoleculeMolGraphFeaturizer\n",
+ "\n",
+ "n_atom_features, n_bond_features = SimpleMoleculeMolGraphFeaturizer().shape\n",
+ "(n_atom_features + n_bond_features) == mp.W_i.in_features"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from chemprop.featurizers import MultiHotAtomFeaturizer\n",
+ "\n",
+ "n_extra_bond_features = 12\n",
+ "featurizer = SimpleMoleculeMolGraphFeaturizer(\n",
+ " atom_featurizer=MultiHotAtomFeaturizer.organic(), extra_bond_fdim=n_extra_bond_features\n",
+ ")\n",
+ "\n",
+ "mp = BondMessagePassing(d_v=featurizer.atom_fdim, d_e=featurizer.bond_fdim)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "If extra atom descriptors are used, the message passing layer also needs to be told. A separate weight matrix is created and applied to the concatenated hidden representation and extra descriptors after message passing is complete. The output dimension of the message passing layer is the sum of the hidden size and number of extra atom descriptors."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "328"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "n_extra_atom_descriptors = 28\n",
+ "mp = BondMessagePassing(d_vd=n_extra_atom_descriptors)\n",
+ "mp.output_dim"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Customization"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The following hyperparameters of the message passing layer are customizable:\n",
+ "\n",
+ " - the hidden dimension during message passing, default: 300\n",
+ " - whether a bias term used, default: False\n",
+ " - the number of message passing iterations, default: 3\n",
+ " - whether to pass messages on undirected edges, default: False\n",
+ " - the dropout probability, default: 0.0 (i.e. no dropout)\n",
+ " - which activation function, default: ReLU"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "mp = BondMessagePassing(\n",
+ " d_h=600, bias=True, depth=5, undirected=True, dropout=0.5, activation=\"tanh\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The output of message passing is a torch tensor of shape # of atoms in batch x length of hidden representation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "torch.Size([6, 600])"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "batch_molgraph, extra_atom_descriptors, *_ = next(iter(dataloader))\n",
+ "hidden_atom_representations = mp(batch_molgraph, extra_atom_descriptors)\n",
+ "hidden_atom_representations.shape"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "chemprop",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/chemprop-updated/docs/source/tutorial/python/models/multicomponent_mpnn_model.ipynb b/chemprop-updated/docs/source/tutorial/python/models/multicomponent_mpnn_model.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..2f142a975b184951bece7db7939ed45f41513fd4
--- /dev/null
+++ b/chemprop-updated/docs/source/tutorial/python/models/multicomponent_mpnn_model.ipynb
@@ -0,0 +1,208 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Multicomponent models"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from chemprop.nn.message_passing import MulticomponentMessagePassing\n",
+ "from chemprop.models import MulticomponentMPNN"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Overview"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The basic Chemprop model is designed for a single molecule or reaction as input. A multicomponent Chemprop model organizes these basic building blocks to take multiple molecules/reactions as input. This is useful for properties that depend on multiple components like properties in solvents."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Message passing"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "`MulticomponentMessagePassing` organizes the single component [message passing](./message_passing.ipynb) modules for each component in the multicomponent dataset. The individual message passing modules can be unique for each component, shared between some components, or shared between all components. If all components share the same message passing module, the shared flag can be set to True. Note that it doesn't make sense for components that use different featurizers (e.g. molecules and reactions) to use the same message passing module."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from chemprop.nn import BondMessagePassing\n",
+ "\n",
+ "mp1 = BondMessagePassing(d_h=100)\n",
+ "mp2 = BondMessagePassing(d_h=600)\n",
+ "blocks = [mp1, mp2]\n",
+ "mcmp = MulticomponentMessagePassing(blocks=blocks, n_components=len(blocks))\n",
+ "\n",
+ "mp = BondMessagePassing()\n",
+ "mcmp = MulticomponentMessagePassing(blocks=[mp], n_components=2, shared=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "During the forward pass of the model, the output of each message passing block is concatentated after aggregation as input to the predictor."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Aggregation\n",
+ "\n",
+ "A single [aggregation](./aggregation.ipynb) module is used on all message passing outputs."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from chemprop.nn import MeanAggregation\n",
+ "\n",
+ "agg = MeanAggregation()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Predictor"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The [predictor](./predictor.ipynb) needs to be told the output dimension of the message passing layer."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from chemprop.nn import RegressionFFN\n",
+ "\n",
+ "ffn = RegressionFFN(input_dim=mcmp.output_dim)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Multicomponent MPNN"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The submodules are composed together in a `MulticomponentMPNN` model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "MulticomponentMPNN(\n",
+ " (message_passing): MulticomponentMessagePassing(\n",
+ " (blocks): ModuleList(\n",
+ " (0-1): 2 x BondMessagePassing(\n",
+ " (W_i): Linear(in_features=86, out_features=300, bias=False)\n",
+ " (W_h): Linear(in_features=300, out_features=300, bias=False)\n",
+ " (W_o): Linear(in_features=372, out_features=300, bias=True)\n",
+ " (dropout): Dropout(p=0.0, inplace=False)\n",
+ " (tau): ReLU()\n",
+ " (V_d_transform): Identity()\n",
+ " (graph_transform): Identity()\n",
+ " )\n",
+ " )\n",
+ " )\n",
+ " (agg): MeanAggregation()\n",
+ " (bn): Identity()\n",
+ " (predictor): RegressionFFN(\n",
+ " (ffn): MLP(\n",
+ " (0): Sequential(\n",
+ " (0): Linear(in_features=600, out_features=300, bias=True)\n",
+ " )\n",
+ " (1): Sequential(\n",
+ " (0): ReLU()\n",
+ " (1): Dropout(p=0.0, inplace=False)\n",
+ " (2): Linear(in_features=300, out_features=1, bias=True)\n",
+ " )\n",
+ " )\n",
+ " (criterion): MSE(task_weights=[[1.0]])\n",
+ " (output_transform): Identity()\n",
+ " )\n",
+ " (X_d_transform): Identity()\n",
+ " (metrics): ModuleList(\n",
+ " (0-1): 2 x MSE(task_weights=[[1.0]])\n",
+ " )\n",
+ ")"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mc_model = MulticomponentMPNN(mcmp, agg, ffn)\n",
+ "mc_model"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "chemprop",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/chemprop-updated/docs/source/tutorial/python/models/predictor.ipynb b/chemprop-updated/docs/source/tutorial/python/models/predictor.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..0f09019bd64271a59b5c6a0cb900cff3b6aef756
--- /dev/null
+++ b/chemprop-updated/docs/source/tutorial/python/models/predictor.ipynb
@@ -0,0 +1,444 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Predictors"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "from chemprop.nn.predictors import (\n",
+ " RegressionFFN,\n",
+ " BinaryClassificationFFN,\n",
+ " MulticlassClassificationFFN,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This is example output of [aggregation](./aggregation.ipynb) for input to the predictor."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "n_datapoints_in_batch = 2\n",
+ "hidden_dim = 300\n",
+ "example_aggregation_output = torch.randn(n_datapoints_in_batch, hidden_dim)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Feed forward network"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The learned representation from message passing and aggregation is a vector like that of fixed representations. While other predictors like random forest could be used to make final predictions from this representation, Chemprop prefers and implements using a feed forward network as that allows for end-to-end training. Three basic Chemprop FFNs differ in the prediction task they are used for. Note that multiclass classification needs to know the number of classes."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "regression_ffn = RegressionFFN()\n",
+ "binary_class_ffn = BinaryClassificationFFN()\n",
+ "multi_class_ffn = MulticlassClassificationFFN(n_classes=3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Input dimension"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The default input dimension of the predictor is the same as the default dimension of the message passing hidden representation. If your message passing hidden dimension is different, or if you have addition atom or datapoint descriptors, you need to change the predictor's input dimension."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tensor([[0.2080],\n",
+ " [0.2787]], grad_fn=)"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ffn = RegressionFFN()\n",
+ "ffn(example_aggregation_output)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tensor([[-0.0877],\n",
+ " [-0.2629]], grad_fn=)"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mp_hidden_dim = 2\n",
+ "n_atom_descriptors = 1\n",
+ "mp_output = torch.randn(n_datapoints_in_batch, mp_hidden_dim + n_atom_descriptors)\n",
+ "example_datapoint_descriptors = torch.randn(n_datapoints_in_batch, 12)\n",
+ "\n",
+ "input_dim = mp_output.shape[1] + example_datapoint_descriptors.shape[1]\n",
+ "\n",
+ "ffn = RegressionFFN(input_dim=input_dim)\n",
+ "ffn(torch.cat([mp_output, example_datapoint_descriptors], dim=1))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Output dimension"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The number of tasks defaults to 1 but can be adjusted. Predictors that need to predict multiple values per task, like multiclass classification, will automatically adjust the output dimension."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "torch.Size([2, 4])"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ffn = RegressionFFN(n_tasks=4)\n",
+ "ffn(example_aggregation_output).shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "torch.Size([2, 4, 3])"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ffn = MulticlassClassificationFFN(n_tasks=4, n_classes=3)\n",
+ "ffn(example_aggregation_output).shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Customization"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The following hyperparameters of the predictor are customizable:\n",
+ "\n",
+ " - the hidden dimension between layer, default: 300\n",
+ " - the number of layer, default 1\n",
+ " - the dropout probability, default: 0.0 (i.e. no dropout)\n",
+ " - which activation function, default: ReLU"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tensor([[ 0.0121],\n",
+ " [-0.0760]], grad_fn=)"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "custom_ffn = RegressionFFN(hidden_dim=600, n_layers=3, dropout=0.1, activation=\"tanh\")\n",
+ "custom_ffn(example_aggregation_output)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Intermediate hidden representations can also be extracted. Note that each predictor layer consists of an activation layer, followed by dropout, followed by a linear layer. The first predictor layer only has the linear layer."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "torch.Size([2, 600])"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "layer = 2\n",
+ "custom_ffn.encode(example_aggregation_output, i=layer).shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "RegressionFFN(\n",
+ " (ffn): MLP(\n",
+ " (0): Sequential(\n",
+ " (0): Linear(in_features=300, out_features=600, bias=True)\n",
+ " )\n",
+ " (1): Sequential(\n",
+ " (0): Tanh()\n",
+ " (1): Dropout(p=0.1, inplace=False)\n",
+ " (2): Linear(in_features=600, out_features=600, bias=True)\n",
+ " )\n",
+ " (2): Sequential(\n",
+ " (0): Tanh()\n",
+ " (1): Dropout(p=0.1, inplace=False)\n",
+ " (2): Linear(in_features=600, out_features=600, bias=True)\n",
+ " )\n",
+ " (3): Sequential(\n",
+ " (0): Tanh()\n",
+ " (1): Dropout(p=0.1, inplace=False)\n",
+ " (2): Linear(in_features=600, out_features=1, bias=True)\n",
+ " )\n",
+ " )\n",
+ " (criterion): MSE(task_weights=[[1.0]])\n",
+ " (output_transform): Identity()\n",
+ ")"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "custom_ffn"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Criterion"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Each predictor has a criterion that is used as the [loss function](../loss_functions.ipynb) during training. The default criterion for a predictor is defined in the predictor class."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(RegressionFFN._T_default_criterion)\n",
+ "print(BinaryClassificationFFN._T_default_criterion)\n",
+ "print(MulticlassClassificationFFN._T_default_criterion)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "A custom criterion can be given to the predictor."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from chemprop.nn import MSE\n",
+ "\n",
+ "criterion = MSE(task_weights=torch.tensor([0.5, 1.0]))\n",
+ "ffn = RegressionFFN(n_tasks=2, criterion=criterion)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Regression vs. classification"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In addition to using different loss functions, regression and classification predictors also differ in their tranforms of the model outputs during inference. \n",
+ "\n",
+ "Regression should use a [scaler transform](../scaling.ipynb) if target normalization is used during training.\n",
+ "\n",
+ "Classification uses a sigmoid (for binary classification) or a softmax (for multiclass) transform to keep class probability predictions between 0 and 1. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tensor(True)"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "probs = binary_class_ffn(example_aggregation_output)\n",
+ "(0 < probs).all() and (probs < 1).all()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Other predictors coming soon"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Beta versions of predictors for uncertainty and spectral tasks will be finalized in v2.1."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from chemprop.nn.predictors import (\n",
+ " MveFFN,\n",
+ " EvidentialFFN,\n",
+ " BinaryDirichletFFN,\n",
+ " MulticlassDirichletFFN,\n",
+ " SpectralFFN,\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "chemprop",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/chemprop-updated/docs/source/tutorial/python/saving_and_loading.ipynb b/chemprop-updated/docs/source/tutorial/python/saving_and_loading.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..9d1d8aa518bc5616a3964db75a737930542e00ff
--- /dev/null
+++ b/chemprop-updated/docs/source/tutorial/python/saving_and_loading.ipynb
@@ -0,0 +1,161 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Saving and loading models"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "from chemprop.models.utils import save_model, load_model\n",
+ "from chemprop.models.model import MPNN\n",
+ "from chemprop.models.multi import MulticomponentMPNN\n",
+ "from chemprop import nn"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This is an example buffer to save to and load from, to avoid creating new files when running this notebook. A real use case would probably save to and read from a file like `model.pt`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import io\n",
+ "\n",
+ "saved_model = io.BytesIO()\n",
+ "\n",
+ "# from pathlib import Path\n",
+ "# saved_model = Path(\"model.pt\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Saving models"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "A valid model save file is a dictionary containing the hyper parameters and state dict of the model. `torch` is used to pickle the dictionary."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "model = MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), nn.RegressionFFN())\n",
+ "\n",
+ "save_model(saved_model, model)\n",
+ "\n",
+ "# model_dict = {\"hyper_parameters\": model.hparams, \"state_dict\": model.state_dict()}\n",
+ "# torch.save(model_dict, saved_model)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "`lightning` will also automatically create checkpoint files during training. These `.ckpt` files are like `.pt` model files, but also contain information about training and can be used to restart training. See the `lightning` documentation for more details."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "GPU available: True (mps), used: True\n",
+ "TPU available: False, using: 0 TPU cores\n",
+ "HPU available: False, using: 0 HPUs\n"
+ ]
+ }
+ ],
+ "source": [
+ "from lightning.pytorch.callbacks import ModelCheckpoint\n",
+ "from lightning.pytorch import Trainer\n",
+ "\n",
+ "checkpointing = ModelCheckpoint(\n",
+ " dirpath=\"mycheckpoints\",\n",
+ " filename=\"best-{epoch}-{val_loss:.2f}\",\n",
+ " monitor=\"val_loss\",\n",
+ " mode=\"min\",\n",
+ " save_last=True,\n",
+ ")\n",
+ "trainer = Trainer(callbacks=[checkpointing])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Loading models"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "`MPNN` and `MulticomponentMPNN` each have a class method to load a model from either a model file `.pt` or a checkpoint file `.ckpt`. The method to load from a file works for either model files or checkpoint files, but won't load the saved training information from a checkpoint file."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Need to set the buffer stream position to the beginning, not necessary if using a file\n",
+ "saved_model.seek(0)\n",
+ "\n",
+ "model = MPNN.load_from_file(saved_model)\n",
+ "\n",
+ "# Other options\n",
+ "# model = MPNN.load_from_checkpoint(saved_model)\n",
+ "# model = MulticomponentMPNN.load_from_file(saved_model)\n",
+ "# model = MulticomponentMPNN.load_from_checkpoint(saved_model)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/chemprop-updated/docs/source/tutorial/python/scaling.ipynb b/chemprop-updated/docs/source/tutorial/python/scaling.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..2ad64091c1602e5b4eb7a5dc589fead17a4175d5
--- /dev/null
+++ b/chemprop-updated/docs/source/tutorial/python/scaling.ipynb
@@ -0,0 +1,687 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Scaling inputs and outputs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "from chemprop.models import MPNN\n",
+ "from chemprop.nn import BondMessagePassing, NormAggregation, RegressionFFN\n",
+ "from chemprop.nn.transforms import ScaleTransform, UnscaleTransform, GraphTransform"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This is an example [dataset](./data/datasets.ipynb) with extra atom and bond features, extra atom descriptors, and extra [datapoint](./data/datapoints.ipynb) descriptors."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "from chemprop.data import MoleculeDatapoint, MoleculeDataset\n",
+ "\n",
+ "smis = [\"CC\", \"CN\", \"CO\", \"CF\", \"CP\", \"CS\", \"CI\"]\n",
+ "ys = np.random.rand(len(smis), 1) * 100\n",
+ "\n",
+ "n_datapoints = len(smis)\n",
+ "n_atoms = 2\n",
+ "n_bonds = 1\n",
+ "n_extra_atom_features = 3\n",
+ "n_extra_bond_features = 4\n",
+ "n_extra_atom_descriptors = 5\n",
+ "n_extra_datapoint_descriptors = 6\n",
+ "\n",
+ "extra_atom_features = np.random.rand(n_datapoints, n_atoms, n_extra_atom_features)\n",
+ "extra_bond_features = np.random.rand(n_datapoints, n_bonds, n_extra_bond_features)\n",
+ "extra_atom_descriptors = np.random.rand(n_datapoints, n_atoms, n_extra_atom_descriptors)\n",
+ "extra_datapoint_descriptors = np.random.rand(n_datapoints, n_extra_datapoint_descriptors)\n",
+ "\n",
+ "datapoints = [\n",
+ " MoleculeDatapoint.from_smi(smi, y, x_d=x_d, V_f=V_f, E_f=E_f, V_d=V_d)\n",
+ " for smi, y, x_d, V_f, E_f, V_d in zip(\n",
+ " smis,\n",
+ " ys,\n",
+ " extra_datapoint_descriptors,\n",
+ " extra_atom_features,\n",
+ " extra_bond_features,\n",
+ " extra_atom_descriptors,\n",
+ " )\n",
+ "]\n",
+ "train_dset = MoleculeDataset(datapoints[:3])\n",
+ "val_dset = MoleculeDataset(datapoints[3:5])\n",
+ "test_dset = MoleculeDataset(datapoints[5:])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Scaling targets - FFN"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Scaling the target values before training can improve model performance and make training faster. The scaler for the targets should be fit to the training dataset and then applied to the validation dataset. This scaler is *not* applied to the test dataset. Instead the scaler is used to make an `UnscaleTransform` which is given to the predictor (FFN) layer and used automatically during inference. \n",
+ "\n",
+ "Note that currently the output_transform is saved both in the model's state_dict and and in the model's hyperparameters. This may be changed in the future to align with `lightning`'s recommendations. You can ignore any messages about this."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "output_scaler = train_dset.normalize_targets()\n",
+ "val_dset.normalize_targets(output_scaler)\n",
+ "# test_dset targets not scaled\n",
+ "\n",
+ "output_transform = UnscaleTransform.from_standard_scaler(output_scaler)\n",
+ "\n",
+ "ffn = RegressionFFN(output_transform=output_transform)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Scaling extra atom and bond features - Message Passing"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The atom and bond features generated by Chemprop [featurizers](./featurizers/molgraph_molecule_featurizer.ipynb) are either multi-hot or on the order of 1. We recommend scaling extra atom and bond features to also be on the order of 1. Like the target scaler, these scalers are fit to the training data, applied to the validation data, and then saved to the model (in this case the message passing layer) so that they are applied automatically to the test dataset during inference."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
StandardScaler()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
StandardScaler()
"
+ ],
+ "text/plain": [
+ "StandardScaler()"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "V_f_scaler = train_dset.normalize_inputs(\"V_f\")\n",
+ "E_f_scaler = train_dset.normalize_inputs(\"E_f\")\n",
+ "\n",
+ "val_dset.normalize_inputs(\"V_f\", V_f_scaler)\n",
+ "val_dset.normalize_inputs(\"E_f\", E_f_scaler)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The scalers are used to make `ScaleTransform`s. These are combined into a `GraphTransform` which is given to the message passing module. Note that `ScaleTransform` acts on the whole feature vector, not just the extra features. The `ScaleTransform`'s mean and scale arrays are padded with enough zeros and ones so that only the extra features are actually scaled. The amount of padding required is the length of the default features of the featurizer."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from chemprop.featurizers import SimpleMoleculeMolGraphFeaturizer\n",
+ "\n",
+ "featurizer = SimpleMoleculeMolGraphFeaturizer(\n",
+ " extra_atom_fdim=n_extra_atom_features, extra_bond_fdim=n_extra_bond_features\n",
+ ")\n",
+ "n_V_features = featurizer.atom_fdim - featurizer.extra_atom_fdim\n",
+ "n_E_features = featurizer.bond_fdim - featurizer.extra_bond_fdim\n",
+ "\n",
+ "V_f_transform = ScaleTransform.from_standard_scaler(V_f_scaler, pad=n_V_features)\n",
+ "E_f_transform = ScaleTransform.from_standard_scaler(E_f_scaler, pad=n_E_features)\n",
+ "\n",
+ "graph_transform = GraphTransform(V_f_transform, E_f_transform)\n",
+ "\n",
+ "mp = BondMessagePassing(graph_transform=graph_transform)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "If you only have one of extra atom features or extra bond features, you can set the transform for the unused option to `torch.nn.Identity`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "graph_transform = GraphTransform(V_transform=torch.nn.Identity(), E_transform=E_f_transform)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Scaling extra atom descriptors - Message Passing"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The atom descriptors from message passing (before aggregation) are also likely to be on the order of 1 so extra atom descriptors should also be scaled. No padding is needed (unlike above) as this scaling is only applied to the extra atom descriptors. The `ScaleTransform` is given to the message passing module for use during inference."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "V_d_scaler = train_dset.normalize_inputs(\"V_d\")\n",
+ "val_dset.normalize_inputs(\"V_d\", V_d_scaler)\n",
+ "\n",
+ "V_d_transform = ScaleTransform.from_standard_scaler(V_d_scaler)\n",
+ "\n",
+ "mp = BondMessagePassing(V_d_transform=V_d_transform)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "A `GraphTransform` and `ScaleTransform` can both be given to the message passing."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "mp = BondMessagePassing(graph_transform=graph_transform, V_d_transform=V_d_transform)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Scaling extra datapoint descriptors - MPNN"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The molecule/reaction descriptors from message passing (after aggregation) are batch normalized by default to be on the order of 1 (can be turned off, see the [model notebook](./models/basic_mpnn_model.ipynb)). Therefore we also recommended scaling the extra datapoint level descriptors. The `ScaleTransform` for this is given to the `MPNN` or `MulticomponentMPNN` module."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_d_scaler = train_dset.normalize_inputs(\"X_d\")\n",
+ "val_dset.normalize_inputs(\"X_d\", X_d_scaler)\n",
+ "\n",
+ "X_d_transform = ScaleTransform.from_standard_scaler(X_d_scaler)\n",
+ "\n",
+ "chemprop_model = MPNN(\n",
+ " BondMessagePassing(), NormAggregation(), RegressionFFN(), X_d_transform=X_d_transform\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "chemprop",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/chemprop-updated/docs/source/uncertainty.nblink b/chemprop-updated/docs/source/uncertainty.nblink
new file mode 100644
index 0000000000000000000000000000000000000000..dc48565d43c209b26725082a41097cb8cfd075e6
--- /dev/null
+++ b/chemprop-updated/docs/source/uncertainty.nblink
@@ -0,0 +1,3 @@
+{
+"path": "../../examples/uncertainty.ipynb"
+}
diff --git a/chemprop-updated/docs/source/use_featurizer_with_other_libraries.nblink b/chemprop-updated/docs/source/use_featurizer_with_other_libraries.nblink
new file mode 100644
index 0000000000000000000000000000000000000000..5112dd211f8a2693b821a4df548d2e8d3d750165
--- /dev/null
+++ b/chemprop-updated/docs/source/use_featurizer_with_other_libraries.nblink
@@ -0,0 +1,3 @@
+{
+"path": "../../examples/use_featurizer_with_other_libraries.ipynb"
+}
diff --git a/chemprop-updated/environment.yml b/chemprop-updated/environment.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fd1fb6ca6355730c2c7ec83889ade7f67e5956d4
--- /dev/null
+++ b/chemprop-updated/environment.yml
@@ -0,0 +1,17 @@
+name: chemprop
+channels:
+ - conda-forge
+dependencies:
+ - python>=3.11
+ - pytorch>=2.1
+ - astartes
+ - aimsim
+ - configargparse
+ - lightning>=2.0
+ - numpy
+ - pandas
+ - rdkit
+ - scikit-learn
+ - scipy
+ - rich
+ - descriptastorus
diff --git a/chemprop-updated/examples/active_learning.ipynb b/chemprop-updated/examples/active_learning.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..4200f0fa0fa1fca7cc85bfc575af2a6d864a0d6d
--- /dev/null
+++ b/chemprop-updated/examples/active_learning.ipynb
@@ -0,0 +1,843 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Active Learning\n",
+ "Active learning is an iterative process where a model actively selects the most informative data points to be labeled by an oracle (e.g. a human expert), optimizing the model's performance with fewer labeled samples. Active learning can be implemented with Chemprop through Python as demonstrated by this notebook."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/active_learning.ipynb)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Install chemprop from GitHub if running in Google Colab\n",
+ "import os\n",
+ "\n",
+ "if os.getenv(\"COLAB_RELEASE_TAG\"):\n",
+ " try:\n",
+ " import chemprop\n",
+ " except ImportError:\n",
+ " !git clone https://github.com/chemprop/chemprop.git\n",
+ " %cd chemprop\n",
+ " !pip install .\n",
+ " %cd examples"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Import packages"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pathlib import Path\n",
+ "import random\n",
+ "from typing import Tuple\n",
+ "\n",
+ "from lightning import pytorch as pl\n",
+ "import matplotlib.pyplot as plt\n",
+ "import pandas as pd\n",
+ "import torch\n",
+ "from torch.utils.data import DataLoader\n",
+ "\n",
+ "from chemprop import data, featurizers, models, nn"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Load some data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "chemprop_dir = Path.cwd().parent\n",
+ "input_path = (\n",
+ " chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\"\n",
+ ") # path to your data .csv file\n",
+ "df_input = pd.read_csv(input_path)\n",
+ "smis = df_input.loc[:, \"smiles\"].values\n",
+ "ys = df_input.loc[:, [\"lipo\"]].values\n",
+ "all_data = [data.MoleculeDatapoint.from_smi(smi, y) for smi, y in zip(smis, ys)]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In this notebook we use three sets of data: An starting set of training data, a set of data to select additional training data from, and a set of data to test the model on. The set of data to select additional training data from could be unlabeled, but for this example all the data already has labels."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "mols = [d.mol for d in all_data] # RDkit Mol objects are use for structure based splits\n",
+ "splitting_indices = data.make_split_indices(mols, \"random\", (0.1, 0.8, 0.1))\n",
+ "starting_data, additional_data, test_data = data.split_data_by_indices(all_data, *splitting_indices)\n",
+ "starting_data, additional_data, test_data = starting_data[0], additional_data[0], test_data[0]\n",
+ "test_loader = data.build_dataloader(data.MoleculeDataset(test_data), shuffle=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "During each iteration of active learning, the training data will be split into training and validation sets and packaged into data loaders, so we make a helper function to do this."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_dataloaders(trainval_data) -> Tuple[DataLoader]:\n",
+ " trainval_mols = [d.mol for d in trainval_data]\n",
+ " train_indices, _, val_indices = data.make_split_indices(\n",
+ " trainval_mols, \"random\", (0.9, 0.0, 0.1)\n",
+ " )\n",
+ " train_data, val_data, _ = data.split_data_by_indices(\n",
+ " trainval_data, train_indices, val_indices, None\n",
+ " )\n",
+ "\n",
+ " train_dset = data.MoleculeDataset(train_data[0])\n",
+ " scaler = train_dset.normalize_targets()\n",
+ "\n",
+ " val_dset = data.MoleculeDataset(val_data[0])\n",
+ " val_dset.normalize_targets(scaler)\n",
+ "\n",
+ " train_loader = data.build_dataloader(train_dset)\n",
+ " val_loader = data.build_dataloader(val_dset, shuffle=False)\n",
+ " return train_loader, val_loader, scaler"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We also define a helper function to construct a chemprop model. Because this is a regression task, the targets of the training data are normalized and the model needs the scaler that was used to unnormalize the predictions."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_mpnn(scaler):\n",
+ " output_transform = nn.UnscaleTransform.from_standard_scaler(scaler)\n",
+ " ffn = nn.MveFFN(output_transform=output_transform)\n",
+ " mpnn = models.MPNN(nn.BondMessagePassing(), nn.MeanAggregation(), ffn, batch_norm=False)\n",
+ " return mpnn"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We also need a lightning trainer to run the model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "GPU available: True (mps), used: False\n",
+ "TPU available: False, using: 0 TPU cores\n",
+ "HPU available: False, using: 0 HPUs\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.\n"
+ ]
+ }
+ ],
+ "source": [
+ "trainer = pl.Trainer(\n",
+ " logger=False, enable_progress_bar=False, accelerator=\"cpu\", devices=1, max_epochs=20\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Change active learning parameters here"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "A priority function (or acquistition function) guides the active learning process by selecting the most informative data points to label next. A good choice for such a function is the uncertainty of a model's output on each data point."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# rank datapoints based on priority, priority determined by variance\n",
+ "def priority_function(mpnn, datapoint):\n",
+ " dataset = data.MoleculeDataset([datapoint])\n",
+ " loader = data.build_dataloader(dataset, batch_size=1)\n",
+ " output = trainer.predict(mpnn, loader)\n",
+ " output = torch.concat(output, dim=0)\n",
+ " return output[..., 1]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "If our additional data was unlabeled, we would need a way to get the labels for the selected data points. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# the oracle takes a list of potentially unlabeled datapoints to be labeled for the next active learning iteration.\n",
+ "def request_labels(new_data):\n",
+ " # adding new data labels:\n",
+ " # for datapoint in new_data:\n",
+ " # datapoint.y = {label}\n",
+ " return"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Lastly, we also need to decide how many data points to add to our training set in each iteration."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# number of new datapoints added to trainval pool each iteration.\n",
+ "query_size = len(additional_data) // 8"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Start training"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We start by training a model on the initial training data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Dropping last batch of size 1 to avoid issues with batch normalization (dataset size = 1, batch_size = 64)\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n",
+ "Loading `train_dataloader` to estimate number of stepping batches.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "\n",
+ " | Name | Type | Params | Mode \n",
+ "---------------------------------------------------------------\n",
+ "0 | message_passing | BondMessagePassing | 227 K | train\n",
+ "1 | agg | MeanAggregation | 0 | train\n",
+ "2 | bn | Identity | 0 | train\n",
+ "3 | predictor | MveFFN | 90.9 K | train\n",
+ "4 | X_d_transform | Identity | 0 | train\n",
+ "5 | metrics | ModuleList | 0 | train\n",
+ "---------------------------------------------------------------\n",
+ "318 K Trainable params\n",
+ "0 Non-trainable params\n",
+ "318 K Total params\n",
+ "1.274 Total estimated model params size (MB)\n",
+ "24 Modules in train mode\n",
+ "0 Modules in eval mode\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/utilities/data.py:105: Total length of `DataLoader` across ranks is zero. Please make sure this was your intention.\n",
+ "`Trainer.fit` stopped: `max_epochs=20` reached.\n"
+ ]
+ }
+ ],
+ "source": [
+ "train_loader, val_loader, scaler = get_dataloaders(starting_data)\n",
+ "mpnn = get_mpnn(scaler)\n",
+ "trainer.fit(mpnn, train_loader, val_loader)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now we can start the active learning loop. In each iteration, we train a model on the current training data, use the model to select the most informative data points (the ones where the model is least certain), add them to the training data, and repeat."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n",
+ "Loading `train_dataloader` to estimate number of stepping batches.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "\n",
+ " | Name | Type | Params | Mode \n",
+ "---------------------------------------------------------------\n",
+ "0 | message_passing | BondMessagePassing | 227 K | train\n",
+ "1 | agg | MeanAggregation | 0 | train\n",
+ "2 | bn | Identity | 0 | train\n",
+ "3 | predictor | MveFFN | 90.9 K | train\n",
+ "4 | X_d_transform | Identity | 0 | train\n",
+ "5 | metrics | ModuleList | 0 | train\n",
+ "---------------------------------------------------------------\n",
+ "318 K Trainable params\n",
+ "0 Non-trainable params\n",
+ "318 K Total params\n",
+ "1.274 Total estimated model params size (MB)\n",
+ "24 Modules in train mode\n",
+ "0 Modules in eval mode\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "`Trainer.fit` stopped: `max_epochs=20` reached.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n"
+ ],
+ "text/plain": [
+ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+ "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n",
+ "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+ "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.2045652866363525 \u001b[0m\u001b[35m \u001b[0m│\n",
+ "└───────────────────────────┴───────────────────────────┘\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n",
+ "Loading `train_dataloader` to estimate number of stepping batches.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "\n",
+ " | Name | Type | Params | Mode \n",
+ "---------------------------------------------------------------\n",
+ "0 | message_passing | BondMessagePassing | 227 K | train\n",
+ "1 | agg | MeanAggregation | 0 | train\n",
+ "2 | bn | Identity | 0 | train\n",
+ "3 | predictor | MveFFN | 90.9 K | train\n",
+ "4 | X_d_transform | Identity | 0 | train\n",
+ "5 | metrics | ModuleList | 0 | train\n",
+ "---------------------------------------------------------------\n",
+ "318 K Trainable params\n",
+ "0 Non-trainable params\n",
+ "318 K Total params\n",
+ "1.274 Total estimated model params size (MB)\n",
+ "24 Modules in train mode\n",
+ "0 Modules in eval mode\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "`Trainer.fit` stopped: `max_epochs=20` reached.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n"
+ ],
+ "text/plain": [
+ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+ "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n",
+ "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+ "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9172996282577515 \u001b[0m\u001b[35m \u001b[0m│\n",
+ "└───────────────────────────┴───────────────────────────┘\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n",
+ "Loading `train_dataloader` to estimate number of stepping batches.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "\n",
+ " | Name | Type | Params | Mode \n",
+ "---------------------------------------------------------------\n",
+ "0 | message_passing | BondMessagePassing | 227 K | train\n",
+ "1 | agg | MeanAggregation | 0 | train\n",
+ "2 | bn | Identity | 0 | train\n",
+ "3 | predictor | MveFFN | 90.9 K | train\n",
+ "4 | X_d_transform | Identity | 0 | train\n",
+ "5 | metrics | ModuleList | 0 | train\n",
+ "---------------------------------------------------------------\n",
+ "318 K Trainable params\n",
+ "0 Non-trainable params\n",
+ "318 K Total params\n",
+ "1.274 Total estimated model params size (MB)\n",
+ "24 Modules in train mode\n",
+ "0 Modules in eval mode\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "`Trainer.fit` stopped: `max_epochs=20` reached.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n"
+ ],
+ "text/plain": [
+ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+ "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n",
+ "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+ "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.0593369007110596 \u001b[0m\u001b[35m \u001b[0m│\n",
+ "└───────────────────────────┴───────────────────────────┘\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n",
+ "Loading `train_dataloader` to estimate number of stepping batches.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "\n",
+ " | Name | Type | Params | Mode \n",
+ "---------------------------------------------------------------\n",
+ "0 | message_passing | BondMessagePassing | 227 K | train\n",
+ "1 | agg | MeanAggregation | 0 | train\n",
+ "2 | bn | Identity | 0 | train\n",
+ "3 | predictor | MveFFN | 90.9 K | train\n",
+ "4 | X_d_transform | Identity | 0 | train\n",
+ "5 | metrics | ModuleList | 0 | train\n",
+ "---------------------------------------------------------------\n",
+ "318 K Trainable params\n",
+ "0 Non-trainable params\n",
+ "318 K Total params\n",
+ "1.274 Total estimated model params size (MB)\n",
+ "24 Modules in train mode\n",
+ "0 Modules in eval mode\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "`Trainer.fit` stopped: `max_epochs=20` reached.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n"
+ ],
+ "text/plain": [
+ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+ "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n",
+ "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+ "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.151768445968628 \u001b[0m\u001b[35m \u001b[0m│\n",
+ "└───────────────────────────┴───────────────────────────┘\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n",
+ "Loading `train_dataloader` to estimate number of stepping batches.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "\n",
+ " | Name | Type | Params | Mode \n",
+ "---------------------------------------------------------------\n",
+ "0 | message_passing | BondMessagePassing | 227 K | train\n",
+ "1 | agg | MeanAggregation | 0 | train\n",
+ "2 | bn | Identity | 0 | train\n",
+ "3 | predictor | MveFFN | 90.9 K | train\n",
+ "4 | X_d_transform | Identity | 0 | train\n",
+ "5 | metrics | ModuleList | 0 | train\n",
+ "---------------------------------------------------------------\n",
+ "318 K Trainable params\n",
+ "0 Non-trainable params\n",
+ "318 K Total params\n",
+ "1.274 Total estimated model params size (MB)\n",
+ "24 Modules in train mode\n",
+ "0 Modules in eval mode\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "`Trainer.fit` stopped: `max_epochs=20` reached.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n"
+ ],
+ "text/plain": [
+ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+ "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n",
+ "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+ "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.2037131786346436 \u001b[0m\u001b[35m \u001b[0m│\n",
+ "└───────────────────────────┴───────────────────────────┘\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n",
+ "Loading `train_dataloader` to estimate number of stepping batches.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "\n",
+ " | Name | Type | Params | Mode \n",
+ "---------------------------------------------------------------\n",
+ "0 | message_passing | BondMessagePassing | 227 K | train\n",
+ "1 | agg | MeanAggregation | 0 | train\n",
+ "2 | bn | Identity | 0 | train\n",
+ "3 | predictor | MveFFN | 90.9 K | train\n",
+ "4 | X_d_transform | Identity | 0 | train\n",
+ "5 | metrics | ModuleList | 0 | train\n",
+ "---------------------------------------------------------------\n",
+ "318 K Trainable params\n",
+ "0 Non-trainable params\n",
+ "318 K Total params\n",
+ "1.274 Total estimated model params size (MB)\n",
+ "24 Modules in train mode\n",
+ "0 Modules in eval mode\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "`Trainer.fit` stopped: `max_epochs=20` reached.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n"
+ ],
+ "text/plain": [
+ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+ "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n",
+ "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+ "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.1304174661636353 \u001b[0m\u001b[35m \u001b[0m│\n",
+ "└───────────────────────────┴───────────────────────────┘\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n",
+ "Loading `train_dataloader` to estimate number of stepping batches.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "\n",
+ " | Name | Type | Params | Mode \n",
+ "---------------------------------------------------------------\n",
+ "0 | message_passing | BondMessagePassing | 227 K | train\n",
+ "1 | agg | MeanAggregation | 0 | train\n",
+ "2 | bn | Identity | 0 | train\n",
+ "3 | predictor | MveFFN | 90.9 K | train\n",
+ "4 | X_d_transform | Identity | 0 | train\n",
+ "5 | metrics | ModuleList | 0 | train\n",
+ "---------------------------------------------------------------\n",
+ "318 K Trainable params\n",
+ "0 Non-trainable params\n",
+ "318 K Total params\n",
+ "1.274 Total estimated model params size (MB)\n",
+ "24 Modules in train mode\n",
+ "0 Modules in eval mode\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "`Trainer.fit` stopped: `max_epochs=20` reached.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n"
+ ],
+ "text/plain": [
+ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+ "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n",
+ "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+ "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.0078696012496948 \u001b[0m\u001b[35m \u001b[0m│\n",
+ "└───────────────────────────┴───────────────────────────┘\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/brianli/Documents/chemprop/examples/checkpoints exists and is not empty.\n",
+ "Loading `train_dataloader` to estimate number of stepping batches.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "\n",
+ " | Name | Type | Params | Mode \n",
+ "---------------------------------------------------------------\n",
+ "0 | message_passing | BondMessagePassing | 227 K | train\n",
+ "1 | agg | MeanAggregation | 0 | train\n",
+ "2 | bn | Identity | 0 | train\n",
+ "3 | predictor | MveFFN | 90.9 K | train\n",
+ "4 | X_d_transform | Identity | 0 | train\n",
+ "5 | metrics | ModuleList | 0 | train\n",
+ "---------------------------------------------------------------\n",
+ "318 K Trainable params\n",
+ "0 Non-trainable params\n",
+ "318 K Total params\n",
+ "1.274 Total estimated model params size (MB)\n",
+ "24 Modules in train mode\n",
+ "0 Modules in eval mode\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "`Trainer.fit` stopped: `max_epochs=20` reached.\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n"
+ ],
+ "text/plain": [
+ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+ "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n",
+ "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+ "│\u001b[36m \u001b[0m\u001b[36m test/mse \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9942679405212402 \u001b[0m\u001b[35m \u001b[0m│\n",
+ "└───────────────────────────┴───────────────────────────┘\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "trainval_data = starting_data\n",
+ "results = []\n",
+ "\n",
+ "for _ in range(len(additional_data) // query_size):\n",
+ " # sort new datapoints by priority using priority function\n",
+ " priority_remaining_data = [\n",
+ " (priority_function(mpnn, datapoint), datapoint) for datapoint in additional_data\n",
+ " ]\n",
+ " sorted_remaining_data = [\n",
+ " datapoint\n",
+ " for unc, datapoint in sorted(priority_remaining_data, key=lambda d: d[0], reverse=True)\n",
+ " ]\n",
+ "\n",
+ " new_data = sorted_remaining_data[:query_size]\n",
+ " additional_data = additional_data[query_size:]\n",
+ "\n",
+ " request_labels(new_data)\n",
+ " trainval_data.extend(new_data)\n",
+ "\n",
+ " train_loader, val_loader, scaler = get_dataloaders(trainval_data)\n",
+ "\n",
+ " mpnn = get_mpnn(scaler)\n",
+ " trainer.fit(mpnn, train_loader, val_loader)\n",
+ "\n",
+ " result = trainer.test(mpnn, test_loader)\n",
+ " results.append((len(trainval_data), result[0][\"test/mse\"]))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Finally we can view the results. The model's performance will hopefully improve with each iteration of active learning. Though this notebook is just an example. We didn't train the model for many epochs, and we used a very small dataset, so we don't expect to see the model improve. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[(20, 1.2045652866363525),\n",
+ " (30, 0.9172996282577515),\n",
+ " (40, 1.0593369007110596),\n",
+ " (50, 1.151768445968628),\n",
+ " (60, 1.2037131786346436),\n",
+ " (70, 1.1304174661636353),\n",
+ " (80, 1.0078696012496948),\n",
+ " (90, 0.9942679405212402)]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAArwAAAK7CAYAAAAQv1z7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACTKklEQVR4nOzdd3iV9f3/8dd9TvYke0BIwt6IbBAFB8qqfq3VasGB1lrbb7W0vyraVm1t1bZWa5114SqOr1tAQGUpiDLC3oSZhJC957l/f5ycA5FhAknuM56P68p1lXPuk/POXUxefPL+vD+GaZqmAAAAAB9ls7oAAAAAoD0ReAEAAODTCLwAAADwaQReAAAA+DQCLwAAAHwagRcAAAA+jcALAAAAn0bgBQAAgE8j8AIAAMCnEXgBeIQnnnhChmFowIABZ/w5cnJydP/99ysrK+uE5+6//34ZhnEWFZ6ZOXPmyDAM7du3r8Pfu7UMw9D9999vdRke4bv3YuvWrbr//vu94v9HACci8ALwCC+99JIkacuWLVq9evUZfY6cnBw98MADJw28t9xyi1atWnU2Jfq8VatW6ZZbbrG6DI+0detWPfDAAwRewEsReAFYbs2aNdqwYYOmTJkiSXrxxRfb/D26dOmiUaNGtfnn9VT19fVqaGho1WtGjRqlLl26tFNFrdPY2Kja2lqrywDgIwi8ACznCrgPP/ywxowZozfffFNVVVUnXHf48GHdeuutSktLU1BQkFJTU3XVVVfpyJEjWrp0qYYPHy5Juummm2QYRrNfS3+3peGKK65Qenq6HA7HCe8zcuRInXvuue4/m6app59+Wuecc45CQ0MVExOjq666Snv37j3jr/mzzz7TRRddpKioKIWFhWns2LH6/PPPm12ze/du3XTTTerZs6fCwsLUuXNnTZs2TZs2bWp23dKlS2UYhl577TX95je/UefOnRUcHKzdu3frxhtvVEREhHbv3q3JkycrIiJCaWlp+s1vfnNCoPzur/Fd7RhLlizRz3/+c8XHxysuLk5XXnmlcnJymr22trZWv/nNb5ScnKywsDCdf/75Wrt2rTIyMnTjjTee9l7s27dPhmHob3/7mx588EFlZmYqODhYS5YskeT8B9EPfvADxcbGKiQkREOGDNHbb7/d7HNUVVXpt7/9rTIzMxUSEqLY2FgNGzZMc+fOdV8zfvx4jR8//oT3v/HGG5WRkXHK+ubMmaMf/ehHkqQJEya4/27NmTNHkrR+/XpNnTpViYmJCg4OVmpqqqZMmaJDhw6d9usG0HEIvAAsVV1drblz52r48OEaMGCAZs6cqfLycr3zzjvNrjt8+LCGDx+u999/X7NmzdKCBQv0+OOPKzo6WsXFxTr33HP18ssvS5J+//vfa9WqVaf9Ff3MmTN14MABffHFF80e3759u7755hvddNNN7sd+9rOf6c4779TFF1+sDz74QE8//bS2bNmiMWPG6MiRI63+ml9//XVNnDhRUVFReuWVV/T2228rNjZWl156abPQm5OTo7i4OD388MP69NNP9dRTTykgIEAjR47Ujh07Tvi8s2fP1oEDB/Tss8/q448/VmJioiTnau8PfvADXXTRRfrwww81c+ZMPfbYY3rkkUdaVO8tt9yiwMBA/fe//9Xf/vY3LV26VNOnT292zU033aTHH39cN910kz788EP98Ic/1P/8z/+opKSkxffliSee0BdffKF//OMfWrBggfr06aMlS5Zo7NixKikp0bPPPqsPP/xQ55xzjq655hp34JSkWbNm6ZlnntGvfvUrffrpp3rttdf0ox/9SIWFhS1+/1OZMmWK/vrXv0qSnnrqKfffrSlTpqiyslKXXHKJjhw5oqeeekqLFy/W448/rq5du6q8vPys3xtAGzEBwEKvvvqqKcl89tlnTdM0zfLycjMiIsIcN25cs+tmzpxpBgYGmlu3bj3l5/r2229NSebLL798wnP33Xefefy3vPr6ejMpKcm87rrrml33u9/9zgwKCjILCgpM0zTNVatWmZLMRx99tNl1Bw8eNENDQ83f/e53p/36Xn75ZVOSmZ2dbZqmaVZWVpqxsbHmtGnTml3X2NhoDh482BwxYsQpP1dDQ4NZV1dn9uzZ0/z1r3/tfnzJkiWmJPP8888/4TU33HCDKcl8++23mz0+efJks3fv3s0ek2Ted999J9R+++23N7vub3/7mynJzM3NNU3TNLds2WJKMu+6665m182dO9eUZN5www2n/JpM0zSzs7NNSWb37t3Nurq6Zs/16dPHHDJkiFlfX9/s8alTp5opKSlmY2OjaZqmOWDAAPOKK6447ftccMEF5gUXXHDC4zfccIOZnp7e7LHv3ot33nnHlGQuWbKk2XVr1qwxJZkffPDBad8bgLVY4QVgqRdffFGhoaH68Y9/LEmKiIjQj370I61YsUK7du1yX7dgwQJNmDBBffv2bZP3DQgI0PTp0/Xee++ptLRUkrNv9LXXXtPll1+uuLg4SdInn3wiwzA0ffp0NTQ0uD+Sk5M1ePBgLV26tFXvu3LlShUVFemGG25o9vkcDocuu+wyffvtt6qsrJQkNTQ06K9//av69eunoKAgBQQEKCgoSLt27dK2bdtO+Nw//OEPT/qehmFo2rRpzR4bNGiQ9u/f36Kaf/CDH5zwWknu1y9btkySdPXVVze77qqrrlJAQECL3sP1PoGBge4/7969W9u3b9dPfvITSWp2vyZPnqzc3Fz3SveIESO0YMEC3X333Vq6dKmqq6tb/L5no0ePHoqJidFdd92lZ599Vlu3bu2Q9wXQOgReAJbZvXu3li9frilTpsg0TZWUlKikpERXXXWVpGOTGyTp6NGjbb6haubMmaqpqdGbb74pSVq4cKFyc3ObtTMcOXJEpmkqKSlJgYGBzT6+/vprFRQUtOo9XS0QV1111Qmf75FHHpFpmioqKpLk/DX9H/7wB11xxRX6+OOPtXr1an377bcaPHjwSQNdSkrKSd8zLCxMISEhzR4LDg5WTU1Ni2p2hf/jXyvJXYOrbSApKanZdQEBASe89nS+W7/rXv32t7894V7dfvvtkuS+/0888YTuuusuffDBB5owYYJiY2N1xRVXNPtHU3uIjo7WsmXLdM455+iee+5R//79lZqaqvvuu0/19fXt+t4AWq7l//QGgDb20ksvyTRN/d///Z/+7//+74TnX3nlFT344IOy2+1KSEho801A/fr104gRI/Tyyy/rZz/7mV5++WWlpqZq4sSJ7mvi4+NlGIZWrFjhDnrHO9ljpxMfHy9J+ve//33KqRGu4Pj666/r+uuvd/ePuhQUFKhTp04nvM6KOcPSsUB85MgRde7c2f14Q0NDq3pov1u/617Nnj1bV1555Ulf07t3b0lSeHi4HnjgAT3wwAM6cuSIe7V32rRp2r59uyQpJCTEvZp/vNb+o+W7Bg4cqDfffFOmaWrjxo2aM2eO/vSnPyk0NFR33333WX1uAG2DwAvAEo2NjXrllVfUvXt3vfDCCyc8/8knn+jRRx/VggULNHXqVE2aNEmvvfaaduzY4Q453/XdlceWuOmmm/Tzn/9cX375pT7++GPNmjVLdrvd/fzUqVP18MMP6/Dhwyf8yv5MjB07Vp06ddLWrVv1y1/+8rTXGoZxQqCeN2+eDh8+rB49epx1LW3l/PPPlyS99dZbzaZb/N///V+rR6Mdr3fv3urZs6c2bNhwQug/naSkJN14443asGGDHn/8cVVVVSksLEwZGRl65513VFtb676vhYWFWrlypaKiok77OVvyd8swDA0ePFiPPfaY5syZo3Xr1rW4ZgDti8ALwBILFixQTk6OHnnkkZOOihowYICefPJJvfjii5o6dar+9Kc/acGCBTr//PN1zz33aODAgSopKdGnn36qWbNmqU+fPurevbtCQ0P1xhtvqG/fvoqIiFBqaqpSU1NPWce1116rWbNm6dprr1Vtbe0JI7TGjh2rW2+9VTfddJPWrFmj888/X+Hh4crNzdWXX36pgQMH6uc//3mLv+6IiAj9+9//1g033KCioiJdddVVSkxM1NGjR7VhwwYdPXpUzzzzjCRn2J4zZ4769OmjQYMGae3atfr73//uMbNyXfr3769rr71Wjz76qOx2uy688EJt2bJFjz76qKKjo2WznXn33HPPPadJkybp0ksv1Y033qjOnTurqKhI27Zt07p169zTPEaOHKmpU6dq0KBBiomJ0bZt2/Taa69p9OjRCgsLkyTNmDFDzz33nKZPn66f/vSnKiws1N/+9rfvDbuS3CcA/uc//1FkZKRCQkKUmZmpVatW6emnn9YVV1yhbt26yTRNvffeeyopKdEll1xyxl83gDZm5Y45AP7riiuuMIOCgsz8/PxTXvPjH//YDAgIMPPy8kzTdE5GmDlzppmcnGwGBgaaqamp5tVXX20eOXLE/Zq5c+eaffr0MQMDA5vttP/ulIbjXXfddaYkc+zYsaes5aWXXjJHjhxphoeHm6GhoWb37t3N66+/3lyzZs1pv87vTmlwWbZsmTllyhQzNjbWDAwMNDt37mxOmTLFfOedd9zXFBcXmzfffLOZmJhohoWFmeedd565YsWKE6YNuKY0HP9alxtuuMEMDw8/4fGT3Q+dYkrDt99+2+w61/sdP7GgpqbGnDVrlpmYmGiGhISYo0aNMletWmVGR0c3myhxMq4pDX//+99P+vyGDRvMq6++2kxMTDQDAwPN5ORk88ILL3RP9jBN07z77rvNYcOGmTExMWZwcLDZrVs389e//rV72obLK6+8Yvbt29cMCQkx+/XrZ7711lstmtJgmqb5+OOPm5mZmabdbndPA9m+fbt57bXXmt27dzdDQ0PN6Ohoc8SIEeacOXNO+zUD6FiGaZqmNVEbAODLVq5cqbFjx+qNN97QddddZ3U5APwYgRcAcNYWL16sVatWaejQoQoNDdWGDRv08MMPKzo6Whs3bjxhSgQAdCR6eAEAZy0qKkqLFi3S448/rvLycsXHx2vSpEl66KGHCLsALMcKLwAAAHwaB08AAADApxF4AQAA4NMIvAAAAPBpbFo7CYfDoZycHEVGRlp2VCcAAABOzTRNlZeXKzU19XsPuCHwnkROTo7S0tKsLgMAAADf4+DBg997AiWB9yQiIyMlOW9gS46cBAAAQMcqKytTWlqaO7edDoH3JFxtDFFRUQReAAAAD9aS9lM2rQEAAMCnEXgBAADg0wi8AAAA8GkEXgAAAPg0Ai8AAAB8GoEXAAAAPo3ACwAAAJ9G4AUAAIBPI/ACAADApxF4AQAA4NMIvAAAAPBpBF4AAAD4NAIvAAAAfBqBFwAAAD6NwAsAAACfRuAFAACATyPwAgAAwKcReAEAAODTCLwAAADwaQReAAAA+DQCLwAAAHwagRcAAAA+jcALAAAAn0bg9QB7jlbo4w052nWk3OpSAAAAfA6B1wM8tWS3/nfuei3ckmd1KQAAAD6HwOsBMuPCJUnZBVUWVwIA1jhYVKU/fLBZe45WWF0KAB8UYHUBkDLinYF3X2GlxZUAQMcrr6nXjS9/oz1HK7XhUIk+uH2sbDbD6rIA+BBWeD1ApivwFhB4AfgXh8PUb97eoD1Hnd//Nh4q1QdZhy2uCoCvIfB6gPS4MElSYWWdymrqLa4GADrO00t3a9HWIwqy23TFOamSpL99ukPVdY0WVwbAlxB4PUBkSKDiI4IkSfvp4wXgJ5bsyNeji3dKkv50eX89/MNB6twpVHllNXp+xV6LqwPgSywNvMuXL9e0adOUmpoqwzD0wQcfnPb69957T5dccokSEhIUFRWl0aNHa+HChSdc9+6776pfv34KDg5Wv3799P7777fTV9B2Mlwb1+jjBeAH9hVU6o6562Wa0nUju+rHI7oqJNCuuyf1kSQ9s3SPjpTVWFwlAF9haeCtrKzU4MGD9eSTT7bo+uXLl+uSSy7R/PnztXbtWk2YMEHTpk3T+vXr3desWrVK11xzjWbMmKENGzZoxowZuvrqq7V69er2+jLaRAZ9vAD8RGVtg3722lqV1TRoSNdOum9aP/dzUwel6NyunVRd36h/LNxhYZUAfIlhmqZpdRGSZBiG3n//fV1xxRWtel3//v11zTXX6I9//KMk6ZprrlFZWZkWLFjgvuayyy5TTEyM5s6d26LPWVZWpujoaJWWlioqKqpV9Zypp5bs1t8X7tCVQzrrn9ec0yHvCQAdzTRN/XLues3bmKuEyGB98r/nKSkqpNk16w4U68qnV8owpI9/eZ4GdI62qFoAnqw1ec2re3gdDofKy8sVGxvrfmzVqlWaOHFis+suvfRSrVy58pSfp7a2VmVlZc0+OpqrpYHRZAB82X+W79W8jbkKsBl65ifnnhB2JencrjH6weBUmab0l3nb5CHrMgC8mFcH3kcffVSVlZW6+uqr3Y/l5eUpKSmp2XVJSUnKyzv1KWYPPfSQoqOj3R9paWntVvOpuCY17Ctk0xoA3/TlrgI98ul2SdJ90/ppWEbsKa/93WW9FRRg06q9hfpsW35HlQjAR3lt4J07d67uv/9+vfXWW0pMTGz2nGE0H1humuYJjx1v9uzZKi0tdX8cPHiwXWo+HVcPb1FlnUqrGU0GwLccLKrSL+euk8OUfjS0i6aPSj/t9V1iwnTLeZmSpL/O36a6BkdHlAnAR3ll4H3rrbd088036+2339bFF1/c7Lnk5OQTVnPz8/NPWPU9XnBwsKKiopp9dLSI4AAlRAZLYuMaAN9SXdeon722ViVV9RrUJVp/vmLAaRchXG6f0EPxEUHKLqjU61/v74BKAfgqrwu8c+fO1Y033qj//ve/mjJlygnPjx49WosXL2722KJFizRmzJiOKvGMZdLHC8DHmKape97fpK25ZYoLD9Kz04cqJNDeotdGBAfoNxN7S5L+9fkulVTVtWepAHyYpYG3oqJCWVlZysrKkiRlZ2crKytLBw4ckORsNbj++uvd18+dO1fXX3+9Hn30UY0aNUp5eXnKy8tTaWmp+5o77rhDixYt0iOPPKLt27frkUce0WeffaY777yzI7+0M5IR39THy+ETAHzEnJX79P76w7LbDD153blK7RTaqtdfPSxNfZIjVVpdr399vqudqgTg6ywNvGvWrNGQIUM0ZMgQSdKsWbM0ZMgQ94ix3Nxcd/iVpOeee04NDQ36xS9+oZSUFPfHHXfc4b5mzJgxevPNN/Xyyy9r0KBBmjNnjt566y2NHDmyY7+4M+CexcsKLwAf8PXeQj04b5sk6Z7JfTW6e1yrP4fdZujeKX0lSa+t2q+9RyvatEYA/sFj5vB6Eivm8ErS/E25uv2NdTonrZM++MXYDntfAGhrOSXVmvbvL1VYWacrzknVY9ec06K+3VOZOedbfbE9X5f0S9Lz1w9rw0oBeCu/mcPra5jFC8AX1NQ36uevr1VhZZ36pUTpoSsHnVXYlaR7JveR3WZo8dYjWrmnoI0qBeAvCLwexNXDW1JVz+YMAF7JNE398cPN2nCoVJ3CAvXcjKEKDWrZJrXT6ZEYqZ+M7CpJevCTbWp08MtJAC1H4PUgYUEBSopqGk3GARQAvNAbqw/o7TWHZDOkf187RGmxYW32ue+8uJciQwK0NbdM76471GafF4DvI/B6GHdbA7N4AXiZtfuL9MDHWyRJv7usj8b1TGjTzx8bHqRfXdhTkvSPhTtUWdvQpp8fgO8i8HoYV+DNJvAC8CJHymp02+vrVN9oasrAFP3s/G7t8j7Xj0lX19gw5ZfX6rnle9vlPQD4HgKvh2E0GQBvU9fg0O1vrNPR8lr1SorQ3646+01qpxIcYNfsSX0kSf9Zvke5pdXt8j4AfAuB18Nkug+fIPAC8A5/+mSL1u4vVmRIgP4zY5jCgwPa9f0uG5CsERmxqql36O+f7mjX9wLgGwi8Hsa1wptdUClGJAPwdG9/e1Cvf31AhiH968fnuL+HtSfDMPT7qc7DKN5bf1gbD5W0+3sC8G4EXg+THuv8YVFW06CSqnqLqwGAU8s6WKLff7BZkjTr4l66sE9Sh733oC6ddOWQzpKcY8pYIABwOgReDxMaZFdyVIgkKZs+XgAe6mh5rW57ba3qGh26pF+SfjGhR4fX8P8u662QQJu+2VekTzfndfj7A/AeBF4PlEEfLwAPVt/o0C/+u055ZTXqlhCuf149WDZb+2xSO52U6FDden53SdJDC7artqGxw2sA4B0IvB4oM55ZvAA811/nb9M32UWKCHZuUosMCbSslp+d302JkcE6UFSlV1fut6wOAJ6NwOuB3LN4OW0NgId5f/0hvfzVPknSo1cPVo/ECEvrCQ8O0G8v7S1JeuKLXSqsqLW0HgCeicDrgVy7nPfTwwvAg2w+XKq7390kSfrfC3vo0v7JFlfk9MNzu6hfSpTKaxr0r893WV0OAA9E4PVAx5+2xs5jAJ6gqLJOP3ttrWobHJrQO0F3XtzL6pLc7LZjY8reWH1Au/PLLa4IgKch8Hqg9DjnprXymgYVVdZZXA0Af9fQ6ND/zl2nwyXVyogL0+M/HiK7BZvUTmdM93hd0i9JjQ5Tf52/3epyAHgYAq8HCgm0KzXaOZqMI4YBWO3vC3foq92FCguy67kZwxQdat0mtdOZPamPAmyGvtierxW7jlpdDgAPQuD1UMdOXGPjGgDrfLwhR88t3ytJ+vtVg9U7OdLiik6tW0KEZoxOl+Q8jKLRQUsYACcCr4di4xoAq23PK9Pv/m+jJOlnF3TTlEEpFlf0/e64qKeiQwO140i53l5z0OpyAHgIAq+Hyjxu4xoAdLTSqnrd+upaVdc3alzPeP3u0j5Wl9QincKCdMdFPSVJjy7aoYraBosrAuAJCLweyrVxjR5eAB2t0WHqjrfW60BRlbrEhOoJD9ykdjrTR6UrMz5cBRV1enrJbqvLAeABCLwe6thpa1WMJgPQoR5bvFNLdxxVSKBNz80YqpjwIKtLapWgAJvumewcU/bCl9k6VMxeCMDfEXg9VFpsmAxDqqhtUEEFo8kAdIxPN+fpyaZV0YevHKT+qdEWV3RmLu6bqNHd4lTX4NDfPt1hdTkALEbg9VDO0WShkti4BqBj7M4v12/ezpIkzRybqSuGdLa2oLNgGIbundJXhiF9tCFH6w4UW10SAAsReD1YZjwb1wB0jLKaet362lpV1jVqVLdYzZ7sHZvUTmdA52hddW4XSdKDn2ylPQzwYwReD8bGNQAdweEwNeutDdp7tFIp0SF68rpzFWj3jR8Pv720t8KC7Fp3oESfbMy1uhwAFvGN72g+6viNawDQXv79xW59tu2IggJsenb6UMVHBFtdUptJigrRbRd0lyQ9vGC7auobLa4IgBUIvB4sg1m8ANrZ59uO6PHPd0qSHrxigAandbK2oHbw03HdlBwVosMl1Xr5q31WlwPAAgReD+Y6bW1fYSW9ZwDaXHZBpe58K0umKc0Yla6rh6VZXVK7CA2y63eX9ZYkPbVkt46W11pcEYCORuD1YGmxobIZUlVdo45W8A0aQNupqG3Qra+uUXlNg4alx+gPU/tZXVK7uuKczhrUJVoVtQ167LOdVpcDoIMReD1YcIBdqZ2co8no4wXQVkzT1O/+b4N25VcoMTJYT//kXAUF+PaPA5vN0O+nOEP9m98c0I68cosrAtCRfPs7nA84tnGNPl4AbePZZXs1f1OeAu2Gnpk+VIlRIVaX1CFGZMZq0oBkOUzpwXmMKQP8CYHXw7k3rjGaDEAbWLbzqP62cLsk6f4f9NfQ9BiLK+pYd0/qoyC7TSt2FWjpzqNWlwOggxB4PVwGK7wA2siBwir9au56mab04+Fpum5EV6tL6nDpceG6cWyGJOkv87apodFhbUEAOgSB18NlxrsOn6CHF8CZq6pr0K2vrVFpdb0Gp3XSA5f3l2EYVpdliV9M6KGYsEDtzq/Q3G8PWl0OgA5A4PVw6U0tDfsZTQbgDJmmqbvf3aTteeWKjwjSs9PPVXCA3eqyLBMdGqhfX9JLkvTY4p0qra63uCIA7Y3A6+HSYsLco8nymR0J4Ay8+GW2PtqQowCboaeuO1cp0aFWl2S5a0d0VfeEcBVV1unpJbutLgdAOyPwerigAJu6xDjbGjhxDUBrrdxToIcWODep/X5KX43sFmdxRZ4h0G5zjyl7+at9OkDbGODTCLxegI1rAM7E4ZJq/fK/69XoMHXlkM66YUyG1SV5lPG9EzSuZ7zqGh165NPtVpcDoB0ReL1AZhwb1wC0Tk19o257ba2KKuvUPzVKf71yoN9uUjsVwzB075S+shnSvE25+nZfkdUlAWgnBF4vwAovgNYwTVP3vr9Zmw6XKiYsUM/NGKqQQP/dpHY6fZKjdM1w53i2Bz/ZKoeDzcGALyLwegHX4RP7OHwCQAu89vV+vbvukGyG9NR157r3AeDkZl3SS+FBdm04VKqPNuRYXQ6AdkDg9QLuFd7CSlYfAJzWN9lF+tPHWyVJsyf11Zge8RZX5PkSIoN1+4QekqRHPt2u6rpGiysC0NYIvF6gS0yo7DZDNfUOHSmvsbocAB4qr7RGt7+xTg0OU9MGp+qWcZlWl+Q1bj4vU507hSq3tEYvrNhrdTkA2hiB1wsE2m1Ki3HOzdxXwMY1ACeqbWjUba+vVUFFrfokR+qRH7JJrTVCAu26a1IfSdIzy/Yov4zFBcCXEHi9xPFtDQDwXfd/tFVZB0sUFRKg52YMVVhQgNUleZ1pg1I0pGsnVdU16tFFO60uB0AbIvB6CffGNSY1APiOud8c0NxvDsgwpCeuHeI+khytYxiG+zCKt9ce1JacUosrAtBWCLxeIiOO09YAnGjdgWLd9+EWSdJvJ/bW+N6JFlfk3Yamx2jqoBSZpvSXedtkmmwUBnwBgddL0NIA4Lvyy2v089fXqq7Rocv6J+v28d2tLskn3HVZHwUF2LRyT6E+35ZvdTkA2gCB10tkNgXe/YVVjCYDoLoGh37xxjodKatVj8QI/ePqwWxSayNpsWG6+TznhIu/zt+m+kaHxRUBOFsEXi/RuVOoAmyGahscymP3MOD3/jJvq77dV6zI4AD9Z8ZQRQSzSa0t3T6+u+LCg7S3oFKvf73f6nIAnCUCr5cIsNuUFuvs42XjGuDf/m/tIb2yyhnCHrvmHHVLiLC4It8TGRKoWRN7SZIe/2yXSqrqLK4IwNkg8HoR98Y1+ngBv7XpUKnueX+TJOmOi3rq4n5JFlfku64ZlqbeSZEqra7Xv7/YbXU5AM4CgdeLuDeuscIL+KXCilr97LU1qmtw6KI+ibrjop5Wl+TTAuw23TulryTp1VX7mJIDeDECrxdxbVzL5rQ1wO80NDr0y/+uV05pjbrFh+uxH58jm41Nau3t/F4JGt87QfWNph6av83qcgCcIQKvF3EdPrGflgbA7zy8YLtW7S1UeJBdz80YqqiQQKtL8hv3Tu4ru83Qoq1HtGpPodXlADgDBF4v4g68RYwmA/zJh1mH9cKX2ZKkR68erJ5JkRZX5F96JkXquhFdJUkPztvK91/ACxF4vUhqpxAF2g3VNTiUU1ptdTkAOsDWnDLd9e5GSc5RWZcNSLG4Iv9058U9FRkcoC05ZXpv/WGrywHQSgReL9J8NBl9vICvK6mq089eX6OaeofO75Wg30zsbXVJfisuIli/vLCHJOnvC7erqq7B4ooAtAaB18tkNrU1MJoM8G2NDlP/O3e9DhZVq2tsmJ748Tmys0nNUjeOzVBabKiOlNXquWV7rS4HQCsQeL2MazTZfsbjAD7tH4t2aMWuAoUGOjepdQoLsrokvxccYNfsSc4xZc8t36O8Uk69BLwFgdfLuA6f2McKL+Cz5m/K1TNL90iSHrlqkPqmRFlcEVwmDUjWsPQY1dQ79LeF260uB0ALEXi9TIZ7Fi+BF/BFO4+U67fvbJAk/XRcpn4wONXiinA8wzD0h6n9JEnvrTusjYdKrC0IQIsQeL2MazTZwaJqNTIaB/AppdX1+tlra1VV16gx3eN012V9rC4JJzE4rZP+Z0hnSdKD87bJNPleDHg6Aq+XSe0UqiC7TXWNDuWUMJoM8BUOh6lfv5Wl7IJKde4UqievO1cBdr5Fe6r/d2lvBQfY9E12kRZuOWJ1OQC+B99NvYzdZqgrfbyAz3n88136Ynu+ggNsem7GUMWGs0nNk6V2CtWt53eTJD20YJtqGxotrgjA6RB4vZCrrWEffbyAT1i89Yie+HyXJOmv/zNQAzpHW1wRWuK2C7orITJY+wur9Nqq/VaXA+A0CLxeyDWpIZvDJwCvt+dohX79VpYk6cYxGfrh0C7WFoQWCw8O0P9rOgzkX5/vUlFlncUVATgVAq8Xck1qoKUB8G7lNfW69dU1qqht0IiMWN07pa/VJaGVfji0i/qmRKm8psG9Sg/A8xB4vVBmPC0NgLdzOEz99p0N2nO0UslRIXrqJ+cqkE1qXsduM/T7pn+ovPb1fu3Or7C4IgAnw3dXL+Ra4T1YXKWGRofF1QA4E88s26OFW44oyG7TM9PPVUJksNUl4QyN7RGvi/smqtFh6qH526wuB8BJEHi9UEpUiIICbKpvNJVTwtGWgLdZsiNf/1i0Q5L0p8v7a0jXGIsrwtmaPbmvAmyGPt+ery93FVhdDoDvIPB6IZvNUHps08Y1+ngBr7K/sFJ3zF0v05SuG9lVPx7R1eqS0Aa6J0Ro+qh0SdKD87ZyMBDgYQi8XiqDPl7A61TWNujWV9eqrKZBQ7p20n3T+lldEtrQHRf1VHRooLbnleudNQetLgfAcQi8Xsq1cS2bwAt4BdM09bt3N2rHkXIlRAbr2elDFRxgt7ostKGY8CD96qKekqR/LNqpitoGiysC4ELg9VKuwyf209IAeIXnV+zVvI25CrAZevon5yopKsTqktAOZoxKV0ZcmAoqavXs0j1WlwOgCYHXS2XEu44X5vAJwNN9uatADy/YLkm6b1o/Dc+ItbgitJegAJtmT3aOKXt+xV4dLqm2uCIAEoHXa7lWeA8WMZoM8GQHi6r0v3PXyWFKVw3t4t7YBN81sV+SRmbGqrbBob99ut3qcgCIwOu1kqNCFBxgU4PD1KFiVhAAT1RT36jbXl+r4qp6DeoSrQevGCDDMKwuC+3MMAz9YWo/GYb0YVaO1h8otrokwO8ReL2UzWa4V3kZTQZ4HtM0Nfu9TdqSU6a48CA9O32oQgLZpOYvBnSO1g/P7SJJenDeNpkmY8oAKxF4vZi7j5dJDYDHmbNyn95ff1h2m6EnrztXqZ1CrS4JHey3E3srNNCutfuLNW9TrtXlAH6NwOvFXLN497NxDfAoX+8t1IPznEfM3jO5r0Z3j7O4IlghOTpEP7ugmyTp4QXbVVPfaHFFgP8i8Hoxd0sDK7yAx8gpqdYv3linRoepy89J1cyxGVaXBAvden43JUeF6FBxteas3Gd1OYDfIvB6MVfg3UcPL+ARauob9fPX16qwsk59U6L08JWD2KTm58KCAvT/Lu0tSXrqi90qqKi1uCLAPxF4vZjrtLVDxdWqZzQZYCnTNPXHDzdrw6FSdQoL1H9mDFVoEJvUIP3PkM4a0DlK5bUNemzxTqvLAfwSgdeLJUUFKzTQrkaHqYNF9PECVnpj9QG9veaQbIb072uHKC02zOqS4CFsNkN/mNJPkjT3mwPaeaTc4ooA/2Np4F2+fLmmTZum1NRUGYahDz744LTX5+bm6rrrrlPv3r1ls9l05513nnDNnDlzZBjGCR81NTXt80VYyDAMpcc5f6iycQ2wTn55jf70yVZJ0u8u66NxPRMsrgieZmS3OF3WP1kOU/pL04ZGAB3H0sBbWVmpwYMH68knn2zR9bW1tUpISNC9996rwYMHn/K6qKgo5ebmNvsICfHNc+tdbQ1sXAOs8/qq/aprcOictE762fndrC4HHuruSX0UaDe0bOdRLd2Rb3U5gF8JsPLNJ02apEmTJrX4+oyMDP3rX/+SJL300kunvM4wDCUnJ591fd4gnY1rgKWq6xr12tf7JTl35LNJDaeSER+uG0Zn6IUvs/WXedt0Xo94BdjpLAQ6gk/+l1ZRUaH09HR16dJFU6dO1fr16097fW1trcrKypp9eIvMpsMnWOEFrPHe+kMqrqpXl5hQTeyXZHU58HD/e2FPxYQFald+hd789qDV5QB+w+cCb58+fTRnzhx99NFHmjt3rkJCQjR27Fjt2rXrlK956KGHFB0d7f5IS0vrwIrPDqPJAOs4HKZe/DJbkjRzbCardfhe0WGBuvPiXpKkxxbvVFlNvcUVAf7B5747jxo1StOnT9fgwYM1btw4vf322+rVq5f+/e9/n/I1s2fPVmlpqfvj4EHv+Ve3q4f3cHG16hoYTQZ0pCU78rX3aKUiQwJ09XDv+YcyrHXdyK7qlhCuwso6Pb1kj9XlAH7B5wLvd9lsNg0fPvy0K7zBwcGKiopq9uEtEiKDFRZkl8OUDhYzqQHoSC+scK7uXjeiqyKCLd0SAS8SaLfp3sl9JUkvfZnNWEmgA/h84DVNU1lZWUpJSbG6lHbhHE3W1NZAHy/QYTYfLtWqvYWy2wzdMCbD6nLgZS7sk6jzesSrrtGhhz/dbnU5gM+zNPBWVFQoKytLWVlZkqTs7GxlZWXpwIEDkpytBtdff32z17iur6io0NGjR5WVlaWtW7e6n3/ggQe0cOFC7d27V1lZWbr55puVlZWl2267rcO+ro7GxjWg47l6d6cMTFFqp1CLq4G3MQxD907pK8OQ5m3M1dr9RVaXBPg0S38Ht2bNGk2YMMH951mzZkmSbrjhBs2ZM0e5ubnu8OsyZMgQ9/9eu3at/vvf/yo9PV379u2TJJWUlOjWW29VXl6eoqOjNWTIEC1fvlwjRoxo/y/IImxcAzpWXmmNPt6QI0m6ZVymxdXAW/VNidI1w9L05rcH9adPtun9n4+RzcZYO6A9WBp4x48fL9M0T/n8nDlzTnjsdNdL0mOPPabHHnvsbEvzKhlNG9c4bQ3oGHNW7lODw9SIzFgN6tLJ6nLgxWZN7KWPN+Row8ESfbwxR5ef09nqkgCf5PM9vP6A09aAjlNZ26D/rnYeNPHTcZyqhrOTGBmi2yf0kCQ9smC7auobLa4I8E0EXh+QHufs4c0pqVZtA98sgfb0zpqDKqtpUEZcmC7qk2h1OfABN5+XqdToEOWU1rh7wwG0LQKvD0iICFa4azQZ422AdtPoMPXSV/skOUMK/ZZoCyGBdt01qY8k6eklu5VfXmNxRYDvIfD6AMMw3H282QUEXqC9LN56RAeKqtQpLFA/HNrF6nLgQ6YNStXgtE6qrGvUPxfttLocwOcQeH3EsY1r9PEC7eWFFXslST8Z2VVhQRw0gbZjsxn641TnYRRvrTmorTllFlcE+BYCr4/IjGPjGtCe1h8o1pr9xQq0G7phdIbV5cAHDU2P1ZRBKTJN6S/zt37vVCIALUfg9RGujWvM4gXaxwtNm4l+MLizEqNCLK4Gvuruy/ooyG7TV7sL9cX2fKvLAXwGgddHuEaT7aOHF2hzB4uqtGBTriTnZjWgvaTFhumm8zIkSX+Zv031jQ5rCwJ8BIHXR7h6eHNKq5njCLSxV1buk8OUzusRr36pUVaXAx/3iwk9FBcepL1HK/Xf1Qe+/wUAvheB10fEhQcpMjhApikdYDQZ0GbKaur15rcHJUk3c4wwOkBUSKB+fUkvSdLjn+1UaVW9xRUB3o/A6yOOH022j41rQJt5+9uDqqhtUI/ECF3QM8HqcuAnfjw8TT0TI1RcVa9/f7HL6nIAr0fg9SHuwMvGNaBNNDQ69HLTQRO3cNAEOlCA3aZ7pzjHlL2yah8LGcBZIvD6kIymSQ0cPgG0jQWb83S4pFpx4UG6Ykhnq8uBnxnfO1EX9EpQfaOphxdst7ocwKsReH1IRhwtDUBbMU3TfdDEjNHpCgm0W1wR/NG9U/rKZkifbsnT6r2FVpcDeC0Crw+hpQFoO2v2F2vDoVIFBdg0fVS61eXAT/VKitS1I7pKkh6ct00OB4dRAGeCwOtDXLN4c0trGE0GnCXX6u6VQzorPiLY4mrgz359SS9FBgdo0+FSvb/+sNXlAF6JwOtDYsICFRUSIEnaX0gfL3Cm9hVUatHWI5I4aALWi48I1i8u7CFJ+vvCHaqqa7C4IsD7EHh9yPGjybLp4wXO2MtfZcs0pfG9E9QzKdLqcgDdOCZDXWJClVdWo/8s32t1OYDXIfD6GPfGNfp4gTNSWlWvt9cckiT9dFw3i6sBnEIC7bp7Uh9J0nPL9iqvtMbiigDvQuD1MRw+AZydN77Zr+r6RvVJjtSY7nFWlwO4TRmYoqHpMaqub9Q/Fu2wuhzAqxB4fUxmvHMWLyu8QOvVNTj0ysp9kqRbxnWTYXDQBDyHYRj6fdNhFO+uO6TNh0strgjwHgReH3NsFi+b1oDW+mRjjo6U1SoxMlg/GJxqdTnACYZ0jdHl56TKNKU/f7JVpsmYMqAlCLw+xhV488pqVF3HaDKgpZwHTWRLkm4Yk6GgAL49wjP97rI+Cg6waXV2kXuaCIDT4zu6j4kJD1J0aKAk2hqA1li1t1Bbc8sUGmjXT0Z2tboc4JQ6dwp1b6h8aP421TU4LK4I8HwEXh/ExjWg9Vyru1cN7aJOYUEWVwOc3m3juys+Ilj7Cqv02tf7rS4H8HgEXh+UGefauEYfL9ASu/Mr9MX2fBmGNJODJuAFIoID9NuJvSRJ//psp4or6yyuCPBsBF4fxAov0Dovfulc3b24b5L7iG7A0/1oWJr6JEeqrKZB//p8l9XlAB6NwOuDXBvXsunhBb5XYUWt3lvnPGjiFlZ34UXsNkO/n9JPkvT61/u152iFxRUBnovA64NY4QVa7o3VB1Tb4NCgLtEakRlrdTlAq5zXM14X9UlUg8PUQ/O3W10O4LEIvD4os2mFN7+8VpW1DRZXA3iumvpGvbpqnyTp5vMyOWgCXmn25L6y2wx9tu2IVu4usLocwCMReH1QdFigYsIYTQZ8n4+yclRQUaeU6BBNHphidTnAGemRGKHpTaP0/jxvmxwODqMAvovA66NcbQ37mdQAnJRpmnrhy72SpJvGZijQzrdDeK87L+6liOAAbcst09oDxVaXA3gcvsP7KFdbQzZ9vMBJLd9VoJ1HKhQeZNc1wzloAt4tJjxIE/snSZLmbcy1uBrA8xB4fVR6HBvXgNN5YYVzdfea4V3dpxMC3mzqIGdbzvxNuWqkrQFohsDrozLiXYdPEHiB79qeV6YVuwpkM5ztDIAvOK9HgqJCApRfXqs1+4qsLgfwKAReH+Uanp9dQA8v8F0vNh0jfNmAZKXFhllcDdA2ggJsurR/siTpE9oagGYIvD7KtWmtoKJWFYwmA9zyy2v0YVaOJOmWcd0srgZoW1Oa2hoWbKatATgegddHRYUEKi48SBJ9vMDxXlu1X3WNDp3btZPO7RpjdTlAmxrbI16dwgJVUFGn1dmFVpcDeAwCrw9Lj6OPFzhedV2jXv96vyTpp6zuwgcF2m26jLYG4AQEXh/GEcNAc++uO6TiqnqlxYZqYlMoAHyNq63h0815amh0WFwN4BkIvD7s2CxeNq4BDoepl750bla7aUym7DaOEYZvGt0tTrHhQSqqrNPXe5nWAEgEXp927LQ1VniBL7bna29BpSJDAnT18DSrywHaTYDdpssGOH+DMW9TjsXVAJ6BwOvDXKPJ6OEF5D5G+LoRXRURHGBxNUD7mjrQNa0hT/W0NQAEXl/m2rRWUFGn8pp6i6sBrLP5cKm+3lukAJuhGzloAn5gRGas4iOCVFJVr5V7mNYAEHh9WGRIoOIjXKPJ6OOF/3IdIzxlUIpSokMtrgZofwF2myYNcK7yzttIWwNA4PVxGa6Na7Q1wE/llla7xzPdch6jyOA/jp/WUNdAWwP8G4HXx7k3rjGaDH5qzsp9anCYGpkZq4Fdoq0uB+gwwzNilRAZrLKaBn21u8DqcgBLEXh9nGvjGiu88EeVtQ367+oDkjhGGP7HbjM0eQCHUAASgdfnuU9bY4UXfuidNQdVXtOgzPhwXdQn0epygA43dXCqJGnR1jzVNjRaXA1gHQKvj3P18O4rZNMa/Eujw9RLX+2TJM08L1M2DpqAHxraNUZJUcEqr2nQip20NcB/EXh9nKuHt6iyTqXVjCaD/1i8NU8HiqrUKSxQV53bxepyAEvYbIYmN83knbeJtgb4LwKvj4sIDlBCZLAkTlyDf3l+hfMY4ekj0xUaZLe4GsA6Uwc52xoWbz2imnraGuCfCLx+INM1mow+XviJdQeKtXZ/sQLthq4fnW51OYClhqR1Ump0iCpqG7Rs51GrywEsQeD1A8c2rtHHC//w4pfO1d0fDO6sxKgQi6sBrNWsrYFpDfBTBF4/4Orj3UdLA/zAwaIqLWjqVbxlXKbF1QCewXUIxWfbaGuAfyLw+gH3LF5aGuAH5qzcJ4cpndcjXn1ToqwuB/AI56R1UudOoaqqa9TSHflWlwN0OAKvHzg2mozAC99WVlOvt749KInVXeB4hmFoatMq78e0NcAPEXj9QEa8s4e3pKpeJVV1FlcDtJ+3vjmoitoG9UyM0AW9EqwuB/AorraGL7blq6quweJqgI5F4PUDYUEBSopyjibjAAr4qoZGh17+yrlZ7ebzMmUYHDQBHG9g52h1jQ1TdX2jlmxnWgP8C4HXT6S72hro44WPmr85TzmlNYoLD9IVQzpbXQ7gcQzDcK/yfrIxx+JqgI5F4PUTzOKFLzNNUy+s2CtJmjE6XSGBHDQBnMyUpvFkX2zPV2UtbQ3wHwReP8FoMviyNfuLtfFQqYICbJoxioMmgFPpnxqljLgw1TY49Pl2pjXAfxB4/URm08Y1enjhi55f7lzd/eG5nRUXEWxxNYDnck5rcB41PI+2BvgRAq+fcK/w0tIAH7OvoFKLtx2R5NysBuD0XH28S3YcVXlNvcXVAB2DwOsn0mOdgbe0ul7FlYwmg+946atsmaY0oXeCeiRGWl0O4PH6JEeqW0K46hoc+nwbbQ3wDwRePxEaZFdyVIgkKZs+XviIkqo6vbPmkCTplnHdLK4G8A6GYWjqQNe0Bg6hgH8g8PoR1wEUtDXAV/z3mwOqrm9U35QojekeZ3U5gNeYOtjZx7t851GVVtPWAN9H4PUjme5JDWxcg/era3DolZX7JEm3cNAE0Cq9kiLVMzFCdY0Ofbb1iNXlAO2OwOtHMjh8Aj7kk405OlJWq8TIYE1rWq0C0HKuzWvzNtHWAN9H4PUj7tPW6OGFlzNNU8+vcB4jfMOYDAUF8K0MaK2pTYF3xa6jKq2irQG+jZ8SfsTV0pBdUCnTNC2uBjhzq/YUaltumUID7frJyK5WlwN4pR6JkeqTHKn6RlMLt+ZZXQ7Qrgi8fiQ9zrlprbymQUWMJoMXe+FL5+ruj4Z1UaewIIurAbyX66jheUxrgI8j8PqRkEC7UqOdo8nYuAZvtTu/XF9sz5dhSDeN5aAJ4Gy4+ni/2l3AjHb4NAKvn+HENXi7F7/cJ0m6uG+Su00HwJnplhChfilRanCYWriFtgb4LgKvn2HjGrxZYUWt3lvnPGjipxw0AbQJpjXAHxB4/Uxm0+ET2azwwgu9/vUB1TY4NKhLtIZnxFhdDuATXNMaVu4pVGFFrcXVAO2DwOtnMljhhZeqqW/Ua1/vk+Q8RpiDJoC2kR4XroGdo9XoMLVwC4dQwDcReP2Mq+dxf0EVo8ngVT7MOqyCijqlRodo0oBkq8sBfIqrreGTjTkWVwK0DwKvn0mLDZNhSOW1DSpkRy68hGmaeqHpoIkbx2Yo0M63LqAtucaTfb23UEfLaWuA7+Gnhp9xjiYLlcSkBniPZTuPald+hcKD7PrxCA6aANpaWmyYBneJlsOUPmVaA3wQgdcPZbBxDV7mxaaDJq4Z3lVRIYEWVwP4pqmDUiVJn2ygrQG+h8Drh9i4Bm+yPa9MK3YVyGZIN43NsLocwGdNGujsjf9mX5Hyy2osrgZoWwReP5TpPnyC09bg+Vy9u5MGpCgtNsziagDf1SUmTEO6dpJpSgs209YA30Lg9UOs8MJb5JfV6MOsw5Kkm8dxjDDQ3lxtDfM2cggFfAuB1w8df7wwo8ngyV5dtV/1jaaGpsfo3K4cNAG0t8lNbQ3f7i9SXiltDfAdBF4/lBYbKpshVdY16iin6sBDVdc16vXV+yVJt5zH6i7QEVKiQzUsPUamKc3nqGH4EAKvHwoOsCu1k2s0GX288Ezvrjukkqp6pcWGamJ/DpoAOorrEIp5BF74EEsD7/LlyzVt2jSlpqbKMAx98MEHp70+NzdX1113nXr37i2bzaY777zzpNe9++676tevn4KDg9WvXz+9//77bV+8l8s8rq0B8DQOh6mXmkaRzRybKbuNY4SBjjJ5YIoMQ1q7v1g5JdVWlwO0CUsDb2VlpQYPHqwnn3yyRdfX1tYqISFB9957rwYPHnzSa1atWqVrrrlGM2bM0IYNGzRjxgxdffXVWr16dVuW7vXYuAZP9sX2fO0tqFRkSIB+NCzN6nIAv5IUFaLhGbGSaGuA77A08E6aNEkPPvigrrzyyhZdn5GRoX/961+6/vrrFR0dfdJrHn/8cV1yySWaPXu2+vTpo9mzZ+uiiy7S448/3oaVez/3xjUCLzzQ8yv2SpKuG9lVEcEBFlcD+J+pTW0NnzCtAT7C53p4V61apYkTJzZ77NJLL9XKlStP+Zra2lqVlZU1+/B1GXGu09bo4YVn2XSoVKuzixRgM3TjmAyrywH80mUDkmUzpKyDJTpYxM8JeD+fC7x5eXlKSkpq9lhSUpLy8k49RPuhhx5SdHS0+yMtzfd/hepa4d1fyGgyeJYXv3Su7k4ZlKKU6FCLqwH8U2JkiEZmxkmirQG+wecCryQZRvMNLqZpnvDY8WbPnq3S0lL3x8GDB9u7RMulxYTJZkhVdY3KL2c0GTxDbmm1+1eot5zXzeJqAP/GtAb4Ep8LvMnJySes5ubn55+w6nu84OBgRUVFNfvwdUEBNnWJcbY1MKkBnmLOyn1qcJgamRmrgV1O3qcPoGO42ho2HirVgULaGuDdfC7wjh49WosXL2722KJFizRmzBiLKvJcbFyDJ6mobdB/Vx+QJP10HKu7gNXiI4I1pnu8JOmTTTkWVwOcHUsDb0VFhbKyspSVlSVJys7OVlZWlg4ccP7Qmz17tq6//vpmr3FdX1FRoaNHjyorK0tbt251P3/HHXdo0aJFeuSRR7R9+3Y98sgj+uyzz045s9efsXENnuSdNQdVXtOgzPhwXdgn0epyAOi4tgamNcDLWRp416xZoyFDhmjIkCGSpFmzZmnIkCH64x//KMl50IQr/Lq4rl+7dq3++9//asiQIZo8ebL7+TFjxujNN9/Uyy+/rEGDBmnOnDl66623NHLkyI77wryEexYvLQ2wWKPD1EtfNR00cV6mbBw0AXiES/sny24ztCWnTNn8rIAXs3TA5fjx4087IWDOnDknPNaSiQJXXXWVrrrqqrMpzS9k0tIAD7FoS54OFlWrU1igrjq3i9XlAGgSGx6ksT3itXznUc3flKtfTOhhdUnAGfG5Hl603PE9vIwmg5VeaDpGePrIdIUG2S2uBsDxpg50tjV8vIE+XngvAq8f6xITKrvNUE29Q0fKGE0Ga6w7UKy1+4sVZLfp+jHpVpcD4Dsm9k9SgM3Q9rxy7c6vsLoc4IwQeP1YoN2mtBjnYH96s2CVF1c4V3d/cE6qEiNDLK4GwHd1CgvSeT2d0xo4hALeisDr59Lj6OOFdQ4WVWnBZucP0JvPy7S4GgCnMnVQqiTpk420NcA7EXj9nHvjGiu8sMDLX+2Tw5TG9YxX3xTfP/AF8FaX9EtSoN3QziMV2nmk3OpygFYj8Pq5Y7N4CbzoWGU19XrrW+fYQVZ3Ac8WHRqo83smSGImL7wTgdfPuSY17OfYSHSwN785oMq6RvVMjNAFvRKsLgfA95g6uOkQik25TPaB1yHw+rnjZ/E6HHwDQ8eob3Rozlf7JEm3jMuUYXDQBODpLu6bpKAAm3bnV2gHbQ3wMgReP9e5U6gCbIZqGxzKK6uxuhz4iQWb85RTWqP4iCBdfk5nq8sB0AKRIYHu38bQ1gBvQ+D1cwF2m9JinX28bFxDRzBNUy+s2CtJmjEqQyGBHDQBeIupg5raGjbS1gDvQuDFsY1rjCZDB/h2X7E2HipVcIBN00d1tbocAK1wUd8kBQfYtLegUltzy6wuB2gxAi/YuIYO9XzT6u6V53ZRXESwxdUAaI2I4ABN6J0oibYGeBcCL9wb1xhNhvaWXVCpz7YdkSTdfF6GtcUAOCNTBjGtAd6HwItjp60ReNHOXv4qW6YpTeidoB6JkVaXA+AMXNQ3USGBNu0vrNKWHNoa4B0IvFBmU+DdX1TFaDK0m5KqOr2z5pAk6afjullcDYAzFRYUoIv6JEmSPuaoYXgJAi+U2ilEgXZDdQ0O5ZRWW10OfNQbqw+our5RfVOiNLp7nNXlADgLU5jWAC9D4EWz0WRsXEN7qGtw6JWV+yRJP+WgCcDrTeidqLAguw4VV2vjoVKrywG+F4EXko61NbBxDe3h4w05yi+vVWJksKYOSrW6HABnKTTIrov6OtsaPqGtAV6AwAtJbFxD+zFNUy98mS1JumFMhoIC+LYD+IIpA2lrgPfgJw8kSZnxTaetcfgE2tjKPYXallum0EC7fjKSgyYAXzG+d4LCg+zKKa3R+oMlVpcDnBaBF5KOHT5BSwPamusY4R8N66JOYUEWVwOgrYQE2nVJP2dbA4dQwNMReCFJymhqaThYVK1GRpOhjezOL9eSHUdlGNLMsZlWlwOgjU1p6smftzGXsZbwaAReSJJSO4UqyG5TXaNDOSWMJkPbeLGpd/eSvknu3yIA8B3jesYrMjhAeWU1Wneg2OpygFMi8EKSZLcZ6hpHHy/aTmFFrd5dd1iSdAsHTQA+6fi2hk9oa4AHI/DCLcMVeOnjRRt47ev9qmtwaHCXaA3PiLG6HADtZOpg57SG+ZtyaYmDxyLwwi3DPYuXwydwdmrqG/Xaqv2SpJvHdeOgCcCHndcjQZEhAcovr9WafUVWlwOcFIEXbq4ey/20NOAsfbD+sAor65QaHaJJA5KtLgdAOwoKsOnS/s7/zudtoq0BnonAC7dM12gyAi/OwvEHTdw0NlOBdr7NAL5u6iBXW0MebQ3wSPwkgptrhfdgUZUaGh0WVwNvtWznUe3Or1BEcICuGZFmdTkAOsDYHvGKDg1UQUWtVmcXWl0OcAICL9xSokIUFGBTfaOpnJIaq8uBl3phhXN195rhaYoKCbS4GgAdIdBu02WutgamNcADEXjhZrMZSo91TmqgrQFnYltumb7cXSCbId04JsPqcgB0oClNbQ2fbs7jt4TwOAReNONqa2A0Gc6Ea3V30oAUpTX94wmAfxjTPU4xYYEqrKzT13uZ1gDPQuBFM66Naxw+gdbKL6vRRxtcB01wjDDgbwLsNl02wLnKO29TjsXVAM0ReNGMaxYvK7xorVdX7Vd9o6mh6TEa0pWDJgB/NPW4toZ62hrgQQi8aMZ92lohh0+g5arqGvT6audBEz9ldRfwWyMzYxUfEaTiqnqt2sO0BngOAi+aYTQZzsS76w6rpKpeabGhuqQfB00A/srZ1uD8HvDJRtoa4DkIvGgmOSpEwQE2NThMHSqutroceAGHw9RLTQdNzBybKbuNY4QBfzZlYKokaeGWI6prYOEEnoHAi2ZsNuNYHy8b19ACn2/PV3ZBpSJDAnT1MA6aAPzdiMxYJUQGq7S6Xl/tKbC6HEASgRcnkRHf1MfLxjW0wAsr9kqSrhvZVeHBARZXA8Bqdpuhya62hg0cQgHPQODFCY6t8LJxDae36VCpVmcXKcBmcNAEALcpg5xtDYu25qm2odHiagACL07CtXEtmxVefI8XvnSu7k4dlKKU6FCLqwHgKYalxygpKljlNQ36chdtDbAegRcnoIcXLZFTUq15G52/rrxlXDeLqwHgSWw2Q5MHOmfyfrKRtgZYj8CLE7hOWztUXM3gcJzSKyv3qcFhalS3WA3oHG11OQA8jOsQisVbj6imnrYGWIvAixMkRQUrJNCmRkaT4RQqahv0328OSJJuOY/VXQAnGpIWo5ToEFXUNmj5zqNWlwM/R+DFCQzD4IhhnNbb3x5UeU2DusWH68I+iVaXA8ADHd/WMG8TbQ2wFoEXJ+UKvGxcw3c1Oky99FXTQRPnZcrGQRMATsHV1vAZbQ2wGIEXJ+Wa1MDGNXzXoi15OlRcrZiwQP3w3C5WlwPAg52T1kmdO4Wqsq5RS3fkW10O/BiBFyeV6Tp8glm8+I7nmw6amD4qXaFBdourAeDJDMPQlEFMa4D1CLw4KXp4cTJr9xdr3YESBdltmjE63epyAHiBKU19vJ9vy1dVXYPF1cBfEXhxUsdGk1WproHRZHB6semgicvPSVViZIjF1QDwBoO6RCstNlTV9Y1asp1pDbAGgRcnlRAZrLAguxymdLCYtgZIB4uq9OnmPEnSzeMyLa4GgLcwDENTBjqPGp63KcfiauCvCLw4KcMwlE5bA47z8lf75DClcT3j1Sc5yupyAHgR17SGL7bnq7KWtgZ0PAIvTomNa3Apra7XW982HTTBMcIAWql/apQy4sJUU+/Q59uZ1oCOR+DFKbFxDS5vfXtAlXWN6pUUofN7xltdDgAvc/y0hnkbaWtAxyPw4pSYxQtJqm906OWv9klyHiNsGBw0AaD1XH28S3YcVQVtDehgBF6cEqetQZLmb8pVbmmN4iOC9INzUq0uB4CX6psSqW7x4aprcOizrUesLgd+hsCLU8po6uHNKalWbQNHQvoj0zT14pfOY4RnjMpQSCAHTQA4M4ZhuDevcQgFOhqBF6eUEBGscNdosiI2rvmjb7KLtPFQqYIDbJo+qqvV5QDwclMGOX9LtHznUZXV1FtcDfxJqwLv3/72N1VXV7v/vHz5ctXW1rr/XF5erttvv73tqoOlDMM41sdbQOD1Ry80re5eeW4XxUUEW1wNAG/XKylCPRIjVNdIWwM6VqsC7+zZs1VeXu7+89SpU3X48GH3n6uqqvTcc8+1XXWwHBvX/Fd2QaU+2+b8gXTzeRw0AeDs0dYAq7Qq8Jqmedo/w/dkxDn7eNm45n9e+jJbpild2CdRPRIjrC4HgI+YMtAZeFfsOqrSKtoa0DHo4cVpuWfxssLrV0qq6vTO2oOSpFtY3QXQhnomRap3UqTqG00t2ppndTnwEwRenFYmPbx+6Y3VB1RT71C/lCiN7h5ndTkAfMwU2hrQwQJa+4IXXnhBERHOX282NDRozpw5io93nrx0fH8vfIOrhzentFo19Y2MpfIDtQ2NmrNynyTplnGZHDQBoM1NGZSify7eqa92F6i4sk4x4UFWlwQf16rA27VrVz3//PPuPycnJ+u111474Rr4jrjwIEUGB6i8tkEHi6rUMynS6pLQzj7ekKuj5bVKigrW1EEcNAGg7XVPiFDflChtyy3Toq15umY42QHtq1WBd9++fe1UBjyVYRhKjw/T5sNlyi6oJPD6ONM09cKKvZKkG8ZkKCiAricA7WPqoBRtyy3TJxtzCbxod/w0w/di45r/WLmnUNvzyhUaaNd1I/gBBKD9uKY1rNxTqMKK2u+5Gjg7rQq8q1ev1oIFC5o99uqrryozM1OJiYm69dZbmx1EAd/g2riWzcY1n/d80+ru1cO6qFMYPXUA2k9GfLgGdI5So8PUwi0cQoH21arAe//992vjxo3uP2/atEk333yzLr74Yt199936+OOP9dBDD7V5kbCWa4V3Pyu8Pm3XkXIt3XFUhiHdNJZRZADa35SBzn0C8zblWFwJfF2rAm9WVpYuuugi95/ffPNNjRw5Us8//7xmzZqlJ554Qm+//XabFwlrHTtemMDry15sOkb4kr5J7v/PAaA9udoaVu0p1NFyfkOM9tOqwFtcXKykpCT3n5ctW6bLLrvM/efhw4fr4MGDbVcdPEKmezRZjWrqGy2uBu2hoKJW7613HhP+0/O7WVwNAH/RNS5Mg7tEy2FKn27hEAq0n1YF3qSkJGVnO1eB6urqtG7dOo0ePdr9fHl5uQIDA9u2QlguJixQkSHOgR77C+nj9UVvfXtQdQ0ODe4SrWHpMVaXA8CPuA6hmLeRtga0n1YF3ssuu0x33323VqxYodmzZyssLEzjxo1zP79x40Z17969zYuEtQzDOG7jGm0NvmjxVueGkR+P6MpBEwA61OSmtobV2UXKL6+xuBr4qlYF3gcffFB2u10XXHCBnn/+ef3nP/9RUNCxndwvvfSSJk6c2OZFwnpsXPNdhRW12nCoRJJ0YZ9Ea4sB4He6xIRpSNdOMk1pwSbaGtA+WnXwREJCglasWKHS0lJFRETIbm9+zOw777yjyEgOJvBF7o1rBF6fs2znUZmm1C8lSklRIVaXA8APTRmYovUHSjRvY65uGJNhdTnwQa0KvDNnzmzRdS+99NIZFQPPlRkfJomWBl+0ZMdRSazuArDO5IEpenDeNn27v0h5pTVKjuYf32hbrQq8c+bMUXp6uoYMGSLTNNurJnigdNdpaxw+4VMaGh1atiNfkjShT4LF1QDwV6mdQjU0PUZr9xdr/qZczTyPWeBoW60KvLfddpvefPNN7d27VzNnztT06dMVGxvbXrXBg2Q2Bd68shpV1zUqNMj+Pa+AN1h/sERlNQ3qFBaoc9KYzgDAOlMHpWjt/mLNI/CiHbRq09rTTz+t3Nxc3XXXXfr444+Vlpamq6++WgsXLmTF18fFhAcpOtQ5co4+Xt/xxXbn6u4FvRJktzGdAYB1Jg1IkWFIa/cXK6ek2upy4GNaFXglKTg4WNdee60WL16srVu3qn///rr99tuVnp6uioqK9qgRHsK1cY1JDb5jSVPgndCb/l0A1kqODtHwdOdvjedvyrW4GviaVgfe4xmGIcMwZJqmHA5HW9UED5UZ59q4Rh+vL8gpqdb2vHIZhnOFFwCsNnWwcybvJxsJvGhbrQ68tbW1mjt3ri655BL17t1bmzZt0pNPPqkDBw4oIiKiPWqEhzi2cY0VXl+wtGk6w5C0TooJD/qeqwGg/V02IFmGIWUdLNHBIhZX0HZaFXhvv/12paSk6JFHHtHUqVN16NAhvfPOO5o8ebJstrNaLIYXcJ+2RkuDT1jSNJ2BcWQAPEViZIhGZjrbGhZsZpUXbadVUxqeffZZde3aVZmZmVq2bJmWLVt20uvee++9NikOnsV9+AQrvF6vtqFRX+0ukCSNp38XgAeZMihVX+8t0icbc3Xr+d2tLgc+olWB9/rrr5dhsJPbX7lGk+WX16qqrkFhQa366wMP8k12karqGpUYGaz+qVFWlwMAbpMGJOu+Dzdr46FSHSisUtem/SPA2Wj1wRPwX9FhgYoJC1RxVb32FVSpH0HJa31x3HQG/hELwJPERwRrdPc4fbW7UPM25ern41nlxdmztPF2+fLlmjZtmlJTU2UYhj744IPvfc2yZcs0dOhQhYSEqFu3bnr22WebPT9nzhz39IjjP2pqatrpq/Av7o1r9PF6NdeGNU5XA+CJpgxMlSTN25RjcSXwFZYG3srKSg0ePFhPPvlki67Pzs7W5MmTNW7cOK1fv1733HOPfvWrX+ndd99tdl1UVJRyc3ObfYSEcC53W3BvXKOP12tlF1Qqu6BSgXZDY3vEW10OAJzgsgHJstsMbT5cxr4RtAlLmzAnTZqkSZMmtfh616a5xx9/XJLUt29frVmzRv/4xz/0wx/+0H2dYRhKTk5u63IhKYPRZF7PddjE8IxYRYYEWlwNAJwoNjxIY7rHacWuAs3blKtfTOhhdUnwcl41S2zVqlWaOHFis8cuvfRSrVmzRvX19e7HKioqlJ6eri5dumjq1Klav379aT9vbW2tysrKmn3g5DLinZsH9hcyH9FbMY4MgDeYOohDKNB2vCrw5uXlKSkpqdljSUlJamhoUEGBc8RSnz59NGfOHH300UeaO3euQkJCNHbsWO3ateuUn/ehhx5SdHS0+yMtLa1dvw5vxixe71ZZ26DVe4skMY4MgGe7tH+yAmyGtuWWac/RCqvLgZfzqsAr6YQd5aZpNnt81KhRmj59ugYPHqxx48bp7bffVq9evfTvf//7lJ9z9uzZKi0tdX8cPHiw/b4AL+fatHa0vFYVtQ0WV4PWWrmnUHWNDnWNDVP3hHCrywGAU+oUFqTzejr3GcxjlRdnyasCb3JysvLy8po9lp+fr4CAAMXFxZ30NTabTcOHDz/tCm9wcLCioqKafeDkokMDFdt0DC19vN7n2DiyBMaRAfB4UwY62xoIvDhbXhV4R48ercWLFzd7bNGiRRo2bJgCA0+++cY0TWVlZSklJaUjSvQLGU1DwBlN5l1M09TSpv7dCfTvAvACE/slK9BuaMeRcu06Um51OfBilgbeiooKZWVlKSsrS5Jz7FhWVpYOHDggydlqcP3117uvv+2227R//37NmjVL27Zt00svvaQXX3xRv/3tb93XPPDAA1q4cKH27t2rrKws3XzzzcrKytJtt93WoV+bL3MdMczGNe+yPa9cuaU1Cgm0aVS3k/9GBAA8SXRYoM7v6ZwXzuY1nA1LA++aNWs0ZMgQDRkyRJI0a9YsDRkyRH/84x8lSbm5ue7wK0mZmZmaP3++li5dqnPOOUd//vOf9cQTTzQbSVZSUqJbb71Vffv21cSJE3X48GEtX75cI0aM6Ngvzoe5jhhmFq93cU1nGNM9XiGBdourAYCWmdI0rWHeplz3vh2gtSydwzt+/PjT/uU92VHGF1xwgdatW3fK1zz22GN67LHH2qI8nIJrhZceXu/imr9LOwMAb3JxvyQF2W3anV+hnUcq1Ds50uqS4IW8qocXniGD44W9TmlVvdbuL5bk3LAGAN4iKiRQ5/dytTVw1DDODIEXreY6fKKgok7lNfXfczU8wbJdR+UwpV5JEeoSE2Z1OQDQKtMGH5vWQFsDzgSBF60WGRKo+AjnaDI2rnmHpe5xZLQzAPA+F/VNUlCATXsLKrUtl2kNaD0CL85IBhvXvIbDYWrpzqOS6N8F4J0iggPc7VjzNtHWgNYj8OKMsHHNe2w4VKKiyjpFhgRoaHqM1eUAwBmZOihVknM8GW0NaC0CL86I6/CJbDauebwlO5yru+f3TFCgnf/kAXinC/skKiTQpv2FVdqSU2Z1OfAy/PTDGWGF13u4xpGNZzoDAC8WHhygC5vasjiEAq1F4MUZOTaajE1rniy/vEabDpdKksazYQ2Al5sy0NXWkENbA1qFwIsz4lrhLaqsU2k1o8k81dKmdoZBXaKVEBlscTUAcHYu7JOo0EC7DhVXa+OhUqvLgRch8OKMRAQHuAPUfvp4PdbSHa52BlZ3AXi/0CC7Lurr/H42bxNtDWg5Ai/OmHvjGn28Hqm+0aEVOwskyd33BgDebuogDqFA6xF4ccbcfbwF9PF6ojX7ilVe26C48CAN6hxtdTkA0CbG905UeJBdh0uqtf5gidXlwEsQeHHG3JMaaGnwSEua2hku6J0gm82wuBoAaBshgXZd3C9JknOVF2gJAi/OWCaB16Mt4ThhAD5qykBnW8P8TblyOGhrwPcj8OKMHWtpIPB6moNFVdqVXyG7zdD5PZm/C8C3nN8rQZHBAcotrdG6A8VWlwMvQODFGcuId25aK66qV2kVo8k8iWs6w9CuMYoOC7S4GgBoWyGBdl3S1NbAIRRoCQIvzlhYUIASm0aTccSwZ3EdJzyB6QwAfNSUQbQ1oOUIvDgrHDHseWrqG7Vyj3Mc2YQ+tDMA8E3n9YxXZEiA8strtWY/bQ04PQIvzkpmHBvXPM2qvYWqqXcoJTpEvZMirS4HANpFcIBdl/ZPluQ8ahg4HQIvzgorvJ7HPZ2hT6IMg3FkAHzXsbaGPDXS1oDTIPDirGQ2bVzLLuTwCU9gmqa+YBwZAD8xtnu8okMDVVBRq2+yi6wuBx6MwIuzks5oMo+y52iFDhVXK8hu09gecVaXAwDtKijApkv7u6Y10NaAUyPw4qy4ZvGWVteruLLO4mqwZLtzOsPIbrEKCwqwuBoAaH9TB6VKkj7dnKeGRofF1cBTEXhxVkKD7EqOCpHExjVP4DpO+ELGkQHwE6O7xykmLFCFlXVaTVsDToHAi7PmOoCCwGut8pp6dw8b/bsA/EWg3abLBrimNXAIBU6OwIuzltk0qSG7gI1rVvpqd4EaHKa6xYe7p2cAgD841taQq3raGnASBF6cNTaueQbXdIbxrO4C8DMjM2MVFx6k4qp6rdpTaHU58EAEXpy1DA6fsJxpmscdJ8zpagD8S8BxbQ3zaGvASRB4cdaOtTRUyjQZ/G2FLTllOlpeq7Agu0ZkxlpdDgB0ONchFJ9uyVNdA20NaI7Ai7OWHufctFZe06DiqnqLq/FPrtPVxvaIV3CA3eJqAKDjjcyMU3xEsEqr6/XVngKry4GHIfDirIUE2pUa7RxNlk0fryW+YBwZAD9ntxmaPJC2BpwcgRdtgo1r1imqrFPWwRJJ0vje9O8C8F9TBjrbGhbS1oDvIPCiTbjGYLFxreMt33lUpin1TYlSSnSo1eUAgGWGZcQqMTJY5TUNWrHrqNXlwIMQeNEmMpsOn6CloeO5xpFNYHUXgJ9ztjU4V3lpa8DxCLxoE67RZPsLOXyiIzU6TC3b6VzFoH8XAKSpTdMaFm89opr6Rourgacg8KJNuEaT7WM0WYdaf6BYpdX1ig4N1DlpnawuBwAsd27XGCVHhai8tkHLd9LWACcCL9pEWmyYDEMqr21QYWWd1eX4jSVN0xnO75WgADv/OQOAzWa4Z/LO20RbA5z4CYk24RxN5twwxaSGjvPFdlc7A/27AODiCryf0daAJgRetJkMNq51qLzSGm3LLZNhSOf3JPACgMuQtE7q3ClUlXWNWrqDtgYQeNGG2LjWsVztDOekdVJcRLDF1QCA5zCMY4dQfLIxx+Jq4AkIvGgzro1r2czi7RBL3OPImM4AAN81dVCqJOnzbfmqrqOtwd8ReNFmMjhtrcPUNjTqq93Os+IZRwYAJxrUJVpdYkJVXd/o/o0Y/BeBF23G1cPLaLL29212sSrrGpUQGax+KVFWlwMAHscwjk1roK0BBF60mbTYMNkMqbKuUUcraq0ux6e5Vism9E6QzWZYXA0AeKZpTW0NX2zPV2Vtg8XVwEoEXrSZ4AC7Ujs5R5Oxca190b8LAN+vf2qU0uPCVFPvcB/DDv9E4EWbcm9co4+33ewrqNTegkoF2AyN7RlvdTkA4LEMw9CUgU2HUGzkEAp/RuBFm2LjWvtztTMMz4hVVEigxdUAgGdz9fEu2ZGvCtoa/BaBF20qPa5p4xqjydrNkqYh6hM4XQ0Avle/lCh1iw9XbYNDn287YnU5sAiBF23qWEsDPbztoaquQV/vLZTEODIAaInm0xpoa/BXBF60qYx412lrjCZrDyt3F6quwaEuMaHqnhBhdTkA4BVcgXfZjqMqq6m3uBpYgcCLNpUW4xxNVlXXqKPljCZra67+3Qv7JMowGEcGAC3ROylSPRIjVNfo0GdbaWvwRwRetKmgAJu6xDj7eJnU0LZM02QcGQCcAaY1gMCLNsfGtfax80iFckprFBxg0+jucVaXAwBexdXWsHzXUZVW09bgbwi8aHNsXGsfrqHpY7rHKSTQbnE1AOBdeiVFqldShOobTS3akmd1OehgBF60OWbxtg/3ccJMZwCAMzK16ajheZtoa/A3BF60OdcKLy0Nbae0ql5r9xdLon8XAM7U5KY+3i93Faikqs7iatCRCLxoc8dGk1UxmqyNrNh9VI0OUz0SI5QWG2Z1OQDglXokRqhPcqQaHKYW0tbgVwi8aHNdYkJltxmqrm/UkTJGk7UFV/8uh00AwNmZNtjZ1sAhFP6FwIs2F2i3qUtMqCRGk7UFh8PUsqbjhMf35jhhADgbrraGlXsKVVRJW4O/IPCiXbg3rtHHe9Y2HS5VYWWdIoIDNDwj1upyAMCrZcaHq39qlBppa/ArBF60CzautR1XO8O4nvEKtPOfLACcLddM3k825lhcCToKPz3RLjJch0/Q0nDWljKODADa1NSBzj7eVXsKVVDBXhN/QOBFu3BNatjH4RNn5Wh5rTYcKpUkje9F/y4AtIWucWEa1CVaDlP6dDNtDf6AwIt2cXwPr8PBaLIztWync7PagM5RSowKsbgaAPAdUwbS1uBPCLxoF11iQhVgM1Tb4FBeWY3V5XitJa5xZBw2AQBtytXHuzq7SPnl/JzydQRetIsAu819QAIb185MfaNDy3c1jSOjfxcA2lSXmDCdk9ZJJm0NfoHAi3ZzbOMafbxnYu3+YpXXNCg2PEiDu3SyuhwA8DlTXdMaNnAIha8j8KLdZDCa7KwsaZrOcEGvBNlthsXVAIDvcR1C8e3+IuWV0tbgywi8aDeujWuctnZmXP27jCMDgPaR2ilUQ9NjZJrSgs2s8voyAi/azbHRZATe1jpUXKWdRypkM6Tze8ZbXQ4A+CzXtIZ5Gwm8vozAi3aT2bTCu7+oitFkrbR0h3Oz2tD0GHUKC7K4GgDwXZMHpsgwpDX7i5VTUm11OWgnBF60m9ROIQq0G6prcCiX0WSt4mpnGM84MgBoV8nRIRqeHitJmr+JVV5fReBFu2k2moy2hharqW/UV3sKJEkTCLwA0O5cM3nnEXh9FoEX7YqNa6339d5C1dQ7lBwVor4pkVaXAwA+b9KAZBmGtP5AiQ4VM0rTFxF40a7cRwwTeFvM1b87oU+CDINxZADQ3hKjQjQiw9nWMPu9TVq89Yhq6hstrgptKcDqAuDbMuNdp63xL+aWME1TX7jGkdHOAAAd5vrRGVqdXaQVuwq0YleBIoIDdFHfRE0akKLxvRMUEmi3ukScBQIv2hWHT7TO3oJKHSiqUpDdprE9GEcGAB1lyqAUpXQao4835GjBpjzlldXow6wcfZiVo7Aguy7sk6jJA1M0oXeiQoMIv96GwIt25WppOFBYpUaHyYlh38M1nWFkt1iFB/OfJwB0pHO7xujcrjH6w5R+Wn+wRAs25WrB5jwdLqnWJxtz9cnGXIUG2jWhT4ImDUjRhX0S+V7tJfh/Ce0qtVOoguw21TU6lFNS7Z7agJNzHSfMODIAsI7NZmhoeoyGpsfo3il9teFQqRZsytW8Tbk6VFyt+ZvyNH9TnoIDbBrfO0GTBzrDb2RIoNWl4xQIvGhXdpuhtNhQ7TlaqX2FlQTe06iobdA32UWSpAs5ThgAPIJhGDonrZPOSeukuyf10ebDZZq/OVfzN+Vqf2GVFm45ooVbjigowKbzeyZo8sBkXdQ3SdGhhF9PQuBFu8uMD3cG3oJKjeuZYHU5HuvLXQWqbzSVERemzKbeZwCA5zAMQwO7RGtgl2j97tLe2ppbpgWb8jR/U672FlTqs21H9Nm2Iwq0GxrXM0GTBiTrkn5JnJjpAQi8aHfu0WRMajitpbQzAIDXMAxD/VOj1T81Wr+Z2Es7j1Ro3qZcLdiUq135Ffpie76+2J6vAJuhMT3iNWVgsi7pl6zYcMKvFQi8aHfuSQ3M4j0l0zTd/bu0MwCAdzEMQ72TI9U7OVKzLumlXUfKNX9TnhZsztX2vHIt33lUy3ce1T3vb9aY7nGaNCBFE/snKT4i2OrS/QaBF+3O9ev5bEaTndLW3DIdKatVaKBdIzJjrS4HAHAWeiZF6o6kSN1xcU/tOVqhBZtyNX9Tnrbmlrnn/P7+g00a1S1Okwam6NL+SUqMDLG6bJ9G4EW7S49zblQ7WFSlhkaHAuwc8PddrnFkY3vEM9wcAHxI94QI/fLCnvrlhT21r6BS8zfnasGmPG06XKqVewq1ck+h/vjhZg3PiNWUgSm6bECykqIIv22NwIt2lxodqqAAm+oaHMopqVHXOCY1fNeS444TBgD4poz4cN0+voduH99DB4uqNH9TruZvztOGgyX6JrtI32QX6f6Pt2ho1xhNbgq/qZ1CrS7bJxB40e5sNkPpsWHalV+hfYWVBN7vKK6s0/oDxZI4ThgA/EVabJh+dkF3/eyC7jpUXKVPNzunPaw7UKI1+4u1Zn+x/vTJVg3p2sm98tslhp+fZ8rS3y0vX75c06ZNU2pqqgzD0AcffPC9r1m2bJmGDh2qkJAQdevWTc8+++wJ17z77rvq16+fgoOD1a9fP73//vvtUD1agyOGT235rqNymFKf5Ej+JQ8AfqhLTJhuGddN790+VqtmX6j7pvXT8IwYGYa0/kCJHpy3Tec9skSXP/mlnl22RweYetRqlgbeyspKDR48WE8++WSLrs/OztbkyZM1btw4rV+/Xvfcc49+9atf6d1333Vfs2rVKl1zzTWaMWOGNmzYoBkzZujqq6/W6tWr2+vLQAu4N64xqeEErv7dCUxnAAC/lxIdqpvGZuqd28bo69kX6U+X99fIzFgZhrThUKkeXrBd5/99iab+e4WeWrKbCUgtZJimaVpdhOQc6fH+++/riiuuOOU1d911lz766CNt27bN/dhtt92mDRs2aNWqVZKka665RmVlZVqwYIH7mssuu0wxMTGaO3dui2opKytTdHS0SktLFRUVdWZfEJp5Y/V+3fv+Zk3onaCXbxphdTkeo9FhatiDi1VcVa+3fzaaCQ0AgJPKL6/Roi1HNH9Trr7eWyjHcemtb0qUJg9I1uRBKeqeEGFdkR2sNXnNq3p4V61apYkTJzZ77NJLL9WLL76o+vp6BQYGatWqVfr1r399wjWPP/74KT9vbW2tamtr3X8uKytr07ohZXL4xEllHSxRcVW9okICdG7XTlaXAwDwUImRIZo+Kl3TR6WrsKJWi7Y6w+/KPYXallumbbllenTxTvVOitSkgcmaPDBFvZIirS7bY3hV4M3Ly1NSUlKzx5KSktTQ0KCCggKlpKSc8pq8vLxTft6HHnpIDzzwQLvUDCdXDy+jyZpztTOc3yuBewIAaJG4iGBdO6Krrh3RVcWVdVq89YjmbcrVV7sLtONIuXYcKdfjn+1Sj8QI98pv76RIGYZhdemW8arAK+mE/7NcHRnHP36ya073f/Ls2bM1a9Ys95/LysqUlpbWFuWiSXJUiIIDbKptcOhwSbXSm1Z8/Z3rdDWmMwAAzkRMeJCuHp6mq4enqbSqXou3OVd+V+w6qt35FXrii9164ovd6hYfrkkDkzVpQIr6p0b5Xfj1qsCbnJx8wkptfn6+AgICFBcXd9prvrvqe7zg4GAFB3O8X3uy2QxlxIVrx5FyZRdUEnglHSmr0ZacMhmGdEFv5u8CAM5OdFigrhraRVcN7aKymnp9vu2I5m/K07KdR7W3oFJPLdmjp5bsUXpcmCYNSNHkgcka2DnaL8KvVwXe0aNH6+OPP2722KJFizRs2DAFBga6r1m8eHGzPt5FixZpzJgxHVorTpQeF6YdR8qdO0p7W12N9ZY2re4O6tKJ89QBAG0qKiRQ/zOki/5nSBeV19Tri+35WrApT0t25Gt/YZWeXbZHzy7boy4xoZo8MEWTBiTrnLROPht+LQ28FRUV2r17t/vP2dnZysrKUmxsrLp27arZs2fr8OHDevXVVyU5JzI8+eSTmjVrln76059q1apVevHFF5tNX7jjjjt0/vnn65FHHtHll1+uDz/8UJ999pm+/PLLDv/60FxmPBvXjvdFU//uhbQzAADaUWRIoC4/p7MuP6ezKmsbtGSHM/x+sT1fh4qr9Z/le/Wf5XuVGh2iSQOdK79D0mJks/lO+LU08K5Zs0YTJkxw/9nVR3vDDTdozpw5ys3N1YEDB9zPZ2Zmav78+fr1r3+tp556SqmpqXriiSf0wx/+0H3NmDFj9Oabb+r3v/+9/vCHP6h79+566623NHLkyI77wnBSHD5xTF2DQ1/uKpDEccIAgI4THhygqYNSNXVQqqrqGrRsx1HN35ynz7cdUU5pjV78Mlsvfpmt5KgQXTbAOe1haHqM7F4efj1mDq8nYQ5v+1i1p1DXPv+1MuLCtPT/Tfj+F/iwlbsLdN0LqxUfEaxv7rnIp/4VDQDwPjX1jVq286gWbMrVZ9vyVVHb4H4uITJYkwY4N7yNyIz1mPDrs3N44d1cLQ0Hi6tV3+hQoB+P4XK1M4zvnUDYBQBYLiTQrkv7J+vS/smqqW/Ul7sKNH9zrhZvPaKj5bV6ddV+vbpqv+IjgnRpf+fK78jMWK8ZqUngRYdJjAxWSKBNNfUOHSqudgdgf8Q4MgCApwoJtOvifkm6uF+S6hoc+mp3geZvytWirUdUUFGnN1Yf0BurDygmLNAdfkd3j/PohSwCLzqMazTZ9jznpAZ/DbwHCqu052il7DZD43rFW10OAACnFBRg04Q+iZrQJ1F/bXRo5Z5CLdiUq4Vb8lRcVa83vz2oN789qOjQQE3sl6TJg1I0tnu8ggI8K/x6VjXweRlxbFxzre4OS49RVEigxdUAANAygXabLuiVoId/OEjf3nuxXr95pK4b2VVx4UEqra7XO2sP6aaXv9Xa/cVWl3oCVnjRodyTGgr8N/C6x5H1oZ0BAOCdAuw2ndczXuf1jNefLx+gb7KLNH9TrtbsL9aIzFiryzsBgRcdKjM+TJKU7aezeKvrGrVqb6EkaQKBFwDgA+w2Q6O7x2l09zirSzklWhrQodwtDX66wrtyT4HqGhzq3ClUPRMjrC4HAAC/QOBFh3K1NBwqrlJdg8PiajqeezpDnwSfPb4RAABPQ+BFh0qMDFZYkF0OUzpY7F9tDaZpasn2o5Lo3wUAoCMReNGhDMNQelNbw34/m9SwK79Ch0uqFRRg0+hujCMDAKCjEHjR4dwb1wr8a4V3SdN0htHd4hQaZLe4GgAA/AeBFx3OXzeuMY4MAABrEHjR4fzx8ImymnqtaRrEzXHCAAB0LAIvOpxrUkO2H63wrthZoEaHqe4J4eoaF2Z1OQAA+BUCLzpcRlMPb05JtWobGi2upmO4x5GxugsAQIcj8KLDJUQEK9w1mqyo2upy2p3DYWrpDvp3AQCwCoEXHc4wDHdbgz9sXNucU6qCijpFBAdoWIbnnS8OAICvI/DCEv60cc01neG8HvEKCuA/OQAAOho/fWGJDPcsXt8PvEt2OE9Xm9AnweJKAADwTwReWCLDfdqabx8+UVBRq42HSiRJ49mwBgCAJQi8sESmn4wmW7bjqExT6p8apaSoEKvLAQDALxF4YQnXprWc0mrV1PvuaDLGkQEAYD0CLywRFx6kiOAAmaZ0sMg32xoaGh1avtPVv0vgBQDAKgReWMI5msy3N66tO1CispoGxYQF6py0TlaXAwCA3yLwwjK+vnHNNY7sgl4JstsMi6sBAMB/EXhhGffGNR+dxes6XY12BgAArEXghWXch0/4YEvD4ZJqbc8rl82Qzu/J/F0AAKxE4IVlfPl4Ydfq7pCuMYoJD7K4GgAA/BuBF5bJiHNuWssprfG50WRLtjunM1xIOwMAAJYj8MIyseFBigwJkORbG9dq6hv11e4CSdL43rQzAABgNQIvLGMYhnvj2j4f2rj2TXaRqusblRQVrH4pUVaXAwCA3yPwwlK+uHHNNY5sQu9EGQbjyAAAsBqBF5bK8MEVXteGtfEcJwwAgEcg8MJSro1rvnLa2t6jFdpXWKVAu6HzesZbXQ4AABCBFxY7NprMNzatLdnhnM4wIjNWEcEBFlcDAAAkAi8sltnUw5tXVqPqOu8fTbbkuP5dAADgGQi8sFRMeJCiQwMlSfuLvLutobK2QauzCyVxnDAAAJ6EwAvL+cqJa1/tLlB9o6n0uDB1a/qaAACA9Qi8sNyxjWve3ce7ZAfjyAAA8EQEXljOF2bxmqbpPk6Y09UAAPAsBF5YzhdOW9uWW668shqFBNo0qluc1eUAAIDjEHhhOV84fMLVzjC2e7xCAu0WVwMAAI5H4IXlXKPJjpTVqqquweJqzox7HBnTGQAA8DgEXlguOixQncKco8m88QCKkqo6rTtQLInACwCAJyLwwiO4N655YVvDsp1H5TCl3kmR6twp1OpyAADAdxB44RG8eePa0qbjhMf3YToDAACeiMALj+Cto8kaHaaW7XQG3gs5ThgAAI9E4IVHyIh3Hj7hbT28Gw6VqKiyTpEhATo3PcbqcgAAwEkQeOERXCu82V7W0rC0aTrD+T0TFGjnPycAADwRP6HhEVyzeI+W16qi1ntGk32xg3FkAAB4OgIvPEJ0aKBiw4MkSfu9ZJU3v6xGmw+XSZIu6MWGNQAAPBWBFx4jI867+nhd0xkGd4lWQmSwxdUAAIBTIfDCY3jbEcOu44THM50BAACPRuCFx3AdMZztBaPJ6hocWrGrQJJ0If27AAB4NAIvPEZ6vPfM4l2zv0gVtQ2KjwjSwM7RVpcDAABOg8ALj5HpRccLu/p3L+iVKJvNsLgaAABwOgReeAzX4RMFFXUqr6m3uJrT+2K7axwZ0xkAAPB0BF54jMiQQMVHuEaTee6khoNFVdqdXyG7zdC4ngReAAA8HYEXHiXDCzauuaYzDE2PUXRooMXVAACA70PghUdJj/P8jWtLXO0MjCMDAMArEHjhUTKb+nizPXTjWnVdo1buKZTEODIAALwFgRcexXX4hKf28H69t1C1DQ6lRoeoV1KE1eUAAIAWIPDCo2R4eEvDsekMiTIMxpEBAOANCLzwKK4V3sLKOpV52Ggy0zTdG9bo3wUAwHsQeOFRIoIDFB8RLMnzVnl351foUHG1ggJsGtMjzupyAABACxF44XHcG9c8LPC6VndHdYtTWFCAxdUAAICWIvDC47j6eD1t49qS7c7jhCf05rAJAAC8CYEXHsfVx+tJLQ1lNfX6dl+RJPp3AQDwNgReeJzMpsDrSbN4v9pVoAaHqW7x4e5ADgAAvAOBFx4nPc7Zw+tJK7zHjyMDAADehcALj+Pq4S2uqldplfWjyRwOU0t3uvp3CbwAAHgbAi88TnhwgBIjm0aTeUBbw5acMh0tr1V4kF3DM2OsLgcAALQSgRceyb1xzQMCr2sc2dge8QoOsFtcDQAAaC0CLzxSZlNbgyfM4nX1715I/y4AAF6JwAuP5CmjyQorarXhUIkkaTz9uwAAeCUCLzxSRtOkhmyLD59YvuuoTFPqmxKl5OgQS2sBAABnhsALj+Ra4d1vcQ/vF02nq13Yh9PVAADwVgReeCTXaLKSqnqVVNVZUkNDo0PLGUcGAIDXI/DCI4UG2ZUc5WwhsGrj2vqDJSqtrlensEAN6co4MgAAvBWBFx4rI77pxDWL2hqWNE1nOL9nguw2w5IaAADA2SPwwmNluEeTWbNxjXFkAAD4BgIvPJaVo8lyS6u1Pa9chiGd34sNawAAeDMCLzyWa4XXikkNS5qmMwxJ66TY8KAOf38AANB2CLzwWJnxx05bM02zQ9/bdZww0xkAAPB+BF54rPSmwyfKahpUXFXfYe9b29Cor3YXSJIm0L8LAIDXI/DCY4UE2pUS3fGjyb7JLlJVXaMSI4PVPzWqw94XAAC0D8sD79NPP63MzEyFhIRo6NChWrFixWmvf+qpp9S3b1+Fhoaqd+/eevXVV5s9P2fOHBmGccJHTU1Ne34ZaCeuPt6O3Ljm6t8d3ztBhsE4MgAAvF2AlW/+1ltv6c4779TTTz+tsWPH6rnnntOkSZO0detWde3a9YTrn3nmGc2ePVvPP/+8hg8frm+++UY//elPFRMTo2nTprmvi4qK0o4dO5q9NiQkpN2/HrS9jPhwrdpb2KEb11z9u4wjAwDAN1gaeP/5z3/q5ptv1i233CJJevzxx7Vw4UI988wzeuihh064/rXXXtPPfvYzXXPNNZKkbt266euvv9YjjzzSLPAahqHk5OSO+SLQrjKbDp/ILuyYWbzZBZXKLqhUoN3Q2B7xHfKeAACgfVnW0lBXV6e1a9dq4sSJzR6fOHGiVq5cedLX1NbWnrBSGxoaqm+++Ub19cc2NVVUVCg9PV1dunTR1KlTtX79+tPWUltbq7KysmYf8Awd3dLgOl1teEasIkMCO+Q9AQBA+7Is8BYUFKixsVFJSUnNHk9KSlJeXt5JX3PppZfqhRde0Nq1a2WaptasWaOXXnpJ9fX1Kihw7qrv06eP5syZo48++khz585VSEiIxo4dq127dp2yloceekjR0dHuj7S0tLb7QnFWjj98oiNGkzGODAAA32P5prXvbgoyTfOUG4X+8Ic/aNKkSRo1apQCAwN1+eWX68Ybb5Qk2e12SdKoUaM0ffp0DR48WOPGjdPbb7+tXr166d///vcpa5g9e7ZKS0vdHwcPHmybLw5nrWtsmAxDKq9tUGFlXbu+V2Vtg1bvLZLEODIAAHyJZYE3Pj5edrv9hNXc/Pz8E1Z9XUJDQ/XSSy+pqqpK+/bt04EDB5SRkaHIyEjFx5+839Jms2n48OGnXeENDg5WVFRUsw94hpBAu1KjQyW1/4lrK/cUqq7RobTYUHVPCG/X9wIAAB3HssAbFBSkoUOHavHixc0eX7x4scaMGXPa1wYGBqpLly6y2+168803NXXqVNlsJ/9STNNUVlaWUlJS2qx2dKwM18a1gvbduHZ8OwPjyAAA8B2WTmmYNWuWZsyYoWHDhmn06NH6z3/+owMHDui2226T5Gw1OHz4sHvW7s6dO/XNN99o5MiRKi4u1j//+U9t3rxZr7zyivtzPvDAAxo1apR69uypsrIyPfHEE8rKytJTTz1lydeIs5cRF66vdhe268Y10zTdG9ZoZwAAwLdYGnivueYaFRYW6k9/+pNyc3M1YMAAzZ8/X+np6ZKk3NxcHThwwH19Y2OjHn30Ue3YsUOBgYGaMGGCVq5cqYyMDPc1JSUluvXWW5WXl6fo6GgNGTJEy5cv14gRIzr6y0MbyWzauJbdji0NO46UK7e0RiGBNo3uFtdu7wMAADqeYXbE1ncvU1ZWpujoaJWWltLP6wEWbz2in766Rv1TozTvV+Pa5T2eXrpbf/t0hy7sk6iXbhzeLu8BAADaTmvymuVTGoDv4zp8Yn9hVbuNJlvadJzwhN4J7fL5AQCAdQi88HhpsWGyGVJFbYMKKtp+NFlpVb3WHiiWJI1n/i4AAD6HwAuPFxxgV2on52iyfe3Qx7t811E1Okz1TIxQWmxYm39+AABgLQIvvIJ741o7TGpwTWe4kOkMAAD4JAIvvEJ6nHPlta1HkzkcppbudPbv0s4AAIBvIvDCK2TEOVd427qlYePhUhVV1ikyOEDDMmLa9HMDAADPQOCFV3C1NOxr49PWvmhqZxjXK16Bdv5zAADAF/ETHl4hI/7YCm9bjiZb2nScMO0MAAD4LgIvvEJajHM0WVVdo46W17bJ58wvr9HGQ6WSpPHM3wUAwGcReOEVggJs6hzjHE3WVpMalu1wblYb2DlaiZEhbfI5AQCA5yHwwmu09ca1JU3tDBMYRwYAgE8j8MJruDeuFZ79xrX6RodW7CyQxHHCAAD4OgIvvIZ7hbcNWhrW7CtWeW2D4sKDNLhLp7P+fAAAwHMReOE12vK0Ndd0hgt6JchmM8768wEAAM9F4IXXcJ22tr+w6qxHk7nm746nfxcAAJ9H4IXXSIsNk91mqLq+UUfKznw02cGiKu3Kr5DNkC7oSf8uAAC+jsALrxFot6lL02iys5nUsHSncxzZ0PQYRYcFtkltAADAcxF44VXaYuPaku2MIwMAwJ8QeOFV3BvXznCFt6a+USv3uMaREXgBAPAHBF54lYymjWtnusK7am+hauodSokOUZ/kyLYsDQAAeCgCL7xKuuvwiYIzO3xiqWs6Q+9EGQbjyAAA8AcEXniVzKYe3v1FlXI4WjeazDRNfdE0f/dC+ncBAPAbBF54lS4xoQqwGaqpd+hIeU2rXrvnaKUOFlUryG7TmO5x7VQhAADwNAReeJUAu01psc4+3taeuOaazjCyW6zCgwPavDYAAOCZCLzwOsc2rrWuj3dJUzsD0xkAAPAvBF54nXTXLN5WjCYrr6nXt/uKJDF/FwAAf0PghdfJjG/94RNf7S5QfaOpzPhw9+sBAIB/IPDC62TEt36Fd8l253HC43sntEtNAADAcxF44XXco8kKq1o0msw0TXf/LuPIAADwPwReeJ3UTiEKtBuqbXAot+z7R5NtySlTfnmtwoLsGpEZ2wEVAgAAT0LghdcJsNuUFtPyI4Zd48jG9ohXcIC9XWsDAACeh8ALr+Tq423JLF7GkQEA4N8IvPBKGe4+3tMH3qLKOq0/WCKJDWsAAPgrAi+8Uma867S10x8+sXznUZmm1Cc5UqmdQjuiNAAA4GEIvPBKLR1N5m5nYDoDAAB+i8ALr+RqaThQWKXGU4wma3SYWrbTOX+XcWQAAPgvAi+8UmqnUAXZbaprdCinpPqk12QdLFZJVb2iQwM1JK1TxxYIAAA8BoEXXsluM5QW6+zJ3V948j7eL5rGkZ3fK0EBdv6qAwDgr0gB8FqZrtFkp+jjdR0nPIHpDAAA+DUCL7yWq4/3ZIdP5JXWaGtumQxDuqAXgRcAAH9G4IXXSo8/deBd2jSdYXCXToqLCO7QugAAgGch8MJrZcaduqXB1b/LdAYAAEDghdfKaDp84mBR89FktQ2N+mp3gSSOEwYAAAReeLHU6FAFBdhU32g2G022Zl+xKusaFR8RrP6pURZWCAAAPAGBF17LZjOUHus6YvhYW4OrnWFC7wTZbIYltQEAAM9B4IVXO9kRwxwnDAAAjkfghVfLiGu+wru/sFJ7j1YqwGbovJ7xVpYGAAA8BIEXXs21wus6bW1JUzvDsIwYRYUEWlYXAADwHAReeLXM7xw+8cUO5+lqjCMDAAAuBF54NdcK74GiKpXX1OvrvYWSGEcGAACOIfDCqyVHhSg4wKYGh6l31hxSXYNDXWJC1SMxwurSAACAhyDwwqvZbIbSmzauzVm5T5JzddcwGEcGAACcCLzwehlxx9oaJGlCnwQrywEAAB6GwAuvl9nUxytJwQE2je7GODIAAHAMgRdeL+O4wDu6e5xCg+wWVgMAADwNgRdez9XSIDGODAAAnIjAC693fEsD48gAAMB3BVhdAHC2kqND9NuJvWS32ZQWG2Z1OQAAwMMQeOETfnlhT6tLAAAAHoqWBgAAAPg0Ai8AAAB8GoEXAAAAPo3ACwAAAJ9G4AUAAIBPI/ACAADApxF4AQAA4NMIvAAAAPBpBF4AAAD4NAIvAAAAfBqBFwAAAD6NwAsAAACfRuAFAACATyPwAgAAwKcReAEAAODTCLwAAADwaQReAAAA+DQCLwAAAHwagRcAAAA+jcALAAAAn0bgBQAAgE8j8AIAAMCnEXgBAADg0wi8AAAA8GkEXgAAAPi0AKsL8ESmaUqSysrKLK4EAAAAJ+PKaa7cdjoE3pMoLy+XJKWlpVlcCQAAAE6nvLxc0dHRp73GMFsSi/2Mw+FQTk6OIiMjZRhGu79fWVmZ0tLSdPDgQUVFRbX7+/ki7uHZ4f6dPe7h2eMenh3u39njHp6djr5/pmmqvLxcqampstlO36XLCu9J2Gw2denSpcPfNyoqiv/AzhL38Oxw/84e9/DscQ/PDvfv7HEPz05H3r/vW9l1YdMaAAAAfBqBFwAAAD6NwOsBgoODdd999yk4ONjqUrwW9/DscP/OHvfw7HEPzw737+xxD8+OJ98/Nq0BAADAp7HCCwAAAJ9G4AUAAIBPI/ACAADApxF4AQAA4NMIvB3ooYce0vDhwxUZGanExERdccUV2rFjR7NrTNPU/fffr9TUVIWGhmr8+PHasmWLRRV7lmeeeUaDBg1yD7QePXq0FixY4H6ee9c6Dz30kAzD0J133ul+jHt4evfff78Mw2j2kZyc7H6e+9cyhw8f1vTp0xUXF6ewsDCdc845Wrt2rft57uOpZWRknPB30DAM/eIXv5DEvWuJhoYG/f73v1dmZqZCQ0PVrVs3/elPf5LD4XBfw308vfLyct15551KT09XaGioxowZo2+//db9vEfePxMd5tJLLzVffvllc/PmzWZWVpY5ZcoUs2vXrmZFRYX7mocfftiMjIw03333XXPTpk3mNddcY6akpJhlZWUWVu4ZPvroI3PevHnmjh07zB07dpj33HOPGRgYaG7evNk0Te5da3zzzTdmRkaGOWjQIPOOO+5wP849PL377rvP7N+/v5mbm+v+yM/Pdz/P/ft+RUVFZnp6unnjjTeaq1evNrOzs83PPvvM3L17t/sa7uOp5efnN/v7t3jxYlOSuWTJEtM0uXct8eCDD5pxcXHmJ598YmZnZ5vvvPOOGRERYT7++OPua7iPp3f11Veb/fr1M5ctW2bu2rXLvO+++8yoqCjz0KFDpml65v0j8FooPz/flGQuW7bMNE3TdDgcZnJysvnwww+7r6mpqTGjo6PNZ5991qoyPVpMTIz5wgsvcO9aoby83OzZs6e5ePFi84ILLnAHXu7h97vvvvvMwYMHn/Q57l/L3HXXXeZ55513yue5j61zxx13mN27dzcdDgf3roWmTJlizpw5s9ljV155pTl9+nTTNPk7+H2qqqpMu91ufvLJJ80eHzx4sHnvvfd67P2jpcFCpaWlkqTY2FhJUnZ2tvLy8jRx4kT3NcHBwbrgggu0cuVKS2r0VI2NjXrzzTdVWVmp0aNHc+9a4Re/+IWmTJmiiy++uNnj3MOW2bVrl1JTU5WZmakf//jH2rt3ryTuX0t99NFHGjZsmH70ox8pMTFRQ4YM0fPPP+9+nvvYcnV1dXr99dc1c+ZMGYbBvWuh8847T59//rl27twpSdqwYYO+/PJLTZ48WRJ/B79PQ0ODGhsbFRIS0uzx0NBQffnllx57/wi8FjFNU7NmzdJ5552nAQMGSJLy8vIkSUlJSc2uTUpKcj/n7zZt2qSIiAgFBwfrtttu0/vvv69+/fpx71rozTff1Lp16/TQQw+d8Bz38PuNHDlSr776qhYuXKjnn39eeXl5GjNmjAoLC7l/LbR3714988wz6tmzpxYuXKjbbrtNv/rVr/Tqq69K4u9ha3zwwQcqKSnRjTfeKIl711J33XWXrr32WvXp00eBgYEaMmSI7rzzTl177bWSuI/fJzIyUqNHj9af//xn5eTkqLGxUa+//rpWr16t3Nxcj71/AZa9s5/75S9/qY0bN+rLL7884TnDMJr92TTNEx7zV71791ZWVpZKSkr07rvv6oYbbtCyZcvcz3PvTu3gwYO64447tGjRohP+ZX487uGpTZo0yf2/Bw4cqNGjR6t79+565ZVXNGrUKEncv+/jcDg0bNgw/fWvf5UkDRkyRFu2bNEzzzyj66+/3n0d9/H7vfjii5o0aZJSU1ObPc69O7233npLr7/+uv773/+qf//+ysrK0p133qnU1FTdcMMN7uu4j6f22muvaebMmercubPsdrvOPfdcXXfddVq3bp37Gk+7f6zwWuB///d/9dFHH2nJkiXq0qWL+3HXbu/v/gsoPz//hH8p+augoCD16NFDw4YN00MPPaTBgwfrX//6F/euBdauXav8/HwNHTpUAQEBCggI0LJly/TEE08oICDAfZ+4hy0XHh6ugQMHateuXfwdbKGUlBT169ev2WN9+/bVgQMHJPF9sKX279+vzz77TLfccov7Me5dy/y///f/dPfdd+vHP/6xBg4cqBkzZujXv/61+zdf3Mfv1717dy1btkwVFRU6ePCgvvnmG9XX1yszM9Nj7x+BtwOZpqlf/vKXeu+99/TFF18oMzOz2fOuvyiLFy92P1ZXV6dly5ZpzJgxHV2uVzBNU7W1tdy7Frjooou0adMmZWVluT+GDRumn/zkJ8rKylK3bt24h61UW1urbdu2KSUlhb+DLTR27NgTxjHu3LlT6enpkvg+2FIvv/yyEhMTNWXKFPdj3LuWqaqqks3WPP7Y7Xb3WDLuY8uFh4crJSVFxcXFWrhwoS6//HLPvX8WbZbzSz//+c/N6Ohoc+nSpc3GylRVVbmvefjhh83o6GjzvffeMzdt2mRee+21lo/y8BSzZ882ly9fbmZnZ5sbN24077nnHtNms5mLFi0yTZN7dyaOn9JgmtzD7/Ob3/zGXLp0qbl3717z66+/NqdOnWpGRkaa+/btM02T+9cS33zzjRkQEGD+5S9/MXft2mW+8cYbZlhYmPn666+7r+E+nl5jY6PZtWtX86677jrhOe7d97vhhhvMzp07u8eSvffee2Z8fLz5u9/9zn0N9/H0Pv30U3PBggXm3r17zUWLFpmDBw82R4wYYdbV1Zmm6Zn3j8DbgSSd9OPll192X+NwOMz77rvPTE5ONoODg83zzz/f3LRpk3VFe5CZM2ea6enpZlBQkJmQkGBedNFF7rBrmty7M/HdwMs9PD3XLMnAwEAzNTXVvPLKK80tW7a4n+f+tczHH39sDhgwwAwODjb79Olj/uc//2n2PPfx9BYuXGhKMnfs2HHCc9y771dWVmbecccdZteuXc2QkBCzW7du5r333mvW1ta6r+E+nt5bb71lduvWzQwKCjKTk5PNX/ziF2ZJSYn7eU+8f4ZpmqZ168sAAABA+6KHFwAAAD6NwAsAAACfRuAFAACATyPwAgAAwKcReAEAAODTCLwAAADwaQReAAAA+DQCLwAAAHwagReA3xg/frzuvPNOq8vQ/fffr3POOcf95xtvvFFXXHHFaV/TVrV7yj1oC3PmzFGnTp3O+PWGYeiDDz5os3oAeC4CLwCPYxjGaT9uvPHGM/q87733nv785z+3+nXV1dUKCwvTX/7yF0VHR6uqquqEa2pqatSpUyf985//bPXn/9e//qU5c+a0+nWns3TpUhmGoZKSkmaPn+k98EW5ubmaNGmS1WUA6AAEXgAeJzc31/3x+OOPKyoqqtlj//rXv5pdX19f36LPGxsbq8jIyFbXs3jxYqWlpenWW29VdXW13n333ROueffdd1VVVaUZM2a0+vNHR0ef1Upla5zpPfBFycnJCg4OtroMAB2AwAvA4yQnJ7s/oqOjZRiG+8+uldS3335b48ePV0hIiF5//XUVFhbq2muvVZcuXRQWFqaBAwdq7ty5zT7vd3+dn5GRob/+9a+aOXOmIiMj1bVrV/3nP/85oZ4PP/xQP/jBD5SQkKBp06bppZdeOuGal156yX3NXXfdpV69eiksLEzdunXTH/7wh9OG8u+2NFRWVur6669XRESEUlJS9Oijj57wmtdff13Dhg1TZGSkkpOTdd111yk/P1+StG/fPk2Y8P/bu/+YqOs/gONPRPnVBx1iwo0pmAZBTRwHcygLdCrKHOfMgTUK6marRjommBtBBGuSopQ/52qD1D8QdSSBW6xEKg5NHTQNFHLQ+Yf9gbIlmmNw7+8f7D7rowdcal/z9npst/H+ce/36/2GsRfve3O3BICgoCDDqfj9ezAwMMAbb7xBUFAQAQEBrFq1ip6eHr3deW3g22+/JTo6Gk3TWLlyJTdu3BhzPc7T5cbGRmJjY/Hz82PhwoVcunTJ0O/EiRO8+OKL+Pr6EhER8cA6J4ptIkNDQ+Tm5mIymfDz8yMiIoJt27bp7X+/0lBSUuLy1QTnybtSiu3bt/Pcc8/h7+9PbGwsx48fdzsWIcSTJQmvEOKp9MEHH7Bx40a6urpITU3l3r17mM1mGhoauHz5Mm+//Tavv/46586dG3ecnTt3Eh8fT3t7O++99x7vvvsuV65c0dsdDgcNDQ1YLBYArFYrLS0t9Pb26n36+vpobm7GarUCEBgYSHV1NZ2dnXz++ed88cUXVFZWur22goICmpubqauro6mpiTNnznDx4kVDn6GhIcrKyvjll1/4+uuv6e3t1ZPaWbNm6afQV69edXkq7pSTk8OFCxeor6+nra0NpRRpaWmGBP3u3btUVFRw+PBhfvjhB+x2O/n5+W6to6KigvPnzzNz5kzS09P1cS9evEhGRgbr16/n0qVLlJSUUFRUZLja4U5s49m9ezf19fXU1tZy9epVjhw5QkREhMu++fn5hlcRKioqCAgIID4+HoAPP/yQqqoqDhw4wK+//kpeXh5ZWVm0tLS4FYsQ4glTQgjxH1ZVVaWmTZuml3t7exWgPvvsswmfm5aWpjZv3qyXk5OT1aZNm/RyeHi4ysrK0ssOh0PNnDlTHThwQK9rbW1VM2bMUCMjI0oppYaHh1VYWJgqLi7W+xQXF6uwsDA1PDzsMo7t27crs9mslz/66CMVGxurl7Ozs5XFYlFKKXX79m3l4+Ojampq9PabN28qf39/Q+z3+/nnnxWgbt++rZRSqrm5WQFqYGDA0O/ve9Dd3a0A1draqrf39/crf39/VVtbq5Qa3X9A/fbbb3qfffv2qZCQkDFjcc7tag1Hjx5VSin12muvqeXLlxueV1BQoGJiYv5RbH//2bjf+++/r5YuXaocDofLdkDV1dU9UN/W1qb8/Pz0WAcHB5Wfn5+y2WyGflarVb366qtjzi+E+O+QE14hxFPJefLmNDIywieffML8+fMJDg5G0zSampqw2+3jjjN//nz9a+fVCefVABi9zrB69WomTRr9dent7U12djbV1dU4HA6UUnz11Vfk5OTg7e0NwPHjx0lKSiI0NBRN0ygqKpowDqdr164xNDREYmKiXjd9+nSioqIM/drb27FYLISHhxMYGEhKSgqA2/MAdHV1MXnyZBYuXKjXBQcHExUVRVdXl14XEBDA3Llz9bLJZDLs0VhcrcE5bldXF4sXLzb0X7x4MT09PYyMjLgd23hycnLo6OggKiqKjRs30tTUNOFz7HY7a9asIT8/n4yMDAA6Ozu5d+8ey5cvR9M0/XHo0CGuXbvmVixCiCdLEl4hxFPpmWeeMZR37txJZWUlW7Zs4fTp03R0dJCamsrQ0NC440yZMsVQ9vLywuFw6OX6+nr9OoPTW2+9xfXr1zl9+jTff/89drudN998E4CzZ8+yfv16Vq1aRUNDA+3t7RQWFk4Yh5NSasI+d+7cYcWKFWiaxpEjRzh//jx1dXUAbs8z3lxKKby8vPSyqz1yJ05XnOPeP8f98bgb23ji4uLo7e2lrKyMv/76i4yMDNatWzdm/zt37pCenk5iYiKlpaV6vfPnobGxkY6ODv3R2dkp93iFeEpMftIBCCHE4/Djjz9isVjIysoCRpOUnp4eoqOjH3rMnp4e+vr6WLFihaF+7ty5JCcnU1VVhVKKlJQU/QS0tbWV8PBwCgsL9f6///6723POmzePKVOmcPbsWWbPng2M/vNWd3c3ycnJAFy5coX+/n7Ky8uZNWsWABcuXDCM4+PjA4yefI8lJiaG4eFhzp07x6JFiwC4efMm3d3dj7RvTq7W8MILL+hz//TTT4b+NpuNyMhIvL29H1tsU6dOJTMzk8zMTNatW8fKlSu5desW06dPN/RTSpGVlYXD4eDw4cOGpDomJgZfX1/sdrv+PRBCPF0k4RVCeIR58+Zx4sQJbDYbQUFB7Nq1iz/++OOREreTJ0+ybNkyAgICHmizWq1s2LABgC+//NIQh91up6amhoSEBBobG/XTV3domobVaqWgoIDg4GBCQkIoLCzUr1QAzJ49Gx8fH/bs2cM777zD5cuXH3hv3fDwcLy8vGhoaCAtLQ1/f380TTP0ef7557FYLGzYsIGDBw8SGBjI1q1bCQsLe+BU+2GUlpYa1jBjxgz93Sg2b95MQkICZWVlZGZm0tbWxt69e9m/f/9ji62yshKTycSCBQuYNGkSx44dIzQ01OVbwJWUlPDdd9/R1NTE4OAgg4ODwOhbxgUGBpKfn09eXh4Oh4OkpCT+/PNPbDYbmqaRnZ39yHslhPh3yZUGIYRHKCoqIi4ujtTUVFJSUggNDZ3w08smcvLkyTGTq1deeQVfX198fX1Zu3atXm+xWMjLyyM3N5cFCxZgs9koKir6R/Pu2LGDl19+mfT0dJYtW0ZSUhJms1lvf/bZZ6murubYsWPExMRQXl5ORUWFYYywsDA+/vhjtm7dSkhICLm5uS7nqqqqwmw2s3r1ahITE1FKcerUqQeuMTyM8vJyNm3ahNls5saNG9TX1+snz3FxcdTW1lJTU8NLL71EcXExpaWlhg8VedTYNE3j008/JT4+noSEBPr6+jh16pThjwenlpYWBgcHWbRoESaTSX8cPXoUgLKyMoqLi9m2bRvR0dGkpqbyzTffMGfOnEfeJyHEv89LPexFLCGE8GD9/f2YTCauX79OaGjokw7nqXLmzBmWLFnCwMDA/+0DNYQQYjxywiuEEC7cunWLXbt2SbIrhBAeQO7wCiGEC5GRkURGRj7pMIQQQjwGcqVBCCGEEEJ4NLnSIIQQQgghPJokvEIIIYQQwqNJwiuEEEIIITyaJLxCCCGEEMKjScIrhBBCCCE8miS8QgghhBDCo0nCK4QQQgghPJokvEIIIYQQwqP9D3apBfFvB4ZUAAAAAElFTkSuQmCC",
+ "text/plain": [
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "fig = plt.figure(figsize=(8, 8))\n",
+ "ax = fig.add_subplot(1, 1, 1)\n",
+ "ax.set_title(\"Active learning results\")\n",
+ "ax.set_xlabel(\"Train/Validation pool size\")\n",
+ "ax.set_ylabel(\"MSE\")\n",
+ "\n",
+ "ax.plot([a[0] for a in results], [a[1] for a in results])\n",
+ "plt.show()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "chemprop",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/chemprop-updated/examples/convert_v1_to_v2.ipynb b/chemprop-updated/examples/convert_v1_to_v2.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..227cc49efa93b4e530321013cea7121ba830f2e0
--- /dev/null
+++ b/chemprop-updated/examples/convert_v1_to_v2.ipynb
@@ -0,0 +1,495 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Convert v1 to v2"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/convert_v1_to_v2.ipynb)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Install chemprop from GitHub if running in Google Colab\n",
+ "import os\n",
+ "\n",
+ "if os.getenv(\"COLAB_RELEASE_TAG\"):\n",
+ " try:\n",
+ " import chemprop\n",
+ " except ImportError:\n",
+ " !git clone https://github.com/chemprop/chemprop.git\n",
+ " %cd chemprop\n",
+ " !pip install .\n",
+ " %cd examples"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Import packages"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "from pprint import pprint\n",
+ "from pathlib import Path\n",
+ "\n",
+ "from chemprop.utils.v1_to_v2 import convert_model_dict_v1_to_v2\n",
+ "from chemprop.models.model import MPNN\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Change model paths here"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "chemprop_dir = Path.cwd().parent\n",
+ "model_v1_input_path = chemprop_dir / \"tests/data/example_model_v1_regression_mol.pt\" # path to v1 model .pt file\n",
+ "model_v2_output_path = Path.cwd() / \"converted_model.ckpt\" # path to save the converted model .ckpt file"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Load v1 model .pt file"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "model_v1_dict = torch.load(model_v1_input_path, weights_only=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['args',\n",
+ " 'state_dict',\n",
+ " 'data_scaler',\n",
+ " 'features_scaler',\n",
+ " 'atom_descriptor_scaler',\n",
+ " 'bond_descriptor_scaler',\n",
+ " 'atom_bond_scaler']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Here are all the keys that is stored in v1 model\n",
+ "pprint(list(model_v1_dict.keys()))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'activation': 'ReLU',\n",
+ " 'adding_bond_types': True,\n",
+ " 'adding_h': False,\n",
+ " 'aggregation': 'mean',\n",
+ " 'aggregation_norm': 100,\n",
+ " 'atom_constraints': [],\n",
+ " 'atom_descriptor_scaling': True,\n",
+ " 'atom_descriptors': None,\n",
+ " 'atom_descriptors_path': None,\n",
+ " 'atom_descriptors_size': 0,\n",
+ " 'atom_features_size': 0,\n",
+ " 'atom_messages': False,\n",
+ " 'atom_targets': [],\n",
+ " 'batch_size': 50,\n",
+ " 'bias': False,\n",
+ " 'bias_solvent': False,\n",
+ " 'bond_constraints': [],\n",
+ " 'bond_descriptor_scaling': True,\n",
+ " 'bond_descriptors': None,\n",
+ " 'bond_descriptors_path': None,\n",
+ " 'bond_descriptors_size': 0,\n",
+ " 'bond_features_size': 0,\n",
+ " 'bond_targets': [],\n",
+ " 'cache_cutoff': 10000,\n",
+ " 'checkpoint_dir': None,\n",
+ " 'checkpoint_frzn': None,\n",
+ " 'checkpoint_path': None,\n",
+ " 'checkpoint_paths': None,\n",
+ " 'class_balance': False,\n",
+ " 'config_path': None,\n",
+ " 'constraints_path': None,\n",
+ " 'crossval_index_dir': None,\n",
+ " 'crossval_index_file': None,\n",
+ " 'crossval_index_sets': None,\n",
+ " 'cuda': False,\n",
+ " 'data_path': '/Users/hwpang/Software/chemprop/tests/data/regression.csv',\n",
+ " 'data_weights_path': None,\n",
+ " 'dataset_type': 'regression',\n",
+ " 'depth': 3,\n",
+ " 'depth_solvent': 3,\n",
+ " 'device': device(type='cpu'),\n",
+ " 'dropout': 0.0,\n",
+ " 'empty_cache': False,\n",
+ " 'ensemble_size': 1,\n",
+ " 'epochs': 1,\n",
+ " 'evidential_regularization': 0,\n",
+ " 'explicit_h': False,\n",
+ " 'extra_metrics': [],\n",
+ " 'features_generator': None,\n",
+ " 'features_only': False,\n",
+ " 'features_path': None,\n",
+ " 'features_scaling': True,\n",
+ " 'features_size': None,\n",
+ " 'ffn_hidden_size': 300,\n",
+ " 'ffn_num_layers': 2,\n",
+ " 'final_lr': 0.0001,\n",
+ " 'folds_file': None,\n",
+ " 'freeze_first_only': False,\n",
+ " 'frzn_ffn_layers': 0,\n",
+ " 'gpu': None,\n",
+ " 'grad_clip': None,\n",
+ " 'hidden_size': 300,\n",
+ " 'hidden_size_solvent': 300,\n",
+ " 'ignore_columns': None,\n",
+ " 'init_lr': 0.0001,\n",
+ " 'is_atom_bond_targets': False,\n",
+ " 'keeping_atom_map': False,\n",
+ " 'log_frequency': 10,\n",
+ " 'loss_function': 'mse',\n",
+ " 'max_data_size': None,\n",
+ " 'max_lr': 0.001,\n",
+ " 'metric': 'rmse',\n",
+ " 'metrics': ['rmse'],\n",
+ " 'minimize_score': True,\n",
+ " 'mpn_shared': False,\n",
+ " 'multiclass_num_classes': 3,\n",
+ " 'no_adding_bond_types': False,\n",
+ " 'no_atom_descriptor_scaling': False,\n",
+ " 'no_bond_descriptor_scaling': False,\n",
+ " 'no_cache_mol': False,\n",
+ " 'no_cuda': False,\n",
+ " 'no_features_scaling': False,\n",
+ " 'no_shared_atom_bond_ffn': False,\n",
+ " 'num_folds': 1,\n",
+ " 'num_lrs': 1,\n",
+ " 'num_tasks': 1,\n",
+ " 'num_workers': 8,\n",
+ " 'number_of_molecules': 1,\n",
+ " 'overwrite_default_atom_features': False,\n",
+ " 'overwrite_default_bond_features': False,\n",
+ " 'phase_features_path': None,\n",
+ " 'pytorch_seed': 0,\n",
+ " 'quiet': False,\n",
+ " 'reaction': False,\n",
+ " 'reaction_mode': 'reac_diff',\n",
+ " 'reaction_solvent': False,\n",
+ " 'resume_experiment': False,\n",
+ " 'save_dir': '/Users/hwpang/Software/test_chemprop_v1_to_v2/fold_0',\n",
+ " 'save_preds': False,\n",
+ " 'save_smiles_splits': True,\n",
+ " 'seed': 0,\n",
+ " 'separate_test_atom_descriptors_path': None,\n",
+ " 'separate_test_bond_descriptors_path': None,\n",
+ " 'separate_test_constraints_path': None,\n",
+ " 'separate_test_features_path': None,\n",
+ " 'separate_test_path': None,\n",
+ " 'separate_test_phase_features_path': None,\n",
+ " 'separate_val_atom_descriptors_path': None,\n",
+ " 'separate_val_bond_descriptors_path': None,\n",
+ " 'separate_val_constraints_path': None,\n",
+ " 'separate_val_features_path': None,\n",
+ " 'separate_val_path': None,\n",
+ " 'separate_val_phase_features_path': None,\n",
+ " 'shared_atom_bond_ffn': True,\n",
+ " 'show_individual_scores': False,\n",
+ " 'smiles_columns': ['smiles'],\n",
+ " 'spectra_activation': 'exp',\n",
+ " 'spectra_phase_mask': None,\n",
+ " 'spectra_phase_mask_path': None,\n",
+ " 'spectra_target_floor': 1e-08,\n",
+ " 'split_key_molecule': 0,\n",
+ " 'split_sizes': [0.8, 0.1, 0.1],\n",
+ " 'split_type': 'random',\n",
+ " 'target_columns': None,\n",
+ " 'target_weights': None,\n",
+ " 'task_names': ['logSolubility'],\n",
+ " 'test': False,\n",
+ " 'test_fold_index': None,\n",
+ " 'train_data_size': 400,\n",
+ " 'undirected': False,\n",
+ " 'use_input_features': False,\n",
+ " 'val_fold_index': None,\n",
+ " 'warmup_epochs': 2.0,\n",
+ " 'weights_ffn_num_layers': 2}\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Here are the input arguments that is stored in v1 model\n",
+ "pprint(model_v1_dict['args'].__dict__)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['encoder.encoder.0.cached_zero_vector',\n",
+ " 'encoder.encoder.0.W_i.weight',\n",
+ " 'encoder.encoder.0.W_h.weight',\n",
+ " 'encoder.encoder.0.W_o.weight',\n",
+ " 'encoder.encoder.0.W_o.bias',\n",
+ " 'readout.1.weight',\n",
+ " 'readout.1.bias',\n",
+ " 'readout.4.weight',\n",
+ " 'readout.4.bias']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Here are the state_dict that is stored in v1 model\n",
+ "pprint(list(model_v1_dict['state_dict'].keys()))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Convert loaded v1 model dictionary into v2 model dictionary"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "model_v2_dict = convert_model_dict_v1_to_v2(model_v1_dict)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['epoch',\n",
+ " 'global_step',\n",
+ " 'pytorch-lightning_version',\n",
+ " 'state_dict',\n",
+ " 'loops',\n",
+ " 'callbacks',\n",
+ " 'optimizer_states',\n",
+ " 'lr_schedulers',\n",
+ " 'hparams_name',\n",
+ " 'hyper_parameters']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Here are all the keys in the converted model\n",
+ "pprint(list(model_v2_dict.keys()))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['message_passing.W_i.weight',\n",
+ " 'message_passing.W_h.weight',\n",
+ " 'message_passing.W_o.weight',\n",
+ " 'message_passing.W_o.bias',\n",
+ " 'predictor.ffn.0.0.weight',\n",
+ " 'predictor.ffn.0.0.bias',\n",
+ " 'predictor.ffn.1.2.weight',\n",
+ " 'predictor.ffn.1.2.bias',\n",
+ " 'predictor.output_transform.mean',\n",
+ " 'predictor.output_transform.scale',\n",
+ " 'predictor.criterion.task_weights']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Here are all the keys in the converted state_dict\n",
+ "pprint(list(model_v2_dict['state_dict'].keys()))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['batch_norm',\n",
+ " 'metrics',\n",
+ " 'warmup_epochs',\n",
+ " 'init_lr',\n",
+ " 'max_lr',\n",
+ " 'final_lr',\n",
+ " 'message_passing',\n",
+ " 'agg',\n",
+ " 'predictor']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Here are all the keys in the converted hyper_parameters\n",
+ "pprint(list(model_v2_dict['hyper_parameters'].keys()))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Save"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "torch.save(model_v2_dict, model_v2_output_path)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Load converted model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "mpnn = MPNN.load_from_checkpoint(model_v2_output_path)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "MPNN(\n",
+ " (message_passing): BondMessagePassing(\n",
+ " (W_i): Linear(in_features=147, out_features=300, bias=False)\n",
+ " (W_h): Linear(in_features=300, out_features=300, bias=False)\n",
+ " (W_o): Linear(in_features=433, out_features=300, bias=True)\n",
+ " (dropout): Dropout(p=0.0, inplace=False)\n",
+ " (tau): ReLU()\n",
+ " (V_d_transform): Identity()\n",
+ " (graph_transform): Identity()\n",
+ " )\n",
+ " (agg): MeanAggregation()\n",
+ " (bn): Identity()\n",
+ " (predictor): RegressionFFN(\n",
+ " (ffn): MLP(\n",
+ " (0): Sequential(\n",
+ " (0): Linear(in_features=300, out_features=300, bias=True)\n",
+ " )\n",
+ " (1): Sequential(\n",
+ " (0): ReLU()\n",
+ " (1): Dropout(p=0.0, inplace=False)\n",
+ " (2): Linear(in_features=300, out_features=1, bias=True)\n",
+ " )\n",
+ " )\n",
+ " (criterion): MSE(task_weights=[[1.0]])\n",
+ " (output_transform): UnscaleTransform()\n",
+ " )\n",
+ " (X_d_transform): Identity()\n",
+ " (metrics): ModuleList(\n",
+ " (0): RMSE(task_weights=[[1.0]])\n",
+ " (1): MSE(task_weights=[[1.0]])\n",
+ " )\n",
+ ")"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# now visually check the converted model is what is expected\n",
+ "mpnn"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "chemprop",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/chemprop-updated/examples/extra_features_descriptors.ipynb b/chemprop-updated/examples/extra_features_descriptors.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..1dcdfaf4af8c59446b932f1dddd8f6edae5f8955
--- /dev/null
+++ b/chemprop-updated/examples/extra_features_descriptors.ipynb
@@ -0,0 +1,1101 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Using extra features and descriptors\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This notebook demonstrates how to use extra features and descriptors in addition to the default Chemprop featurizers.\n",
+ "\n",
+ "* Extra atom and bond features are used in addition to those calculated by Chemprop internally. \n",
+ "* Extra atom descriptors get incorporated into the atom descriptors from message passing via a learned linear transformation. \n",
+ "* Extra bond descriptors are not currently supported because the bond descriptors from message passing are not used for molecular property prediction. \n",
+ "* Extra molecule features can be used as extra datapoint descriptors, which are concatenated to the output of the aggregation layer before the final prediction layer."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/extra_features_descriptors.ipynb)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Install chemprop from GitHub if running in Google Colab\n",
+ "import os\n",
+ "\n",
+ "if os.getenv(\"COLAB_RELEASE_TAG\"):\n",
+ " try:\n",
+ " import chemprop\n",
+ " except ImportError:\n",
+ " !git clone https://github.com/chemprop/chemprop.git\n",
+ " %cd chemprop\n",
+ " !pip install .\n",
+ " %cd examples"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Loading packages and data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from pathlib import Path\n",
+ "\n",
+ "from lightning import pytorch as pl\n",
+ "from rdkit import Chem\n",
+ "\n",
+ "from chemprop import data, featurizers, models, nn, utils"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "chemprop_dir = Path.cwd().parent\n",
+ "input_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\"\n",
+ "smiles_column = \"smiles\"\n",
+ "target_columns = [\"lipo\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_input = pd.read_csv(input_path)\n",
+ "smis = df_input.loc[:, smiles_column].values\n",
+ "ys = df_input.loc[:, target_columns].values"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Getting extra features and descriptors"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `rdkit.Chem.Mol` representation of molecules is needed as input to many featurizers. Chemprop provides a helpful wrapper to rdkit to make these from SMILES."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "mols = [utils.make_mol(smi, keep_h=False, add_h=False) for smi in smis]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Extra atom features, atom descriptors, bond features"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Extra atom and bond features frequently come from QM calculations. The calculation results can be saved to a file and then loaded in a notebook using pandas or numpy. The loaded atom or bond features can be a list of numpy arrays where each numpy array of features corresponds to a single molecule in the dataset. Each row in an array corresponds to a different atom or bond in the same order of atoms or bonds in the `rdkit.Chem.Mol` objects. \n",
+ "\n",
+ "The atom features could also be used as extra atom descriptors."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# This code is just a placeholder for the actual QM calculation\n",
+ "\n",
+ "\n",
+ "def QM_calculation(mol):\n",
+ " n_extra_atom_feats = 10\n",
+ " n_extra_bond_feats = 4\n",
+ " extra_atom_features = np.array([np.random.randn(n_extra_atom_feats) for a in mol.GetAtoms()])\n",
+ " extra_bond_features = np.array([np.random.randn(n_extra_bond_feats) for a in mol.GetBonds()])\n",
+ " return extra_atom_features, extra_bond_features\n",
+ "\n",
+ "\n",
+ "extra_atom_featuress = []\n",
+ "extra_bond_featuress = []\n",
+ "\n",
+ "for mol in mols:\n",
+ " extra_atom_features, extra_bond_features = QM_calculation(mol)\n",
+ " extra_atom_featuress.append(extra_atom_features)\n",
+ " extra_bond_featuress.append(extra_bond_features)\n",
+ "\n",
+ "# Save to a file\n",
+ "np.savez(\"atom_features.npz\", *extra_atom_featuress)\n",
+ "np.savez(\"bond_features.npz\", *extra_bond_featuress)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "extra_atom_featuress = np.load(\"atom_features.npz\")\n",
+ "extra_atom_featuress = [extra_atom_featuress[f\"arr_{i}\"] for i in range(len(extra_atom_featuress))]\n",
+ "\n",
+ "extra_atom_descriptorss = extra_atom_featuress\n",
+ "\n",
+ "extra_bond_featuress = np.load(\"bond_features.npz\")\n",
+ "extra_bond_featuress = [extra_bond_featuress[f\"arr_{i}\"] for i in range(len(extra_bond_featuress))]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can also get extra atom and bond features from other sources."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "atom_radii = {1: 0.79, 5: 1.2, 6: 0.91, 7: 0.75, 8: 0.65, 9: 0.57, 16: 1.1, 17: 0.97, 35: 1.1}\n",
+ "\n",
+ "extra_atom_featuress = [\n",
+ " np.vstack([np.array([[atom_radii[a.GetAtomicNum()]] for a in mol.GetAtoms()])]) for mol in mols\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Extra molecule features"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "A QM calculation could also be used to get extra molecule features. Extra molecule features are different from extra atom and bond features in that they are stored in a single numpy array where each row corresponds to a single molecule in the dataset, instead of a list of numpy arrays."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def QM_calculation(mol):\n",
+ " n_extra_mol_feats = 7\n",
+ " return np.random.randn(n_extra_mol_feats)\n",
+ "\n",
+ "\n",
+ "extra_mol_features = np.array([QM_calculation(mol) for mol in mols])\n",
+ "\n",
+ "np.savez(\"mol_features.npz\", extra_mol_features)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "extra_mol_features = np.load(\"mol_features.npz\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The extra molecule features can also be calculated using built-in Chemprop featurizers or featurizers from other packages."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "molecule_featurizer = featurizers.MorganBinaryFeaturizer()\n",
+ "\n",
+ "extra_mol_features = np.array([molecule_featurizer(mol) for mol in mols])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# First install other package\n",
+ "# !pip install descriptastorus\n",
+ "\n",
+ "# from descriptastorus.descriptors import rdNormalizedDescriptors\n",
+ "# generator = rdNormalizedDescriptors.RDKit2DNormalized()\n",
+ "# extra_mol_features = np.array([generator.process(smi)[1:] for smi in smis])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The molecule featurizers available in Chemprop are registered in `MoleculeFeaturizerRegristry`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "morgan_binary\n",
+ "morgan_count\n",
+ "rdkit_2d\n",
+ "v1_rdkit_2d\n",
+ "v1_rdkit_2d_normalized\n"
+ ]
+ }
+ ],
+ "source": [
+ "for MoleculeFeaturizer in featurizers.MoleculeFeaturizerRegistry.keys():\n",
+ " print(MoleculeFeaturizer)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "If your model takes multiple components as input, you can use extra molecule features for each component as extra datapoint descriptors. Simply concatentate the extra molecule features together before passing them to the datapoints."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "extra_mol_features_comp1 = np.random.rand(len(mols), 5)\n",
+ "extra_mol_features_comp2 = np.random.rand(len(mols), 5)\n",
+ "\n",
+ "extra_datapoint_descriptors = np.concatenate(\n",
+ " [extra_mol_features_comp1, extra_mol_features_comp2], axis=1\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Making datapoints, datasets, and dataloaders"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Once you have all the extra features and descriptors your model will use, you can make the datapoints."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "datapoints = [\n",
+ " data.MoleculeDatapoint(mol, y, V_f=V_f, E_f=E_f, V_d=V_d, x_d=X_d)\n",
+ " for mol, y, V_f, E_f, V_d, X_d in zip(\n",
+ " mols,\n",
+ " ys,\n",
+ " extra_atom_featuress,\n",
+ " extra_bond_featuress,\n",
+ " extra_atom_descriptorss,\n",
+ " extra_datapoint_descriptors,\n",
+ " )\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "After splitting the data, the datasets are made. To make a dataset, you need a `MolGraph` featurizer, which needs to be told the size of extra atom and bond features. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "n_extra_atom_feats = extra_atom_featuress[0].shape[1]\n",
+ "n_extra_bond_feats = extra_bond_featuress[0].shape[1]\n",
+ "\n",
+ "featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer(\n",
+ " extra_atom_fdim=n_extra_atom_feats, extra_bond_fdim=n_extra_bond_feats\n",
+ ")\n",
+ "\n",
+ "train_indices, val_indices, test_indices = data.make_split_indices(mols, \"random\", (0.8, 0.1, 0.1))\n",
+ "train_data, val_data, test_data = data.split_data_by_indices(\n",
+ " datapoints, train_indices, val_indices, test_indices\n",
+ ")\n",
+ "\n",
+ "train_dset = data.MoleculeDataset(train_data[0], featurizer)\n",
+ "val_dset = data.MoleculeDataset(val_data[0], featurizer)\n",
+ "test_dset = data.MoleculeDataset(test_data[0], featurizer)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Often scaling the extra features and descriptors improves model performance. The scalers for the extra features and descriptors should be fit to the training dataset, applied to the validation dataset, and then given to the model to apply to the test dataset at prediction time. This is the same as for scaling target values to improve model performance."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
StandardScaler()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
StandardScaler()
"
+ ],
+ "text/plain": [
+ "StandardScaler()"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "targets_scaler = train_dset.normalize_targets()\n",
+ "extra_atom_features_scaler = train_dset.normalize_inputs(\"V_f\")\n",
+ "extra_bond_features_scaler = train_dset.normalize_inputs(\"E_f\")\n",
+ "extra_atom_descriptors_scaler = train_dset.normalize_inputs(\"V_d\")\n",
+ "extra_datapoint_descriptors_scaler = train_dset.normalize_inputs(\"X_d\")\n",
+ "\n",
+ "val_dset.normalize_targets(targets_scaler)\n",
+ "val_dset.normalize_inputs(\"V_f\", extra_atom_features_scaler)\n",
+ "val_dset.normalize_inputs(\"E_f\", extra_bond_features_scaler)\n",
+ "val_dset.normalize_inputs(\"V_d\", extra_atom_descriptors_scaler)\n",
+ "val_dset.normalize_inputs(\"X_d\", extra_datapoint_descriptors_scaler)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Featurize the train and val datasets to save computation time.\n",
+ "train_dset.cache = True\n",
+ "val_dset.cache = True\n",
+ "\n",
+ "train_loader = data.build_dataloader(train_dset)\n",
+ "val_loader = data.build_dataloader(val_dset, shuffle=False)\n",
+ "test_loader = data.build_dataloader(test_dset, shuffle=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Making the model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The message passing layer needs to know the total size of atom and bond features (i.e. the sum of the sizes of the Chemprop atom and bond features and the extra atom and bond features). The `MolGraph` featurizer collects this information. The message passing layer also needs to know the number of extra atom descriptors.\n",
+ "\n",
+ "The extra atom and bond features scalers are combined into a graph transform which is given to the message passing layer to use at prediction time. To avoid scaling the atom and bond features from the internal Chemprop featurizers, the graph transform uses a pad equal to the length of features from the Chemprop internal atom and bond featurizers. This information is stored in the `MolGraph` featurizer.\n",
+ "\n",
+ "The extra atom descriptor scaler are also converted to a transform and given to the message passing layer to use at prediction time."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "n_V_features = featurizer.atom_fdim - featurizer.extra_atom_fdim\n",
+ "n_E_features = featurizer.bond_fdim - featurizer.extra_bond_fdim\n",
+ "\n",
+ "V_f_transform = nn.ScaleTransform.from_standard_scaler(extra_atom_features_scaler, pad=n_V_features)\n",
+ "E_f_transform = nn.ScaleTransform.from_standard_scaler(extra_bond_features_scaler, pad=n_E_features)\n",
+ "\n",
+ "graph_transform = nn.GraphTransform(V_f_transform, E_f_transform)\n",
+ "\n",
+ "V_d_transform = nn.ScaleTransform.from_standard_scaler(extra_atom_descriptors_scaler)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "n_extra_atom_descs = extra_atom_descriptorss[0].shape[1]\n",
+ "\n",
+ "mp = nn.BondMessagePassing(\n",
+ " d_v=featurizer.atom_fdim,\n",
+ " d_e=featurizer.bond_fdim,\n",
+ " d_vd=n_extra_atom_descs,\n",
+ " graph_transform=graph_transform,\n",
+ " V_d_transform=V_d_transform,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The predictor layer needs to know the size of the its input, including any extra datapoint descriptors. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ffn_input_dim = mp.output_dim + extra_datapoint_descriptors.shape[1]\n",
+ "\n",
+ "output_transform = nn.UnscaleTransform.from_standard_scaler(targets_scaler)\n",
+ "ffn = nn.RegressionFFN(input_dim=ffn_input_dim, output_transform=output_transform)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The overall model is given the transform from the extra datapoint descriptors scaler."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_d_transform = nn.ScaleTransform.from_standard_scaler(extra_datapoint_descriptors_scaler)\n",
+ "\n",
+ "chemprop_model = models.MPNN(mp, nn.NormAggregation(), ffn, X_d_transform=X_d_transform)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Training and prediction"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The rest of the training and prediction are the same as other Chemprop workflows."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "GPU available: False, used: False\n",
+ "TPU available: False, using: 0 TPU cores\n",
+ "HPU available: False, using: 0 HPUs\n"
+ ]
+ }
+ ],
+ "source": [
+ "trainer = pl.Trainer(\n",
+ " logger=False, enable_checkpointing=False, enable_progress_bar=True, max_epochs=5\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Loading `train_dataloader` to estimate number of stepping batches.\n",
+ "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n",
+ "\n",
+ " | Name | Type | Params | Mode \n",
+ "---------------------------------------------------------------\n",
+ "0 | message_passing | BondMessagePassing | 325 K | train\n",
+ "1 | agg | NormAggregation | 0 | train\n",
+ "2 | bn | Identity | 0 | train\n",
+ "3 | predictor | RegressionFFN | 96.6 K | train\n",
+ "4 | X_d_transform | ScaleTransform | 0 | train\n",
+ "5 | metrics | ModuleList | 0 | train\n",
+ "---------------------------------------------------------------\n",
+ "422 K Trainable params\n",
+ "0 Non-trainable params\n",
+ "422 K Total params\n",
+ "1.690 Total estimated model params size (MB)\n",
+ "27 Modules in train mode\n",
+ "0 Modules in eval mode\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sanity Checking DataLoader 0: 0%| | 0/1 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 4: 100%|██████████| 2/2 [00:00<00:00, 2.22it/s, train_loss_step=1.340, val_loss=8.520, train_loss_epoch=0.936]"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "`Trainer.fit` stopped: `max_epochs=5` reached.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 4: 100%|██████████| 2/2 [00:00<00:00, 2.21it/s, train_loss_step=1.340, val_loss=8.520, train_loss_epoch=0.936]\n"
+ ]
+ }
+ ],
+ "source": [
+ "trainer.fit(chemprop_model, train_loader, val_loader)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Testing DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.20it/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Setting up process group for: env:// [rank=0, world_size=1]\n",
+ "\u001b[36m(TorchTrainer pid=24873)\u001b[0m Started distributed worker processes: \n",
+ "\u001b[36m(TorchTrainer pid=24873)\u001b[0m - (ip=172.31.231.162, pid=24952) world_rank=0, local_rank=0, node_rank=0\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m GPU available: False, used: False\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m TPU available: False, using: 0 TPU cores\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m HPU available: False, using: 0 HPUs\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sanity Checking DataLoader 0: 0%| | 0/1 [00:00, ?it/s]\n",
+ "Epoch 0: 0%| | 0/2 [00:00, ?it/s] \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Loading `train_dataloader` to estimate number of stepping batches.\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m /home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m /home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m \n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m | Name | Type | Params | Mode \n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m ---------------------------------------------------------------\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m 0 | message_passing | BondMessagePassing | 579 K | train\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m 1 | agg | MeanAggregation | 0 | train\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m 2 | bn | BatchNorm1d | 1.0 K | train\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m 3 | predictor | RegressionFFN | 5.0 M | train\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m 4 | X_d_transform | Identity | 0 | train\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m 5 | metrics | ModuleList | 0 | train\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m ---------------------------------------------------------------\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m 5.6 M Trainable params\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m 0 Non-trainable params\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m 5.6 M Total params\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m 22.346 Total estimated model params size (MB)\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m 27 Modules in train mode\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m 0 Modules in eval mode\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m /home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/core/saving.py:363: Skipping 'metrics' parameter because it is not possible to safely dump to YAML.\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m /home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 0: 50%|█████ | 1/2 [00:00<00:00, 1.12it/s, v_num=0, train_loss_step=0.987]\n",
+ "Epoch 0: 100%|██████████| 2/2 [00:01<00:00, 1.83it/s, v_num=0, train_loss_step=1.040]\n",
+ "Validation: | | 0/? [00:00, ?it/s]\u001b[A\n",
+ "Validation: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "Validation DataLoader 0: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m \n",
+ "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 14.60it/s]\u001b[A\n",
+ "Epoch 0: 100%|██████████| 2/2 [00:01<00:00, 1.67it/s, v_num=0, train_loss_step=1.040, val_loss=0.848]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000000)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 0: 100%|██████████| 2/2 [00:01<00:00, 1.26it/s, v_num=0, train_loss_step=1.040, val_loss=0.848, train_loss_epoch=0.997]\n",
+ "Epoch 1: 0%| | 0/2 [00:00, ?it/s, v_num=0, train_loss_step=1.040, val_loss=0.848, train_loss_epoch=0.997] \n",
+ "Epoch 1: 50%|█████ | 1/2 [00:00<00:00, 2.22it/s, v_num=0, train_loss_step=0.984, val_loss=0.848, train_loss_epoch=0.997]\n",
+ "Epoch 1: 100%|██████████| 2/2 [00:00<00:00, 3.32it/s, v_num=0, train_loss_step=0.406, val_loss=0.848, train_loss_epoch=0.997]\n",
+ "Validation: | | 0/? [00:00, ?it/s]\u001b[A\n",
+ "Validation: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "Validation DataLoader 0: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 18.53it/s]\u001b[A\n",
+ "Epoch 1: 100%|██████████| 2/2 [00:00<00:00, 2.97it/s, v_num=0, train_loss_step=0.406, val_loss=0.904, train_loss_epoch=0.997]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2024-10-22 09:04:05,874\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000001)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 1: 100%|██████████| 2/2 [00:01<00:00, 1.90it/s, v_num=0, train_loss_step=0.406, val_loss=0.904, train_loss_epoch=0.869]\n",
+ "Epoch 2: 0%| | 0/2 [00:00, ?it/s, v_num=0, train_loss_step=0.406, val_loss=0.904, train_loss_epoch=0.869] \n",
+ "Epoch 2: 50%|█████ | 1/2 [00:00<00:00, 1.15it/s, v_num=0, train_loss_step=1.190, val_loss=0.904, train_loss_epoch=0.869]\n",
+ "Epoch 2: 100%|██████████| 2/2 [00:01<00:00, 1.81it/s, v_num=0, train_loss_step=1.290, val_loss=0.904, train_loss_epoch=0.869]\n",
+ "Validation: | | 0/? [00:00, ?it/s]\u001b[A\n",
+ "Validation: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "Validation DataLoader 0: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m \n",
+ "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 14.01it/s]\u001b[A\n",
+ "Epoch 2: 100%|██████████| 2/2 [00:01<00:00, 1.66it/s, v_num=0, train_loss_step=1.290, val_loss=0.842, train_loss_epoch=0.869]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2024-10-22 09:04:07,873\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000002)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 2: 100%|██████████| 2/2 [00:01<00:00, 1.29it/s, v_num=0, train_loss_step=1.290, val_loss=0.842, train_loss_epoch=1.210]\n",
+ "Epoch 3: 0%| | 0/2 [00:00, ?it/s, v_num=0, train_loss_step=1.290, val_loss=0.842, train_loss_epoch=1.210] \n",
+ "Epoch 3: 50%|█████ | 1/2 [00:00<00:00, 1.80it/s, v_num=0, train_loss_step=0.890, val_loss=0.842, train_loss_epoch=1.210]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[36m(TorchTrainer pid=24953)\u001b[0m Started distributed worker processes: \n",
+ "\u001b[36m(TorchTrainer pid=24953)\u001b[0m - (ip=172.31.231.162, pid=25062) world_rank=0, local_rank=0, node_rank=0\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m Setting up process group for: env:// [rank=0, world_size=1]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 2.44it/s, v_num=0, train_loss_step=0.749, val_loss=0.842, train_loss_epoch=1.210]\n",
+ "Validation: | | 0/? [00:00, ?it/s]\u001b[A\n",
+ "Validation: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "Validation DataLoader 0: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m \n",
+ "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 10.81it/s]\u001b[A\n",
+ "Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 2.15it/s, v_num=0, train_loss_step=0.749, val_loss=0.912, train_loss_epoch=1.210]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000003)\n",
+ "2024-10-22 09:04:09,291\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 3: 100%|██████████| 2/2 [00:01<00:00, 1.62it/s, v_num=0, train_loss_step=0.749, val_loss=0.912, train_loss_epoch=0.861]\n",
+ "Epoch 4: 0%| | 0/2 [00:00, ?it/s, v_num=0, train_loss_step=0.749, val_loss=0.912, train_loss_epoch=0.861] \n",
+ "Epoch 4: 50%|█████ | 1/2 [00:00<00:00, 1.41it/s, v_num=0, train_loss_step=0.845, val_loss=0.912, train_loss_epoch=0.861]\n",
+ "Epoch 4: 100%|██████████| 2/2 [00:00<00:00, 2.04it/s, v_num=0, train_loss_step=0.578, val_loss=0.912, train_loss_epoch=0.861]\n",
+ "Validation: | | 0/? [00:00, ?it/s]\u001b[A\n",
+ "Validation: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "Validation DataLoader 0: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m \n",
+ "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 10.38it/s]\u001b[A\n",
+ "Epoch 4: 100%|██████████| 2/2 [00:01<00:00, 1.78it/s, v_num=0, train_loss_step=0.578, val_loss=0.912, train_loss_epoch=0.861]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2024-10-22 09:04:11,011\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000004)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 4: 100%|██████████| 2/2 [00:01<00:00, 1.31it/s, v_num=0, train_loss_step=0.578, val_loss=0.912, train_loss_epoch=0.792]\n",
+ "Epoch 5: 0%| | 0/2 [00:00, ?it/s, v_num=0, train_loss_step=0.578, val_loss=0.912, train_loss_epoch=0.792] \n",
+ "Epoch 5: 50%|█████ | 1/2 [00:00<00:00, 1.60it/s, v_num=0, train_loss_step=0.584, val_loss=0.912, train_loss_epoch=0.792]\n",
+ "Epoch 5: 100%|██████████| 2/2 [00:00<00:00, 2.58it/s, v_num=0, train_loss_step=0.751, val_loss=0.912, train_loss_epoch=0.792]\n",
+ "Validation: | | 0/? [00:00, ?it/s]\u001b[A\n",
+ "Validation: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "Validation DataLoader 0: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m \n",
+ "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 12.17it/s]\u001b[A\n",
+ "Epoch 5: 100%|██████████| 2/2 [00:00<00:00, 2.26it/s, v_num=0, train_loss_step=0.751, val_loss=0.887, train_loss_epoch=0.792]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2024-10-22 09:04:12,441\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000005)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 5: 100%|██████████| 2/2 [00:01<00:00, 1.59it/s, v_num=0, train_loss_step=0.751, val_loss=0.887, train_loss_epoch=0.618]\n",
+ "Epoch 6: 0%| | 0/2 [00:00, ?it/s, v_num=0, train_loss_step=0.751, val_loss=0.887, train_loss_epoch=0.618] \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m GPU available: False, used: False\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m TPU available: False, using: 0 TPU cores\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m HPU available: False, using: 0 HPUs\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 6: 50%|█████ | 1/2 [00:00<00:00, 1.64it/s, v_num=0, train_loss_step=0.421, val_loss=0.887, train_loss_epoch=0.618]\n",
+ "Epoch 6: 100%|██████████| 2/2 [00:00<00:00, 2.56it/s, v_num=0, train_loss_step=0.569, val_loss=0.887, train_loss_epoch=0.618]\n",
+ "Validation: | | 0/? [00:00, ?it/s]\u001b[A\n",
+ "Validation: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "Validation DataLoader 0: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 13.13it/s]\u001b[A\n",
+ "Epoch 6: 100%|██████████| 2/2 [00:00<00:00, 2.28it/s, v_num=0, train_loss_step=0.569, val_loss=0.876, train_loss_epoch=0.618]\n",
+ "Sanity Checking: | | 0/? [00:00, ?it/s]\n",
+ "Sanity Checking DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 12.06it/s]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m Loading `train_dataloader` to estimate number of stepping batches.\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m /home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m /home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m \n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m | Name | Type | Params | Mode \n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m ---------------------------------------------------------------\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m 0 | message_passing | BondMessagePassing | 383 K | train\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m 1 | agg | MeanAggregation | 0 | train\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m 2 | bn | BatchNorm1d | 800 | train\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m 3 | predictor | RegressionFFN | 5.7 M | train\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m 4 | X_d_transform | Identity | 0 | train\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m 5 | metrics | ModuleList | 0 | train\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m ---------------------------------------------------------------\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m 6.1 M Trainable params\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m 0 Non-trainable params\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m 6.1 M Total params\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m 24.444 Total estimated model params size (MB)\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m 27 Modules in train mode\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m 0 Modules in eval mode\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m /home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/core/saving.py:363: Skipping 'metrics' parameter because it is not possible to safely dump to YAML.\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m /home/knathan/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 0: 0%| | 0/2 [00:00, ?it/s] \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000006)\n",
+ "2024-10-22 09:04:13,968\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 6: 100%|██████████| 2/2 [00:01<00:00, 1.53it/s, v_num=0, train_loss_step=0.569, val_loss=0.876, train_loss_epoch=0.450]\n",
+ "Epoch 7: 0%| | 0/2 [00:00, ?it/s, v_num=0, train_loss_step=0.569, val_loss=0.876, train_loss_epoch=0.450] \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2024-10-22 09:04:14,855\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "2024-10-22 09:04:15,207\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 7: 50%|█████ | 1/2 [00:00<00:00, 2.28it/s, v_num=0, train_loss_step=0.339, val_loss=0.876, train_loss_epoch=0.450]\u001b[32m [repeated 2x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)\u001b[0m\n",
+ "Epoch 1: 100%|██████████| 2/2 [00:00<00:00, 3.75it/s, v_num=0, train_loss_step=0.335, val_loss=0.854, train_loss_epoch=1.010]\u001b[32m [repeated 3x across cluster]\u001b[0m\n",
+ "Validation: | | 0/? [00:00, ?it/s]\u001b[A\u001b[32m [repeated 3x across cluster]\u001b[0m\n",
+ "Validation: 0%| | 0/1 [00:00, ?it/s]\u001b[A\u001b[32m [repeated 3x across cluster]\u001b[0m\n",
+ "Validation DataLoader 0: 0%| | 0/1 [00:00, ?it/s]\u001b[A\u001b[32m [repeated 3x across cluster]\u001b[0m\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m \u001b[32m [repeated 3x across cluster]\u001b[0m\n",
+ "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 16.17it/s]\u001b[A\u001b[32m [repeated 3x across cluster]\u001b[0m\n",
+ "Epoch 1: 100%|██████████| 2/2 [00:00<00:00, 3.26it/s, v_num=0, train_loss_step=0.335, val_loss=0.893, train_loss_epoch=1.010]\u001b[32m [repeated 3x across cluster]\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2024-10-22 09:04:15,979\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 1: 100%|██████████| 2/2 [00:00<00:00, 2.01it/s, v_num=0, train_loss_step=0.335, val_loss=0.893, train_loss_epoch=0.703]\u001b[32m [repeated 3x across cluster]\u001b[0m\n",
+ "Epoch 2: 0%| | 0/2 [00:00, ?it/s, v_num=0, train_loss_step=0.335, val_loss=0.893, train_loss_epoch=0.703]\u001b[32m [repeated 3x across cluster]\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2024-10-22 09:04:16,509\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "2024-10-22 09:04:17,399\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000009)\u001b[32m [repeated 6x across cluster]\u001b[0m\n",
+ "2024-10-22 09:04:17,944\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "2024-10-22 09:04:18,760\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "2024-10-22 09:04:19,250\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "2024-10-22 09:04:20,250\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 11: 50%|█████ | 1/2 [00:00<00:00, 1.25it/s, v_num=0, train_loss_step=0.175, val_loss=0.897, train_loss_epoch=0.258]\u001b[32m [repeated 8x across cluster]\u001b[0m\n",
+ "Epoch 11: 100%|██████████| 2/2 [00:01<00:00, 1.79it/s, v_num=0, train_loss_step=0.312, val_loss=0.897, train_loss_epoch=0.258]\u001b[32m [repeated 7x across cluster]\u001b[0m\n",
+ "Validation: | | 0/? [00:00, ?it/s]\u001b[A\u001b[32m [repeated 7x across cluster]\u001b[0m\n",
+ "Validation: 0%| | 0/1 [00:00, ?it/s]\u001b[A\u001b[32m [repeated 7x across cluster]\u001b[0m\n",
+ "Validation DataLoader 0: 0%| | 0/1 [00:00, ?it/s]\u001b[A\u001b[32m [repeated 7x across cluster]\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2024-10-22 09:04:20,955\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m \u001b[32m [repeated 11x across cluster]\u001b[0m\n",
+ "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 7.84it/s]\u001b[A\u001b[32m [repeated 7x across cluster]\u001b[0m\n",
+ "Epoch 11: 100%|██████████| 2/2 [00:01<00:00, 1.56it/s, v_num=0, train_loss_step=0.312, val_loss=0.869, train_loss_epoch=0.258]\u001b[32m [repeated 7x across cluster]\u001b[0m\n",
+ "Epoch 11: 100%|██████████| 2/2 [00:01<00:00, 1.27it/s, v_num=0, train_loss_step=0.312, val_loss=0.869, train_loss_epoch=0.203]\u001b[32m [repeated 7x across cluster]\u001b[0m\n",
+ "Epoch 12: 0%| | 0/2 [00:00, ?it/s, v_num=0, train_loss_step=0.312, val_loss=0.869, train_loss_epoch=0.203]\u001b[32m [repeated 7x across cluster]\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2024-10-22 09:04:21,687\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "2024-10-22 09:04:22,323\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "2024-10-22 09:04:22,766\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000013)\u001b[32m [repeated 8x across cluster]\u001b[0m\n",
+ "2024-10-22 09:04:24,404\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "2024-10-22 09:04:25,524\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 14: 50%|█████ | 1/2 [00:01<00:01, 0.88it/s, v_num=0, train_loss_step=0.131, val_loss=0.841, train_loss_epoch=0.141] \u001b[32m [repeated 6x across cluster]\u001b[0m\n",
+ "Epoch 7: 100%|██████████| 2/2 [00:01<00:00, 1.13it/s, v_num=0, train_loss_step=0.368, val_loss=0.836, train_loss_epoch=0.399]\u001b[32m [repeated 5x across cluster]\u001b[0m\n",
+ "Validation: | | 0/? [00:00, ?it/s]\u001b[A\u001b[32m [repeated 5x across cluster]\u001b[0m\n",
+ "Validation: 0%| | 0/1 [00:00, ?it/s]\u001b[A\u001b[32m [repeated 5x across cluster]\u001b[0m\n",
+ "Validation DataLoader 0: 0%| | 0/1 [00:00, ?it/s]\u001b[A\u001b[32m [repeated 5x across cluster]\u001b[0m\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m \u001b[32m [repeated 4x across cluster]\u001b[0m\n",
+ "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 7.76it/s]\u001b[A\u001b[32m [repeated 5x across cluster]\u001b[0m\n",
+ "Epoch 7: 100%|██████████| 2/2 [00:01<00:00, 1.01it/s, v_num=0, train_loss_step=0.368, val_loss=0.843, train_loss_epoch=0.399]\u001b[32m [repeated 5x across cluster]\u001b[0m\n",
+ "Epoch 7: 100%|██████████| 2/2 [00:02<00:00, 0.79it/s, v_num=0, train_loss_step=0.368, val_loss=0.843, train_loss_epoch=0.306]\u001b[32m [repeated 5x across cluster]\u001b[0m\n",
+ "Epoch 8: 0%| | 0/2 [00:00, ?it/s, v_num=0, train_loss_step=0.368, val_loss=0.843, train_loss_epoch=0.306]\u001b[32m [repeated 5x across cluster]\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2024-10-22 09:04:27,188\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "2024-10-22 09:04:28,260\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000015)\u001b[32m [repeated 4x across cluster]\u001b[0m\n",
+ "2024-10-22 09:04:30,172\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 9: 50%|█████ | 1/2 [00:01<00:01, 0.72it/s, v_num=0, train_loss_step=0.216, val_loss=0.889, train_loss_epoch=0.254]\u001b[32m [repeated 3x across cluster]\u001b[0m\n",
+ "Epoch 9: 100%|██████████| 2/2 [00:01<00:00, 1.04it/s, v_num=0, train_loss_step=0.322, val_loss=0.889, train_loss_epoch=0.254]\u001b[32m [repeated 4x across cluster]\u001b[0m\n",
+ "Validation: | | 0/? [00:00, ?it/s]\u001b[A\u001b[32m [repeated 4x across cluster]\u001b[0m\n",
+ "Validation: 0%| | 0/1 [00:00, ?it/s]\u001b[A\u001b[32m [repeated 4x across cluster]\u001b[0m\n",
+ "Validation DataLoader 0: 0%| | 0/1 [00:00, ?it/s]\u001b[A\u001b[32m [repeated 4x across cluster]\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2024-10-22 09:04:31,460\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m \u001b[32m [repeated 9x across cluster]\u001b[0m\n",
+ "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 4.73it/s]\u001b[A\u001b[32m [repeated 4x across cluster]\u001b[0m\n",
+ "Epoch 9: 100%|██████████| 2/2 [00:02<00:00, 0.90it/s, v_num=0, train_loss_step=0.322, val_loss=0.910, train_loss_epoch=0.254]\u001b[32m [repeated 4x across cluster]\u001b[0m\n",
+ "Epoch 9: 100%|██████████| 2/2 [00:02<00:00, 0.70it/s, v_num=0, train_loss_step=0.322, val_loss=0.910, train_loss_epoch=0.237]\u001b[32m [repeated 4x across cluster]\u001b[0m\n",
+ "Epoch 16: 0%| | 0/2 [00:00, ?it/s, v_num=0, train_loss_step=0.105, val_loss=0.809, train_loss_epoch=0.128]\u001b[32m [repeated 3x across cluster]\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2024-10-22 09:04:32,873\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "2024-10-22 09:04:33,534\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "2024-10-22 09:04:34,844\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/d775c15d/checkpoint_000011)\u001b[32m [repeated 5x across cluster]\u001b[0m\n",
+ "2024-10-22 09:04:35,472\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 18: 50%|█████ | 1/2 [00:01<00:01, 0.98it/s, v_num=0, train_loss_step=0.0962, val_loss=0.781, train_loss_epoch=0.116]\u001b[32m [repeated 5x across cluster]\u001b[0m\n",
+ "Epoch 11: 100%|██████████| 2/2 [00:01<00:00, 1.91it/s, v_num=0, train_loss_step=0.263, val_loss=0.889, train_loss_epoch=0.219]\u001b[32m [repeated 3x across cluster]\u001b[0m\n",
+ "Validation: | | 0/? [00:00, ?it/s]\u001b[A\u001b[32m [repeated 4x across cluster]\u001b[0m\n",
+ "Validation: 0%| | 0/1 [00:00, ?it/s]\u001b[A\u001b[32m [repeated 4x across cluster]\u001b[0m\n",
+ "Validation DataLoader 0: 0%| | 0/1 [00:00, ?it/s]\u001b[A\u001b[32m [repeated 4x across cluster]\u001b[0m\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m \u001b[32m [repeated 5x across cluster]\u001b[0m\n",
+ "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 9.49it/s]\u001b[A\u001b[32m [repeated 4x across cluster]\u001b[0m\n",
+ "Epoch 11: 100%|██████████| 2/2 [00:01<00:00, 1.68it/s, v_num=0, train_loss_step=0.263, val_loss=0.861, train_loss_epoch=0.219]\u001b[32m [repeated 4x across cluster]\u001b[0m\n",
+ "Epoch 11: 100%|██████████| 2/2 [00:01<00:00, 1.19it/s, v_num=0, train_loss_step=0.263, val_loss=0.861, train_loss_epoch=0.146]\u001b[32m [repeated 6x across cluster]\u001b[0m\n",
+ "Epoch 12: 0%| | 0/2 [00:00, ?it/s, v_num=0, train_loss_step=0.263, val_loss=0.861, train_loss_epoch=0.146]\u001b[32m [repeated 5x across cluster]\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2024-10-22 09:04:37,245\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "2024-10-22 09:04:38,006\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000019)\u001b[32m [repeated 3x across cluster]\u001b[0m\n",
+ "2024-10-22 09:04:40,708\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "2024-10-22 09:04:41,380\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "\u001b[36m(RayTrainWorker pid=24952)\u001b[0m `Trainer.fit` stopped: `max_epochs=20` reached.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 13: 50%|█████ | 1/2 [00:00<00:00, 1.17it/s, v_num=0, train_loss_step=0.118, val_loss=0.849, train_loss_epoch=0.122]\u001b[32m [repeated 3x across cluster]\u001b[0m\n",
+ "Epoch 13: 100%|██████████| 2/2 [00:01<00:00, 1.62it/s, v_num=0, train_loss_step=0.0846, val_loss=0.849, train_loss_epoch=0.122]\u001b[32m [repeated 4x across cluster]\u001b[0m\n",
+ "Validation: | | 0/? [00:00, ?it/s]\u001b[A\u001b[32m [repeated 4x across cluster]\u001b[0m\n",
+ "Validation: 0%| | 0/1 [00:00, ?it/s]\u001b[A\u001b[32m [repeated 4x across cluster]\u001b[0m\n",
+ "Validation DataLoader 0: 0%| | 0/1 [00:00, ?it/s]\u001b[A\u001b[32m [repeated 4x across cluster]\u001b[0m\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m \u001b[32m [repeated 5x across cluster]\u001b[0m\n",
+ "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 7.32it/s]\u001b[A\u001b[32m [repeated 4x across cluster]\u001b[0m\n",
+ "Epoch 13: 100%|██████████| 2/2 [00:01<00:00, 1.42it/s, v_num=0, train_loss_step=0.0846, val_loss=0.842, train_loss_epoch=0.122]\u001b[32m [repeated 4x across cluster]\u001b[0m\n",
+ "Epoch 19: 100%|██████████| 2/2 [00:03<00:00, 0.52it/s, v_num=0, train_loss_step=0.168, val_loss=0.742, train_loss_epoch=0.099]\u001b[32m [repeated 5x across cluster]\u001b[0m\n",
+ "Epoch 14: 0%| | 0/2 [00:00, ?it/s, v_num=0, train_loss_step=0.0846, val_loss=0.842, train_loss_epoch=0.112]\u001b[32m [repeated 3x across cluster]\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2024-10-22 09:04:44,176\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 15: 50%|█████ | 1/2 [00:01<00:01, 0.64it/s, v_num=0, train_loss_step=0.0923, val_loss=0.839, train_loss_epoch=0.0974]\u001b[32m [repeated 2x across cluster]\u001b[0m\n",
+ "Epoch 15: 100%|██████████| 2/2 [00:02<00:00, 0.94it/s, v_num=0, train_loss_step=0.0867, val_loss=0.839, train_loss_epoch=0.0974]\u001b[32m [repeated 2x across cluster]\u001b[0m\n",
+ "Validation: | | 0/? [00:00, ?it/s]\u001b[A\u001b[32m [repeated 2x across cluster]\u001b[0m\n",
+ "Validation: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "Validation DataLoader 0: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "Validation: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "Validation DataLoader 0: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m \u001b[32m [repeated 3x across cluster]\u001b[0m\n",
+ "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 10.51it/s]\u001b[A\n",
+ "Epoch 14: 100%|██████████| 2/2 [00:01<00:00, 1.63it/s, v_num=0, train_loss_step=0.126, val_loss=0.839, train_loss_epoch=0.112]\n",
+ "Epoch 14: 100%|██████████| 2/2 [00:02<00:00, 0.87it/s, v_num=0, train_loss_step=0.126, val_loss=0.839, train_loss_epoch=0.0974]\n",
+ "Epoch 15: 0%| | 0/2 [00:00, ?it/s, v_num=0, train_loss_step=0.126, val_loss=0.839, train_loss_epoch=0.0974] \n",
+ "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 3.15it/s]\u001b[A\n",
+ "Epoch 15: 100%|██████████| 2/2 [00:02<00:00, 0.78it/s, v_num=0, train_loss_step=0.0867, val_loss=0.837, train_loss_epoch=0.0974]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/d775c15d/checkpoint_000015)\u001b[32m [repeated 3x across cluster]\u001b[0m\n",
+ "2024-10-22 09:04:48,312\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 15: 100%|██████████| 2/2 [00:03<00:00, 0.54it/s, v_num=0, train_loss_step=0.0867, val_loss=0.837, train_loss_epoch=0.0912]\n",
+ "Epoch 16: 0%| | 0/2 [00:00, ?it/s, v_num=0, train_loss_step=0.0867, val_loss=0.837, train_loss_epoch=0.0912] \n",
+ "Validation: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "Validation DataLoader 0: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "Epoch 16: 50%|█████ | 1/2 [00:02<00:02, 0.35it/s, v_num=0, train_loss_step=0.0792, val_loss=0.837, train_loss_epoch=0.0912]\n",
+ "Epoch 16: 100%|██████████| 2/2 [00:03<00:00, 0.61it/s, v_num=0, train_loss_step=0.0703, val_loss=0.837, train_loss_epoch=0.0912]\n",
+ "Validation: | | 0/? [00:00, ?it/s]\u001b[A\n",
+ "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 4.23it/s]\u001b[A\n",
+ "Epoch 16: 100%|██████████| 2/2 [00:03<00:00, 0.56it/s, v_num=0, train_loss_step=0.0703, val_loss=0.837, train_loss_epoch=0.0912]\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m \u001b[32m [repeated 2x across cluster]\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2024-10-22 09:04:53,245\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 16: 100%|██████████| 2/2 [00:04<00:00, 0.41it/s, v_num=0, train_loss_step=0.0703, val_loss=0.837, train_loss_epoch=0.0774]\n",
+ "Epoch 17: 0%| | 0/2 [00:00, ?it/s, v_num=0, train_loss_step=0.0703, val_loss=0.837, train_loss_epoch=0.0774] \n",
+ "Epoch 17: 50%|█████ | 1/2 [00:01<00:01, 0.90it/s, v_num=0, train_loss_step=0.0711, val_loss=0.837, train_loss_epoch=0.0774]\n",
+ "Epoch 17: 100%|██████████| 2/2 [00:01<00:00, 1.36it/s, v_num=0, train_loss_step=0.156, val_loss=0.837, train_loss_epoch=0.0774] \n",
+ "Validation: | | 0/? [00:00, ?it/s]\u001b[A\n",
+ "Validation: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "Validation DataLoader 0: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.47it/s]\u001b[A\n",
+ "Epoch 17: 100%|██████████| 2/2 [00:01<00:00, 1.23it/s, v_num=0, train_loss_step=0.156, val_loss=0.836, train_loss_epoch=0.0774]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/d775c15d/checkpoint_000017)\u001b[32m [repeated 2x across cluster]\u001b[0m2024-10-22 09:04:56,772\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 17: 100%|██████████| 2/2 [00:01<00:00, 1.01it/s, v_num=0, train_loss_step=0.156, val_loss=0.836, train_loss_epoch=0.0882]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 18: 0%| | 0/2 [00:00, ?it/s, v_num=0, train_loss_step=0.156, val_loss=0.836, train_loss_epoch=0.0882] \n",
+ "Epoch 18: 50%|█████ | 1/2 [00:00<00:00, 1.43it/s, v_num=0, train_loss_step=0.0684, val_loss=0.836, train_loss_epoch=0.0882]\n",
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m \u001b[32m [repeated 3x across cluster]\u001b[0m\n",
+ "Epoch 18: 100%|██████████| 2/2 [00:00<00:00, 2.20it/s, v_num=0, train_loss_step=0.064, val_loss=0.836, train_loss_epoch=0.0882] \n",
+ "Validation: | | 0/? [00:00, ?it/s]\u001b[A\n",
+ "Validation: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "Validation DataLoader 0: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 12.20it/s]\u001b[A\n",
+ "Epoch 18: 100%|██████████| 2/2 [00:01<00:00, 1.95it/s, v_num=0, train_loss_step=0.064, val_loss=0.830, train_loss_epoch=0.0882]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2024-10-22 09:04:58,523\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 18: 100%|██████████| 2/2 [00:01<00:00, 1.32it/s, v_num=0, train_loss_step=0.064, val_loss=0.830, train_loss_epoch=0.0675]\n",
+ "Epoch 19: 0%| | 0/2 [00:00, ?it/s, v_num=0, train_loss_step=0.064, val_loss=0.830, train_loss_epoch=0.0675] \n",
+ "Epoch 19: 50%|█████ | 1/2 [00:00<00:00, 1.64it/s, v_num=0, train_loss_step=0.0571, val_loss=0.830, train_loss_epoch=0.0675]\n",
+ "Epoch 19: 100%|██████████| 2/2 [00:00<00:00, 2.53it/s, v_num=0, train_loss_step=0.120, val_loss=0.830, train_loss_epoch=0.0675] \n",
+ "Validation: | | 0/? [00:00, ?it/s]\u001b[A\n",
+ "Validation: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "Validation DataLoader 0: 0%| | 0/1 [00:00, ?it/s]\u001b[A\n",
+ "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 13.51it/s]\u001b[A\n",
+ "Epoch 19: 100%|██████████| 2/2 [00:00<00:00, 2.23it/s, v_num=0, train_loss_step=0.120, val_loss=0.815, train_loss_epoch=0.0675]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2024-10-22 09:05:00,109\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 19: 100%|██████████| 2/2 [00:01<00:00, 1.55it/s, v_num=0, train_loss_step=0.120, val_loss=0.815, train_loss_epoch=0.0697]\n",
+ "Epoch 19: 100%|██████████| 2/2 [00:01<00:00, 1.13it/s, v_num=0, train_loss_step=0.120, val_loss=0.815, train_loss_epoch=0.0697]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[36m(RayTrainWorker pid=25062)\u001b[0m `Trainer.fit` stopped: `max_epochs=20` reached.\n",
+ "2024-10-22 09:05:01,809\tWARNING experiment_state.py:205 -- Experiment state snapshotting has been triggered multiple times in the last 5.0 seconds. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.\n",
+ "You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.\n",
+ "You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).\n",
+ "2024-10-22 09:05:01,823\tINFO tune.py:1016 -- Wrote the latest version of all result files and experiment state to '/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37' in 0.0305s.\n",
+ "2024-10-22 09:05:01,873\tINFO tune.py:1048 -- Total run time: 83.87 seconds (83.66 seconds for the tuning loop).\n"
+ ]
+ }
+ ],
+ "source": [
+ "ray.init()\n",
+ "\n",
+ "scheduler = FIFOScheduler()\n",
+ "\n",
+ "# Scaling config controls the resources used by Ray\n",
+ "scaling_config = ScalingConfig(\n",
+ " num_workers=1,\n",
+ " use_gpu=False, # change to True if you want to use GPU\n",
+ ")\n",
+ "\n",
+ "# Checkpoint config controls the checkpointing behavior of Ray\n",
+ "checkpoint_config = CheckpointConfig(\n",
+ " num_to_keep=1, # number of checkpoints to keep\n",
+ " checkpoint_score_attribute=\"val_loss\", # Save the checkpoint based on this metric\n",
+ " checkpoint_score_order=\"min\", # Save the checkpoint with the lowest metric value\n",
+ ")\n",
+ "\n",
+ "run_config = RunConfig(\n",
+ " checkpoint_config=checkpoint_config,\n",
+ " storage_path=hpopt_save_dir / \"ray_results\", # directory to save the results\n",
+ ")\n",
+ "\n",
+ "ray_trainer = TorchTrainer(\n",
+ " lambda config: train_model(\n",
+ " config, train_dset, val_dset, num_workers, scaler\n",
+ " ),\n",
+ " scaling_config=scaling_config,\n",
+ " run_config=run_config,\n",
+ ")\n",
+ "\n",
+ "search_alg = HyperOptSearch(\n",
+ " n_initial_points=1, # number of random evaluations before tree parzen estimators\n",
+ " random_state_seed=42,\n",
+ ")\n",
+ "\n",
+ "# OptunaSearch is another search algorithm that can be used\n",
+ "# search_alg = OptunaSearch() \n",
+ "\n",
+ "tune_config = tune.TuneConfig(\n",
+ " metric=\"val_loss\",\n",
+ " mode=\"min\",\n",
+ " num_samples=2, # number of trials to run\n",
+ " scheduler=scheduler,\n",
+ " search_alg=search_alg,\n",
+ " trial_dirname_creator=lambda trial: str(trial.trial_id), # shorten filepaths\n",
+ " \n",
+ ")\n",
+ "\n",
+ "tuner = tune.Tuner(\n",
+ " ray_trainer,\n",
+ " param_space={\n",
+ " \"train_loop_config\": search_space,\n",
+ " },\n",
+ " tune_config=tune_config,\n",
+ ")\n",
+ "\n",
+ "# Start the hyperparameter search\n",
+ "results = tuner.fit()\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Hyperparameter optimization results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "ResultGrid<[\n",
+ " Result(\n",
+ " metrics={'train_loss': 0.09904231131076813, 'train_loss_step': 0.16821686923503876, 'val/rmse': 0.8613682389259338, 'val/mae': 0.7006751298904419, 'val_loss': 0.7419552206993103, 'train_loss_epoch': 0.09904231131076813, 'epoch': 19, 'step': 40},\n",
+ " path='/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a',\n",
+ " filesystem='local',\n",
+ " checkpoint=Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000019)\n",
+ " ),\n",
+ " Result(\n",
+ " metrics={'train_loss': 0.06969495117664337, 'train_loss_step': 0.11989812552928925, 'val/rmse': 0.902579665184021, 'val/mae': 0.7176367044448853, 'val_loss': 0.8146500587463379, 'train_loss_epoch': 0.06969495117664337, 'epoch': 19, 'step': 40},\n",
+ " path='/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/d775c15d',\n",
+ " filesystem='local',\n",
+ " checkpoint=Checkpoint(filesystem=local, path=/home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/d775c15d/checkpoint_000019)\n",
+ " )\n",
+ "]>"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
train_loss
\n",
+ "
train_loss_step
\n",
+ "
val/rmse
\n",
+ "
val/mae
\n",
+ "
val_loss
\n",
+ "
train_loss_epoch
\n",
+ "
epoch
\n",
+ "
step
\n",
+ "
timestamp
\n",
+ "
checkpoint_dir_name
\n",
+ "
...
\n",
+ "
pid
\n",
+ "
hostname
\n",
+ "
node_ip
\n",
+ "
time_since_restore
\n",
+ "
iterations_since_restore
\n",
+ "
config/train_loop_config/depth
\n",
+ "
config/train_loop_config/ffn_hidden_dim
\n",
+ "
config/train_loop_config/ffn_num_layers
\n",
+ "
config/train_loop_config/message_hidden_dim
\n",
+ "
logdir
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "
\n",
+ "
0
\n",
+ "
0.099042
\n",
+ "
0.168217
\n",
+ "
0.861368
\n",
+ "
0.700675
\n",
+ "
0.741955
\n",
+ "
0.099042
\n",
+ "
19
\n",
+ "
40
\n",
+ "
1729602279
\n",
+ "
checkpoint_000019
\n",
+ "
...
\n",
+ "
24873
\n",
+ "
Knathan-Laptop
\n",
+ "
172.31.231.162
\n",
+ "
49.881516
\n",
+ "
20
\n",
+ "
2
\n",
+ "
2000
\n",
+ "
2
\n",
+ "
500
\n",
+ "
f1a6e41a
\n",
+ "
\n",
+ "
\n",
+ "
1
\n",
+ "
0.069695
\n",
+ "
0.119898
\n",
+ "
0.902580
\n",
+ "
0.717637
\n",
+ "
0.814650
\n",
+ "
0.069695
\n",
+ "
19
\n",
+ "
40
\n",
+ "
1729602299
\n",
+ "
checkpoint_000019
\n",
+ "
...
\n",
+ "
24953
\n",
+ "
Knathan-Laptop
\n",
+ "
172.31.231.162
\n",
+ "
56.653336
\n",
+ "
20
\n",
+ "
2
\n",
+ "
2200
\n",
+ "
2
\n",
+ "
400
\n",
+ "
d775c15d
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2 rows × 27 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " train_loss train_loss_step val/rmse val/mae val_loss \\\n",
+ "0 0.099042 0.168217 0.861368 0.700675 0.741955 \n",
+ "1 0.069695 0.119898 0.902580 0.717637 0.814650 \n",
+ "\n",
+ " train_loss_epoch epoch step timestamp checkpoint_dir_name ... pid \\\n",
+ "0 0.099042 19 40 1729602279 checkpoint_000019 ... 24873 \n",
+ "1 0.069695 19 40 1729602299 checkpoint_000019 ... 24953 \n",
+ "\n",
+ " hostname node_ip time_since_restore iterations_since_restore \\\n",
+ "0 Knathan-Laptop 172.31.231.162 49.881516 20 \n",
+ "1 Knathan-Laptop 172.31.231.162 56.653336 20 \n",
+ "\n",
+ " config/train_loop_config/depth config/train_loop_config/ffn_hidden_dim \\\n",
+ "0 2 2000 \n",
+ "1 2 2200 \n",
+ "\n",
+ " config/train_loop_config/ffn_num_layers \\\n",
+ "0 2 \n",
+ "1 2 \n",
+ "\n",
+ " config/train_loop_config/message_hidden_dim logdir \n",
+ "0 500 f1a6e41a \n",
+ "1 400 d775c15d \n",
+ "\n",
+ "[2 rows x 27 columns]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# results of all trials\n",
+ "result_df = results.get_dataframe()\n",
+ "result_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'depth': 2,\n",
+ " 'ffn_hidden_dim': 2000,\n",
+ " 'ffn_num_layers': 2,\n",
+ " 'message_hidden_dim': 500}"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# best configuration\n",
+ "best_result = results.get_best_result()\n",
+ "best_config = best_result.config\n",
+ "best_config['train_loop_config']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Best model checkpoint path: /home/knathan/chemprop/examples/hpopt/ray_results/TorchTrainer_2024-10-22_09-03-37/f1a6e41a/checkpoint_000019/checkpoint.ckpt\n"
+ ]
+ }
+ ],
+ "source": [
+ "# best model checkpoint path\n",
+ "best_result = results.get_best_result()\n",
+ "best_checkpoint_path = Path(best_result.checkpoint.path) / \"checkpoint.ckpt\"\n",
+ "print(f\"Best model checkpoint path: {best_checkpoint_path}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ray.shutdown()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "chemprop",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/chemprop-updated/examples/interpreting_monte_carlo_tree_search.ipynb b/chemprop-updated/examples/interpreting_monte_carlo_tree_search.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..6bc828b536a17a43e5b69e3d9506a0191d0015fe
--- /dev/null
+++ b/chemprop-updated/examples/interpreting_monte_carlo_tree_search.ipynb
@@ -0,0 +1,1116 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Interpretability with Monte Carlo Tree search\n",
+ "\n",
+ "Based on the paper Jin et al., [Multi-Objective Molecule Generation using Interpretable Substructures](https://arxiv.org/abs/2002.03244) and modified from Chemprop v1 [interpret.py](https://github.com/chemprop/chemprop/blob/master/chemprop/interpret.py)\n",
+ "\n",
+ "Please scroll to after the helper functions to change the model and data input and run the interpretation algorithm\n",
+ "\n",
+ "Note: \n",
+ "- The interpret function does not yet work with additional atom or bond features, as the substructure extracted doesn't necessarily have the corresponding additional atom or bond features readily available.\n",
+ "- It currently only works with single molecule model\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[](https://colab.research.google.com/github/chemprop/chemprop/blob/main/examples/interpreting_monte_carlo_tree_search.ipynb)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Install chemprop from GitHub if running in Google Colab\n",
+ "import os\n",
+ "\n",
+ "if os.getenv(\"COLAB_RELEASE_TAG\"):\n",
+ " try:\n",
+ " import chemprop\n",
+ " except ImportError:\n",
+ " !git clone https://github.com/chemprop/chemprop.git\n",
+ " %cd chemprop\n",
+ " !pip install .\n",
+ " %cd examples"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import packages"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from dataclasses import dataclass, field\n",
+ "import math\n",
+ "from pathlib import Path\n",
+ "import time\n",
+ "from typing import Callable, Union, Iterable\n",
+ "\n",
+ "from lightning import pytorch as pl\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from rdkit import Chem\n",
+ "import torch\n",
+ "\n",
+ "from chemprop import data, featurizers, models\n",
+ "from chemprop.models import MPNN"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Define helper function to make model predictions from SMILES"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def make_prediction(\n",
+ " models: list[MPNN],\n",
+ " trainer: pl.Trainer,\n",
+ " smiles: list[str],\n",
+ ") -> np.ndarray:\n",
+ " \"\"\"Makes predictions on a list of SMILES.\n",
+ "\n",
+ " Parameters\n",
+ " ----------\n",
+ " models : list\n",
+ " A list of models to make predictions with.\n",
+ " smiles : list\n",
+ " A list of SMILES to make predictions on.\n",
+ "\n",
+ " Returns\n",
+ " -------\n",
+ " list[list[float]]\n",
+ " A list of lists containing the predicted values.\n",
+ " \"\"\"\n",
+ "\n",
+ " test_data = [data.MoleculeDatapoint.from_smi(smi) for smi in smiles]\n",
+ " test_dset = data.MoleculeDataset(test_data)\n",
+ " test_loader = data.build_dataloader(\n",
+ " test_dset, batch_size=1, num_workers=0, shuffle=False\n",
+ " )\n",
+ "\n",
+ " with torch.inference_mode():\n",
+ " sum_preds = []\n",
+ " for model in models:\n",
+ " predss = trainer.predict(model, test_loader)\n",
+ " preds = torch.cat(predss, 0)\n",
+ " preds = preds.cpu().numpy()\n",
+ " sum_preds.append(preds)\n",
+ "\n",
+ " # Ensemble predictions\n",
+ " sum_preds = sum(sum_preds)\n",
+ " avg_preds = sum_preds / len(models)\n",
+ "\n",
+ " return avg_preds"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Classes/functions relevant to Monte Carlo Tree Search\n",
+ "\n",
+ "Mostly similar to the scripts from Chemprop v1 [interpret.py](https://github.com/chemprop/chemprop/blob/master/chemprop/interpret.py) with additional documentation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "@dataclass\n",
+ "class MCTSNode:\n",
+ " \"\"\"Represents a node in a Monte Carlo Tree Search.\n",
+ "\n",
+ " Parameters\n",
+ " ----------\n",
+ " smiles : str\n",
+ " The SMILES for the substructure at this node.\n",
+ " atoms : list\n",
+ " A list of atom indices in the substructure at this node.\n",
+ " W : float\n",
+ " The total action value, which indicates how likely the deletion will lead to a good rationale.\n",
+ " N : int\n",
+ " The visit count, which indicates how many times this node has been visited. It is used to balance exploration and exploitation.\n",
+ " P : float\n",
+ " The predicted property score of the new subgraphs' after the deletion, shown as R in the original paper.\n",
+ " \"\"\"\n",
+ "\n",
+ " smiles: str\n",
+ " atoms: Iterable[int]\n",
+ " W: float = 0\n",
+ " N: int = 0\n",
+ " P: float = 0\n",
+ " children: list[...] = field(default_factory=list)\n",
+ "\n",
+ " def __post_init__(self):\n",
+ " self.atoms = set(self.atoms)\n",
+ "\n",
+ " def Q(self) -> float:\n",
+ " \"\"\"\n",
+ " Returns\n",
+ " -------\n",
+ " float\n",
+ " The mean action value of the node.\n",
+ " \"\"\"\n",
+ " return self.W / self.N if self.N > 0 else 0\n",
+ "\n",
+ " def U(self, n: int, c_puct: float = 10.0) -> float:\n",
+ " \"\"\"\n",
+ " Parameters\n",
+ " ----------\n",
+ " n : int\n",
+ " The sum of the visit count of this node's siblings.\n",
+ " c_puct : float\n",
+ " A constant that controls the level of exploration.\n",
+ " \n",
+ " Returns\n",
+ " -------\n",
+ " float\n",
+ " The exploration value of the node.\n",
+ " \"\"\"\n",
+ " return c_puct * self.P * math.sqrt(n) / (1 + self.N)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def find_clusters(mol: Chem.Mol) -> tuple[list[tuple[int, ...]], list[list[int]]]:\n",
+ " \"\"\"Finds clusters within the molecule. Jin et al. from [1]_ only allows deletion of one peripheral non-aromatic bond or one peripheral ring from each state,\n",
+ " so the clusters here are defined as non-ring bonds and the smallest set of smallest rings.\n",
+ "\n",
+ " Parameters\n",
+ " ----------\n",
+ " mol : RDKit molecule\n",
+ " The molecule to find clusters in.\n",
+ "\n",
+ " Returns\n",
+ " -------\n",
+ " tuple\n",
+ " A tuple containing:\n",
+ " - list of tuples: Each tuple contains atoms in a cluster.\n",
+ " - list of int: Each atom's cluster index.\n",
+ " \n",
+ " References\n",
+ " ----------\n",
+ " .. [1] Jin, Wengong, Regina Barzilay, and Tommi Jaakkola. \"Multi-objective molecule generation using interpretable substructures.\" International conference on machine learning. PMLR, 2020. https://arxiv.org/abs/2002.03244\n",
+ " \"\"\"\n",
+ "\n",
+ " n_atoms = mol.GetNumAtoms()\n",
+ " if n_atoms == 1: # special case\n",
+ " return [(0,)], [[0]]\n",
+ "\n",
+ " clusters = []\n",
+ " for bond in mol.GetBonds():\n",
+ " a1 = bond.GetBeginAtom().GetIdx()\n",
+ " a2 = bond.GetEndAtom().GetIdx()\n",
+ " if not bond.IsInRing():\n",
+ " clusters.append((a1, a2))\n",
+ "\n",
+ " ssr = [tuple(x) for x in Chem.GetSymmSSSR(mol)]\n",
+ " clusters.extend(ssr)\n",
+ "\n",
+ " atom_cls = [[] for _ in range(n_atoms)]\n",
+ " for i in range(len(clusters)):\n",
+ " for atom in clusters[i]:\n",
+ " atom_cls[atom].append(i)\n",
+ "\n",
+ " return clusters, atom_cls"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def extract_subgraph_from_mol(mol: Chem.Mol, selected_atoms: set[int]) -> tuple[Chem.Mol, list[int]]:\n",
+ " \"\"\"Extracts a subgraph from an RDKit molecule given a set of atom indices.\n",
+ "\n",
+ " Parameters\n",
+ " ----------\n",
+ " mol : RDKit molecule\n",
+ " The molecule from which to extract a subgraph.\n",
+ " selected_atoms : list of int\n",
+ " The indices of atoms which form the subgraph to be extracted.\n",
+ "\n",
+ " Returns\n",
+ " -------\n",
+ " tuple\n",
+ " A tuple containing:\n",
+ " - RDKit molecule: The subgraph.\n",
+ " - list of int: Root atom indices from the selected indices.\n",
+ " \"\"\"\n",
+ "\n",
+ " selected_atoms = set(selected_atoms)\n",
+ " roots = []\n",
+ " for idx in selected_atoms:\n",
+ " atom = mol.GetAtomWithIdx(idx)\n",
+ " bad_neis = [y for y in atom.GetNeighbors() if y.GetIdx() not in selected_atoms]\n",
+ " if len(bad_neis) > 0:\n",
+ " roots.append(idx)\n",
+ "\n",
+ " new_mol = Chem.RWMol(mol)\n",
+ "\n",
+ " for atom_idx in roots:\n",
+ " atom = new_mol.GetAtomWithIdx(atom_idx)\n",
+ " atom.SetAtomMapNum(1)\n",
+ " aroma_bonds = [\n",
+ " bond for bond in atom.GetBonds() if bond.GetBondType() == Chem.rdchem.BondType.AROMATIC\n",
+ " ]\n",
+ " aroma_bonds = [\n",
+ " bond\n",
+ " for bond in aroma_bonds\n",
+ " if bond.GetBeginAtom().GetIdx() in selected_atoms\n",
+ " and bond.GetEndAtom().GetIdx() in selected_atoms\n",
+ " ]\n",
+ " if len(aroma_bonds) == 0:\n",
+ " atom.SetIsAromatic(False)\n",
+ "\n",
+ " remove_atoms = [\n",
+ " atom.GetIdx() for atom in new_mol.GetAtoms() if atom.GetIdx() not in selected_atoms\n",
+ " ]\n",
+ " remove_atoms = sorted(remove_atoms, reverse=True)\n",
+ " for atom in remove_atoms:\n",
+ " new_mol.RemoveAtom(atom)\n",
+ "\n",
+ " return new_mol.GetMol(), roots"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def extract_subgraph(smiles: str, selected_atoms: set[int]) -> tuple[str, list[int]]:\n",
+ " \"\"\"Extracts a subgraph from a SMILES given a set of atom indices.\n",
+ "\n",
+ " Parameters\n",
+ " ----------\n",
+ " smiles : str\n",
+ " The SMILES string from which to extract a subgraph.\n",
+ " selected_atoms : list of int\n",
+ " The indices of atoms which form the subgraph to be extracted.\n",
+ "\n",
+ " Returns\n",
+ " -------\n",
+ " tuple\n",
+ " A tuple containing:\n",
+ " - str: SMILES representing the subgraph.\n",
+ " - list of int: Root atom indices from the selected indices.\n",
+ " \"\"\"\n",
+ " # try with kekulization\n",
+ " mol = Chem.MolFromSmiles(smiles)\n",
+ " Chem.Kekulize(mol)\n",
+ " subgraph, roots = extract_subgraph_from_mol(mol, selected_atoms)\n",
+ " try:\n",
+ " subgraph = Chem.MolToSmiles(subgraph, kekuleSmiles=True)\n",
+ " subgraph = Chem.MolFromSmiles(subgraph)\n",
+ " except Exception:\n",
+ " subgraph = None\n",
+ "\n",
+ " mol = Chem.MolFromSmiles(smiles) # de-kekulize\n",
+ " if subgraph is not None and mol.HasSubstructMatch(subgraph):\n",
+ " return Chem.MolToSmiles(subgraph), roots\n",
+ "\n",
+ " # If fails, try without kekulization\n",
+ " subgraph, roots = extract_subgraph_from_mol(mol, selected_atoms)\n",
+ " subgraph = Chem.MolToSmiles(subgraph)\n",
+ " subgraph = Chem.MolFromSmiles(subgraph)\n",
+ "\n",
+ " if subgraph is not None:\n",
+ " return Chem.MolToSmiles(subgraph), roots\n",
+ " else:\n",
+ " return None, None"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def mcts_rollout(\n",
+ " node: MCTSNode,\n",
+ " state_map: dict[str, MCTSNode],\n",
+ " orig_smiles: str,\n",
+ " clusters: list[set[int]],\n",
+ " atom_cls: list[set[int]],\n",
+ " nei_cls: list[set[int]],\n",
+ " scoring_function: Callable[[list[str]], list[float]],\n",
+ " min_atoms: int = 15,\n",
+ " c_puct: float = 10.0,\n",
+ ") -> float:\n",
+ " \"\"\"A Monte Carlo Tree Search rollout from a given MCTSNode.\n",
+ "\n",
+ " Parameters\n",
+ " ----------\n",
+ " node : MCTSNode\n",
+ " The MCTSNode from which to begin the rollout.\n",
+ " state_map : dict\n",
+ " A mapping from SMILES to MCTSNode.\n",
+ " orig_smiles : str\n",
+ " The original SMILES of the molecule.\n",
+ " clusters : list\n",
+ " Clusters of atoms.\n",
+ " atom_cls : list\n",
+ " Atom indices in the clusters.\n",
+ " nei_cls : list\n",
+ " Neighboring cluster indices.\n",
+ " scoring_function : function\n",
+ " A function for scoring subgraph SMILES using a Chemprop model.\n",
+ " min_atoms : int\n",
+ " The minimum number of atoms in a subgraph.\n",
+ " c_puct : float\n",
+ " The constant controlling the level of exploration.\n",
+ "\n",
+ " Returns\n",
+ " -------\n",
+ " float\n",
+ " The score of this MCTS rollout.\n",
+ " \"\"\"\n",
+ " # Return if the number of atoms is less than the minimum\n",
+ " cur_atoms = node.atoms\n",
+ " if len(cur_atoms) <= min_atoms:\n",
+ " return node.P\n",
+ "\n",
+ " # Expand if this node has never been visited\n",
+ " if len(node.children) == 0:\n",
+ " # Cluster indices whose all atoms are present in current subgraph\n",
+ " cur_cls = set([i for i, x in enumerate(clusters) if x <= cur_atoms])\n",
+ "\n",
+ " for i in cur_cls:\n",
+ " # Leaf atoms are atoms that are only involved in one cluster.\n",
+ " leaf_atoms = [a for a in clusters[i] if len(atom_cls[a] & cur_cls) == 1]\n",
+ "\n",
+ " # This checks\n",
+ " # 1. If there is only one neighbor cluster in the current subgraph (so that we don't produce unconnected graphs), or\n",
+ " # 2. If the cluster has only two atoms and the current subgraph has only one leaf atom.\n",
+ " # If either of the conditions is met, remove the leaf atoms in the current cluster.\n",
+ " if len(nei_cls[i] & cur_cls) == 1 or len(clusters[i]) == 2 and len(leaf_atoms) == 1:\n",
+ " new_atoms = cur_atoms - set(leaf_atoms)\n",
+ " new_smiles, _ = extract_subgraph(orig_smiles, new_atoms)\n",
+ " if new_smiles in state_map:\n",
+ " new_node = state_map[new_smiles] # merge identical states\n",
+ " else:\n",
+ " new_node = MCTSNode(new_smiles, new_atoms)\n",
+ " if new_smiles:\n",
+ " node.children.append(new_node)\n",
+ "\n",
+ " state_map[node.smiles] = node\n",
+ " if len(node.children) == 0:\n",
+ " return node.P # cannot find leaves\n",
+ "\n",
+ " scores = scoring_function([x.smiles for x in node.children])\n",
+ " for child, score in zip(node.children, scores):\n",
+ " child.P = score\n",
+ "\n",
+ " sum_count = sum(c.N for c in node.children)\n",
+ " selected_node = max(node.children, key=lambda x: x.Q() + x.U(sum_count, c_puct=c_puct))\n",
+ " v = mcts_rollout(\n",
+ " selected_node,\n",
+ " state_map,\n",
+ " orig_smiles,\n",
+ " clusters,\n",
+ " atom_cls,\n",
+ " nei_cls,\n",
+ " scoring_function,\n",
+ " min_atoms=min_atoms,\n",
+ " c_puct=c_puct,\n",
+ " )\n",
+ " selected_node.W += v\n",
+ " selected_node.N += 1\n",
+ "\n",
+ " return v"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def mcts(\n",
+ " smiles: str,\n",
+ " scoring_function: Callable[[list[str]], list[float]],\n",
+ " n_rollout: int,\n",
+ " max_atoms: int,\n",
+ " prop_delta: float,\n",
+ " min_atoms: int = 15,\n",
+ " c_puct: int = 10,\n",
+ ") -> list[MCTSNode]:\n",
+ " \"\"\"Runs the Monte Carlo Tree Search algorithm.\n",
+ "\n",
+ " Parameters\n",
+ " ----------\n",
+ " smiles : str\n",
+ " The SMILES of the molecule to perform the search on.\n",
+ " scoring_function : function\n",
+ " A function for scoring subgraph SMILES using a Chemprop model.\n",
+ " n_rollout : int\n",
+ " The number of MCTS rollouts to perform.\n",
+ " max_atoms : int\n",
+ " The maximum number of atoms allowed in an extracted rationale.\n",
+ " prop_delta : float\n",
+ " The minimum required property value for a satisfactory rationale.\n",
+ " min_atoms : int\n",
+ " The minimum number of atoms in a subgraph.\n",
+ " c_puct : float\n",
+ " The constant controlling the level of exploration.\n",
+ "\n",
+ " Returns\n",
+ " -------\n",
+ " list\n",
+ " A list of rationales each represented by a MCTSNode.\n",
+ " \"\"\"\n",
+ "\n",
+ " mol = Chem.MolFromSmiles(smiles)\n",
+ "\n",
+ " clusters, atom_cls = find_clusters(mol)\n",
+ " nei_cls = [0] * len(clusters)\n",
+ " for i, cls in enumerate(clusters):\n",
+ " nei_cls[i] = [nei for atom in cls for nei in atom_cls[atom]]\n",
+ " nei_cls[i] = set(nei_cls[i]) - {i}\n",
+ " clusters[i] = set(list(cls))\n",
+ " for a in range(len(atom_cls)):\n",
+ " atom_cls[a] = set(atom_cls[a])\n",
+ "\n",
+ " root = MCTSNode(smiles, set(range(mol.GetNumAtoms())))\n",
+ " state_map = {smiles: root}\n",
+ " for _ in range(n_rollout):\n",
+ " mcts_rollout(\n",
+ " root,\n",
+ " state_map,\n",
+ " smiles,\n",
+ " clusters,\n",
+ " atom_cls,\n",
+ " nei_cls,\n",
+ " scoring_function,\n",
+ " min_atoms=min_atoms,\n",
+ " c_puct=c_puct,\n",
+ " )\n",
+ "\n",
+ " rationales = [\n",
+ " node\n",
+ " for _, node in state_map.items()\n",
+ " if len(node.atoms) <= max_atoms and node.P >= prop_delta\n",
+ " ]\n",
+ "\n",
+ " return rationales"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Load model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "chemprop_dir = Path.cwd().parent\n",
+ "model_path = (\n",
+ " chemprop_dir / \"tests\" / \"data\" / \"example_model_v2_regression_mol.pt\"\n",
+ ") # path to model checkpoint (.ckpt) or model file (.pt)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "MPNN(\n",
+ " (message_passing): BondMessagePassing(\n",
+ " (W_i): Linear(in_features=86, out_features=300, bias=False)\n",
+ " (W_h): Linear(in_features=300, out_features=300, bias=False)\n",
+ " (W_o): Linear(in_features=372, out_features=300, bias=True)\n",
+ " (dropout): Dropout(p=0.0, inplace=False)\n",
+ " (tau): ReLU()\n",
+ " (V_d_transform): Identity()\n",
+ " (graph_transform): GraphTransform(\n",
+ " (V_transform): Identity()\n",
+ " (E_transform): Identity()\n",
+ " )\n",
+ " )\n",
+ " (agg): MeanAggregation()\n",
+ " (bn): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (predictor): RegressionFFN(\n",
+ " (ffn): MLP(\n",
+ " (0): Sequential(\n",
+ " (0): Linear(in_features=300, out_features=300, bias=True)\n",
+ " )\n",
+ " (1): Sequential(\n",
+ " (0): ReLU()\n",
+ " (1): Dropout(p=0.0, inplace=False)\n",
+ " (2): Linear(in_features=300, out_features=1, bias=True)\n",
+ " )\n",
+ " )\n",
+ " (criterion): MSE(task_weights=[[1.0]])\n",
+ " (output_transform): UnscaleTransform()\n",
+ " )\n",
+ " (X_d_transform): Identity()\n",
+ " (metrics): ModuleList(\n",
+ " (0-1): 2 x MSE(task_weights=[[1.0]])\n",
+ " )\n",
+ ")"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mpnn = models.MPNN.load_from_file(model_path) # this is a dummy model for testing purposes\n",
+ "mpnn"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Load data to run interpretation for"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "chemprop_dir = Path.cwd().parent\n",
+ "test_path = chemprop_dir / \"tests\" / \"data\" / \"regression\" / \"mol\" / \"mol.csv\"\n",
+ "smiles_column = \"smiles\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
smiles
\n",
+ "
lipo
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "
\n",
+ "
0
\n",
+ "
Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14
\n",
+ "
3.54
\n",
+ "
\n",
+ "
\n",
+ "
1
\n",
+ "
COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)...
\n",
+ "
-1.18
\n",
+ "
\n",
+ "
\n",
+ "
2
\n",
+ "
COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl
\n",
+ "
3.69
\n",
+ "
\n",
+ "
\n",
+ "
3
\n",
+ "
OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C...
\n",
+ "
3.37
\n",
+ "
\n",
+ "
\n",
+ "
4
\n",
+ "
Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N...
\n",
+ "
3.10
\n",
+ "
\n",
+ "
\n",
+ "
...
\n",
+ "
...
\n",
+ "
...
\n",
+ "
\n",
+ "
\n",
+ "
95
\n",
+ "
CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C...
\n",
+ "
2.20
\n",
+ "
\n",
+ "
\n",
+ "
96
\n",
+ "
CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)...
\n",
+ "
2.04
\n",
+ "
\n",
+ "
\n",
+ "
97
\n",
+ "
CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)...
\n",
+ "
4.49
\n",
+ "
\n",
+ "
\n",
+ "
98
\n",
+ "
COc1ccc(Cc2c(N)n[nH]c2N)cc1
\n",
+ "
0.20
\n",
+ "
\n",
+ "
\n",
+ "
99
\n",
+ "
CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(...
\n",
+ "
2.00
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
100 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " smiles lipo\n",
+ "0 Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14 3.54\n",
+ "1 COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)... -1.18\n",
+ "2 COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl 3.69\n",
+ "3 OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(C... 3.37\n",
+ "4 Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)N... 3.10\n",
+ ".. ... ...\n",
+ "95 CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C... 2.20\n",
+ "96 CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)... 2.04\n",
+ "97 CCSc1c(Cc2ccccc2C(F)(F)F)sc3N(CC(C)C)C(=O)N(C)... 4.49\n",
+ "98 COc1ccc(Cc2c(N)n[nH]c2N)cc1 0.20\n",
+ "99 CCN(CCN(C)C)S(=O)(=O)c1ccc(cc1)c2cnc(N)c(n2)C(... 2.00\n",
+ "\n",
+ "[100 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_test = pd.read_csv(test_path)\n",
+ "df_test"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Set up trainer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "GPU available: True (mps), used: False\n",
+ "TPU available: False, using: 0 TPU cores\n",
+ "HPU available: False, using: 0 HPUs\n",
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.\n"
+ ]
+ }
+ ],
+ "source": [
+ "trainer = pl.Trainer(logger=None, enable_progress_bar=False, accelerator=\"cpu\", devices=1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Running interpretation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# MCTS options\n",
+ "rollout = 10 # number of MCTS rollouts to perform. If mol.GetNumAtoms() > 50, consider setting n_rollout = 1 to avoid long computation time\n",
+ "\n",
+ "c_puct = 10.0 # constant that controls the level of exploration\n",
+ "\n",
+ "max_atoms = 20 # maximum number of atoms allowed in an extracted rationale\n",
+ "\n",
+ "min_atoms = 8 # minimum number of atoms in an extracted rationale\n",
+ "\n",
+ "prop_delta = 0.5 # Minimum score to count as positive.\n",
+ "# In this algorithm, if the predicted property from the substructure if larger than prop_delta, the substructure is considered satisfactory.\n",
+ "# This value depends on the property you want to interpret. 0.5 is a dummy value for demonstration purposes\n",
+ "\n",
+ "num_rationales_to_keep = 5 # number of rationales to keep for each molecule"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define the scoring function. \"Score\" for a substructure is the predicted property value of the substructure.\n",
+ "\n",
+ "models = [mpnn]\n",
+ "\n",
+ "property_for_interpretation = \"lipo\"\n",
+ "\n",
+ "property_id = (\n",
+ " df_test.columns.get_loc(property_for_interpretation) - 1\n",
+ ") # property index in the dataset; -1 for the SMILES column\n",
+ "\n",
+ "\n",
+ "def scoring_function(smiles: list[str]) -> list[float]:\n",
+ " return make_prediction(\n",
+ " models=models,\n",
+ " trainer=trainer,\n",
+ " smiles=smiles,\n",
+ " )[:, property_id]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14',\n",
+ " 'COc1cc(OC)c(cc1NC(=O)CSCC(=O)O)S(=O)(=O)N2C(C)CCc3ccccc23',\n",
+ " 'COC(=O)[C@@H](N1CCc2sccc2C1)c3ccccc3Cl',\n",
+ " 'OC[C@H](O)CN1C(=O)C(Cc2ccccc12)NC(=O)c3cc4cc(Cl)sc4[nH]3',\n",
+ " 'Cc1cccc(C[C@H](NC(=O)c2cc(nn2C)C(C)(C)C)C(=O)NCC#N)c1']"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# only use the first 5 SMILES for demonstration purposes\n",
+ "all_smiles = df_test[smiles_column].tolist()[:5]\n",
+ "all_smiles"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/anaconda3/envs/chemprop/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.\n",
+ "[19:04:06] Can't kekulize mol. Unkekulized atoms: 10 11 12 13 14\n",
+ "[19:04:06] Can't kekulize mol. Unkekulized atoms: 11 12 13 14 15\n",
+ "[19:04:06] Can't kekulize mol. Unkekulized atoms: 8 9 10 11 12\n",
+ "[19:04:06] Can't kekulize mol. Unkekulized atoms: 7 8 9 10 11\n",
+ "[19:04:06] Can't kekulize mol. Unkekulized atoms: 1 2 3 4 5\n",
+ "[19:04:06] Can't kekulize mol. Unkekulized atoms: 0 1 3 4 5\n",
+ "[19:04:06] Can't kekulize mol. Unkekulized atoms: 0 1 2 3 4\n",
+ "[19:04:06] Can't kekulize mol. Unkekulized atoms: 11 12 13 14 15\n",
+ "[19:04:06] Can't kekulize mol. Unkekulized atoms: 8 9 10 11 12\n",
+ "[19:04:06] Can't kekulize mol. Unkekulized atoms: 7 8 9 10 11\n",
+ "[19:04:06] Can't kekulize mol. Unkekulized atoms: 10 11 12 13 14\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 13 s, sys: 1.38 s, total: 14.4 s\n",
+ "Wall time: 3.67 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "results_df = {\"smiles\": [], property_for_interpretation: []}\n",
+ "\n",
+ "for i in range(num_rationales_to_keep):\n",
+ " results_df[f\"rationale_{i}\"] = []\n",
+ " results_df[f\"rationale_{i}_score\"] = []\n",
+ "\n",
+ "for smiles in all_smiles:\n",
+ " score = scoring_function([smiles])[0]\n",
+ " if score > prop_delta:\n",
+ " rationales = mcts(\n",
+ " smiles=smiles,\n",
+ " scoring_function=scoring_function,\n",
+ " n_rollout=rollout,\n",
+ " max_atoms=max_atoms,\n",
+ " prop_delta=prop_delta,\n",
+ " min_atoms=min_atoms,\n",
+ " c_puct=c_puct,\n",
+ " )\n",
+ " else:\n",
+ " rationales = []\n",
+ "\n",
+ " results_df[\"smiles\"].append(smiles)\n",
+ " results_df[property_for_interpretation].append(score)\n",
+ "\n",
+ " if len(rationales) == 0:\n",
+ " for i in range(num_rationales_to_keep):\n",
+ " results_df[f\"rationale_{i}\"].append(None)\n",
+ " results_df[f\"rationale_{i}_score\"].append(None)\n",
+ " else:\n",
+ " min_size = min(len(x.atoms) for x in rationales)\n",
+ " min_rationales = [x for x in rationales if len(x.atoms) == min_size]\n",
+ " rats = sorted(min_rationales, key=lambda x: x.P, reverse=True)\n",
+ "\n",
+ " for i in range(num_rationales_to_keep):\n",
+ " if i < len(rats):\n",
+ " results_df[f\"rationale_{i}\"].append(rats[i].smiles)\n",
+ " results_df[f\"rationale_{i}_score\"].append(rats[i].P)\n",
+ " else:\n",
+ " results_df[f\"rationale_{i}\"].append(None)\n",
+ " results_df[f\"rationale_{i}_score\"].append(None)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "