## Prepare input structures

We first start by preparing the input ASE database. In the subfolder `structures` there are 9 example MOF structures. `save_to_db` is an convenient function to conver CIF files into one db.

In [1]:
from dotenv import load_dotenv
load_dotenv()

import glob
from ase.io import read
from mlip_arena.tasks.mof.input import save_to_db

files = glob.glob('structures/*.cif')

atoms_list = []

for file in files:
    atoms = read(file)
    atoms_list.append(atoms)

save_to_db(
    atoms_list=atoms_list,
    db_path='mofs.db'
)

[32m2024-12-22 11:40:02.836[0m | [1mINFO    [0m | [36mmlip_arena.tasks.mof[0m:[36m<module>[0m:[36m6[0m - [1m
The module 'mlip_arena.tasks.mof' is adapted from the repository: https://github.com/hspark1212/DAC-SIM. 
By using this module, you agree to the terms and conditions specified in the following license: 

https://github.com/hspark1212/DAC-SIM/blob/main/LICENSE

Additionally, please ensure proper attribution by citing the reference: 

Lim, Y., Park, H., Walsh, A., & Kim, J. (2024). Accelerating COâ‚‚ Direct Air Capture Screening for Metal-Organic Frameworks with a Transferable Machine Learning Force Field.

A local copy of the LICENSE file can be found at: /pscratch/sd/c/cyrusyc/mlip-arena/mlip_arena/tasks/mof/LICENSE.
[0m


No module named 'deepmd'


[32m2024-12-22 11:40:22.686[0m | [1mINFO    [0m | [36mmlip_arena.tasks.mof.input[0m:[36msave_to_db[0m:[36m45[0m - [1mmofs.db uploaded to atomind/mlip-arena/mof[0m


PosixPath('mofs.db')

## Demo: MOF benchmark workflow

In [4]:
from ase.build import molecule
from mlip_arena.models import MLIPEnum
from mlip_arena.tasks.mof.input import get_atoms_from_db
from mlip_arena.tasks.mof.flow import widom_insertion
from prefect import flow
from tqdm.auto import tqdm

@flow
def benchmark_one():
    results = []
    for model in MLIPEnum:
        if model.name != "MACE-MP(M)":
            continue # skip all models other than MACE-MP
        for atoms in tqdm(get_atoms_from_db('mofs.db')):
            result = widom_insertion(
                atoms, 
                molecule('CO2'),
                calculator_name=model.name,
            )
            results.append(result)
            break # only test on first MOF
    return [r for r in results]

In [5]:
result = benchmark_one()
result[0]['henry_coefficient']

0it [00:00, ?it/s]

mof/mofs.db:   0%|          | 0.00/168k [00:00<?, ?B/s]

Selected GPU cuda:0 with 40339.31 MB free memory from 1 GPUs
Using device: cuda:0
Selected GPU cuda:0 with 40339.31 MB free memory from 1 GPUs
Default dtype float32 does not match model dtype float64, converting models to float32.
Using calculator: <mlip_arena.models.externals.mace-mp.MACE_MP_Medium object at 0x7f8395425890>
Using filter: <ase.filters.FrechetCellFilter object at 0x7f839420ba50>
Using optimizer: <ase.optimize.fire2.FIRE2 object at 0x7f839662e4d0>
       Step     Time          Energy          fmax
FIRE2:    0 15:46:10     -398.611542        0.341164
FIRE2:    1 15:46:10     -398.627014        0.185914
FIRE2:    2 15:46:12     -398.632233        0.157950
FIRE2:    3 15:46:12     -398.636993        0.163124
FIRE2:    4 15:46:12     -398.633881        0.158292
FIRE2:    5 15:46:13     -398.642792        0.152467
FIRE2:    6 15:46:13     -398.643768        0.142569
FIRE2:    7 15:46:13     -398.637024        0.131079
FIRE2:    8 15:46:13     -398.648560        0.115860
FIRE2

Selected GPU cuda:0 with 40301.98 MB free memory from 1 GPUs
Using device: cuda:0
Selected GPU cuda:0 with 40301.98 MB free memory from 1 GPUs
Default dtype float32 does not match model dtype float64, converting models to float32.
Using calculator: <mlip_arena.models.externals.mace-mp.MACE_MP_Medium object at 0x7f83942934d0>
Using optimizer: <ase.optimize.fire2.FIRE2 object at 0x7f8394175990>
       Step     Time          Energy          fmax
FIRE2:    0 15:46:15      -22.777348        0.437851
FIRE2:    1 15:46:17      -22.778408        0.014392


Number of accessible positions: 357364 out of total 498623
Selected GPU cuda:0 with 40280.80 MB free memory from 1 GPUs
Using device: cuda:0
Selected GPU cuda:0 with 40280.80 MB free memory from 1 GPUs
Default dtype float32 does not match model dtype float64, converting models to float32.
Using calculator: <mlip_arena.models.externals.mace-mp.MACE_MP_Medium object at 0x7f838c597e50>


Fold 1/3:   0%|          | 0/5000 [00:00<?, ?it/s]

Fold 2/3:   0%|          | 0/5000 [00:00<?, ?it/s]

Fold 3/3:   0%|          | 0/5000 [00:00<?, ?it/s]

  boltzmann_factor = np.exp(
  boltzmann_factor = np.exp(
  u = (interaction_energies * boltzmann_factor).sum() / boltzmann_factor.sum()


[2.3816888372250245e-06, 2.5323794093995965e-06, inf]

## Run workflow

In [None]:
from dask.distributed import Client
from dask_jobqueue import SLURMCluster
from prefect_dask import DaskTaskRunner
from mlip_arena.tasks.mof.flow import run as MOF

# Orchestrate your awesome dask workflow runner

nodes_per_alloc = 1
gpus_per_alloc = 4
ntasks = 1

cluster_kwargs = dict(
    cores=1,
    memory="64 GB",
    shebang="#!/bin/bash",
    account="matgen",
    walltime="00:30:00",
    job_mem="0",
    job_script_prologue=[
        "source ~/.bashrc",
        "module load python",
        "source activate /pscratch/sd/c/cyrusyc/.conda/mlip-arena",
    ],
    job_directives_skip=["-n", "--cpus-per-task", "-J"],
    job_extra_directives=[
        "-J mof",
        "-q regular",
        f"-N {nodes_per_alloc}",
        "-C gpu",
        f"-G {gpus_per_alloc}",
    ],
)

cluster = SLURMCluster(**cluster_kwargs)
print(cluster.job_script())
cluster.adapt(minimum_jobs=10, maximum_jobs=20)
client = Client(cluster)

# Run the workflow on HPC cluster in parallel

results = MOF.with_options(
    task_runner=DaskTaskRunner(address=client.scheduler.address),
    # log_prints=True,
)()

#!/bin/bash

#SBATCH -A matgen
#SBATCH --mem=0
#SBATCH -t 00:30:00
#SBATCH -J mof
#SBATCH -q regular
#SBATCH -N 1
#SBATCH -C gpu
#SBATCH -G 4
source ~/.bashrc
module load python
source activate /pscratch/sd/c/cyrusyc/.conda/mlip-arena
/pscratch/sd/c/cyrusyc/.conda/mlip-arena/bin/python -m distributed.cli.dask_worker tcp://128.55.64.42:36351 --name dummy-name --nthreads 1 --memory-limit 59.60GiB --nanny --death-timeout 60



0it [00:00, ?it/s]