Skip to content

Commit e3e8c8b

Browse files
committed
Merge branch 'v1.0.0' into main
2 parents ad85d3f + 02aa1d1 commit e3e8c8b

File tree

268 files changed

+186479
-5279
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

268 files changed

+186479
-5279
lines changed

MANIFEST.in

+8-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,9 @@
11

2-
recursive-include ocpmodels/datasets/dev-min *
2+
recursive-include matsciml/datasets/dev-s2ef-dgl *
3+
recursive-include matsciml/datasets/dev-is2re-dgl *
4+
recursive-include matsciml/datasets/materials_project/devset *
5+
recursive-include matsciml/datasets/lips/devset *
6+
recursive-include matsciml/datasets/carolina_db/devset *
7+
recursive-include matsciml/datasets/nomad/devset *
8+
recursive-include matsciml/datasets/oqmd/devset *
9+
recursive-include matsciml/datasets/symmetry/devset *

README.md

+226-101
Large diffs are not rendered by default.

docker/Dockerfile.cobalt_dl docker/Dockerfile

+23-23
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,9 @@
22
# SPDX-License-Identifier: MIT License
33

44
# CUDA version from nvidia docker: https://hub.docker.com/r/nvidia/cuda/tags
5-
ARG CUDA_VERSION=11.3.0-cudnn8-runtime-ubuntu20.04
5+
ARG CUDA_VERSION=11.8.0-cudnn8-runtime-ubuntu20.04
66
FROM nvidia/cuda:$CUDA_VERSION
77

8-
RUN rm /etc/apt/sources.list.d/cuda.list
9-
RUN rm /etc/apt/sources.list.d/nvidia-ml.list
10-
118
ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
129
# Avoids some interactive prompts during apt-get install
1310
ARG DEBIAN_FRONTEND=noninteractive
@@ -17,7 +14,6 @@ RUN apt-get update && \
1714
apt-get autoremove -y && \
1815
apt-get clean && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
1916

20-
2117
# Install system tools
2218
RUN apt-get update --fix-missing && \
2319
apt-get install -yq --no-install-recommends \
@@ -32,20 +28,21 @@ RUN apt-get update --fix-missing && \
3228
openssh-client && \
3329
apt-get clean && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
3430

35-
36-
37-
# install system tools - round 2
3831
RUN apt-get update --fix-missing && \
3932
apt-get install -yq --no-install-recommends \
40-
apt-utils \
41-
python3-pip \
42-
python3-dev \
33+
sudo \
34+
software-properties-common \
35+
python3.9 \
36+
python3-pip \
4337
virtualenv && \
4438
apt-get clean && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
4539

40+
RUN apt-get update \
41+
&& apt-get install -y wget tmux vim libxrender-dev libsm6 libxext6 && \
42+
apt-get clean && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
4643

4744
# Create py3 virtualenv in order to use python3 for the project
48-
RUN virtualenv -p python3 /venv
45+
RUN virtualenv -p python3.9 /venv
4946
ENV PATH=/venv/bin:$PATH
5047
RUN pip install --upgrade pip
5148
RUN pip install -q \
@@ -54,14 +51,17 @@ RUN pip install -q \
5451
matplotlib \
5552
scikit-learn
5653

57-
COPY requirements_clean.txt .
58-
RUN pip install -r requirements_clean.txt
59-
60-
61-
RUN apt-get update \
62-
&& apt-get install -y wget tmux vim libxrender-dev libsm6 libxext6 && \
63-
apt-get clean && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
64-
65-
66-
67-
54+
RUN pip install numba lmdb munch geometric_algebra_attention jsonargparse[signatures] sympy pymatgen
55+
RUN pip install torch==2.0.0+cu118 --index-url https://download.pytorch.org/whl/cu118
56+
RUN pip install dgl==0.9.1 -f https://data.dgl.ai/wheels/cu118/repo.html
57+
RUN pip install dglgo -f https://data.dgl.ai/wheels-test/repo.html
58+
RUN pip install dgllife==0.3.2 rdkit==2023.3.1 rowan
59+
RUN pip install torch-scatter -f https://data.pyg.org/whl/torch-2.0.0+cu118.html
60+
RUN pip install torch-sparse -f https://data.pyg.org/whl/torch-2.0.0+cu118.html
61+
RUN pip install torch-geometric
62+
RUN pip install smact
63+
RUN pip install matminer
64+
RUN pip install p_tqdm
65+
RUN pip install -U pytorch-lightning==1.8.6
66+
RUN pip install -U torchmetrics==0.11.4
67+
RUN pip install -U pytest

docker/requirements_clean.txt

-24
This file was deleted.

docs/MatSci-ML-Benchmark-Table.png

129 KB
Loading
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import pytorch_lightning as pl
2+
from torch.nn import LayerNorm, SiLU
3+
4+
from matsciml.lightning.data_utils import MatSciMLDataModule
5+
from matsciml.models import PLEGNNBackbone
6+
from matsciml.models.base import ScalarRegressionTask
7+
from matsciml.datasets.transforms import PointCloudToGraphTransform
8+
9+
10+
# configure a simple model for testing
11+
model_args = {
12+
"embed_in_dim": 128,
13+
"embed_hidden_dim": 32,
14+
"embed_out_dim": 128,
15+
"embed_depth": 5,
16+
"embed_feat_dims": [128, 128, 128],
17+
"embed_message_dims": [128, 128, 128],
18+
"embed_position_dims": [64, 64],
19+
"embed_edge_attributes_dim": 0,
20+
"embed_activation": "relu",
21+
"embed_residual": True,
22+
"embed_normalize": True,
23+
"embed_tanh": True,
24+
"embed_activate_last": False,
25+
"embed_k_linears": 1,
26+
"embed_use_attention": False,
27+
"embed_attention_norm": "sigmoid",
28+
"readout": "sum",
29+
"node_projection_depth": 3,
30+
"node_projection_hidden_dim": 128,
31+
"node_projection_activation": "relu",
32+
"prediction_out_dim": 1,
33+
"prediction_depth": 3,
34+
"prediction_hidden_dim": 128,
35+
"prediction_activation": "relu",
36+
"encoder_only": True,
37+
}
38+
39+
model = PLEGNNBackbone(**model_args)
40+
task = ScalarRegressionTask(
41+
model,
42+
output_kwargs={
43+
"norm": LayerNorm(128),
44+
"hidden_dim": 128,
45+
"activation": SiLU,
46+
"lazy": False,
47+
"input_dim": 128,
48+
},
49+
lr=1e-3,
50+
task_keys=["energy"],
51+
)
52+
53+
# configure materials project from devset
54+
dm = MatSciMLDataModule.from_devset(
55+
"CMDataset",
56+
dset_kwargs={
57+
"transforms": [
58+
PointCloudToGraphTransform(
59+
"dgl", cutoff_dist=20.0, node_keys=["pos", "atomic_numbers"]
60+
)
61+
]
62+
},
63+
)
64+
65+
# run 10 steps for funsies
66+
trainer = pl.Trainer(fast_dev_run=10, enable_checkpointing=False, logger=False)
67+
68+
trainer.fit(task, datamodule=dm)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import pytorch_lightning as pl
2+
from torch.nn import LayerNorm, SiLU
3+
4+
from matsciml.lightning.data_utils import MatSciMLDataModule
5+
from matsciml.datasets.transforms import PointCloudToGraphTransform
6+
from matsciml.models import GraphConvModel
7+
from matsciml.models.base import ScalarRegressionTask
8+
9+
pl.seed_everything(21616)
10+
11+
12+
model = GraphConvModel(100, 1, encoder_only=True)
13+
task = ScalarRegressionTask(
14+
model,
15+
output_kwargs={
16+
"norm": LayerNorm(128),
17+
"hidden_dim": 128,
18+
"activation": SiLU,
19+
"lazy": False,
20+
"input_dim": 1,
21+
},
22+
lr=1e-3,
23+
task_keys=["band_gap"],
24+
)
25+
26+
27+
dm = MatSciMLDataModule(
28+
"MaterialsProjectDataset",
29+
train_path="./matsciml/datasets/materials_project/devset",
30+
dset_kwargs={"transforms": [PointCloudToGraphTransform("dgl", cutoff_dist=20.0)]},
31+
val_split=0.2,
32+
)
33+
34+
trainer = pl.Trainer(max_epochs=10, enable_checkpointing=False)
35+
36+
trainer.fit(task, datamodule=dm)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import pytorch_lightning as pl
2+
3+
from matsciml.lightning.data_utils import MatSciMLDataModule
4+
from matsciml.models import GraphConvModel
5+
from matsciml.models.base import ScalarRegressionTask
6+
from matsciml.datasets.transforms import PointCloudToGraphTransform
7+
8+
9+
# configure a simple model for testing
10+
model = GraphConvModel(100, 1, encoder_only=True)
11+
task = ScalarRegressionTask(model, task_keys=["band_gap"])
12+
13+
# configure materials project from devset
14+
dm = MatSciMLDataModule.from_devset(
15+
"MaterialsProjectDataset",
16+
dset_kwargs={"transforms": [PointCloudToGraphTransform("dgl", cutoff_dist=20.0)]},
17+
)
18+
19+
# run 10 steps for funsies
20+
trainer = pl.Trainer(fast_dev_run=10, enable_checkpointing=False, logger=False)
21+
22+
trainer.fit(task, datamodule=dm)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import pytorch_lightning as pl
2+
from torch.nn import LayerNorm, SiLU
3+
4+
from matsciml.lightning.data_utils import MatSciMLDataModule
5+
from matsciml.datasets.transforms import PointCloudToGraphTransform
6+
from matsciml.models import PLEGNNBackbone
7+
from matsciml.models.base import ScalarRegressionTask, BinaryClassificationTask
8+
9+
pl.seed_everything(21616)
10+
11+
model_args = {
12+
"embed_in_dim": 128,
13+
"embed_hidden_dim": 32,
14+
"embed_out_dim": 128,
15+
"embed_depth": 5,
16+
"embed_feat_dims": [128, 128, 128],
17+
"embed_message_dims": [128, 128, 128],
18+
"embed_position_dims": [64, 64],
19+
"embed_edge_attributes_dim": 0,
20+
"embed_activation": "relu",
21+
"embed_residual": True,
22+
"embed_normalize": True,
23+
"embed_tanh": True,
24+
"embed_activate_last": False,
25+
"embed_k_linears": 1,
26+
"embed_use_attention": False,
27+
"embed_attention_norm": "sigmoid",
28+
"readout": "sum",
29+
"node_projection_depth": 3,
30+
"node_projection_hidden_dim": 128,
31+
"node_projection_activation": "relu",
32+
"prediction_out_dim": 1,
33+
"prediction_depth": 3,
34+
"prediction_hidden_dim": 128,
35+
"prediction_activation": "relu",
36+
"encoder_only": True,
37+
}
38+
39+
model = PLEGNNBackbone(**model_args)
40+
task = ScalarRegressionTask(
41+
model,
42+
output_kwargs={
43+
"norm": LayerNorm(128),
44+
"hidden_dim": 128,
45+
"activation": SiLU,
46+
"lazy": False,
47+
"input_dim": 128,
48+
},
49+
lr=1e-3,
50+
task_keys=["band_gap"],
51+
)
52+
53+
dm = MatSciMLDataModule(
54+
dataset="MaterialsProjectDataset",
55+
train_path="./matsciml/datasets/materials_project/devset",
56+
dset_kwargs={
57+
"transforms": [
58+
PointCloudToGraphTransform(
59+
"dgl", cutoff_dist=20.0, node_keys=["pos", "atomic_numbers"]
60+
)
61+
]
62+
},
63+
val_split=0.2,
64+
batch_size=16,
65+
num_workers=0,
66+
)
67+
68+
trainer = pl.Trainer(
69+
fast_dev_run=100,
70+
accelerator="cpu",
71+
devices=1,
72+
)
73+
74+
trainer.fit(task, datamodule=dm)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import pytorch_lightning as pl
2+
from torch.nn import LayerNorm, SiLU
3+
4+
from matsciml.lightning.data_utils import MatSciMLDataModule
5+
from matsciml.models import GalaPotential
6+
from matsciml.models.base import ScalarRegressionTask
7+
8+
9+
model_args = {
10+
"D_in": 100,
11+
"hidden_dim": 128,
12+
"merge_fun": "concat",
13+
"join_fun": "concat",
14+
"invariant_mode": "full",
15+
"covariant_mode": "full",
16+
"include_normalized_products": True,
17+
"invar_value_normalization": "momentum",
18+
"eqvar_value_normalization": "momentum_layer",
19+
"value_normalization": "layer",
20+
"score_normalization": "layer",
21+
"block_normalization": "layer",
22+
"equivariant_attention": False,
23+
"tied_attention": True,
24+
"encoder_only": True,
25+
}
26+
27+
mp_norms = {
28+
"formation_energy_per_atom_mean": -1.454,
29+
"formation_energy_per_atom_std": 1.206,
30+
}
31+
32+
task = ScalarRegressionTask(
33+
mp_norms,
34+
encoder_class=GalaPotential,
35+
encoder_kwargs=model_args,
36+
output_kwargs={
37+
"norm": LayerNorm(128),
38+
"hidden_dim": 128,
39+
"activation": SiLU,
40+
"lazy": False,
41+
"input_dim": 128,
42+
},
43+
lr=1e-4,
44+
task_keys=["band_gap"],
45+
)
46+
47+
48+
dm = MatSciMLDataModule(
49+
dataset="MaterialsProjectDataset",
50+
train_path="./matsciml/datasets/materials_project/devset",
51+
val_split=0.2,
52+
batch_size=16,
53+
num_workers=0,
54+
)
55+
56+
trainer = pl.Trainer(
57+
limit_train_batches=2,
58+
limit_val_batches=2,
59+
max_epochs=2,
60+
accelerator="cpu",
61+
)
62+
trainer.fit(task, datamodule=dm)

0 commit comments

Comments
 (0)