Skip to content

Commit 939ef03

Browse files
committed
Add tutorial
1 parent d459695 commit 939ef03

File tree

5 files changed

+18
-8
lines changed

5 files changed

+18
-8
lines changed

docs/generate_examples/conf.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@
1818
os.path.join(ROOT, "examples", "ase"),
1919
os.path.join(ROOT, "examples", "programmatic", "llpr"),
2020
os.path.join(ROOT, "examples", "zbl"),
21-
os.path.join(ROOT, "examples", "programmatic", "use_architectures_outside")
21+
os.path.join(ROOT, "examples", "programmatic", "use_architectures_outside"),
22+
os.path.join(ROOT, "examples", "programmatic", "disk_dataset"),
2223
],
2324
"gallery_dirs": [
2425
os.path.join(ROOT, "docs", "src", "examples", "ase"),
2526
os.path.join(ROOT, "docs", "src", "examples", "programmatic", "llpr"),
2627
os.path.join(ROOT, "docs", "src", "examples", "zbl"),
27-
os.path.join(ROOT, "docs", "src", "examples", "programmatic", "use_architectures_outside")
28+
os.path.join(ROOT, "docs", "src", "examples", "programmatic", "use_architectures_outside"),
29+
os.path.join(ROOT, "docs", "src", "examples", "programmatic", "disk_dataset"),
2830
],
2931
"min_reported_time": 5,
3032
"matplotlib_animations": True,

docs/src/tutorials/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ This sections includes some more advanced tutorials on the usage of the
1313
../examples/zbl/dimers
1414
../examples/programmatic/llpr/llpr
1515
../examples/programmatic/use_architectures_outside/use_outside
16+
../examples/programmatic/disk_dataset/disk_dataset
File renamed without changes.

examples/programmatic/disk-dataset/disk_dataset.py renamed to examples/programmatic/disk_dataset/disk_dataset.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@
44
55
Large datasets may not fit into memory. In such cases, it is useful to save the
66
dataset to disk and load it on the fly during training. This example demonstrates
7-
how to save a ``DiskDataset`` for this purpose.
7+
how to save a ``DiskDataset`` for this purpose. Metatrain will then be able to load
8+
``DiskDataset`` objects saved in this way to execute on-the-fly data loading.
89
"""
910

1011
# %%
1112
#
1213

1314
import ase.io
1415
import torch
15-
import tqdm
1616
from metatensor.torch import Labels, TensorBlock, TensorMap
1717
from metatensor.torch.atomistic import NeighborListOptions, systems_to_torch
1818

@@ -22,11 +22,12 @@
2222

2323
# %%
2424
#
25-
# Read some sample systems. Metatrain always reads systems in float64, while torch
26-
# uses float32 by default. We will convert the systems to float32.
25+
# As an example, we will use 100 structures from the QM9 dataset. In addition to the
26+
# systems and targets (here the energy), we also need to save the neighbor lists that
27+
# the model will use during training.
2728

2829
disk_dataset_writer = DiskDatasetWriter("qm9_reduced_100.zip")
29-
for i in tqdm.tqdm(range(100)):
30+
for i in range(100):
3031
frame = ase.io.read("qm9_reduced_100.xyz", index=i)
3132
system = systems_to_torch(frame, dtype=torch.float64)
3233
system = get_system_with_neighbor_lists(
@@ -48,4 +49,10 @@
4849
],
4950
)
5051
disk_dataset_writer.write_sample(system, {"energy": energy})
51-
del disk_dataset_writer
52+
del disk_dataset_writer # not necessary if the file ends here, but good in general
53+
54+
# %%
55+
#
56+
# The dataset is saved to disk. You can now provide it to ``metatrain`` as a
57+
# dataset to train from, simply by replacing your ``.xyz`` file with the newly created
58+
# zip file (e.g. ``read_from: qm9_reduced_100.zip``).

0 commit comments

Comments
 (0)