Input file input.yaml#
Example of input.yaml can be found in examples/grace folder
seed: 42
cutoff: 6
# cutoff_dict: {Mo: 4, MoNb: 3, W: 5, Ta*: 7 } ## Defining cutoff for each bond type separately, used by certain models
## possible defaults: DEFAULT_CUTOFF_1L, DEFAULT_CUTOFF_2L, CUTOFF_2L
######################
## DATA ##
######################
data:
filename: /path/to/train.pckl.gzip
# filename: /path/to/train.extxyz
# train_size: 100
test_filename: /path/to/test.pckl.gzip
# test_size: 0.05 #
reference_energy: 0
# reference_energy: auto # auto: least-squares fit of per-element E0 from train set
# reference_energy: {Al: -1.23, Li: -3.56}
# save_dataset: False # default is True
# stress_units: eV/A3 # eV/A3 (default) or GPa or kbar or -kbar
# max_workers: 6 # for parallel data builder
## Extra input/reference DataBuilder/s required for model
# extra_components: {
# MagMomDataBuilder: {},
# }
######################
## POTENTIAL ##
######################
potential:
# elements: [C, H, O] # If not provided - determined automatically from data
## Option 1. Presets
preset: GRACE_1LAYER # FS, GRACE_1LAYER, GRACE_2LAYER
# kwargs: {n_rad_max: 16} # kw-arguments that will be passed to preset or custom model
## Option 2. Custom model in python file (advanced)
# custom: model.custom_model # custom model from model.py file, function custom_model
## Option 3. Model in model.yaml
# filename: model.yaml # configuration (WITHOUT weights!) of the model
# checkpoint_name: /path/to/checkpoint.index # path to checkpoint index file
## Option 4. Fine-tune foundation model
# finetune_foundation_model: GRACE-1L-OAM
# reduce_elements: True # default - False, reduce elements to those provided in dataset
## Option 4a. LORA (experimental, not supported)
# lora: {all: {rank: 16, alpha: 1}, Z: {rank: 8, alpha: 1}, I: {rank: 4, alpha: 1, keep_dims: 1} }
## reduce_lora: True # reduce LORA model
## Other parameters:
# shift: False # True/False/"auto" - automatic shift by energy
# True — least-squares per-element shift on training energies (for training from scratch)
# "auto" — when finetuning a foundation model, computes per-element shifts by comparing
# FM predictions with reference DFT data and injects them into the model
# scale: False # False/True or float - automatic scale data by force RMSE
# avg_n_neigh: 40 # Average number of neighbours. By default - automatically determined
# float_dtype: float64 # float64, float32
# custom ZBL core repulsion for model: kwargs: {zbl_cutoff: {Mo: 1, MoNb: 2, W: 1, Ta*: 3 }}
######################
## FIT ##
######################
fit:
# Explicit specification of the train and compute functions for the model with optional parameters
#compute_function: ComputeStructureEnergyAndForcesAndVirial
#train_function: ComputeBatchEnergyAndForces
#compute_function_config: {}
loss: {
energy: { type: huber, weight: 16, delta: 0.01}, # or { type: square, weight: 1}
forces: { type: huber, weight: 32., delta: 0.01},
# stress: { type: huber, weight: 32., delta: 0.01}, #
## Change weights for energy/forces/stress loss components
## and learning_rate after "after_iter" epochs (or fraction: 0.75 = 75% of maxiter, or "auto" = 0.75)
## NOTE: switch requires `scheduler: reduce_on_plateau` (set automatically by `gracemaker -t`)
# switch: { after_iter: 0.75, # or e.g. 350
# energy: { weight: 5.0 },
# forces: { weight: 2.0 },
# stress: {weight: 0.001},
# learning_rate: 0.001}
}
maxiter: auto # Max number of optimization epochs.
## "auto" mode: ~50k total updates for scratch, ~10k for finetuning.
## For BFGS, "auto" is ~500 epochs (scratch) or ~100 (finetuning).
# target_total_updates: 50000
## Alternative to maxiter: specify total gradient updates.
## maxiter will be auto-computed. Recommended ~100-500 for BFGS.
optimizer: Adam
# Optimization with Adam: good for large number of parameters, first-order method
opt_params: { learning_rate: 0.008, use_ema: True, ema_momentum: 0.99, weight_decay: 1.e-20, clipnorm: 1.0}
# reset_optimizer: True # reset optimizer state, after being loaded from checkpoint
# reset_epoch_and_step: False # reset epoch and step internal counters (stored in checkpoint)
scheduler: cosine_decay # scheduler for learning-rate reduction during training
# available options are: reduce_on_plateau, cosine_decay, linear_decay, exponential_decay
scheduler_params: {"minimal_learning_rate": 0.0001}
#scheduler_params: {"warmup_epochs": 2, "cold_learning_rate": 0.1, "minimal_learning_rate": 0.05}
# If :warmup_epochs: > 0, begin optimization with :cold_learning_rate: and reach :opt_params::learning_rate:
# within :warmup_epochs: (can be < 1). Else, begin optimization with :opt_params::learning_rate: and decay down to
# minimum_learning_rate within :maxiter: epochs
# legacy format for reduce_on_plateau lr scheduler
# learning_rate_reduction: { patience: 5, factor: 0.98, min: 5.0e-4, stop_at_min: True, resume_lr: True, }
## Optimization with BFGS: good for SMALL number of parameters (up to 10k), "second"-order method.
## scipy optimizer on CPU will be used
# optimizer: L-BFGS-B # 'L-BFGS-B' for memory limited or 'BFGS' for full method
# opt_params: {maxcor: 100, maxls: 20 } # options for L-BFGS-B
batch_size: 32 # Important hyperparameter for Adam and irrelevant (but must be) for L-BFGS-B/BFGS
test_batch_size: 200 # test batch size (optional)
## Uniform weighting is used if not specified
## Energy based weighting can be used as:
# weighting: {type: energy_based} # the simplest default
# compute_convex_hull: False ## for train+test dataset compute convex hull and distance to it
# eval_init_stats: False ## Compute train/test metrics before start fitting
jit_compile: True # for XLA compilation, must be used in almost all cases
## To use jit_compile efficiently, data must be padded.
## Bucket is a group of batches padded to the same shape for efficient JIT execution.
## max_n_buckets can be an integer or "auto".
## In "auto" mode, the number of buckets is estimated as ~sqrt(num_batches), clamped to [1, 32].
## `train_max_n_buckets`: "auto" (default) or integer. Max number of distinct buffer shapes (buckets) for training.
## - "auto": dynamically determines the minimum number of buckets (1-32) that keeps padding overhead below `auto_bucket_max_padding`.
## `test_max_n_buckets`: "auto" (default) or integer. Same for test set.
## `auto_bucket_max_padding`: 0.3 (default). Target maximum padding overhead fraction for neighbours when using `"auto"` bucketing. 0.3 means 30%.
train_max_n_buckets: auto ## max number of buckets in train set
test_max_n_buckets: auto ## same for test
checkpoint_freq: 10 # frequency for **REGULAR** checkpoints.
# save_all_regular_checkpoints: False # to store ALL regular checkpoints
# progressbar: True # show batch-evaluation progress bar
# train_shuffle: True # shuffle train batches on every epoch
# strategy: mirrored # or -m flag # for parallel multi-GPU parameterization
# trainable_variable_names: ["rho/reducing_", "Z/ChemicalEmbedding"] ### specify trainable variables name pattern
## technical parameters for normalization
# loss_norm_by_batch_size: False # normalization of total loss by global batch size (for backward compat)
# normalize_weights: True ## norm per-sample weights to sum-up to one
# normalize_force_per_structure: True ## force-weights is divided by number of atoms
This is complete list of parameters. For the most of practical purposes
it is sufficient to generate input file with gracemaker -t utility.
Detailed weighting option:
potential:
weighting: {type: energy_based,
## number of structures to randomly select from the initial dataset
nfit: 10000,
## only the structures with energy up to E_min + DEup will be selected
DEup: 10.0, ## eV, upper energy range (E_min + DElow, E_min + DEup)
## only the structures with maximal force on atom up to DFup will be selected
DFup: 50.0, ## eV/A
## lower energy range (E_min, E_min + DElow)
DElow: 1.0, ## eV
## delta_E shift for weights, see paper
DE: 1.0,
## delta_F shift for weights, see paper
DF: 1.0,
## 0<wlow<1 or None: if provided, the renormalization weights of the structures on lower energy range (see DElow)
wlow: 0.75,
## "convex_hull" or "cohesive" or "zero_formation_energy": method to compute the E_min
energy: convex_hull,
## structures types: all (default), bulk or cluster
reftype: all,
## random number seed
seed: 42}