mace-universal / 2023-08-14-mace-universal.sbatch
cyrusyc's picture
add example training script
57ca36b
raw
history blame
1.49 kB
#!/bin/bash
#SBATCH -C gpu
#SBATCH -G 40
#SBATCH -N 10
#SBATCH --ntasks=40
#SBATCH --ntasks-per-node=4
#SBATCH --cpus-per-task=4
#SBATCH --time=6:00:00
#SBATCH --time-min=02:00:00
#SBATCH --error=%x-%j.err
#SBATCH --output=%x-%j.out
#SBATCH --requeue
#SBATCH --exclusive
#SBATCH --open-mode=append
exp_name=$(basename "$SLURM_SUBMIT_DIR")
srun python run_train.py \
--name=$exp_name \
--train_file="train.h5" \
--valid_file="valid.h5" \
--statistics_file="statistics.json" \
--energy_weight=1 \
--forces_weight=1 \
--eval_interval=1 \
--config_type_weights='{"Default":1.0}' \
--E0s='average' \
--error_table='PerAtomMAE' \
--stress_key='stress' \
--model="ScaleShiftMACE" \
--MLP_irreps="64x0e" \
--interaction_first="RealAgnosticResidualInteractionBlock" \
--interaction="RealAgnosticResidualInteractionBlock" \
--num_interactions=2 \
--num_channels=128 \
--max_ell=3 \
--hidden_irreps='64x0e + 64x1o + 64x2e' \
--num_cutoff_basis=10 \
--lr=1e-2 \
--correlation=3 \
--r_max=6.0 \
--num_radial_basis=10 \
--scaling='rms_forces_scaling' \
--distributed \
--num_workers=4 \
--batch_size=10 \
--valid_batch_size=30 \
--max_num_epochs=500 \
--patience=250 \
--amsgrad \
--weight_decay=1e-8 \
--ema \
--ema_decay=0.999 \
--default_dtype="float32"\
--clip_grad=100 \
--device=cuda \
--seed=3 \
--save_cpu \
--restart_latest &