HaileyStorm
/

chess-mamba-vs-xformer

HaileyStorm commited on Mar 25, 2024

Commit

ce494c6

verified ·

1 Parent(s): 26c20cd

Update chess-mamba-vs-xformer/train_bygame.py

Files changed (1) hide show

chess-mamba-vs-xformer/train_bygame.py CHANGED Viewed

@@ -402,7 +402,7 @@ while True:
         param_group['lr'] = lr
     # Evaluate the loss on train/val sets and write checkpoints
-    if iter_num % eval_interval == 0 and master_process and local_iter_num > 0:
         torch.cuda.empty_cache()
         losses = estimate_loss()
         if init_from == 'anneal':
@@ -453,7 +453,7 @@ while True:
                 if losses['val'] < best_val_loss: # Temporary / only good after it's settled
                     best_val_loss = losses['val']
                     torch.save(checkpoint, os.path.join(out_dir, f'ckpt_{int(games_seen)}b.pt'))
-                elif current_nearest_multiple != last_crossed_multiple or abs(games_seen - 12652800) <= 151 or abs(games_seen - 22275000) <= 151 or abs(games_seen - 11510000) <= 151: # elif so we don't double up
                     last_crossed_multiple = current_nearest_multiple
                     torch.save(checkpoint, os.path.join(out_dir, f'ckpt_{int(games_seen)}g_{tokens_seen_padded}t.pt'))

         param_group['lr'] = lr
     # Evaluate the loss on train/val sets and write checkpoints
+    if (iter_num % eval_interval == 0 and master_process and local_iter_num > 0) or abs(games_seen - 12652800) <= 151 or abs(games_seen - 22275000) <= 151 or abs(games_seen - 11536000) <= 151:
         torch.cuda.empty_cache()
         losses = estimate_loss()
         if init_from == 'anneal':
                 if losses['val'] < best_val_loss: # Temporary / only good after it's settled
                     best_val_loss = losses['val']
                     torch.save(checkpoint, os.path.join(out_dir, f'ckpt_{int(games_seen)}b.pt'))
+                elif current_nearest_multiple != last_crossed_multiple or abs(games_seen - 12652800) <= 151 or abs(games_seen - 22275000) <= 151 or abs(games_seen - 11536000) <= 151: # elif so we don't double up
                     last_crossed_multiple = current_nearest_multiple
                     torch.save(checkpoint, os.path.join(out_dir, f'ckpt_{int(games_seen)}g_{tokens_seen_padded}t.pt'))