NairaRahim commited on
Commit
39835b2
·
verified ·
1 Parent(s): 17d02bb

Training in progress, epoch 77, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4af379246575accc58f6abfac00aee925ce6b65883c8632448c83ee50ddfd07
3
  size 559424792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5ab75838ed7afe0c288b3631cba017faa548802aa5787f848080d63a4faa88d
3
  size 559424792
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:930083a4131ce65cdc8b356487bf1b813c159fa19cc01194a70d602dd9b94b36
3
  size 1118926970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b09965c1441b1cd1585c57f2d17e8d0e016910d7c90ad54c3a3afac8664c3bfd
3
  size 1118926970
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:914809037b3f88a1a2608685ab6ce1391a78e990c3ce33067466cc03d6a8480d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eddd447be293e64952cf96e8afe87fbb852cea08cd9ba63106be57df271d8556
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f90a182f176031c61cd117d508af79c4ff26bf3b95484e9bbe4017a087414d71
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc365eae5b467a6c47056a0e754f854a75cac7f154e6d27e155fce16f2e6ebd0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 12.716951370239258,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-88740",
4
- "epoch": 76.0,
5
  "eval_steps": 500,
6
- "global_step": 99180,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7552,6 +7552,105 @@
7552
  "eval_samples_per_second": 29.555,
7553
  "eval_steps_per_second": 3.714,
7554
  "step": 99180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7555
  }
7556
  ],
7557
  "logging_steps": 100,
@@ -7566,7 +7665,7 @@
7566
  "early_stopping_threshold": 0.0
7567
  },
7568
  "attributes": {
7569
- "early_stopping_patience_counter": 8
7570
  }
7571
  },
7572
  "TrainerControl": {
@@ -7580,7 +7679,7 @@
7580
  "attributes": {}
7581
  }
7582
  },
7583
- "total_flos": 4.620861214306099e+16,
7584
  "train_batch_size": 8,
7585
  "trial_name": null,
7586
  "trial_params": null
 
1
  {
2
  "best_metric": 12.716951370239258,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-88740",
4
+ "epoch": 77.0,
5
  "eval_steps": 500,
6
+ "global_step": 100485,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7552
  "eval_samples_per_second": 29.555,
7553
  "eval_steps_per_second": 3.714,
7554
  "step": 99180
7555
+ },
7556
+ {
7557
+ "epoch": 76.01532567049809,
7558
+ "grad_norm": 1.737823724746704,
7559
+ "learning_rate": 2.509578544061303e-06,
7560
+ "loss": 11.8981,
7561
+ "step": 99200
7562
+ },
7563
+ {
7564
+ "epoch": 76.0919540229885,
7565
+ "grad_norm": 1.0878353118896484,
7566
+ "learning_rate": 2.4616858237547894e-06,
7567
+ "loss": 11.8443,
7568
+ "step": 99300
7569
+ },
7570
+ {
7571
+ "epoch": 76.16858237547893,
7572
+ "grad_norm": 2.0454564094543457,
7573
+ "learning_rate": 2.413793103448276e-06,
7574
+ "loss": 11.8515,
7575
+ "step": 99400
7576
+ },
7577
+ {
7578
+ "epoch": 76.24521072796935,
7579
+ "grad_norm": 1.3210684061050415,
7580
+ "learning_rate": 2.3659003831417623e-06,
7581
+ "loss": 12.0233,
7582
+ "step": 99500
7583
+ },
7584
+ {
7585
+ "epoch": 76.32183908045977,
7586
+ "grad_norm": 1.1547104120254517,
7587
+ "learning_rate": 2.318007662835249e-06,
7588
+ "loss": 11.7145,
7589
+ "step": 99600
7590
+ },
7591
+ {
7592
+ "epoch": 76.3984674329502,
7593
+ "grad_norm": 1.3948626518249512,
7594
+ "learning_rate": 2.270114942528736e-06,
7595
+ "loss": 11.7098,
7596
+ "step": 99700
7597
+ },
7598
+ {
7599
+ "epoch": 76.47509578544062,
7600
+ "grad_norm": 1.2874501943588257,
7601
+ "learning_rate": 2.2222222222222225e-06,
7602
+ "loss": 11.8953,
7603
+ "step": 99800
7604
+ },
7605
+ {
7606
+ "epoch": 76.55172413793103,
7607
+ "grad_norm": 1.8570905923843384,
7608
+ "learning_rate": 2.174329501915709e-06,
7609
+ "loss": 11.9397,
7610
+ "step": 99900
7611
+ },
7612
+ {
7613
+ "epoch": 76.62835249042146,
7614
+ "grad_norm": 1.3673057556152344,
7615
+ "learning_rate": 2.1264367816091954e-06,
7616
+ "loss": 11.8056,
7617
+ "step": 100000
7618
+ },
7619
+ {
7620
+ "epoch": 76.70498084291188,
7621
+ "grad_norm": 2.1938419342041016,
7622
+ "learning_rate": 2.078544061302682e-06,
7623
+ "loss": 11.9414,
7624
+ "step": 100100
7625
+ },
7626
+ {
7627
+ "epoch": 76.7816091954023,
7628
+ "grad_norm": 1.9171061515808105,
7629
+ "learning_rate": 2.0306513409961687e-06,
7630
+ "loss": 11.8369,
7631
+ "step": 100200
7632
+ },
7633
+ {
7634
+ "epoch": 76.85823754789271,
7635
+ "grad_norm": 1.0486401319503784,
7636
+ "learning_rate": 1.982758620689655e-06,
7637
+ "loss": 11.8322,
7638
+ "step": 100300
7639
+ },
7640
+ {
7641
+ "epoch": 76.93486590038314,
7642
+ "grad_norm": 1.6005215644836426,
7643
+ "learning_rate": 1.934865900383142e-06,
7644
+ "loss": 11.8781,
7645
+ "step": 100400
7646
+ },
7647
+ {
7648
+ "epoch": 77.0,
7649
+ "eval_loss": 12.72097396850586,
7650
+ "eval_runtime": 44.1751,
7651
+ "eval_samples_per_second": 29.542,
7652
+ "eval_steps_per_second": 3.712,
7653
+ "step": 100485
7654
  }
7655
  ],
7656
  "logging_steps": 100,
 
7665
  "early_stopping_threshold": 0.0
7666
  },
7667
  "attributes": {
7668
+ "early_stopping_patience_counter": 9
7669
  }
7670
  },
7671
  "TrainerControl": {
 
7679
  "attributes": {}
7680
  }
7681
  },
7682
+ "total_flos": 4.681650983960218e+16,
7683
  "train_batch_size": 8,
7684
  "trial_name": null,
7685
  "trial_params": null