iamnguyen commited on
Commit
1ed53a5
·
verified ·
1 Parent(s): 10133b2

Training in progress, step 5392, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff920e5ccc2d2a405fd728410318b5d77a58a46344ae8542bb3adc83fa0b5aa5
3
  size 903834408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fb9521546742e5ab06169707c06cc043bec2d4f930941361e3febca593be593
3
  size 903834408
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc711544bbc463382370e087fb56a499145af6b3d8d135d3d2ad1e1612e329ff
3
  size 1807824186
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc632dd60615524ed9d3b3017351f4b7d961867652709cf0787424ce518c75df
3
  size 1807824186
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efc2f5216a7387c189c9349463d3b40a111802d247e9267a1c2d9b8f7b01f222
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ffe3c35f79e8a24334ee3298aa52cdf049e1eff33cb89177eb46490ca5a3c18
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a2cc71334cc40bd202de7bb17936da773f888ad3b66367935ef5b0e0f47c791
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:031a255d4ec60870f505044effca17dbde0758a421a8fda981c50d676e69969a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3475393939393939,
5
  "eval_steps": 16,
6
- "global_step": 5376,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -40663,6 +40663,127 @@
40663
  "eval_samples_per_second": 11.336,
40664
  "eval_steps_per_second": 1.417,
40665
  "step": 5376
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40666
  }
40667
  ],
40668
  "logging_steps": 1,
@@ -40682,7 +40803,7 @@
40682
  "attributes": {}
40683
  }
40684
  },
40685
- "total_flos": 1.0476024382881792e+17,
40686
  "train_batch_size": 8,
40687
  "trial_name": null,
40688
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.34857373737373737,
5
  "eval_steps": 16,
6
+ "global_step": 5392,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
40663
  "eval_samples_per_second": 11.336,
40664
  "eval_steps_per_second": 1.417,
40665
  "step": 5376
40666
+ },
40667
+ {
40668
+ "epoch": 0.3476040404040404,
40669
+ "grad_norm": 0.06481784582138062,
40670
+ "learning_rate": 0.00019442224357848402,
40671
+ "loss": 0.0972,
40672
+ "step": 5377
40673
+ },
40674
+ {
40675
+ "epoch": 0.34766868686868685,
40676
+ "grad_norm": 0.06606926023960114,
40677
+ "learning_rate": 0.00019441999134371015,
40678
+ "loss": 0.0868,
40679
+ "step": 5378
40680
+ },
40681
+ {
40682
+ "epoch": 0.34773333333333334,
40683
+ "grad_norm": 0.054367631673812866,
40684
+ "learning_rate": 0.0001944177386673652,
40685
+ "loss": 0.0822,
40686
+ "step": 5379
40687
+ },
40688
+ {
40689
+ "epoch": 0.3477979797979798,
40690
+ "grad_norm": 0.0662309005856514,
40691
+ "learning_rate": 0.00019441548554945972,
40692
+ "loss": 0.0874,
40693
+ "step": 5380
40694
+ },
40695
+ {
40696
+ "epoch": 0.34786262626262626,
40697
+ "grad_norm": 0.06305788457393646,
40698
+ "learning_rate": 0.0001944132319900042,
40699
+ "loss": 0.0818,
40700
+ "step": 5381
40701
+ },
40702
+ {
40703
+ "epoch": 0.34792727272727275,
40704
+ "grad_norm": 0.06198299676179886,
40705
+ "learning_rate": 0.00019441097798900922,
40706
+ "loss": 0.0885,
40707
+ "step": 5382
40708
+ },
40709
+ {
40710
+ "epoch": 0.3479919191919192,
40711
+ "grad_norm": 0.05868459865450859,
40712
+ "learning_rate": 0.00019440872354648529,
40713
+ "loss": 0.0783,
40714
+ "step": 5383
40715
+ },
40716
+ {
40717
+ "epoch": 0.3480565656565657,
40718
+ "grad_norm": 0.0710318386554718,
40719
+ "learning_rate": 0.000194406468662443,
40720
+ "loss": 0.095,
40721
+ "step": 5384
40722
+ },
40723
+ {
40724
+ "epoch": 0.3481212121212121,
40725
+ "grad_norm": 0.07270139455795288,
40726
+ "learning_rate": 0.00019440421333689285,
40727
+ "loss": 0.1007,
40728
+ "step": 5385
40729
+ },
40730
+ {
40731
+ "epoch": 0.3481858585858586,
40732
+ "grad_norm": 0.06582971662282944,
40733
+ "learning_rate": 0.00019440195756984538,
40734
+ "loss": 0.0902,
40735
+ "step": 5386
40736
+ },
40737
+ {
40738
+ "epoch": 0.34825050505050503,
40739
+ "grad_norm": 0.06729482114315033,
40740
+ "learning_rate": 0.0001943997013613112,
40741
+ "loss": 0.0854,
40742
+ "step": 5387
40743
+ },
40744
+ {
40745
+ "epoch": 0.3483151515151515,
40746
+ "grad_norm": 0.06232403591275215,
40747
+ "learning_rate": 0.0001943974447113008,
40748
+ "loss": 0.0749,
40749
+ "step": 5388
40750
+ },
40751
+ {
40752
+ "epoch": 0.34837979797979796,
40753
+ "grad_norm": 0.08777357637882233,
40754
+ "learning_rate": 0.00019439518761982477,
40755
+ "loss": 0.0951,
40756
+ "step": 5389
40757
+ },
40758
+ {
40759
+ "epoch": 0.34844444444444445,
40760
+ "grad_norm": 0.057237379252910614,
40761
+ "learning_rate": 0.00019439293008689364,
40762
+ "loss": 0.0753,
40763
+ "step": 5390
40764
+ },
40765
+ {
40766
+ "epoch": 0.34850909090909094,
40767
+ "grad_norm": 0.06308239698410034,
40768
+ "learning_rate": 0.00019439067211251803,
40769
+ "loss": 0.0908,
40770
+ "step": 5391
40771
+ },
40772
+ {
40773
+ "epoch": 0.34857373737373737,
40774
+ "grad_norm": 0.052329737693071365,
40775
+ "learning_rate": 0.00019438841369670838,
40776
+ "loss": 0.0662,
40777
+ "step": 5392
40778
+ },
40779
+ {
40780
+ "epoch": 0.34857373737373737,
40781
+ "eval_bleu": 13.954759479769704,
40782
+ "eval_loss": 0.09024699032306671,
40783
+ "eval_runtime": 2.8264,
40784
+ "eval_samples_per_second": 11.322,
40785
+ "eval_steps_per_second": 1.415,
40786
+ "step": 5392
40787
  }
40788
  ],
40789
  "logging_steps": 1,
 
40803
  "attributes": {}
40804
  }
40805
  },
40806
+ "total_flos": 1.0507203026878464e+17,
40807
  "train_batch_size": 8,
40808
  "trial_name": null,
40809
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10442abf42d09a4ee49c7b9608c99ee976b42cd3eb3b8d38f504fad55e685d91
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a05b39c84dbf67327b54c00b0712aecde2c94f9948f9a446688d5d5d918449e
3
  size 5240