satvik-dixit commited on
Commit
7e9a33f
·
verified ·
1 Parent(s): 7590700

Uploaded checkpoint-27500

Browse files
Files changed (5) hide show
  1. adapter_model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +1761 -3
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ac7111e4408c88e98e042d4c5fc26cf04d5c377bc4e895d0d09876770025062
3
  size 119975656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ad86e91f2924a189dc19b796deef58a4c1f44b9596040dfbed596e3a58a58a4
3
  size 119975656
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:658ee3d420a3387b166c96890bb3416d4ef5da62499c2d9f27507add5578ba3e
3
  size 240145026
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6dcec2fc7bddf9ccd947d61cfbb7ec0d09e233cfdb81b4d8f00e3043b60ec27
3
  size 240145026
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a86cb077d8db69b8576d847120141afacb571e7ca656f25e2cba58ec3ade5df
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:971f8c6b700d32d9d1711207ade77f4dca9cda1be000e561bca9b74000ac50f5
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3bdbaa37c77733a3ea9eb90a36bc290f4f5b9f56abe23cc6586cbaa459f92c6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bae572518ab53ddc674f52a5ef01613875bea64a8d9c53d4b7d4a9aedc712f19
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.2961933612823486,
3
  "best_model_checkpoint": "runs/deepseek_lora_20240422-141601/checkpoint-25000",
4
- "epoch": 0.625,
5
  "eval_steps": 2500,
6
- "global_step": 25000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -17587,6 +17587,1764 @@
17587
  "eval_samples_per_second": 8.174,
17588
  "eval_steps_per_second": 8.174,
17589
  "step": 25000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17590
  }
17591
  ],
17592
  "logging_steps": 10,
@@ -17594,7 +19352,7 @@
17594
  "num_input_tokens_seen": 0,
17595
  "num_train_epochs": 1,
17596
  "save_steps": 2500,
17597
- "total_flos": 4.025531498496e+17,
17598
  "train_batch_size": 1,
17599
  "trial_name": null,
17600
  "trial_params": null
 
1
  {
2
  "best_metric": 1.2961933612823486,
3
  "best_model_checkpoint": "runs/deepseek_lora_20240422-141601/checkpoint-25000",
4
+ "epoch": 0.6875,
5
  "eval_steps": 2500,
6
+ "global_step": 27500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
17587
  "eval_samples_per_second": 8.174,
17588
  "eval_steps_per_second": 8.174,
17589
  "step": 25000
17590
+ },
17591
+ {
17592
+ "epoch": 0.63,
17593
+ "grad_norm": 7.878478527069092,
17594
+ "learning_rate": 3.3830508474576273e-06,
17595
+ "loss": 1.5645,
17596
+ "step": 25010
17597
+ },
17598
+ {
17599
+ "epoch": 0.63,
17600
+ "grad_norm": 4.464993000030518,
17601
+ "learning_rate": 3.3762711864406783e-06,
17602
+ "loss": 1.2522,
17603
+ "step": 25020
17604
+ },
17605
+ {
17606
+ "epoch": 0.63,
17607
+ "grad_norm": 1.824704885482788,
17608
+ "learning_rate": 3.3694915254237292e-06,
17609
+ "loss": 1.1667,
17610
+ "step": 25030
17611
+ },
17612
+ {
17613
+ "epoch": 0.63,
17614
+ "grad_norm": 4.979220390319824,
17615
+ "learning_rate": 3.3627118644067802e-06,
17616
+ "loss": 1.4592,
17617
+ "step": 25040
17618
+ },
17619
+ {
17620
+ "epoch": 0.63,
17621
+ "grad_norm": 7.964636325836182,
17622
+ "learning_rate": 3.3559322033898308e-06,
17623
+ "loss": 1.3505,
17624
+ "step": 25050
17625
+ },
17626
+ {
17627
+ "epoch": 0.63,
17628
+ "grad_norm": 3.848740816116333,
17629
+ "learning_rate": 3.3491525423728817e-06,
17630
+ "loss": 1.3424,
17631
+ "step": 25060
17632
+ },
17633
+ {
17634
+ "epoch": 0.63,
17635
+ "grad_norm": 5.446021556854248,
17636
+ "learning_rate": 3.3423728813559327e-06,
17637
+ "loss": 1.3514,
17638
+ "step": 25070
17639
+ },
17640
+ {
17641
+ "epoch": 0.63,
17642
+ "grad_norm": 4.195797920227051,
17643
+ "learning_rate": 3.3355932203389833e-06,
17644
+ "loss": 1.4442,
17645
+ "step": 25080
17646
+ },
17647
+ {
17648
+ "epoch": 0.63,
17649
+ "grad_norm": 5.259161949157715,
17650
+ "learning_rate": 3.3288135593220343e-06,
17651
+ "loss": 1.4244,
17652
+ "step": 25090
17653
+ },
17654
+ {
17655
+ "epoch": 0.63,
17656
+ "grad_norm": 4.59972620010376,
17657
+ "learning_rate": 3.322033898305085e-06,
17658
+ "loss": 1.3104,
17659
+ "step": 25100
17660
+ },
17661
+ {
17662
+ "epoch": 0.63,
17663
+ "grad_norm": 4.761903762817383,
17664
+ "learning_rate": 3.3152542372881358e-06,
17665
+ "loss": 1.3031,
17666
+ "step": 25110
17667
+ },
17668
+ {
17669
+ "epoch": 0.63,
17670
+ "grad_norm": 10.678994178771973,
17671
+ "learning_rate": 3.3084745762711868e-06,
17672
+ "loss": 1.372,
17673
+ "step": 25120
17674
+ },
17675
+ {
17676
+ "epoch": 0.63,
17677
+ "grad_norm": 5.033021926879883,
17678
+ "learning_rate": 3.3016949152542377e-06,
17679
+ "loss": 1.3018,
17680
+ "step": 25130
17681
+ },
17682
+ {
17683
+ "epoch": 0.63,
17684
+ "grad_norm": 14.870203971862793,
17685
+ "learning_rate": 3.2949152542372887e-06,
17686
+ "loss": 1.3284,
17687
+ "step": 25140
17688
+ },
17689
+ {
17690
+ "epoch": 0.63,
17691
+ "grad_norm": 1.7636396884918213,
17692
+ "learning_rate": 3.288135593220339e-06,
17693
+ "loss": 1.3044,
17694
+ "step": 25150
17695
+ },
17696
+ {
17697
+ "epoch": 0.63,
17698
+ "grad_norm": 2.6163928508758545,
17699
+ "learning_rate": 3.28135593220339e-06,
17700
+ "loss": 1.1875,
17701
+ "step": 25160
17702
+ },
17703
+ {
17704
+ "epoch": 0.63,
17705
+ "grad_norm": 4.163614273071289,
17706
+ "learning_rate": 3.274576271186441e-06,
17707
+ "loss": 1.3458,
17708
+ "step": 25170
17709
+ },
17710
+ {
17711
+ "epoch": 0.63,
17712
+ "grad_norm": 9.529121398925781,
17713
+ "learning_rate": 3.2677966101694918e-06,
17714
+ "loss": 1.2659,
17715
+ "step": 25180
17716
+ },
17717
+ {
17718
+ "epoch": 0.63,
17719
+ "grad_norm": 10.017566680908203,
17720
+ "learning_rate": 3.2610169491525428e-06,
17721
+ "loss": 1.4239,
17722
+ "step": 25190
17723
+ },
17724
+ {
17725
+ "epoch": 0.63,
17726
+ "grad_norm": 4.03933048248291,
17727
+ "learning_rate": 3.2542372881355933e-06,
17728
+ "loss": 1.252,
17729
+ "step": 25200
17730
+ },
17731
+ {
17732
+ "epoch": 0.63,
17733
+ "grad_norm": 6.725574970245361,
17734
+ "learning_rate": 3.2474576271186443e-06,
17735
+ "loss": 1.1699,
17736
+ "step": 25210
17737
+ },
17738
+ {
17739
+ "epoch": 0.63,
17740
+ "grad_norm": 8.446992874145508,
17741
+ "learning_rate": 3.2406779661016953e-06,
17742
+ "loss": 1.2996,
17743
+ "step": 25220
17744
+ },
17745
+ {
17746
+ "epoch": 0.63,
17747
+ "grad_norm": 15.130813598632812,
17748
+ "learning_rate": 3.2338983050847462e-06,
17749
+ "loss": 1.1601,
17750
+ "step": 25230
17751
+ },
17752
+ {
17753
+ "epoch": 0.63,
17754
+ "grad_norm": 8.915867805480957,
17755
+ "learning_rate": 3.2271186440677972e-06,
17756
+ "loss": 1.4512,
17757
+ "step": 25240
17758
+ },
17759
+ {
17760
+ "epoch": 0.63,
17761
+ "grad_norm": 3.813676595687866,
17762
+ "learning_rate": 3.2203389830508473e-06,
17763
+ "loss": 1.4311,
17764
+ "step": 25250
17765
+ },
17766
+ {
17767
+ "epoch": 0.63,
17768
+ "grad_norm": 8.306417465209961,
17769
+ "learning_rate": 3.2135593220338983e-06,
17770
+ "loss": 1.2554,
17771
+ "step": 25260
17772
+ },
17773
+ {
17774
+ "epoch": 0.63,
17775
+ "grad_norm": 4.557586193084717,
17776
+ "learning_rate": 3.2067796610169493e-06,
17777
+ "loss": 1.4084,
17778
+ "step": 25270
17779
+ },
17780
+ {
17781
+ "epoch": 0.63,
17782
+ "grad_norm": 3.206493616104126,
17783
+ "learning_rate": 3.2000000000000003e-06,
17784
+ "loss": 1.3265,
17785
+ "step": 25280
17786
+ },
17787
+ {
17788
+ "epoch": 0.63,
17789
+ "grad_norm": 4.9644622802734375,
17790
+ "learning_rate": 3.1932203389830513e-06,
17791
+ "loss": 1.3932,
17792
+ "step": 25290
17793
+ },
17794
+ {
17795
+ "epoch": 0.63,
17796
+ "grad_norm": 3.4366214275360107,
17797
+ "learning_rate": 3.186440677966102e-06,
17798
+ "loss": 1.4233,
17799
+ "step": 25300
17800
+ },
17801
+ {
17802
+ "epoch": 0.63,
17803
+ "grad_norm": 6.7854461669921875,
17804
+ "learning_rate": 3.1796610169491528e-06,
17805
+ "loss": 1.2838,
17806
+ "step": 25310
17807
+ },
17808
+ {
17809
+ "epoch": 0.63,
17810
+ "grad_norm": 6.945521831512451,
17811
+ "learning_rate": 3.1728813559322038e-06,
17812
+ "loss": 1.2936,
17813
+ "step": 25320
17814
+ },
17815
+ {
17816
+ "epoch": 0.63,
17817
+ "grad_norm": 4.550187587738037,
17818
+ "learning_rate": 3.1661016949152547e-06,
17819
+ "loss": 1.3477,
17820
+ "step": 25330
17821
+ },
17822
+ {
17823
+ "epoch": 0.63,
17824
+ "grad_norm": 5.256103515625,
17825
+ "learning_rate": 3.1593220338983053e-06,
17826
+ "loss": 1.4062,
17827
+ "step": 25340
17828
+ },
17829
+ {
17830
+ "epoch": 0.63,
17831
+ "grad_norm": 11.691208839416504,
17832
+ "learning_rate": 3.1525423728813563e-06,
17833
+ "loss": 1.1473,
17834
+ "step": 25350
17835
+ },
17836
+ {
17837
+ "epoch": 0.63,
17838
+ "grad_norm": 5.051747798919678,
17839
+ "learning_rate": 3.145762711864407e-06,
17840
+ "loss": 1.4299,
17841
+ "step": 25360
17842
+ },
17843
+ {
17844
+ "epoch": 0.63,
17845
+ "grad_norm": 2.562920093536377,
17846
+ "learning_rate": 3.138983050847458e-06,
17847
+ "loss": 1.2428,
17848
+ "step": 25370
17849
+ },
17850
+ {
17851
+ "epoch": 0.63,
17852
+ "grad_norm": 7.520709037780762,
17853
+ "learning_rate": 3.1322033898305088e-06,
17854
+ "loss": 1.1736,
17855
+ "step": 25380
17856
+ },
17857
+ {
17858
+ "epoch": 0.63,
17859
+ "grad_norm": 5.788994789123535,
17860
+ "learning_rate": 3.1254237288135598e-06,
17861
+ "loss": 1.3141,
17862
+ "step": 25390
17863
+ },
17864
+ {
17865
+ "epoch": 0.64,
17866
+ "grad_norm": 17.664766311645508,
17867
+ "learning_rate": 3.1186440677966107e-06,
17868
+ "loss": 1.357,
17869
+ "step": 25400
17870
+ },
17871
+ {
17872
+ "epoch": 0.64,
17873
+ "grad_norm": 4.366672992706299,
17874
+ "learning_rate": 3.111864406779661e-06,
17875
+ "loss": 1.3631,
17876
+ "step": 25410
17877
+ },
17878
+ {
17879
+ "epoch": 0.64,
17880
+ "grad_norm": 15.109098434448242,
17881
+ "learning_rate": 3.105084745762712e-06,
17882
+ "loss": 1.5056,
17883
+ "step": 25420
17884
+ },
17885
+ {
17886
+ "epoch": 0.64,
17887
+ "grad_norm": 12.550411224365234,
17888
+ "learning_rate": 3.098305084745763e-06,
17889
+ "loss": 1.2255,
17890
+ "step": 25430
17891
+ },
17892
+ {
17893
+ "epoch": 0.64,
17894
+ "grad_norm": 5.603880882263184,
17895
+ "learning_rate": 3.091525423728814e-06,
17896
+ "loss": 1.4206,
17897
+ "step": 25440
17898
+ },
17899
+ {
17900
+ "epoch": 0.64,
17901
+ "grad_norm": 5.365425109863281,
17902
+ "learning_rate": 3.0847457627118648e-06,
17903
+ "loss": 1.1442,
17904
+ "step": 25450
17905
+ },
17906
+ {
17907
+ "epoch": 0.64,
17908
+ "grad_norm": 6.900291442871094,
17909
+ "learning_rate": 3.0779661016949153e-06,
17910
+ "loss": 1.3042,
17911
+ "step": 25460
17912
+ },
17913
+ {
17914
+ "epoch": 0.64,
17915
+ "grad_norm": 6.29402494430542,
17916
+ "learning_rate": 3.0711864406779663e-06,
17917
+ "loss": 1.2615,
17918
+ "step": 25470
17919
+ },
17920
+ {
17921
+ "epoch": 0.64,
17922
+ "grad_norm": 11.43036937713623,
17923
+ "learning_rate": 3.0644067796610173e-06,
17924
+ "loss": 1.1831,
17925
+ "step": 25480
17926
+ },
17927
+ {
17928
+ "epoch": 0.64,
17929
+ "grad_norm": 8.487439155578613,
17930
+ "learning_rate": 3.0576271186440683e-06,
17931
+ "loss": 1.2347,
17932
+ "step": 25490
17933
+ },
17934
+ {
17935
+ "epoch": 0.64,
17936
+ "grad_norm": 4.873865127563477,
17937
+ "learning_rate": 3.0508474576271192e-06,
17938
+ "loss": 1.0908,
17939
+ "step": 25500
17940
+ },
17941
+ {
17942
+ "epoch": 0.64,
17943
+ "grad_norm": 2.734248161315918,
17944
+ "learning_rate": 3.0440677966101694e-06,
17945
+ "loss": 1.0498,
17946
+ "step": 25510
17947
+ },
17948
+ {
17949
+ "epoch": 0.64,
17950
+ "grad_norm": 12.489617347717285,
17951
+ "learning_rate": 3.0372881355932203e-06,
17952
+ "loss": 1.2103,
17953
+ "step": 25520
17954
+ },
17955
+ {
17956
+ "epoch": 0.64,
17957
+ "grad_norm": 11.316421508789062,
17958
+ "learning_rate": 3.0305084745762713e-06,
17959
+ "loss": 1.5457,
17960
+ "step": 25530
17961
+ },
17962
+ {
17963
+ "epoch": 0.64,
17964
+ "grad_norm": 7.098801612854004,
17965
+ "learning_rate": 3.0237288135593223e-06,
17966
+ "loss": 1.4746,
17967
+ "step": 25540
17968
+ },
17969
+ {
17970
+ "epoch": 0.64,
17971
+ "grad_norm": 5.496173858642578,
17972
+ "learning_rate": 3.0169491525423733e-06,
17973
+ "loss": 1.296,
17974
+ "step": 25550
17975
+ },
17976
+ {
17977
+ "epoch": 0.64,
17978
+ "grad_norm": 3.706704616546631,
17979
+ "learning_rate": 3.010169491525424e-06,
17980
+ "loss": 1.4458,
17981
+ "step": 25560
17982
+ },
17983
+ {
17984
+ "epoch": 0.64,
17985
+ "grad_norm": 10.640968322753906,
17986
+ "learning_rate": 3.003389830508475e-06,
17987
+ "loss": 1.2303,
17988
+ "step": 25570
17989
+ },
17990
+ {
17991
+ "epoch": 0.64,
17992
+ "grad_norm": 9.76960563659668,
17993
+ "learning_rate": 2.9966101694915258e-06,
17994
+ "loss": 1.3352,
17995
+ "step": 25580
17996
+ },
17997
+ {
17998
+ "epoch": 0.64,
17999
+ "grad_norm": 6.274062633514404,
18000
+ "learning_rate": 2.9898305084745768e-06,
18001
+ "loss": 1.4795,
18002
+ "step": 25590
18003
+ },
18004
+ {
18005
+ "epoch": 0.64,
18006
+ "grad_norm": 2.7021098136901855,
18007
+ "learning_rate": 2.9830508474576277e-06,
18008
+ "loss": 1.3396,
18009
+ "step": 25600
18010
+ },
18011
+ {
18012
+ "epoch": 0.64,
18013
+ "grad_norm": 4.872988224029541,
18014
+ "learning_rate": 2.9762711864406783e-06,
18015
+ "loss": 1.4767,
18016
+ "step": 25610
18017
+ },
18018
+ {
18019
+ "epoch": 0.64,
18020
+ "grad_norm": 8.984478950500488,
18021
+ "learning_rate": 2.969491525423729e-06,
18022
+ "loss": 1.3939,
18023
+ "step": 25620
18024
+ },
18025
+ {
18026
+ "epoch": 0.64,
18027
+ "grad_norm": 18.204336166381836,
18028
+ "learning_rate": 2.96271186440678e-06,
18029
+ "loss": 1.3875,
18030
+ "step": 25630
18031
+ },
18032
+ {
18033
+ "epoch": 0.64,
18034
+ "grad_norm": 6.397688388824463,
18035
+ "learning_rate": 2.955932203389831e-06,
18036
+ "loss": 1.1992,
18037
+ "step": 25640
18038
+ },
18039
+ {
18040
+ "epoch": 0.64,
18041
+ "grad_norm": 3.8524389266967773,
18042
+ "learning_rate": 2.9491525423728818e-06,
18043
+ "loss": 1.5454,
18044
+ "step": 25650
18045
+ },
18046
+ {
18047
+ "epoch": 0.64,
18048
+ "grad_norm": 6.4902191162109375,
18049
+ "learning_rate": 2.9423728813559327e-06,
18050
+ "loss": 1.3023,
18051
+ "step": 25660
18052
+ },
18053
+ {
18054
+ "epoch": 0.64,
18055
+ "grad_norm": 2.5433766841888428,
18056
+ "learning_rate": 2.935593220338983e-06,
18057
+ "loss": 1.3424,
18058
+ "step": 25670
18059
+ },
18060
+ {
18061
+ "epoch": 0.64,
18062
+ "grad_norm": 2.7309176921844482,
18063
+ "learning_rate": 2.928813559322034e-06,
18064
+ "loss": 1.2601,
18065
+ "step": 25680
18066
+ },
18067
+ {
18068
+ "epoch": 0.64,
18069
+ "grad_norm": 5.759544849395752,
18070
+ "learning_rate": 2.922033898305085e-06,
18071
+ "loss": 1.4213,
18072
+ "step": 25690
18073
+ },
18074
+ {
18075
+ "epoch": 0.64,
18076
+ "grad_norm": 8.862116813659668,
18077
+ "learning_rate": 2.915254237288136e-06,
18078
+ "loss": 1.2039,
18079
+ "step": 25700
18080
+ },
18081
+ {
18082
+ "epoch": 0.64,
18083
+ "grad_norm": 4.360088348388672,
18084
+ "learning_rate": 2.9084745762711868e-06,
18085
+ "loss": 1.2741,
18086
+ "step": 25710
18087
+ },
18088
+ {
18089
+ "epoch": 0.64,
18090
+ "grad_norm": 6.417861461639404,
18091
+ "learning_rate": 2.9016949152542373e-06,
18092
+ "loss": 1.173,
18093
+ "step": 25720
18094
+ },
18095
+ {
18096
+ "epoch": 0.64,
18097
+ "grad_norm": 4.8081254959106445,
18098
+ "learning_rate": 2.8949152542372883e-06,
18099
+ "loss": 1.3977,
18100
+ "step": 25730
18101
+ },
18102
+ {
18103
+ "epoch": 0.64,
18104
+ "grad_norm": 4.581600189208984,
18105
+ "learning_rate": 2.8881355932203393e-06,
18106
+ "loss": 1.2773,
18107
+ "step": 25740
18108
+ },
18109
+ {
18110
+ "epoch": 0.64,
18111
+ "grad_norm": 7.737278938293457,
18112
+ "learning_rate": 2.8813559322033903e-06,
18113
+ "loss": 1.3239,
18114
+ "step": 25750
18115
+ },
18116
+ {
18117
+ "epoch": 0.64,
18118
+ "grad_norm": 8.583956718444824,
18119
+ "learning_rate": 2.8745762711864412e-06,
18120
+ "loss": 1.3715,
18121
+ "step": 25760
18122
+ },
18123
+ {
18124
+ "epoch": 0.64,
18125
+ "grad_norm": 6.567660331726074,
18126
+ "learning_rate": 2.8677966101694914e-06,
18127
+ "loss": 1.0883,
18128
+ "step": 25770
18129
+ },
18130
+ {
18131
+ "epoch": 0.64,
18132
+ "grad_norm": 8.99410343170166,
18133
+ "learning_rate": 2.8610169491525424e-06,
18134
+ "loss": 1.213,
18135
+ "step": 25780
18136
+ },
18137
+ {
18138
+ "epoch": 0.64,
18139
+ "grad_norm": 7.688558101654053,
18140
+ "learning_rate": 2.8542372881355933e-06,
18141
+ "loss": 1.147,
18142
+ "step": 25790
18143
+ },
18144
+ {
18145
+ "epoch": 0.65,
18146
+ "grad_norm": 9.962355613708496,
18147
+ "learning_rate": 2.8474576271186443e-06,
18148
+ "loss": 1.243,
18149
+ "step": 25800
18150
+ },
18151
+ {
18152
+ "epoch": 0.65,
18153
+ "grad_norm": 2.2275478839874268,
18154
+ "learning_rate": 2.8406779661016953e-06,
18155
+ "loss": 1.2328,
18156
+ "step": 25810
18157
+ },
18158
+ {
18159
+ "epoch": 0.65,
18160
+ "grad_norm": 3.4127414226531982,
18161
+ "learning_rate": 2.833898305084746e-06,
18162
+ "loss": 1.4244,
18163
+ "step": 25820
18164
+ },
18165
+ {
18166
+ "epoch": 0.65,
18167
+ "grad_norm": 12.15282917022705,
18168
+ "learning_rate": 2.827118644067797e-06,
18169
+ "loss": 1.401,
18170
+ "step": 25830
18171
+ },
18172
+ {
18173
+ "epoch": 0.65,
18174
+ "grad_norm": 8.007610321044922,
18175
+ "learning_rate": 2.820338983050848e-06,
18176
+ "loss": 1.3701,
18177
+ "step": 25840
18178
+ },
18179
+ {
18180
+ "epoch": 0.65,
18181
+ "grad_norm": 9.589988708496094,
18182
+ "learning_rate": 2.8135593220338988e-06,
18183
+ "loss": 1.2886,
18184
+ "step": 25850
18185
+ },
18186
+ {
18187
+ "epoch": 0.65,
18188
+ "grad_norm": 4.063002109527588,
18189
+ "learning_rate": 2.8067796610169497e-06,
18190
+ "loss": 1.3383,
18191
+ "step": 25860
18192
+ },
18193
+ {
18194
+ "epoch": 0.65,
18195
+ "grad_norm": 2.1042330265045166,
18196
+ "learning_rate": 2.8000000000000003e-06,
18197
+ "loss": 1.2753,
18198
+ "step": 25870
18199
+ },
18200
+ {
18201
+ "epoch": 0.65,
18202
+ "grad_norm": 23.256053924560547,
18203
+ "learning_rate": 2.793220338983051e-06,
18204
+ "loss": 1.3617,
18205
+ "step": 25880
18206
+ },
18207
+ {
18208
+ "epoch": 0.65,
18209
+ "grad_norm": 7.6475911140441895,
18210
+ "learning_rate": 2.786440677966102e-06,
18211
+ "loss": 1.2862,
18212
+ "step": 25890
18213
+ },
18214
+ {
18215
+ "epoch": 0.65,
18216
+ "grad_norm": 16.81471824645996,
18217
+ "learning_rate": 2.779661016949153e-06,
18218
+ "loss": 1.5188,
18219
+ "step": 25900
18220
+ },
18221
+ {
18222
+ "epoch": 0.65,
18223
+ "grad_norm": 10.20080852508545,
18224
+ "learning_rate": 2.7728813559322038e-06,
18225
+ "loss": 1.5414,
18226
+ "step": 25910
18227
+ },
18228
+ {
18229
+ "epoch": 0.65,
18230
+ "grad_norm": 9.043648719787598,
18231
+ "learning_rate": 2.7661016949152548e-06,
18232
+ "loss": 1.3299,
18233
+ "step": 25920
18234
+ },
18235
+ {
18236
+ "epoch": 0.65,
18237
+ "grad_norm": 4.330277442932129,
18238
+ "learning_rate": 2.7593220338983053e-06,
18239
+ "loss": 1.1826,
18240
+ "step": 25930
18241
+ },
18242
+ {
18243
+ "epoch": 0.65,
18244
+ "grad_norm": 7.962332725524902,
18245
+ "learning_rate": 2.752542372881356e-06,
18246
+ "loss": 1.3566,
18247
+ "step": 25940
18248
+ },
18249
+ {
18250
+ "epoch": 0.65,
18251
+ "grad_norm": 8.763781547546387,
18252
+ "learning_rate": 2.745762711864407e-06,
18253
+ "loss": 1.3485,
18254
+ "step": 25950
18255
+ },
18256
+ {
18257
+ "epoch": 0.65,
18258
+ "grad_norm": 6.160745620727539,
18259
+ "learning_rate": 2.738983050847458e-06,
18260
+ "loss": 1.303,
18261
+ "step": 25960
18262
+ },
18263
+ {
18264
+ "epoch": 0.65,
18265
+ "grad_norm": 2.6451609134674072,
18266
+ "learning_rate": 2.732203389830509e-06,
18267
+ "loss": 1.4117,
18268
+ "step": 25970
18269
+ },
18270
+ {
18271
+ "epoch": 0.65,
18272
+ "grad_norm": 10.857940673828125,
18273
+ "learning_rate": 2.7254237288135593e-06,
18274
+ "loss": 1.1566,
18275
+ "step": 25980
18276
+ },
18277
+ {
18278
+ "epoch": 0.65,
18279
+ "grad_norm": 5.1549601554870605,
18280
+ "learning_rate": 2.7186440677966103e-06,
18281
+ "loss": 1.2738,
18282
+ "step": 25990
18283
+ },
18284
+ {
18285
+ "epoch": 0.65,
18286
+ "grad_norm": 7.437528610229492,
18287
+ "learning_rate": 2.7118644067796613e-06,
18288
+ "loss": 1.4507,
18289
+ "step": 26000
18290
+ },
18291
+ {
18292
+ "epoch": 0.65,
18293
+ "grad_norm": 6.80765962600708,
18294
+ "learning_rate": 2.7050847457627123e-06,
18295
+ "loss": 1.1497,
18296
+ "step": 26010
18297
+ },
18298
+ {
18299
+ "epoch": 0.65,
18300
+ "grad_norm": 6.173390865325928,
18301
+ "learning_rate": 2.6983050847457633e-06,
18302
+ "loss": 1.3426,
18303
+ "step": 26020
18304
+ },
18305
+ {
18306
+ "epoch": 0.65,
18307
+ "grad_norm": 6.7725911140441895,
18308
+ "learning_rate": 2.6915254237288134e-06,
18309
+ "loss": 1.3951,
18310
+ "step": 26030
18311
+ },
18312
+ {
18313
+ "epoch": 0.65,
18314
+ "grad_norm": 6.6503777503967285,
18315
+ "learning_rate": 2.6847457627118644e-06,
18316
+ "loss": 1.4507,
18317
+ "step": 26040
18318
+ },
18319
+ {
18320
+ "epoch": 0.65,
18321
+ "grad_norm": 5.210537433624268,
18322
+ "learning_rate": 2.6779661016949153e-06,
18323
+ "loss": 1.3876,
18324
+ "step": 26050
18325
+ },
18326
+ {
18327
+ "epoch": 0.65,
18328
+ "grad_norm": 3.615936756134033,
18329
+ "learning_rate": 2.6711864406779663e-06,
18330
+ "loss": 1.4476,
18331
+ "step": 26060
18332
+ },
18333
+ {
18334
+ "epoch": 0.65,
18335
+ "grad_norm": 9.065774917602539,
18336
+ "learning_rate": 2.6644067796610173e-06,
18337
+ "loss": 1.3124,
18338
+ "step": 26070
18339
+ },
18340
+ {
18341
+ "epoch": 0.65,
18342
+ "grad_norm": 13.187819480895996,
18343
+ "learning_rate": 2.657627118644068e-06,
18344
+ "loss": 1.2038,
18345
+ "step": 26080
18346
+ },
18347
+ {
18348
+ "epoch": 0.65,
18349
+ "grad_norm": 13.807534217834473,
18350
+ "learning_rate": 2.650847457627119e-06,
18351
+ "loss": 1.2118,
18352
+ "step": 26090
18353
+ },
18354
+ {
18355
+ "epoch": 0.65,
18356
+ "grad_norm": 16.19401741027832,
18357
+ "learning_rate": 2.64406779661017e-06,
18358
+ "loss": 1.2734,
18359
+ "step": 26100
18360
+ },
18361
+ {
18362
+ "epoch": 0.65,
18363
+ "grad_norm": 7.493824005126953,
18364
+ "learning_rate": 2.6372881355932208e-06,
18365
+ "loss": 1.3692,
18366
+ "step": 26110
18367
+ },
18368
+ {
18369
+ "epoch": 0.65,
18370
+ "grad_norm": 16.29339599609375,
18371
+ "learning_rate": 2.6305084745762718e-06,
18372
+ "loss": 1.3132,
18373
+ "step": 26120
18374
+ },
18375
+ {
18376
+ "epoch": 0.65,
18377
+ "grad_norm": 6.331333160400391,
18378
+ "learning_rate": 2.6237288135593223e-06,
18379
+ "loss": 1.3203,
18380
+ "step": 26130
18381
+ },
18382
+ {
18383
+ "epoch": 0.65,
18384
+ "grad_norm": 6.448307991027832,
18385
+ "learning_rate": 2.616949152542373e-06,
18386
+ "loss": 1.2753,
18387
+ "step": 26140
18388
+ },
18389
+ {
18390
+ "epoch": 0.65,
18391
+ "grad_norm": 6.896134376525879,
18392
+ "learning_rate": 2.610169491525424e-06,
18393
+ "loss": 1.349,
18394
+ "step": 26150
18395
+ },
18396
+ {
18397
+ "epoch": 0.65,
18398
+ "grad_norm": 5.189770698547363,
18399
+ "learning_rate": 2.603389830508475e-06,
18400
+ "loss": 1.289,
18401
+ "step": 26160
18402
+ },
18403
+ {
18404
+ "epoch": 0.65,
18405
+ "grad_norm": 1.7721081972122192,
18406
+ "learning_rate": 2.596610169491526e-06,
18407
+ "loss": 1.4432,
18408
+ "step": 26170
18409
+ },
18410
+ {
18411
+ "epoch": 0.65,
18412
+ "grad_norm": 3.9717156887054443,
18413
+ "learning_rate": 2.5898305084745768e-06,
18414
+ "loss": 1.2982,
18415
+ "step": 26180
18416
+ },
18417
+ {
18418
+ "epoch": 0.65,
18419
+ "grad_norm": 19.084896087646484,
18420
+ "learning_rate": 2.5830508474576273e-06,
18421
+ "loss": 1.3897,
18422
+ "step": 26190
18423
+ },
18424
+ {
18425
+ "epoch": 0.66,
18426
+ "grad_norm": 1.6735248565673828,
18427
+ "learning_rate": 2.576271186440678e-06,
18428
+ "loss": 1.4137,
18429
+ "step": 26200
18430
+ },
18431
+ {
18432
+ "epoch": 0.66,
18433
+ "grad_norm": 8.025382041931152,
18434
+ "learning_rate": 2.569491525423729e-06,
18435
+ "loss": 1.2629,
18436
+ "step": 26210
18437
+ },
18438
+ {
18439
+ "epoch": 0.66,
18440
+ "grad_norm": 4.695014476776123,
18441
+ "learning_rate": 2.56271186440678e-06,
18442
+ "loss": 1.5102,
18443
+ "step": 26220
18444
+ },
18445
+ {
18446
+ "epoch": 0.66,
18447
+ "grad_norm": 4.135346412658691,
18448
+ "learning_rate": 2.555932203389831e-06,
18449
+ "loss": 1.1628,
18450
+ "step": 26230
18451
+ },
18452
+ {
18453
+ "epoch": 0.66,
18454
+ "grad_norm": 6.607401371002197,
18455
+ "learning_rate": 2.5491525423728814e-06,
18456
+ "loss": 1.305,
18457
+ "step": 26240
18458
+ },
18459
+ {
18460
+ "epoch": 0.66,
18461
+ "grad_norm": 17.407390594482422,
18462
+ "learning_rate": 2.5423728813559323e-06,
18463
+ "loss": 1.3841,
18464
+ "step": 26250
18465
+ },
18466
+ {
18467
+ "epoch": 0.66,
18468
+ "grad_norm": 13.363433837890625,
18469
+ "learning_rate": 2.5355932203389833e-06,
18470
+ "loss": 1.4593,
18471
+ "step": 26260
18472
+ },
18473
+ {
18474
+ "epoch": 0.66,
18475
+ "grad_norm": 4.77979040145874,
18476
+ "learning_rate": 2.5288135593220343e-06,
18477
+ "loss": 1.2445,
18478
+ "step": 26270
18479
+ },
18480
+ {
18481
+ "epoch": 0.66,
18482
+ "grad_norm": 10.652926445007324,
18483
+ "learning_rate": 2.5220338983050853e-06,
18484
+ "loss": 1.3092,
18485
+ "step": 26280
18486
+ },
18487
+ {
18488
+ "epoch": 0.66,
18489
+ "grad_norm": 5.278314113616943,
18490
+ "learning_rate": 2.5152542372881354e-06,
18491
+ "loss": 1.447,
18492
+ "step": 26290
18493
+ },
18494
+ {
18495
+ "epoch": 0.66,
18496
+ "grad_norm": 6.439229488372803,
18497
+ "learning_rate": 2.5084745762711864e-06,
18498
+ "loss": 1.3421,
18499
+ "step": 26300
18500
+ },
18501
+ {
18502
+ "epoch": 0.66,
18503
+ "grad_norm": 4.738833904266357,
18504
+ "learning_rate": 2.5016949152542374e-06,
18505
+ "loss": 1.3454,
18506
+ "step": 26310
18507
+ },
18508
+ {
18509
+ "epoch": 0.66,
18510
+ "grad_norm": 4.070488929748535,
18511
+ "learning_rate": 2.4949152542372883e-06,
18512
+ "loss": 1.377,
18513
+ "step": 26320
18514
+ },
18515
+ {
18516
+ "epoch": 0.66,
18517
+ "grad_norm": 1.9005275964736938,
18518
+ "learning_rate": 2.488135593220339e-06,
18519
+ "loss": 1.4501,
18520
+ "step": 26330
18521
+ },
18522
+ {
18523
+ "epoch": 0.66,
18524
+ "grad_norm": 9.970990180969238,
18525
+ "learning_rate": 2.48135593220339e-06,
18526
+ "loss": 1.1534,
18527
+ "step": 26340
18528
+ },
18529
+ {
18530
+ "epoch": 0.66,
18531
+ "grad_norm": 2.7662065029144287,
18532
+ "learning_rate": 2.474576271186441e-06,
18533
+ "loss": 1.4422,
18534
+ "step": 26350
18535
+ },
18536
+ {
18537
+ "epoch": 0.66,
18538
+ "grad_norm": 11.093968391418457,
18539
+ "learning_rate": 2.467796610169492e-06,
18540
+ "loss": 1.198,
18541
+ "step": 26360
18542
+ },
18543
+ {
18544
+ "epoch": 0.66,
18545
+ "grad_norm": 6.7317280769348145,
18546
+ "learning_rate": 2.461016949152543e-06,
18547
+ "loss": 1.244,
18548
+ "step": 26370
18549
+ },
18550
+ {
18551
+ "epoch": 0.66,
18552
+ "grad_norm": 8.76866340637207,
18553
+ "learning_rate": 2.4542372881355933e-06,
18554
+ "loss": 1.3653,
18555
+ "step": 26380
18556
+ },
18557
+ {
18558
+ "epoch": 0.66,
18559
+ "grad_norm": 11.940791130065918,
18560
+ "learning_rate": 2.4474576271186443e-06,
18561
+ "loss": 1.304,
18562
+ "step": 26390
18563
+ },
18564
+ {
18565
+ "epoch": 0.66,
18566
+ "grad_norm": 6.687407970428467,
18567
+ "learning_rate": 2.4406779661016953e-06,
18568
+ "loss": 1.1965,
18569
+ "step": 26400
18570
+ },
18571
+ {
18572
+ "epoch": 0.66,
18573
+ "grad_norm": 5.42927885055542,
18574
+ "learning_rate": 2.433898305084746e-06,
18575
+ "loss": 1.3501,
18576
+ "step": 26410
18577
+ },
18578
+ {
18579
+ "epoch": 0.66,
18580
+ "grad_norm": 2.731924057006836,
18581
+ "learning_rate": 2.427118644067797e-06,
18582
+ "loss": 1.4572,
18583
+ "step": 26420
18584
+ },
18585
+ {
18586
+ "epoch": 0.66,
18587
+ "grad_norm": 5.305939197540283,
18588
+ "learning_rate": 2.4203389830508474e-06,
18589
+ "loss": 1.3428,
18590
+ "step": 26430
18591
+ },
18592
+ {
18593
+ "epoch": 0.66,
18594
+ "grad_norm": 10.32532787322998,
18595
+ "learning_rate": 2.4135593220338984e-06,
18596
+ "loss": 1.4827,
18597
+ "step": 26440
18598
+ },
18599
+ {
18600
+ "epoch": 0.66,
18601
+ "grad_norm": 7.55979585647583,
18602
+ "learning_rate": 2.4067796610169493e-06,
18603
+ "loss": 1.2877,
18604
+ "step": 26450
18605
+ },
18606
+ {
18607
+ "epoch": 0.66,
18608
+ "grad_norm": 9.092228889465332,
18609
+ "learning_rate": 2.4000000000000003e-06,
18610
+ "loss": 1.3516,
18611
+ "step": 26460
18612
+ },
18613
+ {
18614
+ "epoch": 0.66,
18615
+ "grad_norm": 4.732894420623779,
18616
+ "learning_rate": 2.393220338983051e-06,
18617
+ "loss": 1.3218,
18618
+ "step": 26470
18619
+ },
18620
+ {
18621
+ "epoch": 0.66,
18622
+ "grad_norm": 8.649917602539062,
18623
+ "learning_rate": 2.386440677966102e-06,
18624
+ "loss": 1.2708,
18625
+ "step": 26480
18626
+ },
18627
+ {
18628
+ "epoch": 0.66,
18629
+ "grad_norm": 4.569608211517334,
18630
+ "learning_rate": 2.379661016949153e-06,
18631
+ "loss": 1.3422,
18632
+ "step": 26490
18633
+ },
18634
+ {
18635
+ "epoch": 0.66,
18636
+ "grad_norm": 3.702059030532837,
18637
+ "learning_rate": 2.372881355932204e-06,
18638
+ "loss": 1.4754,
18639
+ "step": 26500
18640
+ },
18641
+ {
18642
+ "epoch": 0.66,
18643
+ "grad_norm": 3.9777114391326904,
18644
+ "learning_rate": 2.3661016949152544e-06,
18645
+ "loss": 1.3133,
18646
+ "step": 26510
18647
+ },
18648
+ {
18649
+ "epoch": 0.66,
18650
+ "grad_norm": 9.692605018615723,
18651
+ "learning_rate": 2.3593220338983053e-06,
18652
+ "loss": 1.2866,
18653
+ "step": 26520
18654
+ },
18655
+ {
18656
+ "epoch": 0.66,
18657
+ "grad_norm": 2.1870622634887695,
18658
+ "learning_rate": 2.3525423728813563e-06,
18659
+ "loss": 1.4271,
18660
+ "step": 26530
18661
+ },
18662
+ {
18663
+ "epoch": 0.66,
18664
+ "grad_norm": 6.799996852874756,
18665
+ "learning_rate": 2.345762711864407e-06,
18666
+ "loss": 1.3768,
18667
+ "step": 26540
18668
+ },
18669
+ {
18670
+ "epoch": 0.66,
18671
+ "grad_norm": 2.3258345127105713,
18672
+ "learning_rate": 2.338983050847458e-06,
18673
+ "loss": 1.4023,
18674
+ "step": 26550
18675
+ },
18676
+ {
18677
+ "epoch": 0.66,
18678
+ "grad_norm": 3.950892925262451,
18679
+ "learning_rate": 2.3322033898305084e-06,
18680
+ "loss": 1.2986,
18681
+ "step": 26560
18682
+ },
18683
+ {
18684
+ "epoch": 0.66,
18685
+ "grad_norm": 11.114343643188477,
18686
+ "learning_rate": 2.3254237288135594e-06,
18687
+ "loss": 1.3538,
18688
+ "step": 26570
18689
+ },
18690
+ {
18691
+ "epoch": 0.66,
18692
+ "grad_norm": 8.47208023071289,
18693
+ "learning_rate": 2.3186440677966103e-06,
18694
+ "loss": 1.3557,
18695
+ "step": 26580
18696
+ },
18697
+ {
18698
+ "epoch": 0.66,
18699
+ "grad_norm": 15.794944763183594,
18700
+ "learning_rate": 2.3118644067796613e-06,
18701
+ "loss": 1.3295,
18702
+ "step": 26590
18703
+ },
18704
+ {
18705
+ "epoch": 0.67,
18706
+ "grad_norm": 5.596043586730957,
18707
+ "learning_rate": 2.305084745762712e-06,
18708
+ "loss": 1.2669,
18709
+ "step": 26600
18710
+ },
18711
+ {
18712
+ "epoch": 0.67,
18713
+ "grad_norm": 8.727288246154785,
18714
+ "learning_rate": 2.298305084745763e-06,
18715
+ "loss": 1.339,
18716
+ "step": 26610
18717
+ },
18718
+ {
18719
+ "epoch": 0.67,
18720
+ "grad_norm": 10.842510223388672,
18721
+ "learning_rate": 2.291525423728814e-06,
18722
+ "loss": 1.2154,
18723
+ "step": 26620
18724
+ },
18725
+ {
18726
+ "epoch": 0.67,
18727
+ "grad_norm": 11.826702117919922,
18728
+ "learning_rate": 2.284745762711865e-06,
18729
+ "loss": 1.1599,
18730
+ "step": 26630
18731
+ },
18732
+ {
18733
+ "epoch": 0.67,
18734
+ "grad_norm": 16.47806167602539,
18735
+ "learning_rate": 2.2779661016949154e-06,
18736
+ "loss": 1.3005,
18737
+ "step": 26640
18738
+ },
18739
+ {
18740
+ "epoch": 0.67,
18741
+ "grad_norm": 4.797351837158203,
18742
+ "learning_rate": 2.2711864406779663e-06,
18743
+ "loss": 1.2779,
18744
+ "step": 26650
18745
+ },
18746
+ {
18747
+ "epoch": 0.67,
18748
+ "grad_norm": 6.353465557098389,
18749
+ "learning_rate": 2.2644067796610173e-06,
18750
+ "loss": 1.2419,
18751
+ "step": 26660
18752
+ },
18753
+ {
18754
+ "epoch": 0.67,
18755
+ "grad_norm": 10.895663261413574,
18756
+ "learning_rate": 2.257627118644068e-06,
18757
+ "loss": 1.2078,
18758
+ "step": 26670
18759
+ },
18760
+ {
18761
+ "epoch": 0.67,
18762
+ "grad_norm": 7.583923816680908,
18763
+ "learning_rate": 2.250847457627119e-06,
18764
+ "loss": 1.3381,
18765
+ "step": 26680
18766
+ },
18767
+ {
18768
+ "epoch": 0.67,
18769
+ "grad_norm": 10.84875774383545,
18770
+ "learning_rate": 2.2440677966101694e-06,
18771
+ "loss": 1.4887,
18772
+ "step": 26690
18773
+ },
18774
+ {
18775
+ "epoch": 0.67,
18776
+ "grad_norm": 5.171149253845215,
18777
+ "learning_rate": 2.2372881355932204e-06,
18778
+ "loss": 1.3469,
18779
+ "step": 26700
18780
+ },
18781
+ {
18782
+ "epoch": 0.67,
18783
+ "grad_norm": 6.136636734008789,
18784
+ "learning_rate": 2.2305084745762714e-06,
18785
+ "loss": 1.1804,
18786
+ "step": 26710
18787
+ },
18788
+ {
18789
+ "epoch": 0.67,
18790
+ "grad_norm": 12.95764446258545,
18791
+ "learning_rate": 2.2237288135593223e-06,
18792
+ "loss": 1.2808,
18793
+ "step": 26720
18794
+ },
18795
+ {
18796
+ "epoch": 0.67,
18797
+ "grad_norm": 4.0281453132629395,
18798
+ "learning_rate": 2.216949152542373e-06,
18799
+ "loss": 1.4454,
18800
+ "step": 26730
18801
+ },
18802
+ {
18803
+ "epoch": 0.67,
18804
+ "grad_norm": 5.7566609382629395,
18805
+ "learning_rate": 2.210169491525424e-06,
18806
+ "loss": 1.2968,
18807
+ "step": 26740
18808
+ },
18809
+ {
18810
+ "epoch": 0.67,
18811
+ "grad_norm": 4.710749626159668,
18812
+ "learning_rate": 2.203389830508475e-06,
18813
+ "loss": 1.355,
18814
+ "step": 26750
18815
+ },
18816
+ {
18817
+ "epoch": 0.67,
18818
+ "grad_norm": 3.0553205013275146,
18819
+ "learning_rate": 2.196610169491526e-06,
18820
+ "loss": 1.3319,
18821
+ "step": 26760
18822
+ },
18823
+ {
18824
+ "epoch": 0.67,
18825
+ "grad_norm": 15.849903106689453,
18826
+ "learning_rate": 2.1898305084745764e-06,
18827
+ "loss": 1.3233,
18828
+ "step": 26770
18829
+ },
18830
+ {
18831
+ "epoch": 0.67,
18832
+ "grad_norm": 31.49736785888672,
18833
+ "learning_rate": 2.1830508474576273e-06,
18834
+ "loss": 1.3991,
18835
+ "step": 26780
18836
+ },
18837
+ {
18838
+ "epoch": 0.67,
18839
+ "grad_norm": 11.734864234924316,
18840
+ "learning_rate": 2.1762711864406783e-06,
18841
+ "loss": 1.5305,
18842
+ "step": 26790
18843
+ },
18844
+ {
18845
+ "epoch": 0.67,
18846
+ "grad_norm": 6.124046325683594,
18847
+ "learning_rate": 2.169491525423729e-06,
18848
+ "loss": 1.249,
18849
+ "step": 26800
18850
+ },
18851
+ {
18852
+ "epoch": 0.67,
18853
+ "grad_norm": 11.438417434692383,
18854
+ "learning_rate": 2.16271186440678e-06,
18855
+ "loss": 1.2922,
18856
+ "step": 26810
18857
+ },
18858
+ {
18859
+ "epoch": 0.67,
18860
+ "grad_norm": 12.979373931884766,
18861
+ "learning_rate": 2.1559322033898304e-06,
18862
+ "loss": 1.3554,
18863
+ "step": 26820
18864
+ },
18865
+ {
18866
+ "epoch": 0.67,
18867
+ "grad_norm": 3.8955001831054688,
18868
+ "learning_rate": 2.1491525423728814e-06,
18869
+ "loss": 1.3303,
18870
+ "step": 26830
18871
+ },
18872
+ {
18873
+ "epoch": 0.67,
18874
+ "grad_norm": 9.349483489990234,
18875
+ "learning_rate": 2.1423728813559324e-06,
18876
+ "loss": 1.2548,
18877
+ "step": 26840
18878
+ },
18879
+ {
18880
+ "epoch": 0.67,
18881
+ "grad_norm": 2.8842084407806396,
18882
+ "learning_rate": 2.1355932203389833e-06,
18883
+ "loss": 1.3517,
18884
+ "step": 26850
18885
+ },
18886
+ {
18887
+ "epoch": 0.67,
18888
+ "grad_norm": 3.986353635787964,
18889
+ "learning_rate": 2.128813559322034e-06,
18890
+ "loss": 1.5566,
18891
+ "step": 26860
18892
+ },
18893
+ {
18894
+ "epoch": 0.67,
18895
+ "grad_norm": 14.33786392211914,
18896
+ "learning_rate": 2.122033898305085e-06,
18897
+ "loss": 1.2168,
18898
+ "step": 26870
18899
+ },
18900
+ {
18901
+ "epoch": 0.67,
18902
+ "grad_norm": 4.677867889404297,
18903
+ "learning_rate": 2.115254237288136e-06,
18904
+ "loss": 1.2758,
18905
+ "step": 26880
18906
+ },
18907
+ {
18908
+ "epoch": 0.67,
18909
+ "grad_norm": 3.638185977935791,
18910
+ "learning_rate": 2.108474576271187e-06,
18911
+ "loss": 1.2348,
18912
+ "step": 26890
18913
+ },
18914
+ {
18915
+ "epoch": 0.67,
18916
+ "grad_norm": 2.7823917865753174,
18917
+ "learning_rate": 2.1016949152542374e-06,
18918
+ "loss": 1.4506,
18919
+ "step": 26900
18920
+ },
18921
+ {
18922
+ "epoch": 0.67,
18923
+ "grad_norm": 14.349405288696289,
18924
+ "learning_rate": 2.0949152542372883e-06,
18925
+ "loss": 1.2189,
18926
+ "step": 26910
18927
+ },
18928
+ {
18929
+ "epoch": 0.67,
18930
+ "grad_norm": 5.958116054534912,
18931
+ "learning_rate": 2.0881355932203393e-06,
18932
+ "loss": 1.0776,
18933
+ "step": 26920
18934
+ },
18935
+ {
18936
+ "epoch": 0.67,
18937
+ "grad_norm": 5.689637184143066,
18938
+ "learning_rate": 2.08135593220339e-06,
18939
+ "loss": 1.5885,
18940
+ "step": 26930
18941
+ },
18942
+ {
18943
+ "epoch": 0.67,
18944
+ "grad_norm": 17.451379776000977,
18945
+ "learning_rate": 2.074576271186441e-06,
18946
+ "loss": 1.4116,
18947
+ "step": 26940
18948
+ },
18949
+ {
18950
+ "epoch": 0.67,
18951
+ "grad_norm": 6.859378814697266,
18952
+ "learning_rate": 2.0677966101694914e-06,
18953
+ "loss": 1.1968,
18954
+ "step": 26950
18955
+ },
18956
+ {
18957
+ "epoch": 0.67,
18958
+ "grad_norm": 5.8354082107543945,
18959
+ "learning_rate": 2.0610169491525424e-06,
18960
+ "loss": 1.3615,
18961
+ "step": 26960
18962
+ },
18963
+ {
18964
+ "epoch": 0.67,
18965
+ "grad_norm": 4.631415367126465,
18966
+ "learning_rate": 2.0542372881355934e-06,
18967
+ "loss": 1.3517,
18968
+ "step": 26970
18969
+ },
18970
+ {
18971
+ "epoch": 0.67,
18972
+ "grad_norm": 9.62684154510498,
18973
+ "learning_rate": 2.0474576271186443e-06,
18974
+ "loss": 1.3364,
18975
+ "step": 26980
18976
+ },
18977
+ {
18978
+ "epoch": 0.67,
18979
+ "grad_norm": 4.8851447105407715,
18980
+ "learning_rate": 2.0406779661016953e-06,
18981
+ "loss": 1.2893,
18982
+ "step": 26990
18983
+ },
18984
+ {
18985
+ "epoch": 0.68,
18986
+ "grad_norm": 5.123071670532227,
18987
+ "learning_rate": 2.033898305084746e-06,
18988
+ "loss": 1.4069,
18989
+ "step": 27000
18990
+ },
18991
+ {
18992
+ "epoch": 0.68,
18993
+ "grad_norm": 3.8006324768066406,
18994
+ "learning_rate": 2.027118644067797e-06,
18995
+ "loss": 1.3013,
18996
+ "step": 27010
18997
+ },
18998
+ {
18999
+ "epoch": 0.68,
19000
+ "grad_norm": 3.052011728286743,
19001
+ "learning_rate": 2.020338983050848e-06,
19002
+ "loss": 1.3073,
19003
+ "step": 27020
19004
+ },
19005
+ {
19006
+ "epoch": 0.68,
19007
+ "grad_norm": 6.314701080322266,
19008
+ "learning_rate": 2.0135593220338984e-06,
19009
+ "loss": 1.3644,
19010
+ "step": 27030
19011
+ },
19012
+ {
19013
+ "epoch": 0.68,
19014
+ "grad_norm": 2.868659257888794,
19015
+ "learning_rate": 2.0067796610169494e-06,
19016
+ "loss": 1.4159,
19017
+ "step": 27040
19018
+ },
19019
+ {
19020
+ "epoch": 0.68,
19021
+ "grad_norm": 3.1452548503875732,
19022
+ "learning_rate": 2.0000000000000003e-06,
19023
+ "loss": 1.2184,
19024
+ "step": 27050
19025
+ },
19026
+ {
19027
+ "epoch": 0.68,
19028
+ "grad_norm": 7.137606620788574,
19029
+ "learning_rate": 1.993220338983051e-06,
19030
+ "loss": 1.1827,
19031
+ "step": 27060
19032
+ },
19033
+ {
19034
+ "epoch": 0.68,
19035
+ "grad_norm": 4.114950180053711,
19036
+ "learning_rate": 1.986440677966102e-06,
19037
+ "loss": 1.3316,
19038
+ "step": 27070
19039
+ },
19040
+ {
19041
+ "epoch": 0.68,
19042
+ "grad_norm": 4.815858364105225,
19043
+ "learning_rate": 1.9796610169491524e-06,
19044
+ "loss": 1.286,
19045
+ "step": 27080
19046
+ },
19047
+ {
19048
+ "epoch": 0.68,
19049
+ "grad_norm": 2.6551191806793213,
19050
+ "learning_rate": 1.9728813559322034e-06,
19051
+ "loss": 1.2562,
19052
+ "step": 27090
19053
+ },
19054
+ {
19055
+ "epoch": 0.68,
19056
+ "grad_norm": 10.009329795837402,
19057
+ "learning_rate": 1.9661016949152544e-06,
19058
+ "loss": 1.3984,
19059
+ "step": 27100
19060
+ },
19061
+ {
19062
+ "epoch": 0.68,
19063
+ "grad_norm": 2.3229589462280273,
19064
+ "learning_rate": 1.9593220338983053e-06,
19065
+ "loss": 1.3478,
19066
+ "step": 27110
19067
+ },
19068
+ {
19069
+ "epoch": 0.68,
19070
+ "grad_norm": 5.636902332305908,
19071
+ "learning_rate": 1.9525423728813563e-06,
19072
+ "loss": 1.3959,
19073
+ "step": 27120
19074
+ },
19075
+ {
19076
+ "epoch": 0.68,
19077
+ "grad_norm": 15.958221435546875,
19078
+ "learning_rate": 1.945762711864407e-06,
19079
+ "loss": 1.4275,
19080
+ "step": 27130
19081
+ },
19082
+ {
19083
+ "epoch": 0.68,
19084
+ "grad_norm": 5.097884654998779,
19085
+ "learning_rate": 1.938983050847458e-06,
19086
+ "loss": 1.3826,
19087
+ "step": 27140
19088
+ },
19089
+ {
19090
+ "epoch": 0.68,
19091
+ "grad_norm": 6.786471366882324,
19092
+ "learning_rate": 1.932203389830509e-06,
19093
+ "loss": 1.3073,
19094
+ "step": 27150
19095
+ },
19096
+ {
19097
+ "epoch": 0.68,
19098
+ "grad_norm": 7.529135704040527,
19099
+ "learning_rate": 1.9254237288135594e-06,
19100
+ "loss": 1.2605,
19101
+ "step": 27160
19102
+ },
19103
+ {
19104
+ "epoch": 0.68,
19105
+ "grad_norm": 10.065816879272461,
19106
+ "learning_rate": 1.9186440677966104e-06,
19107
+ "loss": 1.2377,
19108
+ "step": 27170
19109
+ },
19110
+ {
19111
+ "epoch": 0.68,
19112
+ "grad_norm": 5.812075614929199,
19113
+ "learning_rate": 1.9118644067796613e-06,
19114
+ "loss": 1.3309,
19115
+ "step": 27180
19116
+ },
19117
+ {
19118
+ "epoch": 0.68,
19119
+ "grad_norm": 4.214847564697266,
19120
+ "learning_rate": 1.9050847457627119e-06,
19121
+ "loss": 1.4063,
19122
+ "step": 27190
19123
+ },
19124
+ {
19125
+ "epoch": 0.68,
19126
+ "grad_norm": 4.021416187286377,
19127
+ "learning_rate": 1.8983050847457629e-06,
19128
+ "loss": 1.2773,
19129
+ "step": 27200
19130
+ },
19131
+ {
19132
+ "epoch": 0.68,
19133
+ "grad_norm": 7.9816083908081055,
19134
+ "learning_rate": 1.8915254237288136e-06,
19135
+ "loss": 1.2151,
19136
+ "step": 27210
19137
+ },
19138
+ {
19139
+ "epoch": 0.68,
19140
+ "grad_norm": 9.82458209991455,
19141
+ "learning_rate": 1.8847457627118646e-06,
19142
+ "loss": 1.393,
19143
+ "step": 27220
19144
+ },
19145
+ {
19146
+ "epoch": 0.68,
19147
+ "grad_norm": 16.87822914123535,
19148
+ "learning_rate": 1.8779661016949156e-06,
19149
+ "loss": 1.269,
19150
+ "step": 27230
19151
+ },
19152
+ {
19153
+ "epoch": 0.68,
19154
+ "grad_norm": 6.815838813781738,
19155
+ "learning_rate": 1.8711864406779661e-06,
19156
+ "loss": 1.4237,
19157
+ "step": 27240
19158
+ },
19159
+ {
19160
+ "epoch": 0.68,
19161
+ "grad_norm": 3.9835774898529053,
19162
+ "learning_rate": 1.8644067796610171e-06,
19163
+ "loss": 1.2872,
19164
+ "step": 27250
19165
+ },
19166
+ {
19167
+ "epoch": 0.68,
19168
+ "grad_norm": 4.0309953689575195,
19169
+ "learning_rate": 1.857627118644068e-06,
19170
+ "loss": 1.2629,
19171
+ "step": 27260
19172
+ },
19173
+ {
19174
+ "epoch": 0.68,
19175
+ "grad_norm": 12.035406112670898,
19176
+ "learning_rate": 1.8508474576271189e-06,
19177
+ "loss": 1.3755,
19178
+ "step": 27270
19179
+ },
19180
+ {
19181
+ "epoch": 0.68,
19182
+ "grad_norm": 1.8347936868667603,
19183
+ "learning_rate": 1.8440677966101696e-06,
19184
+ "loss": 1.3831,
19185
+ "step": 27280
19186
+ },
19187
+ {
19188
+ "epoch": 0.68,
19189
+ "grad_norm": 8.658760070800781,
19190
+ "learning_rate": 1.8372881355932204e-06,
19191
+ "loss": 1.3428,
19192
+ "step": 27290
19193
+ },
19194
+ {
19195
+ "epoch": 0.68,
19196
+ "grad_norm": 3.145319938659668,
19197
+ "learning_rate": 1.8305084745762714e-06,
19198
+ "loss": 1.4272,
19199
+ "step": 27300
19200
+ },
19201
+ {
19202
+ "epoch": 0.68,
19203
+ "grad_norm": 9.265095710754395,
19204
+ "learning_rate": 1.8237288135593223e-06,
19205
+ "loss": 1.4133,
19206
+ "step": 27310
19207
+ },
19208
+ {
19209
+ "epoch": 0.68,
19210
+ "grad_norm": 7.101969242095947,
19211
+ "learning_rate": 1.816949152542373e-06,
19212
+ "loss": 1.1041,
19213
+ "step": 27320
19214
+ },
19215
+ {
19216
+ "epoch": 0.68,
19217
+ "grad_norm": 16.614511489868164,
19218
+ "learning_rate": 1.8101694915254239e-06,
19219
+ "loss": 1.3473,
19220
+ "step": 27330
19221
+ },
19222
+ {
19223
+ "epoch": 0.68,
19224
+ "grad_norm": 2.301051378250122,
19225
+ "learning_rate": 1.8033898305084746e-06,
19226
+ "loss": 1.2432,
19227
+ "step": 27340
19228
+ },
19229
+ {
19230
+ "epoch": 0.68,
19231
+ "grad_norm": 5.643409729003906,
19232
+ "learning_rate": 1.7966101694915256e-06,
19233
+ "loss": 1.2781,
19234
+ "step": 27350
19235
+ },
19236
+ {
19237
+ "epoch": 0.68,
19238
+ "grad_norm": 7.286752223968506,
19239
+ "learning_rate": 1.7898305084745766e-06,
19240
+ "loss": 1.328,
19241
+ "step": 27360
19242
+ },
19243
+ {
19244
+ "epoch": 0.68,
19245
+ "grad_norm": 3.3953471183776855,
19246
+ "learning_rate": 1.7830508474576271e-06,
19247
+ "loss": 1.3025,
19248
+ "step": 27370
19249
+ },
19250
+ {
19251
+ "epoch": 0.68,
19252
+ "grad_norm": 8.240042686462402,
19253
+ "learning_rate": 1.7762711864406781e-06,
19254
+ "loss": 1.3847,
19255
+ "step": 27380
19256
+ },
19257
+ {
19258
+ "epoch": 0.68,
19259
+ "grad_norm": 9.705995559692383,
19260
+ "learning_rate": 1.769491525423729e-06,
19261
+ "loss": 1.427,
19262
+ "step": 27390
19263
+ },
19264
+ {
19265
+ "epoch": 0.69,
19266
+ "grad_norm": 3.306814193725586,
19267
+ "learning_rate": 1.7627118644067799e-06,
19268
+ "loss": 1.4487,
19269
+ "step": 27400
19270
+ },
19271
+ {
19272
+ "epoch": 0.69,
19273
+ "grad_norm": 15.25204086303711,
19274
+ "learning_rate": 1.7559322033898306e-06,
19275
+ "loss": 1.1448,
19276
+ "step": 27410
19277
+ },
19278
+ {
19279
+ "epoch": 0.69,
19280
+ "grad_norm": 9.065521240234375,
19281
+ "learning_rate": 1.7491525423728814e-06,
19282
+ "loss": 1.4139,
19283
+ "step": 27420
19284
+ },
19285
+ {
19286
+ "epoch": 0.69,
19287
+ "grad_norm": 7.884547233581543,
19288
+ "learning_rate": 1.7423728813559324e-06,
19289
+ "loss": 1.3313,
19290
+ "step": 27430
19291
+ },
19292
+ {
19293
+ "epoch": 0.69,
19294
+ "grad_norm": 8.109780311584473,
19295
+ "learning_rate": 1.7355932203389834e-06,
19296
+ "loss": 1.2523,
19297
+ "step": 27440
19298
+ },
19299
+ {
19300
+ "epoch": 0.69,
19301
+ "grad_norm": 23.829362869262695,
19302
+ "learning_rate": 1.728813559322034e-06,
19303
+ "loss": 1.2634,
19304
+ "step": 27450
19305
+ },
19306
+ {
19307
+ "epoch": 0.69,
19308
+ "grad_norm": 8.787532806396484,
19309
+ "learning_rate": 1.7220338983050849e-06,
19310
+ "loss": 1.3913,
19311
+ "step": 27460
19312
+ },
19313
+ {
19314
+ "epoch": 0.69,
19315
+ "grad_norm": 17.018415451049805,
19316
+ "learning_rate": 1.7152542372881356e-06,
19317
+ "loss": 1.2223,
19318
+ "step": 27470
19319
+ },
19320
+ {
19321
+ "epoch": 0.69,
19322
+ "grad_norm": 9.56651782989502,
19323
+ "learning_rate": 1.7084745762711866e-06,
19324
+ "loss": 1.3977,
19325
+ "step": 27480
19326
+ },
19327
+ {
19328
+ "epoch": 0.69,
19329
+ "grad_norm": 4.520813941955566,
19330
+ "learning_rate": 1.7016949152542376e-06,
19331
+ "loss": 1.3356,
19332
+ "step": 27490
19333
+ },
19334
+ {
19335
+ "epoch": 0.69,
19336
+ "grad_norm": 5.794963359832764,
19337
+ "learning_rate": 1.6949152542372882e-06,
19338
+ "loss": 1.2999,
19339
+ "step": 27500
19340
+ },
19341
+ {
19342
+ "epoch": 0.69,
19343
+ "eval_loss": 1.2965120077133179,
19344
+ "eval_runtime": 122.3839,
19345
+ "eval_samples_per_second": 8.171,
19346
+ "eval_steps_per_second": 8.171,
19347
+ "step": 27500
19348
  }
19349
  ],
19350
  "logging_steps": 10,
 
19352
  "num_input_tokens_seen": 0,
19353
  "num_train_epochs": 1,
19354
  "save_steps": 2500,
19355
+ "total_flos": 4.4280846483456e+17,
19356
  "train_batch_size": 1,
19357
  "trial_name": null,
19358
  "trial_params": null