mtzig commited on
Commit
1fc9a25
·
verified ·
1 Parent(s): 03df885

Training in progress, step 1306, checkpoint

Browse files
last-checkpoint/optimizer_0/.metadata CHANGED
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
 
last-checkpoint/optimizer_0/__0_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b360a4328f640ed51ddaf65beb21759c2322654758d2b7b7f6e00f66a17354f8
3
  size 13934748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16ba4d36be29c47761e4de822f2a56f3f1055d47e1e1f14d2488c6f83eddeec5
3
  size 13934748
last-checkpoint/optimizer_0/__1_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20d1395c5a780e12bd9c2d3c0a3a98e6d11c049377ae734be8b4c6bec63af7cd
3
  size 13999412
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e6479e4d97665e28705f11762ea10d92ed3b015451419fbda2316d5d8b06c73
3
  size 13999412
last-checkpoint/optimizer_0/__2_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b980d02c86a12c4ddd321afa25558b9bda6ce7377f5a7301fbc73043dd7e72fd
3
  size 13990904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7eaf25285a1507f4859bd6e1d4e28599283b7616a349f7a68095dd221ca407fb
3
  size 13990904
last-checkpoint/optimizer_0/__3_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4716794fe32a12753a15aca9b69a92b8ff2a13cc9a1449ccd27487d4a1ca9a7d
3
  size 13990904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24253fe25c07b2675104a5e19811341ecd3d26067b978ca553e4ce6c22669504
3
  size 13990904
last-checkpoint/pytorch_model_fsdp_0/.metadata CHANGED
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
 
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c424477fe3f3aa933900f713ea30de6e63503f0eb3c14d4b5a3fd7be751453c
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac126ceaf5112e8e4b4de50777b385824f0aae9304c36217aac6a5eb68d490a5
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:992a85fd0f9141e2a7ce8e4ce2c770b6564f0c5de13f4c613cc4d93bc456ab03
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7688ad663685a9025f78d869af514fc8027af746b690ee11df3209317548241
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0f386445b7a0ecca12a354673d12666bd045fe42bc66c5282186ece7173d4fd
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3842b87888cf0e1be7e33db60d6a70e76fe57f5600b71df71fcc44946743a05
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81c7ec7bbec3615990bf78e011b0f7bc719d60680964d34bbac0633971dd9f36
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52d83d2a347e7151765828045f9d61b506f1e7fecdc1832b06fe6a81ecb6a04b
3
  size 6966784
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce0c8f8d9638136cb5308b0b5847756c4993f316ede670798b5676d4508282ce
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2103551eb3bb592775eb464494702c4cad71ff0b6af0e4e6c99b289b56b204ad
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc378caf9e3227b70a474c0063f96ad82cc21701d0d5fa1f12d57ba19770909f
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b97f6488d0e04c6e47d5c627ac69513ec7b0ea439086b82abe71f5696931884c
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41e92489ba1b6fe609dc774dd68b88282000969f034d53fc7540c25e859de003
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5116ddf741fb875690434076a78462204512b021145f27059e0edc50c3027b9
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:037f3e8e143701c6dab9d7f5db31ada1d1f6e223405cca2ab7ccd4b03d64aac8
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6598ea402d6825b4746034f32ba2ae771f302f2bde0d4492fa6a600836a7a44e
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:230ef6b51382a71e81c933c6e0f89f49737687e37bb89c538f18f98f56a78ee9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:530a7f21d17686b5d16f06aafaf55f3d145270ec70fa92cf1f5dd1bd2ccad51b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9950248756218906,
5
  "eval_steps": 20,
6
- "global_step": 1300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -9899,6 +9899,48 @@
9899
  "eval_samples_per_second": 6.942,
9900
  "eval_steps_per_second": 0.231,
9901
  "step": 1300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9902
  }
9903
  ],
9904
  "logging_steps": 1,
@@ -9913,12 +9955,12 @@
9913
  "should_evaluate": false,
9914
  "should_log": false,
9915
  "should_save": true,
9916
- "should_training_stop": false
9917
  },
9918
  "attributes": {}
9919
  }
9920
  },
9921
- "total_flos": 1.9972530726187827e+17,
9922
  "train_batch_size": 8,
9923
  "trial_name": null,
9924
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9996172981247609,
5
  "eval_steps": 20,
6
+ "global_step": 1306,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
9899
  "eval_samples_per_second": 6.942,
9900
  "eval_steps_per_second": 0.231,
9901
  "step": 1300
9902
+ },
9903
+ {
9904
+ "epoch": 0.995790279372369,
9905
+ "grad_norm": 7.007760047912598,
9906
+ "learning_rate": 8.935678962196381e-10,
9907
+ "loss": 0.2438,
9908
+ "step": 1301
9909
+ },
9910
+ {
9911
+ "epoch": 0.9965556831228473,
9912
+ "grad_norm": 6.088259696960449,
9913
+ "learning_rate": 5.718865197423817e-10,
9914
+ "loss": 0.3118,
9915
+ "step": 1302
9916
+ },
9917
+ {
9918
+ "epoch": 0.9973210868733257,
9919
+ "grad_norm": 8.319058418273926,
9920
+ "learning_rate": 3.2168750880634537e-10,
9921
+ "loss": 0.36,
9922
+ "step": 1303
9923
+ },
9924
+ {
9925
+ "epoch": 0.9980864906238041,
9926
+ "grad_norm": 7.24370813369751,
9927
+ "learning_rate": 1.4297265199414434e-10,
9928
+ "loss": 0.3103,
9929
+ "step": 1304
9930
+ },
9931
+ {
9932
+ "epoch": 0.9988518943742825,
9933
+ "grad_norm": 5.21471643447876,
9934
+ "learning_rate": 3.57432268771607e-11,
9935
+ "loss": 0.2336,
9936
+ "step": 1305
9937
+ },
9938
+ {
9939
+ "epoch": 0.9996172981247609,
9940
+ "grad_norm": 4.997439384460449,
9941
+ "learning_rate": 0.0,
9942
+ "loss": 0.2676,
9943
+ "step": 1306
9944
  }
9945
  ],
9946
  "logging_steps": 1,
 
9955
  "should_evaluate": false,
9956
  "should_log": false,
9957
  "should_save": true,
9958
+ "should_training_stop": true
9959
  },
9960
  "attributes": {}
9961
  }
9962
  },
9963
+ "total_flos": 2.006617662863442e+17,
9964
  "train_batch_size": 8,
9965
  "trial_name": null,
9966
  "trial_params": null