plip commited on
Commit
f1e0679
1 Parent(s): 4e46033

Training in progress, step 410000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b700ebc6fc16103b0c351577b0fe7b7a9955520a42d8ff4c426cb4c02c884230
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1baf6720e03a80bba42f160ede8ecbec1168817e98171665a299ded4d992a3a
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6331caf84d401728c3e2540374852d6e489eae28c3099656d3c86fe297980cc4
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db26ab4777ecfb64dd27c707c9cfebcf74d431f5c02f5554c247500eee4a57f0
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60abe9bac7f0fc7b8ccd2d34e905c6889ebea4f84a53f5f1e4a7b1ee92473794
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60abe9bac7f0fc7b8ccd2d34e905c6889ebea4f84a53f5f1e4a7b1ee92473794
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60abe9bac7f0fc7b8ccd2d34e905c6889ebea4f84a53f5f1e4a7b1ee92473794
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60abe9bac7f0fc7b8ccd2d34e905c6889ebea4f84a53f5f1e4a7b1ee92473794
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60abe9bac7f0fc7b8ccd2d34e905c6889ebea4f84a53f5f1e4a7b1ee92473794
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60abe9bac7f0fc7b8ccd2d34e905c6889ebea4f84a53f5f1e4a7b1ee92473794
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60abe9bac7f0fc7b8ccd2d34e905c6889ebea4f84a53f5f1e4a7b1ee92473794
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:418af87c326448c49979f76acb65b2fcea72841274341c16e51b4a9d24a0c8ee
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60abe9bac7f0fc7b8ccd2d34e905c6889ebea4f84a53f5f1e4a7b1ee92473794
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ba83cbca80cb672828600b248dd69c4c050beb355cdcf7faf0b56212421edca
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:498c95fdcab578849c69bf4a10f0532f5bf3ba8048fa37c1b416b3afd0ea5f33
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.319881637503205,
5
- "global_step": 400000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -8006,11 +8006,211 @@
8006
  "eval_samples_per_second": 1555.592,
8007
  "eval_steps_per_second": 24.771,
8008
  "step": 400000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8009
  }
8010
  ],
8011
  "max_steps": 500000,
8012
  "num_train_epochs": 12,
8013
- "total_flos": 1.277946307806184e+22,
8014
  "trial_name": null,
8015
  "trial_params": null
8016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.552878678440784,
5
+ "global_step": 410000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
8006
  "eval_samples_per_second": 1555.592,
8007
  "eval_steps_per_second": 24.771,
8008
  "step": 400000
8009
+ },
8010
+ {
8011
+ "epoch": 9.33,
8012
+ "learning_rate": 4.0280733510303475e-05,
8013
+ "loss": 0.2558,
8014
+ "step": 400500
8015
+ },
8016
+ {
8017
+ "epoch": 9.34,
8018
+ "learning_rate": 3.9988095434602716e-05,
8019
+ "loss": 0.2555,
8020
+ "step": 401000
8021
+ },
8022
+ {
8023
+ "epoch": 9.34,
8024
+ "eval_loss": 0.2393871247768402,
8025
+ "eval_runtime": 1.4557,
8026
+ "eval_samples_per_second": 1509.958,
8027
+ "eval_steps_per_second": 24.044,
8028
+ "step": 401000
8029
+ },
8030
+ {
8031
+ "epoch": 9.35,
8032
+ "learning_rate": 3.9696715110663726e-05,
8033
+ "loss": 0.2557,
8034
+ "step": 401500
8035
+ },
8036
+ {
8037
+ "epoch": 9.37,
8038
+ "learning_rate": 3.9406595724975116e-05,
8039
+ "loss": 0.2553,
8040
+ "step": 402000
8041
+ },
8042
+ {
8043
+ "epoch": 9.37,
8044
+ "eval_loss": 0.2388046830892563,
8045
+ "eval_runtime": 1.444,
8046
+ "eval_samples_per_second": 1522.119,
8047
+ "eval_steps_per_second": 24.238,
8048
+ "step": 402000
8049
+ },
8050
+ {
8051
+ "epoch": 9.38,
8052
+ "learning_rate": 3.9117740450235914e-05,
8053
+ "loss": 0.2555,
8054
+ "step": 402500
8055
+ },
8056
+ {
8057
+ "epoch": 9.39,
8058
+ "learning_rate": 3.8830152445321163e-05,
8059
+ "loss": 0.2553,
8060
+ "step": 403000
8061
+ },
8062
+ {
8063
+ "epoch": 9.39,
8064
+ "eval_loss": 0.23731090128421783,
8065
+ "eval_runtime": 1.5043,
8066
+ "eval_samples_per_second": 1461.141,
8067
+ "eval_steps_per_second": 23.267,
8068
+ "step": 403000
8069
+ },
8070
+ {
8071
+ "epoch": 9.4,
8072
+ "learning_rate": 3.854383485524724e-05,
8073
+ "loss": 0.2553,
8074
+ "step": 403500
8075
+ },
8076
+ {
8077
+ "epoch": 9.41,
8078
+ "learning_rate": 3.8258790811137425e-05,
8079
+ "loss": 0.2553,
8080
+ "step": 404000
8081
+ },
8082
+ {
8083
+ "epoch": 9.41,
8084
+ "eval_loss": 0.2364571988582611,
8085
+ "eval_runtime": 1.4662,
8086
+ "eval_samples_per_second": 1499.102,
8087
+ "eval_steps_per_second": 23.871,
8088
+ "step": 404000
8089
+ },
8090
+ {
8091
+ "epoch": 9.42,
8092
+ "learning_rate": 3.7975023430187676e-05,
8093
+ "loss": 0.2552,
8094
+ "step": 404500
8095
+ },
8096
+ {
8097
+ "epoch": 9.44,
8098
+ "learning_rate": 3.7692535815632624e-05,
8099
+ "loss": 0.2555,
8100
+ "step": 405000
8101
+ },
8102
+ {
8103
+ "epoch": 9.44,
8104
+ "eval_loss": 0.23983800411224365,
8105
+ "eval_runtime": 1.4385,
8106
+ "eval_samples_per_second": 1527.964,
8107
+ "eval_steps_per_second": 24.331,
8108
+ "step": 405000
8109
+ },
8110
+ {
8111
+ "epoch": 9.45,
8112
+ "learning_rate": 3.741133105671159e-05,
8113
+ "loss": 0.2554,
8114
+ "step": 405500
8115
+ },
8116
+ {
8117
+ "epoch": 9.46,
8118
+ "learning_rate": 3.713141222863474e-05,
8119
+ "loss": 0.2553,
8120
+ "step": 406000
8121
+ },
8122
+ {
8123
+ "epoch": 9.46,
8124
+ "eval_loss": 0.23924601078033447,
8125
+ "eval_runtime": 1.4347,
8126
+ "eval_samples_per_second": 1532.043,
8127
+ "eval_steps_per_second": 24.396,
8128
+ "step": 406000
8129
+ },
8130
+ {
8131
+ "epoch": 9.47,
8132
+ "learning_rate": 3.6852782392549584e-05,
8133
+ "loss": 0.2556,
8134
+ "step": 406500
8135
+ },
8136
+ {
8137
+ "epoch": 9.48,
8138
+ "learning_rate": 3.657544459550729e-05,
8139
+ "loss": 0.255,
8140
+ "step": 407000
8141
+ },
8142
+ {
8143
+ "epoch": 9.48,
8144
+ "eval_loss": 0.23706746101379395,
8145
+ "eval_runtime": 1.4741,
8146
+ "eval_samples_per_second": 1491.033,
8147
+ "eval_steps_per_second": 23.743,
8148
+ "step": 407000
8149
+ },
8150
+ {
8151
+ "epoch": 9.49,
8152
+ "learning_rate": 3.6299401870429606e-05,
8153
+ "loss": 0.2549,
8154
+ "step": 407500
8155
+ },
8156
+ {
8157
+ "epoch": 9.51,
8158
+ "learning_rate": 3.6024657236075546e-05,
8159
+ "loss": 0.2548,
8160
+ "step": 408000
8161
+ },
8162
+ {
8163
+ "epoch": 9.51,
8164
+ "eval_loss": 0.23815912008285522,
8165
+ "eval_runtime": 1.4566,
8166
+ "eval_samples_per_second": 1508.999,
8167
+ "eval_steps_per_second": 24.029,
8168
+ "step": 408000
8169
+ },
8170
+ {
8171
+ "epoch": 9.52,
8172
+ "learning_rate": 3.575121369700841e-05,
8173
+ "loss": 0.2549,
8174
+ "step": 408500
8175
+ },
8176
+ {
8177
+ "epoch": 9.53,
8178
+ "learning_rate": 3.5479074243562995e-05,
8179
+ "loss": 0.2548,
8180
+ "step": 409000
8181
+ },
8182
+ {
8183
+ "epoch": 9.53,
8184
+ "eval_loss": 0.23738771677017212,
8185
+ "eval_runtime": 1.4607,
8186
+ "eval_samples_per_second": 1504.709,
8187
+ "eval_steps_per_second": 23.96,
8188
+ "step": 409000
8189
+ },
8190
+ {
8191
+ "epoch": 9.54,
8192
+ "learning_rate": 3.5208241851812644e-05,
8193
+ "loss": 0.2549,
8194
+ "step": 409500
8195
+ },
8196
+ {
8197
+ "epoch": 9.55,
8198
+ "learning_rate": 3.493871948353709e-05,
8199
+ "loss": 0.2548,
8200
+ "step": 410000
8201
+ },
8202
+ {
8203
+ "epoch": 9.55,
8204
+ "eval_loss": 0.23903900384902954,
8205
+ "eval_runtime": 1.4733,
8206
+ "eval_samples_per_second": 1491.858,
8207
+ "eval_steps_per_second": 23.756,
8208
+ "step": 410000
8209
  }
8210
  ],
8211
  "max_steps": 500000,
8212
  "num_train_epochs": 12,
8213
+ "total_flos": 1.309895021683634e+22,
8214
  "trial_name": null,
8215
  "trial_params": null
8216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6331caf84d401728c3e2540374852d6e489eae28c3099656d3c86fe297980cc4
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db26ab4777ecfb64dd27c707c9cfebcf74d431f5c02f5554c247500eee4a57f0
3
  size 102501541