plip commited on
Commit
30174f2
1 Parent(s): 207b201

Training in progress, step 310000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bf06004ede41634f31f51db43c8e8bd655d6cf31bd715fe6a400fa272f9d936
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e0507b38aa7540f67f9e0a0633279dd51f9a831fc1db6cc8eacbb35397bd4b8
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f18c7c2c86ca13727949f1e8fc13e7631481b4544ee49ec424ae0d909fab662
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:671528b9282de7ee8eda298ebd63ddf167905d49365aa89ef6684cdf3a665481
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6cc022b9e73fe2f5189297efc512c3e18716586fa066a4b492e92e4f957bb73
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:871f42af1f573f4efd66827fac31645af2cb2540adb5d0d237397c32c55cce8b
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6cc022b9e73fe2f5189297efc512c3e18716586fa066a4b492e92e4f957bb73
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:871f42af1f573f4efd66827fac31645af2cb2540adb5d0d237397c32c55cce8b
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6cc022b9e73fe2f5189297efc512c3e18716586fa066a4b492e92e4f957bb73
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:871f42af1f573f4efd66827fac31645af2cb2540adb5d0d237397c32c55cce8b
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6cc022b9e73fe2f5189297efc512c3e18716586fa066a4b492e92e4f957bb73
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:871f42af1f573f4efd66827fac31645af2cb2540adb5d0d237397c32c55cce8b
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6cc022b9e73fe2f5189297efc512c3e18716586fa066a4b492e92e4f957bb73
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:871f42af1f573f4efd66827fac31645af2cb2540adb5d0d237397c32c55cce8b
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6cc022b9e73fe2f5189297efc512c3e18716586fa066a4b492e92e4f957bb73
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:871f42af1f573f4efd66827fac31645af2cb2540adb5d0d237397c32c55cce8b
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6cc022b9e73fe2f5189297efc512c3e18716586fa066a4b492e92e4f957bb73
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:871f42af1f573f4efd66827fac31645af2cb2540adb5d0d237397c32c55cce8b
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6cc022b9e73fe2f5189297efc512c3e18716586fa066a4b492e92e4f957bb73
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:871f42af1f573f4efd66827fac31645af2cb2540adb5d0d237397c32c55cce8b
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7914b9f1e3709b1198ec189eb9bb9105cd6b88dedbcbbdd4128934a703cf33e3
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b8c10dab6d3ee824fc8fe4628d3bf3ceea806ce0d2fbe513f32af4d508ab89e
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.9899112281274025,
5
- "global_step": 300000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -6006,11 +6006,211 @@
6006
  "eval_samples_per_second": 1567.888,
6007
  "eval_steps_per_second": 24.966,
6008
  "step": 300000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6009
  }
6010
  ],
6011
  "max_steps": 500000,
6012
  "num_train_epochs": 12,
6013
- "total_flos": 9.58459918128956e+21,
6014
  "trial_name": null,
6015
  "trial_params": null
6016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.222908269064983,
5
+ "global_step": 310000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
6006
  "eval_samples_per_second": 1567.888,
6007
  "eval_steps_per_second": 24.966,
6008
  "step": 300000
6009
+ },
6010
+ {
6011
+ "epoch": 7.0,
6012
+ "learning_rate": 0.00011893996636109606,
6013
+ "loss": 0.6483,
6014
+ "step": 300500
6015
+ },
6016
+ {
6017
+ "epoch": 7.01,
6018
+ "learning_rate": 0.00011847572270435852,
6019
+ "loss": 0.6477,
6020
+ "step": 301000
6021
+ },
6022
+ {
6023
+ "epoch": 7.01,
6024
+ "eval_loss": 0.6460827589035034,
6025
+ "eval_runtime": 1.4908,
6026
+ "eval_samples_per_second": 1474.348,
6027
+ "eval_steps_per_second": 23.477,
6028
+ "step": 301000
6029
+ },
6030
+ {
6031
+ "epoch": 7.02,
6032
+ "learning_rate": 0.00011801187847126579,
6033
+ "loss": 0.6478,
6034
+ "step": 301500
6035
+ },
6036
+ {
6037
+ "epoch": 7.04,
6038
+ "learning_rate": 0.00011754843873434411,
6039
+ "loss": 0.6471,
6040
+ "step": 302000
6041
+ },
6042
+ {
6043
+ "epoch": 7.04,
6044
+ "eval_loss": 0.6455149054527283,
6045
+ "eval_runtime": 1.4371,
6046
+ "eval_samples_per_second": 1529.505,
6047
+ "eval_steps_per_second": 24.355,
6048
+ "step": 302000
6049
+ },
6050
+ {
6051
+ "epoch": 7.05,
6052
+ "learning_rate": 0.00011708540856169612,
6053
+ "loss": 0.6443,
6054
+ "step": 302500
6055
+ },
6056
+ {
6057
+ "epoch": 7.06,
6058
+ "learning_rate": 0.00011662279301694567,
6059
+ "loss": 0.4659,
6060
+ "step": 303000
6061
+ },
6062
+ {
6063
+ "epoch": 7.06,
6064
+ "eval_loss": 0.25994545221328735,
6065
+ "eval_runtime": 1.3921,
6066
+ "eval_samples_per_second": 1578.885,
6067
+ "eval_steps_per_second": 25.141,
6068
+ "step": 303000
6069
+ },
6070
+ {
6071
+ "epoch": 7.07,
6072
+ "learning_rate": 0.0001161605971591822,
6073
+ "loss": 0.2739,
6074
+ "step": 303500
6075
+ },
6076
+ {
6077
+ "epoch": 7.08,
6078
+ "learning_rate": 0.00011569882604290559,
6079
+ "loss": 0.2707,
6080
+ "step": 304000
6081
+ },
6082
+ {
6083
+ "epoch": 7.08,
6084
+ "eval_loss": 0.25346240401268005,
6085
+ "eval_runtime": 1.4298,
6086
+ "eval_samples_per_second": 1537.235,
6087
+ "eval_steps_per_second": 24.478,
6088
+ "step": 304000
6089
+ },
6090
+ {
6091
+ "epoch": 7.09,
6092
+ "learning_rate": 0.00011523748471797075,
6093
+ "loss": 0.2691,
6094
+ "step": 304500
6095
+ },
6096
+ {
6097
+ "epoch": 7.11,
6098
+ "learning_rate": 0.00011477657822953255,
6099
+ "loss": 0.2681,
6100
+ "step": 305000
6101
+ },
6102
+ {
6103
+ "epoch": 7.11,
6104
+ "eval_loss": 0.2524736821651459,
6105
+ "eval_runtime": 1.4268,
6106
+ "eval_samples_per_second": 1540.462,
6107
+ "eval_steps_per_second": 24.53,
6108
+ "step": 305000
6109
+ },
6110
+ {
6111
+ "epoch": 7.12,
6112
+ "learning_rate": 0.00011431611161799043,
6113
+ "loss": 0.268,
6114
+ "step": 305500
6115
+ },
6116
+ {
6117
+ "epoch": 7.13,
6118
+ "learning_rate": 0.0001138560899189335,
6119
+ "loss": 0.2678,
6120
+ "step": 306000
6121
+ },
6122
+ {
6123
+ "epoch": 7.13,
6124
+ "eval_loss": 0.25040730834007263,
6125
+ "eval_runtime": 1.4446,
6126
+ "eval_samples_per_second": 1521.535,
6127
+ "eval_steps_per_second": 24.228,
6128
+ "step": 306000
6129
+ },
6130
+ {
6131
+ "epoch": 7.14,
6132
+ "learning_rate": 0.00011339651816308543,
6133
+ "loss": 0.267,
6134
+ "step": 306500
6135
+ },
6136
+ {
6137
+ "epoch": 7.15,
6138
+ "learning_rate": 0.00011293740137624925,
6139
+ "loss": 0.267,
6140
+ "step": 307000
6141
+ },
6142
+ {
6143
+ "epoch": 7.15,
6144
+ "eval_loss": 0.24852143228054047,
6145
+ "eval_runtime": 1.3837,
6146
+ "eval_samples_per_second": 1588.479,
6147
+ "eval_steps_per_second": 25.294,
6148
+ "step": 307000
6149
+ },
6150
+ {
6151
+ "epoch": 7.16,
6152
+ "learning_rate": 0.00011247874457925261,
6153
+ "loss": 0.2668,
6154
+ "step": 307500
6155
+ },
6156
+ {
6157
+ "epoch": 7.18,
6158
+ "learning_rate": 0.0001120205527878927,
6159
+ "loss": 0.2663,
6160
+ "step": 308000
6161
+ },
6162
+ {
6163
+ "epoch": 7.18,
6164
+ "eval_loss": 0.2509649991989136,
6165
+ "eval_runtime": 1.4317,
6166
+ "eval_samples_per_second": 1535.233,
6167
+ "eval_steps_per_second": 24.446,
6168
+ "step": 308000
6169
+ },
6170
+ {
6171
+ "epoch": 7.19,
6172
+ "learning_rate": 0.00011156283101288165,
6173
+ "loss": 0.266,
6174
+ "step": 308500
6175
+ },
6176
+ {
6177
+ "epoch": 7.2,
6178
+ "learning_rate": 0.00011110558425979132,
6179
+ "loss": 0.266,
6180
+ "step": 309000
6181
+ },
6182
+ {
6183
+ "epoch": 7.2,
6184
+ "eval_loss": 0.24989214539527893,
6185
+ "eval_runtime": 1.446,
6186
+ "eval_samples_per_second": 1520.105,
6187
+ "eval_steps_per_second": 24.205,
6188
+ "step": 309000
6189
+ },
6190
+ {
6191
+ "epoch": 7.21,
6192
+ "learning_rate": 0.00011064881752899906,
6193
+ "loss": 0.2661,
6194
+ "step": 309500
6195
+ },
6196
+ {
6197
+ "epoch": 7.22,
6198
+ "learning_rate": 0.00011019253581563262,
6199
+ "loss": 0.2658,
6200
+ "step": 310000
6201
+ },
6202
+ {
6203
+ "epoch": 7.22,
6204
+ "eval_loss": 0.2480825036764145,
6205
+ "eval_runtime": 1.4466,
6206
+ "eval_samples_per_second": 1519.474,
6207
+ "eval_steps_per_second": 24.195,
6208
+ "step": 310000
6209
  }
6210
  ],
6211
  "max_steps": 500000,
6212
  "num_train_epochs": 12,
6213
+ "total_flos": 9.904083823073153e+21,
6214
  "trial_name": null,
6215
  "trial_params": null
6216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f18c7c2c86ca13727949f1e8fc13e7631481b4544ee49ec424ae0d909fab662
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:671528b9282de7ee8eda298ebd63ddf167905d49365aa89ef6684cdf3a665481
3
  size 102501541