NairaRahim commited on
Commit
fc4f407
·
verified ·
1 Parent(s): 00faf0c

Training in progress, epoch 33, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c42a1f9b7d4161ba1bae9d13deadbeb763fe153dec6526f8caefd629b413acef
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2159f89156ff4dcd1758b09775f13ff807bf88e997041dac2f177d65d0f6cad3
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:192e681f7b02b75d8744a13995bf0b21ddfec28797c5b2117fc39e50a373e7e3
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f5e4fa380d2fa2203229213d42b423cf11ccfb6cff79739b5f639ce444c7cee
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2c4db18d5a66e89d462b11782d22327d30dcce66816316fdc2ddae7a53a0ffe
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32a902a960b0fe553ebdc19d7e0271cba80d3ee51b2c90eb8cc26761d030d21d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccb7fdd86b6cca8b702924e4a75b448b78185ab1cd7362e327e12720bd73d09b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4666922908bb339b0a4b434cca7a5683a807e200bb56b9f1cacc195313b5081d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 34.4583740234375,
3
- "best_model_checkpoint": "/kaggle/working/output/checkpoint-36540",
4
- "epoch": 32.0,
5
  "eval_steps": 500,
6
- "global_step": 41760,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3182,6 +3182,105 @@
3182
  "eval_samples_per_second": 26.415,
3183
  "eval_steps_per_second": 3.32,
3184
  "step": 41760
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3185
  }
3186
  ],
3187
  "logging_steps": 100,
@@ -3196,7 +3295,7 @@
3196
  "early_stopping_threshold": 0.0
3197
  },
3198
  "attributes": {
3199
- "early_stopping_patience_counter": 4
3200
  }
3201
  },
3202
  "TrainerControl": {
@@ -3210,7 +3309,7 @@
3210
  "attributes": {}
3211
  }
3212
  },
3213
- "total_flos": 4.503394889750938e+16,
3214
  "train_batch_size": 8,
3215
  "trial_name": null,
3216
  "trial_params": null
 
1
  {
2
+ "best_metric": 34.45762252807617,
3
+ "best_model_checkpoint": "/kaggle/working/output/checkpoint-43065",
4
+ "epoch": 33.0,
5
  "eval_steps": 500,
6
+ "global_step": 43065,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3182
  "eval_samples_per_second": 26.415,
3183
  "eval_steps_per_second": 3.32,
3184
  "step": 41760
3185
+ },
3186
+ {
3187
+ "epoch": 32.030651340996165,
3188
+ "grad_norm": 3.481757164001465,
3189
+ "learning_rate": 2.99904214559387e-05,
3190
+ "loss": 33.4618,
3191
+ "step": 41800
3192
+ },
3193
+ {
3194
+ "epoch": 32.10727969348659,
3195
+ "grad_norm": 3.9191551208496094,
3196
+ "learning_rate": 2.9942528735632187e-05,
3197
+ "loss": 33.627,
3198
+ "step": 41900
3199
+ },
3200
+ {
3201
+ "epoch": 32.18390804597701,
3202
+ "grad_norm": 5.722991466522217,
3203
+ "learning_rate": 2.989463601532567e-05,
3204
+ "loss": 32.705,
3205
+ "step": 42000
3206
+ },
3207
+ {
3208
+ "epoch": 32.26053639846743,
3209
+ "grad_norm": 4.626276016235352,
3210
+ "learning_rate": 2.9846743295019157e-05,
3211
+ "loss": 33.4211,
3212
+ "step": 42100
3213
+ },
3214
+ {
3215
+ "epoch": 32.337164750957854,
3216
+ "grad_norm": 2.526745557785034,
3217
+ "learning_rate": 2.9798850574712644e-05,
3218
+ "loss": 32.9605,
3219
+ "step": 42200
3220
+ },
3221
+ {
3222
+ "epoch": 32.41379310344828,
3223
+ "grad_norm": 2.2517364025115967,
3224
+ "learning_rate": 2.975095785440613e-05,
3225
+ "loss": 33.1264,
3226
+ "step": 42300
3227
+ },
3228
+ {
3229
+ "epoch": 32.490421455938694,
3230
+ "grad_norm": 5.5678606033325195,
3231
+ "learning_rate": 2.9703065134099618e-05,
3232
+ "loss": 33.1141,
3233
+ "step": 42400
3234
+ },
3235
+ {
3236
+ "epoch": 32.56704980842912,
3237
+ "grad_norm": 3.7891595363616943,
3238
+ "learning_rate": 2.96551724137931e-05,
3239
+ "loss": 33.3294,
3240
+ "step": 42500
3241
+ },
3242
+ {
3243
+ "epoch": 32.64367816091954,
3244
+ "grad_norm": 3.350956916809082,
3245
+ "learning_rate": 2.960727969348659e-05,
3246
+ "loss": 33.6182,
3247
+ "step": 42600
3248
+ },
3249
+ {
3250
+ "epoch": 32.72030651340996,
3251
+ "grad_norm": 3.138821601867676,
3252
+ "learning_rate": 2.9559386973180075e-05,
3253
+ "loss": 33.2219,
3254
+ "step": 42700
3255
+ },
3256
+ {
3257
+ "epoch": 32.79693486590038,
3258
+ "grad_norm": 3.301961898803711,
3259
+ "learning_rate": 2.9511494252873566e-05,
3260
+ "loss": 33.5015,
3261
+ "step": 42800
3262
+ },
3263
+ {
3264
+ "epoch": 32.87356321839081,
3265
+ "grad_norm": 3.0760138034820557,
3266
+ "learning_rate": 2.9463601532567052e-05,
3267
+ "loss": 33.3376,
3268
+ "step": 42900
3269
+ },
3270
+ {
3271
+ "epoch": 32.95019157088122,
3272
+ "grad_norm": 2.474372625350952,
3273
+ "learning_rate": 2.941570881226054e-05,
3274
+ "loss": 32.9016,
3275
+ "step": 43000
3276
+ },
3277
+ {
3278
+ "epoch": 33.0,
3279
+ "eval_loss": 34.45762252807617,
3280
+ "eval_runtime": 49.4143,
3281
+ "eval_samples_per_second": 26.409,
3282
+ "eval_steps_per_second": 3.319,
3283
+ "step": 43065
3284
  }
3285
  ],
3286
  "logging_steps": 100,
 
3295
  "early_stopping_threshold": 0.0
3296
  },
3297
  "attributes": {
3298
+ "early_stopping_patience_counter": 0
3299
  }
3300
  },
3301
  "TrainerControl": {
 
3309
  "attributes": {}
3310
  }
3311
  },
3312
+ "total_flos": 4.644125980055654e+16,
3313
  "train_batch_size": 8,
3314
  "trial_name": null,
3315
  "trial_params": null