NairaRahim commited on
Commit
ac99204
·
verified ·
1 Parent(s): 9f2f65f

Training in progress, epoch 75, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:247c9e7997a56e6ef36972946d3715d25909139e8e1b2b5b10915a4123df01a7
3
  size 559424792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaef454aca5a90004a1f7fa735006aaf07a09b80a0c6a02c48a5c9954a9c62a8
3
  size 559424792
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d49286b89bfb500306797db40794e7253e16edb2d06759fd7604e13488743981
3
  size 1118926970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6db55a8256108f51c16b7c04a0dda48b0840ae7fedf6f98128494c32336c189e
3
  size 1118926970
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4c49a5f94183d5eea2e85ebecf0920a90a72c6a9d455839a40431ebc99b1b79
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95d7a84ac41b5d0b0849140ed4b55c3d3d4144a4583408c6d56207542a111683
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b9b48a583021dd0f876f8866c9a036b5875c10a77b3bb22134fbf1237d49f01
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e895d61aa04b2cc52d453ad8ef1da3d7dbc854daaaeb69662e0cdbedb748f6ab
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 12.716951370239258,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-88740",
4
- "epoch": 74.0,
5
  "eval_steps": 500,
6
- "global_step": 96570,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7354,6 +7354,105 @@
7354
  "eval_samples_per_second": 29.56,
7355
  "eval_steps_per_second": 3.715,
7356
  "step": 96570
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7357
  }
7358
  ],
7359
  "logging_steps": 100,
@@ -7368,7 +7467,7 @@
7368
  "early_stopping_threshold": 0.0
7369
  },
7370
  "attributes": {
7371
- "early_stopping_patience_counter": 6
7372
  }
7373
  },
7374
  "TrainerControl": {
@@ -7382,7 +7481,7 @@
7382
  "attributes": {}
7383
  }
7384
  },
7385
- "total_flos": 4.499281674997862e+16,
7386
  "train_batch_size": 8,
7387
  "trial_name": null,
7388
  "trial_params": null
 
1
  {
2
  "best_metric": 12.716951370239258,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-88740",
4
+ "epoch": 75.0,
5
  "eval_steps": 500,
6
+ "global_step": 97875,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7354
  "eval_samples_per_second": 29.56,
7355
  "eval_steps_per_second": 3.715,
7356
  "step": 96570
7357
+ },
7358
+ {
7359
+ "epoch": 74.02298850574712,
7360
+ "grad_norm": 1.6064398288726807,
7361
+ "learning_rate": 3.7543103448275864e-06,
7362
+ "loss": 11.8828,
7363
+ "step": 96600
7364
+ },
7365
+ {
7366
+ "epoch": 74.09961685823755,
7367
+ "grad_norm": 2.088803768157959,
7368
+ "learning_rate": 3.7064176245210733e-06,
7369
+ "loss": 11.7576,
7370
+ "step": 96700
7371
+ },
7372
+ {
7373
+ "epoch": 74.17624521072797,
7374
+ "grad_norm": 1.5417454242706299,
7375
+ "learning_rate": 3.6585249042145593e-06,
7376
+ "loss": 11.9239,
7377
+ "step": 96800
7378
+ },
7379
+ {
7380
+ "epoch": 74.25287356321839,
7381
+ "grad_norm": 1.5983319282531738,
7382
+ "learning_rate": 3.610632183908046e-06,
7383
+ "loss": 11.8119,
7384
+ "step": 96900
7385
+ },
7386
+ {
7387
+ "epoch": 74.32950191570882,
7388
+ "grad_norm": 3.7642099857330322,
7389
+ "learning_rate": 3.5627394636015326e-06,
7390
+ "loss": 11.8259,
7391
+ "step": 97000
7392
+ },
7393
+ {
7394
+ "epoch": 74.40613026819923,
7395
+ "grad_norm": 1.5149072408676147,
7396
+ "learning_rate": 3.5148467432950195e-06,
7397
+ "loss": 11.9898,
7398
+ "step": 97100
7399
+ },
7400
+ {
7401
+ "epoch": 74.48275862068965,
7402
+ "grad_norm": 0.9915036559104919,
7403
+ "learning_rate": 3.4669540229885055e-06,
7404
+ "loss": 11.7665,
7405
+ "step": 97200
7406
+ },
7407
+ {
7408
+ "epoch": 74.55938697318008,
7409
+ "grad_norm": 1.2745176553726196,
7410
+ "learning_rate": 3.4190613026819924e-06,
7411
+ "loss": 11.9657,
7412
+ "step": 97300
7413
+ },
7414
+ {
7415
+ "epoch": 74.6360153256705,
7416
+ "grad_norm": 2.390751600265503,
7417
+ "learning_rate": 3.3711685823754793e-06,
7418
+ "loss": 11.6856,
7419
+ "step": 97400
7420
+ },
7421
+ {
7422
+ "epoch": 74.71264367816092,
7423
+ "grad_norm": 2.2279295921325684,
7424
+ "learning_rate": 3.3232758620689653e-06,
7425
+ "loss": 11.7551,
7426
+ "step": 97500
7427
+ },
7428
+ {
7429
+ "epoch": 74.78927203065135,
7430
+ "grad_norm": 1.8389006853103638,
7431
+ "learning_rate": 3.275383141762452e-06,
7432
+ "loss": 12.0037,
7433
+ "step": 97600
7434
+ },
7435
+ {
7436
+ "epoch": 74.86590038314176,
7437
+ "grad_norm": 1.4288936853408813,
7438
+ "learning_rate": 3.2274904214559387e-06,
7439
+ "loss": 12.0561,
7440
+ "step": 97700
7441
+ },
7442
+ {
7443
+ "epoch": 74.94252873563218,
7444
+ "grad_norm": 1.037800669670105,
7445
+ "learning_rate": 3.1795977011494255e-06,
7446
+ "loss": 11.9257,
7447
+ "step": 97800
7448
+ },
7449
+ {
7450
+ "epoch": 75.0,
7451
+ "eval_loss": 12.724896430969238,
7452
+ "eval_runtime": 44.1538,
7453
+ "eval_samples_per_second": 29.556,
7454
+ "eval_steps_per_second": 3.714,
7455
+ "step": 97875
7456
  }
7457
  ],
7458
  "logging_steps": 100,
 
7467
  "early_stopping_threshold": 0.0
7468
  },
7469
  "attributes": {
7470
+ "early_stopping_patience_counter": 7
7471
  }
7472
  },
7473
  "TrainerControl": {
 
7481
  "attributes": {}
7482
  }
7483
  },
7484
+ "total_flos": 4.560071444651981e+16,
7485
  "train_batch_size": 8,
7486
  "trial_name": null,
7487
  "trial_params": null