NairaRahim commited on
Commit
1b5861f
·
verified ·
1 Parent(s): a84f73f

Training in progress, epoch 76, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eaef454aca5a90004a1f7fa735006aaf07a09b80a0c6a02c48a5c9954a9c62a8
3
  size 559424792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4af379246575accc58f6abfac00aee925ce6b65883c8632448c83ee50ddfd07
3
  size 559424792
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6db55a8256108f51c16b7c04a0dda48b0840ae7fedf6f98128494c32336c189e
3
  size 1118926970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:930083a4131ce65cdc8b356487bf1b813c159fa19cc01194a70d602dd9b94b36
3
  size 1118926970
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95d7a84ac41b5d0b0849140ed4b55c3d3d4144a4583408c6d56207542a111683
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:914809037b3f88a1a2608685ab6ce1391a78e990c3ce33067466cc03d6a8480d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e895d61aa04b2cc52d453ad8ef1da3d7dbc854daaaeb69662e0cdbedb748f6ab
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f90a182f176031c61cd117d508af79c4ff26bf3b95484e9bbe4017a087414d71
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 12.716951370239258,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-88740",
4
- "epoch": 75.0,
5
  "eval_steps": 500,
6
- "global_step": 97875,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7453,6 +7453,105 @@
7453
  "eval_samples_per_second": 29.556,
7454
  "eval_steps_per_second": 3.714,
7455
  "step": 97875
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7456
  }
7457
  ],
7458
  "logging_steps": 100,
@@ -7467,7 +7566,7 @@
7467
  "early_stopping_threshold": 0.0
7468
  },
7469
  "attributes": {
7470
- "early_stopping_patience_counter": 7
7471
  }
7472
  },
7473
  "TrainerControl": {
@@ -7481,7 +7580,7 @@
7481
  "attributes": {}
7482
  }
7483
  },
7484
- "total_flos": 4.560071444651981e+16,
7485
  "train_batch_size": 8,
7486
  "trial_name": null,
7487
  "trial_params": null
 
1
  {
2
  "best_metric": 12.716951370239258,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-88740",
4
+ "epoch": 76.0,
5
  "eval_steps": 500,
6
+ "global_step": 99180,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7453
  "eval_samples_per_second": 29.556,
7454
  "eval_steps_per_second": 3.714,
7455
  "step": 97875
7456
+ },
7457
+ {
7458
+ "epoch": 75.01915708812261,
7459
+ "grad_norm": 0.9783554673194885,
7460
+ "learning_rate": 3.1317049808429124e-06,
7461
+ "loss": 11.7455,
7462
+ "step": 97900
7463
+ },
7464
+ {
7465
+ "epoch": 75.09578544061303,
7466
+ "grad_norm": 1.4434301853179932,
7467
+ "learning_rate": 3.0838122605363985e-06,
7468
+ "loss": 11.99,
7469
+ "step": 98000
7470
+ },
7471
+ {
7472
+ "epoch": 75.17241379310344,
7473
+ "grad_norm": 1.2560200691223145,
7474
+ "learning_rate": 3.035919540229885e-06,
7475
+ "loss": 11.8445,
7476
+ "step": 98100
7477
+ },
7478
+ {
7479
+ "epoch": 75.24904214559388,
7480
+ "grad_norm": 1.123687982559204,
7481
+ "learning_rate": 2.988026819923372e-06,
7482
+ "loss": 11.8894,
7483
+ "step": 98200
7484
+ },
7485
+ {
7486
+ "epoch": 75.32567049808429,
7487
+ "grad_norm": 1.2393250465393066,
7488
+ "learning_rate": 2.9401340996168583e-06,
7489
+ "loss": 11.7591,
7490
+ "step": 98300
7491
+ },
7492
+ {
7493
+ "epoch": 75.40229885057471,
7494
+ "grad_norm": 2.023070812225342,
7495
+ "learning_rate": 2.892241379310345e-06,
7496
+ "loss": 11.7083,
7497
+ "step": 98400
7498
+ },
7499
+ {
7500
+ "epoch": 75.47892720306514,
7501
+ "grad_norm": 1.7746585607528687,
7502
+ "learning_rate": 2.8443486590038316e-06,
7503
+ "loss": 12.0237,
7504
+ "step": 98500
7505
+ },
7506
+ {
7507
+ "epoch": 75.55555555555556,
7508
+ "grad_norm": 1.6215800046920776,
7509
+ "learning_rate": 2.796455938697318e-06,
7510
+ "loss": 11.8271,
7511
+ "step": 98600
7512
+ },
7513
+ {
7514
+ "epoch": 75.63218390804597,
7515
+ "grad_norm": 2.3727614879608154,
7516
+ "learning_rate": 2.7490421455938698e-06,
7517
+ "loss": 11.9133,
7518
+ "step": 98700
7519
+ },
7520
+ {
7521
+ "epoch": 75.7088122605364,
7522
+ "grad_norm": 1.562569260597229,
7523
+ "learning_rate": 2.7011494252873562e-06,
7524
+ "loss": 11.8886,
7525
+ "step": 98800
7526
+ },
7527
+ {
7528
+ "epoch": 75.78544061302682,
7529
+ "grad_norm": 0.8996521830558777,
7530
+ "learning_rate": 2.653256704980843e-06,
7531
+ "loss": 11.6606,
7532
+ "step": 98900
7533
+ },
7534
+ {
7535
+ "epoch": 75.86206896551724,
7536
+ "grad_norm": 1.6331411600112915,
7537
+ "learning_rate": 2.6053639846743296e-06,
7538
+ "loss": 12.057,
7539
+ "step": 99000
7540
+ },
7541
+ {
7542
+ "epoch": 75.93869731800767,
7543
+ "grad_norm": 1.2690104246139526,
7544
+ "learning_rate": 2.5574712643678165e-06,
7545
+ "loss": 11.9791,
7546
+ "step": 99100
7547
+ },
7548
+ {
7549
+ "epoch": 76.0,
7550
+ "eval_loss": 12.717323303222656,
7551
+ "eval_runtime": 44.1546,
7552
+ "eval_samples_per_second": 29.555,
7553
+ "eval_steps_per_second": 3.714,
7554
+ "step": 99180
7555
  }
7556
  ],
7557
  "logging_steps": 100,
 
7566
  "early_stopping_threshold": 0.0
7567
  },
7568
  "attributes": {
7569
+ "early_stopping_patience_counter": 8
7570
  }
7571
  },
7572
  "TrainerControl": {
 
7580
  "attributes": {}
7581
  }
7582
  },
7583
+ "total_flos": 4.620861214306099e+16,
7584
  "train_batch_size": 8,
7585
  "trial_name": null,
7586
  "trial_params": null