mgh6 commited on
Commit
b8b2c82
·
verified ·
1 Parent(s): ca9d6b1

Training in progress, step 4000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10500d6936226928862f57d6cd94f4497878f0b1bb025edca400a181e583d0e9
3
  size 2682482800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26d8f5a8a6c2b2142e1a897b44d8d10c4b31cd9bfb6af9a2f209cb1c664219db
3
  size 2682482800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60868e053ffcd706a9db40ce51977df10a098297affcd57b93e91fffd802ca7e
3
  size 5365108834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5e3926040695ffd36b68b04adaa45e63e11c719919bb0b1c4e2bd27ee1e14c0
3
  size 5365108834
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d018acbbeceb8f6ded094ffbf67a030bf83c1f54ca6f31bd5f038a52ac85034
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3df16cd49a0b8eec2098a33dc0dadf0a4d79c497ced439bd157f2e6ac5bab8f9
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8fbc117afd7b27eea894ff76f1b04973418f72f10dadd713f6dfa7c4713012d
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a70f7b5ef6b95e383628d6376d70e07135c11cad9f40dc480e0188454cbb3db6
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abd2a75ef074977893e3dd803f93d5726cebe2149302e0d0ae3ef1370abb200d
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a34fd3f02bff6d02d805d0bdeac374c0ebc02a92b9a3f16e79223d2bbb10e2b9
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7f1adac70acfa8a2122e4d3f5566d6d9f1d3efc180b57f0dbc4b7b17090217f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d927d2976f1a532e1dfa460599187ed9bfa9e5f16421e7569e35526248d81762
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a199cc7b81fba53fabd1b994537c465f58151394382535a6c542ffe0cc02930
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b32f7bc8829feee7543ba545ec8f492f6a76931d31cd30ffef13d25c9c59f4e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.26852331357768955,
5
  "eval_steps": 500,
6
- "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -497,6 +497,76 @@
497
  "learning_rate": 7.314715359828143e-05,
498
  "loss": 58.981,
499
  "step": 3500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500
  }
501
  ],
502
  "logging_steps": 50,
@@ -516,7 +586,7 @@
516
  "attributes": {}
517
  }
518
  },
519
- "total_flos": 5.99843679544279e+18,
520
  "train_batch_size": 2,
521
  "trial_name": null,
522
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.30688378694593094,
5
  "eval_steps": 500,
6
+ "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
497
  "learning_rate": 7.314715359828143e-05,
498
  "loss": 58.981,
499
  "step": 3500
500
+ },
501
+ {
502
+ "epoch": 0.2723593609145137,
503
+ "grad_norm": 51.17266082763672,
504
+ "learning_rate": 7.27635415068283e-05,
505
+ "loss": 58.9879,
506
+ "step": 3550
507
+ },
508
+ {
509
+ "epoch": 0.2761954082513378,
510
+ "grad_norm": 48.200164794921875,
511
+ "learning_rate": 7.237992941537518e-05,
512
+ "loss": 58.5352,
513
+ "step": 3600
514
+ },
515
+ {
516
+ "epoch": 0.28003145558816195,
517
+ "grad_norm": 37.883766174316406,
518
+ "learning_rate": 7.199631732392206e-05,
519
+ "loss": 58.6675,
520
+ "step": 3650
521
+ },
522
+ {
523
+ "epoch": 0.2838675029249861,
524
+ "grad_norm": 45.00859832763672,
525
+ "learning_rate": 7.161270523246893e-05,
526
+ "loss": 58.7792,
527
+ "step": 3700
528
+ },
529
+ {
530
+ "epoch": 0.2877035502618102,
531
+ "grad_norm": 26.977285385131836,
532
+ "learning_rate": 7.12290931410158e-05,
533
+ "loss": 58.4862,
534
+ "step": 3750
535
+ },
536
+ {
537
+ "epoch": 0.29153959759863435,
538
+ "grad_norm": 38.32195281982422,
539
+ "learning_rate": 7.08454810495627e-05,
540
+ "loss": 58.3661,
541
+ "step": 3800
542
+ },
543
+ {
544
+ "epoch": 0.2953756449354585,
545
+ "grad_norm": 63.87961196899414,
546
+ "learning_rate": 7.046186895810957e-05,
547
+ "loss": 58.25,
548
+ "step": 3850
549
+ },
550
+ {
551
+ "epoch": 0.2992116922722826,
552
+ "grad_norm": 35.58587646484375,
553
+ "learning_rate": 7.007825686665645e-05,
554
+ "loss": 58.3304,
555
+ "step": 3900
556
+ },
557
+ {
558
+ "epoch": 0.3030477396091068,
559
+ "grad_norm": 60.112632751464844,
560
+ "learning_rate": 6.969464477520332e-05,
561
+ "loss": 58.1747,
562
+ "step": 3950
563
+ },
564
+ {
565
+ "epoch": 0.30688378694593094,
566
+ "grad_norm": 41.148353576660156,
567
+ "learning_rate": 6.93110326837502e-05,
568
+ "loss": 57.8954,
569
+ "step": 4000
570
  }
571
  ],
572
  "logging_steps": 50,
 
586
  "attributes": {}
587
  }
588
  },
589
+ "total_flos": 6.855218432384696e+18,
590
  "train_batch_size": 2,
591
  "trial_name": null,
592
  "trial_params": null