mgh6 commited on
Commit
4868d53
·
verified ·
1 Parent(s): e7344bd

Training in progress, step 4000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f87a1a364a4c3b074d33d923c337d1857c4daa5fb358a1762ecbe5edd921a65
3
  size 2682482800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b9b8a5b43dd8d9de9a3ff93b46f59b51e0f503f1bc70951bfb02ea39bc09a63
3
  size 2682482800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:050bb814577fa11d9d1653fc91c52cba6e25bf23abddb567aab775e12c1b1db4
3
  size 5365108834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a5b8fe8bc5ff747c45994317b6f2e200ec28e7106f8c993157a590d2f3d77bd
3
  size 5365108834
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d018acbbeceb8f6ded094ffbf67a030bf83c1f54ca6f31bd5f038a52ac85034
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3df16cd49a0b8eec2098a33dc0dadf0a4d79c497ced439bd157f2e6ac5bab8f9
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8fbc117afd7b27eea894ff76f1b04973418f72f10dadd713f6dfa7c4713012d
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a70f7b5ef6b95e383628d6376d70e07135c11cad9f40dc480e0188454cbb3db6
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abd2a75ef074977893e3dd803f93d5726cebe2149302e0d0ae3ef1370abb200d
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a34fd3f02bff6d02d805d0bdeac374c0ebc02a92b9a3f16e79223d2bbb10e2b9
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7f1adac70acfa8a2122e4d3f5566d6d9f1d3efc180b57f0dbc4b7b17090217f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d927d2976f1a532e1dfa460599187ed9bfa9e5f16421e7569e35526248d81762
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a199cc7b81fba53fabd1b994537c465f58151394382535a6c542ffe0cc02930
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b32f7bc8829feee7543ba545ec8f492f6a76931d31cd30ffef13d25c9c59f4e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.26852331357768955,
5
  "eval_steps": 500,
6
- "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -497,6 +497,76 @@
497
  "learning_rate": 7.314715359828143e-05,
498
  "loss": 61.2952,
499
  "step": 3500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500
  }
501
  ],
502
  "logging_steps": 50,
@@ -516,7 +586,7 @@
516
  "attributes": {}
517
  }
518
  },
519
- "total_flos": 5.99843679544279e+18,
520
  "train_batch_size": 2,
521
  "trial_name": null,
522
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.30688378694593094,
5
  "eval_steps": 500,
6
+ "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
497
  "learning_rate": 7.314715359828143e-05,
498
  "loss": 61.2952,
499
  "step": 3500
500
+ },
501
+ {
502
+ "epoch": 0.2723593609145137,
503
+ "grad_norm": 22.69503402709961,
504
+ "learning_rate": 7.27635415068283e-05,
505
+ "loss": 61.2368,
506
+ "step": 3550
507
+ },
508
+ {
509
+ "epoch": 0.2761954082513378,
510
+ "grad_norm": 22.553903579711914,
511
+ "learning_rate": 7.237992941537518e-05,
512
+ "loss": 60.8539,
513
+ "step": 3600
514
+ },
515
+ {
516
+ "epoch": 0.28003145558816195,
517
+ "grad_norm": 56.599422454833984,
518
+ "learning_rate": 7.199631732392206e-05,
519
+ "loss": 60.8724,
520
+ "step": 3650
521
+ },
522
+ {
523
+ "epoch": 0.2838675029249861,
524
+ "grad_norm": 54.43523025512695,
525
+ "learning_rate": 7.161270523246893e-05,
526
+ "loss": 61.0066,
527
+ "step": 3700
528
+ },
529
+ {
530
+ "epoch": 0.2877035502618102,
531
+ "grad_norm": 21.187387466430664,
532
+ "learning_rate": 7.12290931410158e-05,
533
+ "loss": 60.7629,
534
+ "step": 3750
535
+ },
536
+ {
537
+ "epoch": 0.29153959759863435,
538
+ "grad_norm": 13.791070938110352,
539
+ "learning_rate": 7.08454810495627e-05,
540
+ "loss": 60.6954,
541
+ "step": 3800
542
+ },
543
+ {
544
+ "epoch": 0.2953756449354585,
545
+ "grad_norm": 18.809253692626953,
546
+ "learning_rate": 7.046186895810957e-05,
547
+ "loss": 60.6031,
548
+ "step": 3850
549
+ },
550
+ {
551
+ "epoch": 0.2992116922722826,
552
+ "grad_norm": 12.76346206665039,
553
+ "learning_rate": 7.007825686665645e-05,
554
+ "loss": 60.772,
555
+ "step": 3900
556
+ },
557
+ {
558
+ "epoch": 0.3030477396091068,
559
+ "grad_norm": 39.48878479003906,
560
+ "learning_rate": 6.969464477520332e-05,
561
+ "loss": 60.5169,
562
+ "step": 3950
563
+ },
564
+ {
565
+ "epoch": 0.30688378694593094,
566
+ "grad_norm": 61.519203186035156,
567
+ "learning_rate": 6.93110326837502e-05,
568
+ "loss": 60.3146,
569
+ "step": 4000
570
  }
571
  ],
572
  "logging_steps": 50,
 
586
  "attributes": {}
587
  }
588
  },
589
+ "total_flos": 6.855218432384696e+18,
590
  "train_batch_size": 2,
591
  "trial_name": null,
592
  "trial_params": null