mika5883 commited on
Commit
327c9d0
1 Parent(s): 4af45d2

Training in progress, step 42000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a853f72dbaed77b0f2b7855c598ca5e8f7205a774d5e4a484dc9618c1d703f9c
3
  size 891644712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:341cf52e44aee26bac163dde1d46d0da21ad4c56269b433dfffb30a288535511
3
  size 891644712
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1425ddea2e1aa4193cc447538f36a29b778873646642b0054c31483367ff5ac8
3
  size 1783444357
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fa19a4f5dbef468935d63a1f85e3cfae44ec707144d148f1de907a8b462aaae
3
  size 1783444357
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f28c7019382eca2900417a705546cdab309d5cbea5a7dff4f774785ccae3006
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6be41eee8b8a0220527e2dacace3c21226f82da4d47c62f24e447c03fa2152a0
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7beb525f6000838c9b12583ad1253138900b22ae2a96dcaafec662e05e58650
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8cafd6acaa1b671acd46943cc34e18e4d792ad12b254ee219f6cb062b3f2459
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2496,
5
  "eval_steps": 500,
6
- "global_step": 39000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -553,6 +553,48 @@
553
  "learning_rate": 4.376224e-05,
554
  "loss": 0.3551,
555
  "step": 39000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
556
  }
557
  ],
558
  "logging_steps": 500,
@@ -572,7 +614,7 @@
572
  "attributes": {}
573
  }
574
  },
575
- "total_flos": 1.8999486185472e+17,
576
  "train_batch_size": 64,
577
  "trial_name": null,
578
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2688,
5
  "eval_steps": 500,
6
+ "global_step": 42000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
553
  "learning_rate": 4.376224e-05,
554
  "loss": 0.3551,
555
  "step": 39000
556
+ },
557
+ {
558
+ "epoch": 0.2528,
559
+ "grad_norm": 0.6636275053024292,
560
+ "learning_rate": 4.368224e-05,
561
+ "loss": 0.3587,
562
+ "step": 39500
563
+ },
564
+ {
565
+ "epoch": 0.256,
566
+ "grad_norm": 0.710564911365509,
567
+ "learning_rate": 4.360224e-05,
568
+ "loss": 0.3537,
569
+ "step": 40000
570
+ },
571
+ {
572
+ "epoch": 0.2592,
573
+ "grad_norm": 0.6195800304412842,
574
+ "learning_rate": 4.3522240000000004e-05,
575
+ "loss": 0.3537,
576
+ "step": 40500
577
+ },
578
+ {
579
+ "epoch": 0.2624,
580
+ "grad_norm": 0.7131514549255371,
581
+ "learning_rate": 4.34424e-05,
582
+ "loss": 0.3531,
583
+ "step": 41000
584
+ },
585
+ {
586
+ "epoch": 0.2656,
587
+ "grad_norm": 0.6594410538673401,
588
+ "learning_rate": 4.336256e-05,
589
+ "loss": 0.3518,
590
+ "step": 41500
591
+ },
592
+ {
593
+ "epoch": 0.2688,
594
+ "grad_norm": 0.7651230096817017,
595
+ "learning_rate": 4.328256e-05,
596
+ "loss": 0.3516,
597
+ "step": 42000
598
  }
599
  ],
600
  "logging_steps": 500,
 
614
  "attributes": {}
615
  }
616
  },
617
+ "total_flos": 2.0460985122816e+17,
618
  "train_batch_size": 64,
619
  "trial_name": null,
620
  "trial_params": null