rdemorais commited on
Commit
65476e5
1 Parent(s): 573e28e

Training in progress, step 21000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7d8c9fb8208aa0e0d90ff9e6faa30e64692a370a85f4d8998eee25bca138e50
3
  size 2226478553
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39a351e6af26803f9eeacbdc3599fe7ba53df6f5641098b8daa5f7b9e0aa7b85
3
  size 2226478553
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff61a403b3cfb6a54ffab03f89d73788332a1bacb81b51101f34eaa479906cb3
3
  size 1113252715
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59dcfd93fbced501dd096334a41c07a4b2e21743fa787c29c4db14612e8f084f
3
  size 1113252715
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddb58e7fd295f3218fc9dda421b48419dac1c35f9f62dd226cd79272ce3149c8
3
  size 17563
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2427d3e67c681daaaba70098244ec32f3b70c7781145897a05d923a006eff520
3
  size 17563
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:766c4f37fdc6039a73178318ed142079f6cd59c61c3481cd6269f2a7cfa68325
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88c633fcf373eeb32eac8eef8541ef4cf7ce7b0edbad3ac306cf7779b78afda6
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5fd6c830f4df967d9f2291b54c56401f3e6ffddab3b4f1fcd21a88c860c00bf
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b337a80290075bf247d1150cdeaf1a5b708e8ccc1775639aea6fdc42b499e1c
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.7039331537933788,
5
- "global_step": 20800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -12486,11 +12486,131 @@
12486
  "learning_rate": 1.5456564448620482e-05,
12487
  "loss": 1.1033,
12488
  "step": 20800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12489
  }
12490
  ],
12491
  "max_steps": 24414,
12492
  "num_train_epochs": 2,
12493
- "total_flos": 2.810210873905797e+18,
12494
  "trial_name": null,
12495
  "trial_params": null
12496
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.7203170279652253,
5
+ "global_step": 21000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
12486
  "learning_rate": 1.5456564448620482e-05,
12487
  "loss": 1.1033,
12488
  "step": 20800
12489
+ },
12490
+ {
12491
+ "epoch": 1.7,
12492
+ "learning_rate": 1.5413854958571797e-05,
12493
+ "loss": 1.1021,
12494
+ "step": 20810
12495
+ },
12496
+ {
12497
+ "epoch": 1.71,
12498
+ "learning_rate": 1.5371145468523108e-05,
12499
+ "loss": 1.0975,
12500
+ "step": 20820
12501
+ },
12502
+ {
12503
+ "epoch": 1.71,
12504
+ "learning_rate": 1.532843597847442e-05,
12505
+ "loss": 1.0981,
12506
+ "step": 20830
12507
+ },
12508
+ {
12509
+ "epoch": 1.71,
12510
+ "learning_rate": 1.528572648842573e-05,
12511
+ "loss": 1.1019,
12512
+ "step": 20840
12513
+ },
12514
+ {
12515
+ "epoch": 1.71,
12516
+ "learning_rate": 1.524301699837704e-05,
12517
+ "loss": 1.1089,
12518
+ "step": 20850
12519
+ },
12520
+ {
12521
+ "epoch": 1.71,
12522
+ "learning_rate": 1.5200307508328351e-05,
12523
+ "loss": 1.1111,
12524
+ "step": 20860
12525
+ },
12526
+ {
12527
+ "epoch": 1.71,
12528
+ "learning_rate": 1.5157598018279662e-05,
12529
+ "loss": 1.1091,
12530
+ "step": 20870
12531
+ },
12532
+ {
12533
+ "epoch": 1.71,
12534
+ "learning_rate": 1.5114888528230975e-05,
12535
+ "loss": 1.1001,
12536
+ "step": 20880
12537
+ },
12538
+ {
12539
+ "epoch": 1.71,
12540
+ "learning_rate": 1.5072179038182286e-05,
12541
+ "loss": 1.0987,
12542
+ "step": 20890
12543
+ },
12544
+ {
12545
+ "epoch": 1.71,
12546
+ "learning_rate": 1.5029469548133595e-05,
12547
+ "loss": 1.107,
12548
+ "step": 20900
12549
+ },
12550
+ {
12551
+ "epoch": 1.71,
12552
+ "learning_rate": 1.4986760058084906e-05,
12553
+ "loss": 1.0973,
12554
+ "step": 20910
12555
+ },
12556
+ {
12557
+ "epoch": 1.71,
12558
+ "learning_rate": 1.494405056803622e-05,
12559
+ "loss": 1.0978,
12560
+ "step": 20920
12561
+ },
12562
+ {
12563
+ "epoch": 1.71,
12564
+ "learning_rate": 1.490134107798753e-05,
12565
+ "loss": 1.0993,
12566
+ "step": 20930
12567
+ },
12568
+ {
12569
+ "epoch": 1.72,
12570
+ "learning_rate": 1.485863158793884e-05,
12571
+ "loss": 1.093,
12572
+ "step": 20940
12573
+ },
12574
+ {
12575
+ "epoch": 1.72,
12576
+ "learning_rate": 1.4815922097890153e-05,
12577
+ "loss": 1.1091,
12578
+ "step": 20950
12579
+ },
12580
+ {
12581
+ "epoch": 1.72,
12582
+ "learning_rate": 1.4773212607841464e-05,
12583
+ "loss": 1.1066,
12584
+ "step": 20960
12585
+ },
12586
+ {
12587
+ "epoch": 1.72,
12588
+ "learning_rate": 1.4730503117792775e-05,
12589
+ "loss": 1.1056,
12590
+ "step": 20970
12591
+ },
12592
+ {
12593
+ "epoch": 1.72,
12594
+ "learning_rate": 1.4687793627744084e-05,
12595
+ "loss": 1.101,
12596
+ "step": 20980
12597
+ },
12598
+ {
12599
+ "epoch": 1.72,
12600
+ "learning_rate": 1.4645084137695398e-05,
12601
+ "loss": 1.1034,
12602
+ "step": 20990
12603
+ },
12604
+ {
12605
+ "epoch": 1.72,
12606
+ "learning_rate": 1.4602374647646707e-05,
12607
+ "loss": 1.0953,
12608
+ "step": 21000
12609
  }
12610
  ],
12611
  "max_steps": 24414,
12612
  "num_train_epochs": 2,
12613
+ "total_flos": 2.837232089174661e+18,
12614
  "trial_name": null,
12615
  "trial_params": null
12616
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f904905931c8a1fe154e57477d84e6e13f86ce2215774d32d1df579ae96fcbd
3
  size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6610c57ac1474bf5097bd000e69f982c66ca3a4b3ce31c1509196f6a47bc4144
3
  size 3439
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff61a403b3cfb6a54ffab03f89d73788332a1bacb81b51101f34eaa479906cb3
3
  size 1113252715
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59dcfd93fbced501dd096334a41c07a4b2e21743fa787c29c4db14612e8f084f
3
  size 1113252715
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f904905931c8a1fe154e57477d84e6e13f86ce2215774d32d1df579ae96fcbd
3
  size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6610c57ac1474bf5097bd000e69f982c66ca3a4b3ce31c1509196f6a47bc4144
3
  size 3439