rdemorais commited on
Commit
573e28e
1 Parent(s): 4b8e7a2

Training in progress, step 20800

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c63330816ccaa7eda09a98c9f07e097f09338926db600d22dfaf980f1921ac8
3
  size 2226478553
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7d8c9fb8208aa0e0d90ff9e6faa30e64692a370a85f4d8998eee25bca138e50
3
  size 2226478553
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee04f652a51b6c537c44aaf1d5335a4bcc6258243d5986c3fc233b10ae259e3b
3
  size 1113252715
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff61a403b3cfb6a54ffab03f89d73788332a1bacb81b51101f34eaa479906cb3
3
  size 1113252715
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c53ab751c6683e09e4ff06934dec2839797c82ceb12efc2867a08c6e46ccfcd7
3
  size 17563
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddb58e7fd295f3218fc9dda421b48419dac1c35f9f62dd226cd79272ce3149c8
3
  size 17563
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fb737367e050016a9bfddb6625be031dccf493be9f0d3fe354594d4e3265367
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:766c4f37fdc6039a73178318ed142079f6cd59c61c3481cd6269f2a7cfa68325
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:331e919fbbba265e0172bc959c2bfb37858003f990a9378fe6a9697a94ef9ffd
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5fd6c830f4df967d9f2291b54c56401f3e6ffddab3b4f1fcd21a88c860c00bf
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.6875492796215326,
5
- "global_step": 20600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -12366,11 +12366,131 @@
12366
  "learning_rate": 1.631075424959426e-05,
12367
  "loss": 1.1047,
12368
  "step": 20600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12369
  }
12370
  ],
12371
  "max_steps": 24414,
12372
  "num_train_epochs": 2,
12373
- "total_flos": 2.783189658636933e+18,
12374
  "trial_name": null,
12375
  "trial_params": null
12376
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.7039331537933788,
5
+ "global_step": 20800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
12366
  "learning_rate": 1.631075424959426e-05,
12367
  "loss": 1.1047,
12368
  "step": 20600
12369
+ },
12370
+ {
12371
+ "epoch": 1.69,
12372
+ "learning_rate": 1.626804475954557e-05,
12373
+ "loss": 1.0996,
12374
+ "step": 20610
12375
+ },
12376
+ {
12377
+ "epoch": 1.69,
12378
+ "learning_rate": 1.6225335269496882e-05,
12379
+ "loss": 1.108,
12380
+ "step": 20620
12381
+ },
12382
+ {
12383
+ "epoch": 1.69,
12384
+ "learning_rate": 1.6182625779448193e-05,
12385
+ "loss": 1.1041,
12386
+ "step": 20630
12387
+ },
12388
+ {
12389
+ "epoch": 1.69,
12390
+ "learning_rate": 1.6139916289399508e-05,
12391
+ "loss": 1.096,
12392
+ "step": 20640
12393
+ },
12394
+ {
12395
+ "epoch": 1.69,
12396
+ "learning_rate": 1.6097206799350815e-05,
12397
+ "loss": 1.0904,
12398
+ "step": 20650
12399
+ },
12400
+ {
12401
+ "epoch": 1.69,
12402
+ "learning_rate": 1.6054497309302126e-05,
12403
+ "loss": 1.1021,
12404
+ "step": 20660
12405
+ },
12406
+ {
12407
+ "epoch": 1.69,
12408
+ "learning_rate": 1.6011787819253437e-05,
12409
+ "loss": 1.1042,
12410
+ "step": 20670
12411
+ },
12412
+ {
12413
+ "epoch": 1.69,
12414
+ "learning_rate": 1.596907832920475e-05,
12415
+ "loss": 1.107,
12416
+ "step": 20680
12417
+ },
12418
+ {
12419
+ "epoch": 1.69,
12420
+ "learning_rate": 1.5926368839156062e-05,
12421
+ "loss": 1.0975,
12422
+ "step": 20690
12423
+ },
12424
+ {
12425
+ "epoch": 1.7,
12426
+ "learning_rate": 1.5883659349107373e-05,
12427
+ "loss": 1.1032,
12428
+ "step": 20700
12429
+ },
12430
+ {
12431
+ "epoch": 1.7,
12432
+ "learning_rate": 1.5840949859058684e-05,
12433
+ "loss": 1.1146,
12434
+ "step": 20710
12435
+ },
12436
+ {
12437
+ "epoch": 1.7,
12438
+ "learning_rate": 1.5798240369009995e-05,
12439
+ "loss": 1.0973,
12440
+ "step": 20720
12441
+ },
12442
+ {
12443
+ "epoch": 1.7,
12444
+ "learning_rate": 1.5755530878961306e-05,
12445
+ "loss": 1.105,
12446
+ "step": 20730
12447
+ },
12448
+ {
12449
+ "epoch": 1.7,
12450
+ "learning_rate": 1.5712821388912617e-05,
12451
+ "loss": 1.1082,
12452
+ "step": 20740
12453
+ },
12454
+ {
12455
+ "epoch": 1.7,
12456
+ "learning_rate": 1.5670111898863928e-05,
12457
+ "loss": 1.1024,
12458
+ "step": 20750
12459
+ },
12460
+ {
12461
+ "epoch": 1.7,
12462
+ "learning_rate": 1.562740240881524e-05,
12463
+ "loss": 1.103,
12464
+ "step": 20760
12465
+ },
12466
+ {
12467
+ "epoch": 1.7,
12468
+ "learning_rate": 1.558469291876655e-05,
12469
+ "loss": 1.097,
12470
+ "step": 20770
12471
+ },
12472
+ {
12473
+ "epoch": 1.7,
12474
+ "learning_rate": 1.5541983428717864e-05,
12475
+ "loss": 1.0996,
12476
+ "step": 20780
12477
+ },
12478
+ {
12479
+ "epoch": 1.7,
12480
+ "learning_rate": 1.5499273938669175e-05,
12481
+ "loss": 1.1077,
12482
+ "step": 20790
12483
+ },
12484
+ {
12485
+ "epoch": 1.7,
12486
+ "learning_rate": 1.5456564448620482e-05,
12487
+ "loss": 1.1033,
12488
+ "step": 20800
12489
  }
12490
  ],
12491
  "max_steps": 24414,
12492
  "num_train_epochs": 2,
12493
+ "total_flos": 2.810210873905797e+18,
12494
  "trial_name": null,
12495
  "trial_params": null
12496
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee04f652a51b6c537c44aaf1d5335a4bcc6258243d5986c3fc233b10ae259e3b
3
  size 1113252715
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff61a403b3cfb6a54ffab03f89d73788332a1bacb81b51101f34eaa479906cb3
3
  size 1113252715