romainnn commited on
Commit
c239c37
·
verified ·
1 Parent(s): fc24476

Training in progress, step 2346, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2486c11cdeb92452f6569f7f4645d34f3fecf6d9804950791c482876e167dc31
3
  size 56662456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c143e49ec80b9efa7c889526d4266a7b197596fb39eb75eb8f0c60a959dfb5b3
3
  size 56662456
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:608405eab0f6d3208744fc1dcd08b352694f1fdb0ae36523b678ab0f05719c42
3
  size 29091284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:229ef2852a3e0ca7253143b05e4b688888a7bf5fee9127a04663067e45d8cf7d
3
  size 29091284
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ebc651fb234154b1f64daa74415a94e83b19956b155ddc949aefd7e730748ad
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07d6aa011454a4748f5199ba02a257f0c3397f7d63ff5d7de731bdce7a2a6006
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de3131eebd88f15913d23a8b71b93a7cfd80886eb829bbe03936d99f0b97a408
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82f6fb6c04e83ad8c7e7774f50cfceed3bf90e6e42ded09b4deef26723be76bc
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.007140692323446274,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-2300",
4
- "epoch": 0.6836336615270295,
5
  "eval_steps": 100,
6
- "global_step": 2300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -16299,6 +16299,328 @@
16299
  "eval_samples_per_second": 10.677,
16300
  "eval_steps_per_second": 2.669,
16301
  "step": 2300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16302
  }
16303
  ],
16304
  "logging_steps": 1,
@@ -16322,12 +16644,12 @@
16322
  "should_evaluate": false,
16323
  "should_log": false,
16324
  "should_save": true,
16325
- "should_training_stop": false
16326
  },
16327
  "attributes": {}
16328
  }
16329
  },
16330
- "total_flos": 1.0634494560090194e+18,
16331
  "train_batch_size": 4,
16332
  "trial_name": null,
16333
  "trial_params": null
 
1
  {
2
  "best_metric": 0.007140692323446274,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-2300",
4
+ "epoch": 0.6973063347575701,
5
  "eval_steps": 100,
6
+ "global_step": 2346,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
16299
  "eval_samples_per_second": 10.677,
16300
  "eval_steps_per_second": 2.669,
16301
  "step": 2300
16302
+ },
16303
+ {
16304
+ "epoch": 0.6839308935537805,
16305
+ "grad_norm": 0.11718721687793732,
16306
+ "learning_rate": 1.8306973193326084e-07,
16307
+ "loss": 0.0639,
16308
+ "step": 2301
16309
+ },
16310
+ {
16311
+ "epoch": 0.6842281255805313,
16312
+ "grad_norm": 0.11004548519849777,
16313
+ "learning_rate": 1.7502605215715672e-07,
16314
+ "loss": 0.0404,
16315
+ "step": 2302
16316
+ },
16317
+ {
16318
+ "epoch": 0.6845253576072822,
16319
+ "grad_norm": 0.10758353769779205,
16320
+ "learning_rate": 1.671629205999836e-07,
16321
+ "loss": 0.0538,
16322
+ "step": 2303
16323
+ },
16324
+ {
16325
+ "epoch": 0.6848225896340331,
16326
+ "grad_norm": 0.1499020904302597,
16327
+ "learning_rate": 1.5948035148338757e-07,
16328
+ "loss": 0.082,
16329
+ "step": 2304
16330
+ },
16331
+ {
16332
+ "epoch": 0.685119821660784,
16333
+ "grad_norm": 0.10471717268228531,
16334
+ "learning_rate": 1.5197835870242038e-07,
16335
+ "loss": 0.049,
16336
+ "step": 2305
16337
+ },
16338
+ {
16339
+ "epoch": 0.6854170536875348,
16340
+ "grad_norm": 0.09845124185085297,
16341
+ "learning_rate": 1.446569558255395e-07,
16342
+ "loss": 0.0395,
16343
+ "step": 2306
16344
+ },
16345
+ {
16346
+ "epoch": 0.6857142857142857,
16347
+ "grad_norm": 0.10133890062570572,
16348
+ "learning_rate": 1.375161560946081e-07,
16349
+ "loss": 0.044,
16350
+ "step": 2307
16351
+ },
16352
+ {
16353
+ "epoch": 0.6860115177410366,
16354
+ "grad_norm": 0.12090011686086655,
16355
+ "learning_rate": 1.305559724248062e-07,
16356
+ "loss": 0.0723,
16357
+ "step": 2308
16358
+ },
16359
+ {
16360
+ "epoch": 0.6863087497677874,
16361
+ "grad_norm": 0.10536789149045944,
16362
+ "learning_rate": 1.2377641740464187e-07,
16363
+ "loss": 0.0536,
16364
+ "step": 2309
16365
+ },
16366
+ {
16367
+ "epoch": 0.6866059817945384,
16368
+ "grad_norm": 0.13396191596984863,
16369
+ "learning_rate": 1.1717750329595101e-07,
16370
+ "loss": 0.0716,
16371
+ "step": 2310
16372
+ },
16373
+ {
16374
+ "epoch": 0.6869032138212893,
16375
+ "grad_norm": 0.11562539637088776,
16376
+ "learning_rate": 1.1075924203385324e-07,
16377
+ "loss": 0.061,
16378
+ "step": 2311
16379
+ },
16380
+ {
16381
+ "epoch": 0.6872004458480401,
16382
+ "grad_norm": 0.08731340616941452,
16383
+ "learning_rate": 1.0452164522671837e-07,
16384
+ "loss": 0.0424,
16385
+ "step": 2312
16386
+ },
16387
+ {
16388
+ "epoch": 0.687497677874791,
16389
+ "grad_norm": 0.10618704557418823,
16390
+ "learning_rate": 9.846472415615537e-08,
16391
+ "loss": 0.0484,
16392
+ "step": 2313
16393
+ },
16394
+ {
16395
+ "epoch": 0.6877949099015419,
16396
+ "grad_norm": 0.10936351865530014,
16397
+ "learning_rate": 9.258848977700129e-08,
16398
+ "loss": 0.0648,
16399
+ "step": 2314
16400
+ },
16401
+ {
16402
+ "epoch": 0.6880921419282928,
16403
+ "grad_norm": 0.09159952402114868,
16404
+ "learning_rate": 8.689295271729902e-08,
16405
+ "loss": 0.0359,
16406
+ "step": 2315
16407
+ },
16408
+ {
16409
+ "epoch": 0.6883893739550436,
16410
+ "grad_norm": 0.09538505971431732,
16411
+ "learning_rate": 8.13781232782751e-08,
16412
+ "loss": 0.0423,
16413
+ "step": 2316
16414
+ },
16415
+ {
16416
+ "epoch": 0.6886866059817945,
16417
+ "grad_norm": 0.1170530915260315,
16418
+ "learning_rate": 7.604401143430639e-08,
16419
+ "loss": 0.0589,
16420
+ "step": 2317
16421
+ },
16422
+ {
16423
+ "epoch": 0.6889838380085455,
16424
+ "grad_norm": 0.10562342405319214,
16425
+ "learning_rate": 7.089062683292014e-08,
16426
+ "loss": 0.0508,
16427
+ "step": 2318
16428
+ },
16429
+ {
16430
+ "epoch": 0.6892810700352963,
16431
+ "grad_norm": 0.11546720564365387,
16432
+ "learning_rate": 6.591797879478279e-08,
16433
+ "loss": 0.0588,
16434
+ "step": 2319
16435
+ },
16436
+ {
16437
+ "epoch": 0.6895783020620472,
16438
+ "grad_norm": 0.14187178015708923,
16439
+ "learning_rate": 6.112607631364453e-08,
16440
+ "loss": 0.0705,
16441
+ "step": 2320
16442
+ },
16443
+ {
16444
+ "epoch": 0.6898755340887981,
16445
+ "grad_norm": 0.10787046700716019,
16446
+ "learning_rate": 5.65149280563948e-08,
16447
+ "loss": 0.0587,
16448
+ "step": 2321
16449
+ },
16450
+ {
16451
+ "epoch": 0.6901727661155489,
16452
+ "grad_norm": 0.08616691827774048,
16453
+ "learning_rate": 5.208454236296234e-08,
16454
+ "loss": 0.0341,
16455
+ "step": 2322
16456
+ },
16457
+ {
16458
+ "epoch": 0.6904699981422998,
16459
+ "grad_norm": 0.13538287580013275,
16460
+ "learning_rate": 4.783492724635963e-08,
16461
+ "loss": 0.0772,
16462
+ "step": 2323
16463
+ },
16464
+ {
16465
+ "epoch": 0.6907672301690507,
16466
+ "grad_norm": 0.11333715915679932,
16467
+ "learning_rate": 4.376609039262736e-08,
16468
+ "loss": 0.0569,
16469
+ "step": 2324
16470
+ },
16471
+ {
16472
+ "epoch": 0.6910644621958016,
16473
+ "grad_norm": 0.12235751003026962,
16474
+ "learning_rate": 3.9878039160878844e-08,
16475
+ "loss": 0.0556,
16476
+ "step": 2325
16477
+ },
16478
+ {
16479
+ "epoch": 0.6913616942225524,
16480
+ "grad_norm": 0.10557149350643158,
16481
+ "learning_rate": 3.617078058322232e-08,
16482
+ "loss": 0.046,
16483
+ "step": 2326
16484
+ },
16485
+ {
16486
+ "epoch": 0.6916589262493034,
16487
+ "grad_norm": 0.12478914111852646,
16488
+ "learning_rate": 3.264432136478313e-08,
16489
+ "loss": 0.0733,
16490
+ "step": 2327
16491
+ },
16492
+ {
16493
+ "epoch": 0.6919561582760543,
16494
+ "grad_norm": 0.08897604048252106,
16495
+ "learning_rate": 2.9298667883692622e-08,
16496
+ "loss": 0.0388,
16497
+ "step": 2328
16498
+ },
16499
+ {
16500
+ "epoch": 0.6922533903028051,
16501
+ "grad_norm": 0.09747358411550522,
16502
+ "learning_rate": 2.6133826191032663e-08,
16503
+ "loss": 0.0401,
16504
+ "step": 2329
16505
+ },
16506
+ {
16507
+ "epoch": 0.692550622329556,
16508
+ "grad_norm": 0.09661777317523956,
16509
+ "learning_rate": 2.3149802010913323e-08,
16510
+ "loss": 0.0403,
16511
+ "step": 2330
16512
+ },
16513
+ {
16514
+ "epoch": 0.6928478543563069,
16515
+ "grad_norm": 0.12822963297367096,
16516
+ "learning_rate": 2.034660074037298e-08,
16517
+ "loss": 0.0739,
16518
+ "step": 2331
16519
+ },
16520
+ {
16521
+ "epoch": 0.6931450863830578,
16522
+ "grad_norm": 0.12262982130050659,
16523
+ "learning_rate": 1.7724227449422705e-08,
16524
+ "loss": 0.0491,
16525
+ "step": 2332
16526
+ },
16527
+ {
16528
+ "epoch": 0.6934423184098086,
16529
+ "grad_norm": 0.11021570861339569,
16530
+ "learning_rate": 1.5282686881001875e-08,
16531
+ "loss": 0.0444,
16532
+ "step": 2333
16533
+ },
16534
+ {
16535
+ "epoch": 0.6937395504365595,
16536
+ "grad_norm": 0.10723091661930084,
16537
+ "learning_rate": 1.3021983451000364e-08,
16538
+ "loss": 0.0525,
16539
+ "step": 2334
16540
+ },
16541
+ {
16542
+ "epoch": 0.6940367824633105,
16543
+ "grad_norm": 0.12233065813779831,
16544
+ "learning_rate": 1.094212124824745e-08,
16545
+ "loss": 0.0541,
16546
+ "step": 2335
16547
+ },
16548
+ {
16549
+ "epoch": 0.6943340144900613,
16550
+ "grad_norm": 0.12585288286209106,
16551
+ "learning_rate": 9.043104034456295e-09,
16552
+ "loss": 0.0507,
16553
+ "step": 2336
16554
+ },
16555
+ {
16556
+ "epoch": 0.6946312465168122,
16557
+ "grad_norm": 0.08642668277025223,
16558
+ "learning_rate": 7.324935244301667e-09,
16559
+ "loss": 0.0327,
16560
+ "step": 2337
16561
+ },
16562
+ {
16563
+ "epoch": 0.6949284785435631,
16564
+ "grad_norm": 0.10163281857967377,
16565
+ "learning_rate": 5.78761798534222e-09,
16566
+ "loss": 0.0378,
16567
+ "step": 2338
16568
+ },
16569
+ {
16570
+ "epoch": 0.6952257105703139,
16571
+ "grad_norm": 0.1360352784395218,
16572
+ "learning_rate": 4.431155038031598e-09,
16573
+ "loss": 0.0524,
16574
+ "step": 2339
16575
+ },
16576
+ {
16577
+ "epoch": 0.6955229425970648,
16578
+ "grad_norm": 0.12937673926353455,
16579
+ "learning_rate": 3.255548855740642e-09,
16580
+ "loss": 0.0615,
16581
+ "step": 2340
16582
+ },
16583
+ {
16584
+ "epoch": 0.6958201746238157,
16585
+ "grad_norm": 0.11729301512241364,
16586
+ "learning_rate": 2.260801564735182e-09,
16587
+ "loss": 0.0516,
16588
+ "step": 2341
16589
+ },
16590
+ {
16591
+ "epoch": 0.6961174066505667,
16592
+ "grad_norm": 0.1295451819896698,
16593
+ "learning_rate": 1.4469149641538337e-09,
16594
+ "loss": 0.0682,
16595
+ "step": 2342
16596
+ },
16597
+ {
16598
+ "epoch": 0.6964146386773175,
16599
+ "grad_norm": 0.13985326886177063,
16600
+ "learning_rate": 8.138905260302032e-10,
16601
+ "loss": 0.0777,
16602
+ "step": 2343
16603
+ },
16604
+ {
16605
+ "epoch": 0.6967118707040684,
16606
+ "grad_norm": 0.10082249343395233,
16607
+ "learning_rate": 3.617293952817846e-10,
16608
+ "loss": 0.0439,
16609
+ "step": 2344
16610
+ },
16611
+ {
16612
+ "epoch": 0.6970091027308193,
16613
+ "grad_norm": 0.11734326928853989,
16614
+ "learning_rate": 9.043238970996016e-11,
16615
+ "loss": 0.0561,
16616
+ "step": 2345
16617
+ },
16618
+ {
16619
+ "epoch": 0.6973063347575701,
16620
+ "grad_norm": 0.10924455523490906,
16621
+ "learning_rate": 0.0,
16622
+ "loss": 0.0507,
16623
+ "step": 2346
16624
  }
16625
  ],
16626
  "logging_steps": 1,
 
16644
  "should_evaluate": false,
16645
  "should_log": false,
16646
  "should_save": true,
16647
+ "should_training_stop": true
16648
  },
16649
  "attributes": {}
16650
  }
16651
  },
16652
+ "total_flos": 1.0845724172605194e+18,
16653
  "train_batch_size": 4,
16654
  "trial_name": null,
16655
  "trial_params": null