RoyJoy commited on
Commit
129fc4b
·
verified ·
1 Parent(s): e50e8fc

Training in progress, step 75, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d443a0367b73f54f32eba9bc71a6295631dd07c11ac781c0bfb7e07cf98cd1cc
3
  size 60599872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf77cb4ff4081a992fcf1dae69c126e806763557a75008415a70e77c97cc5696
3
  size 60599872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0732b5f792597bb462eddced8cf72cdfe54fcb6e53d2359f468269966ba19a63
3
  size 121392706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c9be606d592580935ac71d7bbcd9dca72cdb42d5875144e12418a79ac45172e
3
  size 121392706
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83f5738bb1d47b319b4f0515dbcb4b63e9b08511a4d6d02e0dd2483c2b30363a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0656e9afd23988e47c65b323e07581be25b21ac41767c4c71d73b44d12051a45
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bd8adc1a67335790373978db491a9b9fa358cba00f13a28706aed9631c716e1
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d21664c9d5f8dfeaf6f6dc45ce8b272694b2a8b336da2db1358ab7398d1b9b20
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f17606f7f9289687e7255ae73e0167693d08e1fe87e79f9ee768112fec29b437
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5ab66591a435956440f940ed333bc8f4242131d343a362510cf590c99bcec05
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45e0a828af05e53858c8d6a3625ae358aec84dfa1d70e13c154ecae4bd2aace6
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eb53ff07a9146960fd3083194be8a315712ffa16c26a6fbeb7dd1024158da0f
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27a06debcc524d5e29377ca1c81e86a2cd28c93506013f68ac7d1bf85491fb4e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:370cee31176b8bff781da8f054b9870dc93c63a8623674218a84718aa7abd3af
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.6452839374542236,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
- "epoch": 0.32076984763432237,
5
  "eval_steps": 25,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -381,6 +381,189 @@
381
  "eval_samples_per_second": 82.909,
382
  "eval_steps_per_second": 21.556,
383
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  }
385
  ],
386
  "logging_steps": 1,
@@ -409,7 +592,7 @@
409
  "attributes": {}
410
  }
411
  },
412
- "total_flos": 2.54445405339648e+16,
413
  "train_batch_size": 1,
414
  "trial_name": null,
415
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6430225968360901,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-75",
4
+ "epoch": 0.48115477145148355,
5
  "eval_steps": 25,
6
+ "global_step": 75,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
381
  "eval_samples_per_second": 82.909,
382
  "eval_steps_per_second": 21.556,
383
  "step": 50
384
+ },
385
+ {
386
+ "epoch": 0.3271852445870088,
387
+ "grad_norm": 0.4414086937904358,
388
+ "learning_rate": 5.6472358726979935e-05,
389
+ "loss": 0.6875,
390
+ "step": 51
391
+ },
392
+ {
393
+ "epoch": 0.33360064153969526,
394
+ "grad_norm": 0.4426858425140381,
395
+ "learning_rate": 5.500000000000001e-05,
396
+ "loss": 0.6874,
397
+ "step": 52
398
+ },
399
+ {
400
+ "epoch": 0.3400160384923817,
401
+ "grad_norm": 0.37037479877471924,
402
+ "learning_rate": 5.352764127302008e-05,
403
+ "loss": 0.6825,
404
+ "step": 53
405
+ },
406
+ {
407
+ "epoch": 0.34643143544506816,
408
+ "grad_norm": 0.35772907733917236,
409
+ "learning_rate": 5.205685918464356e-05,
410
+ "loss": 0.684,
411
+ "step": 54
412
+ },
413
+ {
414
+ "epoch": 0.3528468323977546,
415
+ "grad_norm": 0.2680012583732605,
416
+ "learning_rate": 5.058922868516978e-05,
417
+ "loss": 0.6749,
418
+ "step": 55
419
+ },
420
+ {
421
+ "epoch": 0.35926222935044105,
422
+ "grad_norm": 0.3476446568965912,
423
+ "learning_rate": 4.912632135009769e-05,
424
+ "loss": 0.6284,
425
+ "step": 56
426
+ },
427
+ {
428
+ "epoch": 0.3656776263031275,
429
+ "grad_norm": 0.3562479019165039,
430
+ "learning_rate": 4.7669703697243516e-05,
431
+ "loss": 0.6325,
432
+ "step": 57
433
+ },
434
+ {
435
+ "epoch": 0.37209302325581395,
436
+ "grad_norm": 0.3966968059539795,
437
+ "learning_rate": 4.6220935509274235e-05,
438
+ "loss": 0.635,
439
+ "step": 58
440
+ },
441
+ {
442
+ "epoch": 0.3785084202085004,
443
+ "grad_norm": 0.3592650294303894,
444
+ "learning_rate": 4.478156816345321e-05,
445
+ "loss": 0.6277,
446
+ "step": 59
447
+ },
448
+ {
449
+ "epoch": 0.38492381716118684,
450
+ "grad_norm": 0.3256514370441437,
451
+ "learning_rate": 4.3353142970386564e-05,
452
+ "loss": 0.6278,
453
+ "step": 60
454
+ },
455
+ {
456
+ "epoch": 0.3913392141138733,
457
+ "grad_norm": 0.3300212323665619,
458
+ "learning_rate": 4.19371895235492e-05,
459
+ "loss": 0.6299,
460
+ "step": 61
461
+ },
462
+ {
463
+ "epoch": 0.39775461106655974,
464
+ "grad_norm": 0.3095785975456238,
465
+ "learning_rate": 4.053522406135775e-05,
466
+ "loss": 0.622,
467
+ "step": 62
468
+ },
469
+ {
470
+ "epoch": 0.4041700080192462,
471
+ "grad_norm": 0.3082052171230316,
472
+ "learning_rate": 3.9148747843544495e-05,
473
+ "loss": 0.6653,
474
+ "step": 63
475
+ },
476
+ {
477
+ "epoch": 0.41058540497193263,
478
+ "grad_norm": 0.4814322292804718,
479
+ "learning_rate": 3.777924554357096e-05,
480
+ "loss": 0.6819,
481
+ "step": 64
482
+ },
483
+ {
484
+ "epoch": 0.4170008019246191,
485
+ "grad_norm": 0.48691344261169434,
486
+ "learning_rate": 3.642818365880224e-05,
487
+ "loss": 0.6831,
488
+ "step": 65
489
+ },
490
+ {
491
+ "epoch": 0.4234161988773055,
492
+ "grad_norm": 0.389440655708313,
493
+ "learning_rate": 3.509700894014496e-05,
494
+ "loss": 0.6806,
495
+ "step": 66
496
+ },
497
+ {
498
+ "epoch": 0.429831595829992,
499
+ "grad_norm": 0.34976035356521606,
500
+ "learning_rate": 3.378714684283011e-05,
501
+ "loss": 0.6767,
502
+ "step": 67
503
+ },
504
+ {
505
+ "epoch": 0.4362469927826784,
506
+ "grad_norm": 0.15002594888210297,
507
+ "learning_rate": 3.250000000000001e-05,
508
+ "loss": 0.6543,
509
+ "step": 68
510
+ },
511
+ {
512
+ "epoch": 0.44266238973536487,
513
+ "grad_norm": 0.2920244634151459,
514
+ "learning_rate": 3.123694672073344e-05,
515
+ "loss": 0.6271,
516
+ "step": 69
517
+ },
518
+ {
519
+ "epoch": 0.4490777866880513,
520
+ "grad_norm": 0.32228878140449524,
521
+ "learning_rate": 2.9999339514117912e-05,
522
+ "loss": 0.6268,
523
+ "step": 70
524
+ },
525
+ {
526
+ "epoch": 0.45549318364073776,
527
+ "grad_norm": 0.36818942427635193,
528
+ "learning_rate": 2.8788503640948912e-05,
529
+ "loss": 0.6333,
530
+ "step": 71
531
+ },
532
+ {
533
+ "epoch": 0.4619085805934242,
534
+ "grad_norm": 0.28305715322494507,
535
+ "learning_rate": 2.760573569460757e-05,
536
+ "loss": 0.6243,
537
+ "step": 72
538
+ },
539
+ {
540
+ "epoch": 0.46832397754611066,
541
+ "grad_norm": 0.3117271065711975,
542
+ "learning_rate": 2.645230221263596e-05,
543
+ "loss": 0.6245,
544
+ "step": 73
545
+ },
546
+ {
547
+ "epoch": 0.4747393744987971,
548
+ "grad_norm": 0.28173038363456726,
549
+ "learning_rate": 2.53294383204969e-05,
550
+ "loss": 0.6205,
551
+ "step": 74
552
+ },
553
+ {
554
+ "epoch": 0.48115477145148355,
555
+ "grad_norm": 0.2890748381614685,
556
+ "learning_rate": 2.423834640897079e-05,
557
+ "loss": 0.6255,
558
+ "step": 75
559
+ },
560
+ {
561
+ "epoch": 0.48115477145148355,
562
+ "eval_loss": 0.6430225968360901,
563
+ "eval_runtime": 0.6128,
564
+ "eval_samples_per_second": 81.594,
565
+ "eval_steps_per_second": 21.214,
566
+ "step": 75
567
  }
568
  ],
569
  "logging_steps": 1,
 
592
  "attributes": {}
593
  }
594
  },
595
+ "total_flos": 3.81668108009472e+16,
596
  "train_batch_size": 1,
597
  "trial_name": null,
598
  "trial_params": null