lesso03 commited on
Commit
cd6f0e2
·
verified ·
1 Parent(s): e878087

Training in progress, step 71, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d92ea60efbfec261af770665fdfbc613d0eec6f177813f0128f10a26a682c754
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9760cc00fcc6c08640955253db2351e0fa8f6d8c6ba4fcd95faaa067cadaaf68
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f7f417a985c37b21d6e2e7565c7584b79b3b88cc5ebc32ed6ff296a24ac2333
3
  size 335922386
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97555fac4974d5de2f4830d3d1b76e43cbfc8cb5d904d61db74c44a63293e8b5
3
  size 335922386
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a537322ad8b13e53e04e7307211efb0c953aabdad90bc42ba3149fe4980cca8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:068139d00f03861bb041ac976325366e812cd7e0f33232157dc99b83a2d7f085
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5d90e9c1bf64ebe69004bc26a02dd8597602f1a7db0279de49dcd6a296224c7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e683effdbf1801ca21c52b8334cbe1684c23fef6e33fad16cce510e4700ea65
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.127659574468085,
5
  "eval_steps": 6,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -429,6 +429,177 @@
429
  "learning_rate": 2.650116284863402e-05,
430
  "loss": 1.7862,
431
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432
  }
433
  ],
434
  "logging_steps": 1,
@@ -443,12 +614,12 @@
443
  "should_evaluate": false,
444
  "should_log": false,
445
  "should_save": true,
446
- "should_training_stop": false
447
  },
448
  "attributes": {}
449
  }
450
  },
451
- "total_flos": 3.70943641780224e+16,
452
  "train_batch_size": 8,
453
  "trial_name": null,
454
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.021276595744681,
5
  "eval_steps": 6,
6
+ "global_step": 71,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
429
  "learning_rate": 2.650116284863402e-05,
430
  "loss": 1.7862,
431
  "step": 50
432
+ },
433
+ {
434
+ "epoch": 2.1702127659574466,
435
+ "grad_norm": 3.1009395122528076,
436
+ "learning_rate": 2.426035992450848e-05,
437
+ "loss": 1.5236,
438
+ "step": 51
439
+ },
440
+ {
441
+ "epoch": 2.2127659574468086,
442
+ "grad_norm": 3.100109577178955,
443
+ "learning_rate": 2.2087813898656774e-05,
444
+ "loss": 1.7312,
445
+ "step": 52
446
+ },
447
+ {
448
+ "epoch": 2.25531914893617,
449
+ "grad_norm": 2.772387981414795,
450
+ "learning_rate": 1.9989285972581595e-05,
451
+ "loss": 1.8003,
452
+ "step": 53
453
+ },
454
+ {
455
+ "epoch": 2.297872340425532,
456
+ "grad_norm": 2.9776549339294434,
457
+ "learning_rate": 1.7970341065091245e-05,
458
+ "loss": 1.6636,
459
+ "step": 54
460
+ },
461
+ {
462
+ "epoch": 2.297872340425532,
463
+ "eval_loss": 2.307121753692627,
464
+ "eval_runtime": 2.2327,
465
+ "eval_samples_per_second": 8.958,
466
+ "eval_steps_per_second": 1.344,
467
+ "step": 54
468
+ },
469
+ {
470
+ "epoch": 2.3404255319148937,
471
+ "grad_norm": 2.711627960205078,
472
+ "learning_rate": 1.6036333055135344e-05,
473
+ "loss": 1.7249,
474
+ "step": 55
475
+ },
476
+ {
477
+ "epoch": 2.382978723404255,
478
+ "grad_norm": 2.998987913131714,
479
+ "learning_rate": 1.4192390584280346e-05,
480
+ "loss": 1.7655,
481
+ "step": 56
482
+ },
483
+ {
484
+ "epoch": 2.425531914893617,
485
+ "grad_norm": 3.725801706314087,
486
+ "learning_rate": 1.2443403456474017e-05,
487
+ "loss": 1.8057,
488
+ "step": 57
489
+ },
490
+ {
491
+ "epoch": 2.4680851063829787,
492
+ "grad_norm": 3.7398223876953125,
493
+ "learning_rate": 1.0794009671164484e-05,
494
+ "loss": 1.79,
495
+ "step": 58
496
+ },
497
+ {
498
+ "epoch": 2.5106382978723403,
499
+ "grad_norm": 2.811511993408203,
500
+ "learning_rate": 9.248583124159438e-06,
501
+ "loss": 1.7487,
502
+ "step": 59
503
+ },
504
+ {
505
+ "epoch": 2.5531914893617023,
506
+ "grad_norm": 3.29445219039917,
507
+ "learning_rate": 7.81122200884072e-06,
508
+ "loss": 1.617,
509
+ "step": 60
510
+ },
511
+ {
512
+ "epoch": 2.5531914893617023,
513
+ "eval_loss": 2.3166096210479736,
514
+ "eval_runtime": 2.2287,
515
+ "eval_samples_per_second": 8.974,
516
+ "eval_steps_per_second": 1.346,
517
+ "step": 60
518
+ },
519
+ {
520
+ "epoch": 2.595744680851064,
521
+ "grad_norm": 4.6789445877075195,
522
+ "learning_rate": 6.4857379484922375e-06,
523
+ "loss": 2.0944,
524
+ "step": 61
525
+ },
526
+ {
527
+ "epoch": 2.6382978723404253,
528
+ "grad_norm": 3.2288119792938232,
529
+ "learning_rate": 5.275645888560232e-06,
530
+ "loss": 1.6808,
531
+ "step": 62
532
+ },
533
+ {
534
+ "epoch": 2.6808510638297873,
535
+ "grad_norm": 4.059349536895752,
536
+ "learning_rate": 4.184154775649768e-06,
537
+ "loss": 2.0006,
538
+ "step": 63
539
+ },
540
+ {
541
+ "epoch": 2.723404255319149,
542
+ "grad_norm": 2.783024549484253,
543
+ "learning_rate": 3.2141590479753236e-06,
544
+ "loss": 1.7605,
545
+ "step": 64
546
+ },
547
+ {
548
+ "epoch": 2.7659574468085104,
549
+ "grad_norm": 3.785507917404175,
550
+ "learning_rate": 2.3682309598308747e-06,
551
+ "loss": 1.8019,
552
+ "step": 65
553
+ },
554
+ {
555
+ "epoch": 2.8085106382978724,
556
+ "grad_norm": 3.3201539516448975,
557
+ "learning_rate": 1.6486137604339813e-06,
558
+ "loss": 1.7181,
559
+ "step": 66
560
+ },
561
+ {
562
+ "epoch": 2.8085106382978724,
563
+ "eval_loss": 2.3182761669158936,
564
+ "eval_runtime": 2.2303,
565
+ "eval_samples_per_second": 8.967,
566
+ "eval_steps_per_second": 1.345,
567
+ "step": 66
568
+ },
569
+ {
570
+ "epoch": 2.851063829787234,
571
+ "grad_norm": 3.517794370651245,
572
+ "learning_rate": 1.0572157452321097e-06,
573
+ "loss": 1.4794,
574
+ "step": 67
575
+ },
576
+ {
577
+ "epoch": 2.8936170212765955,
578
+ "grad_norm": 3.758125066757202,
579
+ "learning_rate": 5.956051954461472e-07,
580
+ "loss": 1.526,
581
+ "step": 68
582
+ },
583
+ {
584
+ "epoch": 2.9361702127659575,
585
+ "grad_norm": 2.9481680393218994,
586
+ "learning_rate": 2.6500621927054715e-07,
587
+ "loss": 1.5486,
588
+ "step": 69
589
+ },
590
+ {
591
+ "epoch": 2.978723404255319,
592
+ "grad_norm": 3.1529266834259033,
593
+ "learning_rate": 6.629550575847354e-08,
594
+ "loss": 1.6324,
595
+ "step": 70
596
+ },
597
+ {
598
+ "epoch": 3.021276595744681,
599
+ "grad_norm": 5.245131492614746,
600
+ "learning_rate": 0.0,
601
+ "loss": 2.7132,
602
+ "step": 71
603
  }
604
  ],
605
  "logging_steps": 1,
 
614
  "should_evaluate": false,
615
  "should_log": false,
616
  "should_save": true,
617
+ "should_training_stop": true
618
  },
619
  "attributes": {}
620
  }
621
  },
622
+ "total_flos": 5.267399713279181e+16,
623
  "train_batch_size": 8,
624
  "trial_name": null,
625
  "trial_params": null