leixa commited on
Commit
0c0749a
·
verified ·
1 Parent(s): 1919199

Training in progress, step 400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e66a2dd8d3d2bbed1d876000dda0651212a54ba174dc9b3cd15e946adefaca8f
3
  size 36220072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18a589fe75b1cb7c48c9d24426648033f29494dfeaa9e2884404f5c3059edb01
3
  size 36220072
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75a9067022dd03df96d0f89703e9bc525a8489b35c83eb6ec203a92f4ee520a9
3
  size 18764180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e142e7fb9925ec38c4eee3e4a83949d871daca0a27c1ac15384b7d7cd6729851
3
  size 18764180
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:652624241b697f848aa0b96d2618afbd1e071c5dbd47f895da652fcde2a22b64
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:717a07704aabb807e176e75648bbf4a543d5155658d3dcaafd5bdf835c1026a2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43da0688aca60835f4e18fa7e0f3cc099504828f82fd5dd994118be26b760a0f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:705cabf5cbc3a6ab0feb67c77b9b453d59efcc939ce90d310af96e621810f990
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.04891370806668569,
5
  "eval_steps": 100,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -459,6 +459,154 @@
459
  "eval_samples_per_second": 25.29,
460
  "eval_steps_per_second": 12.65,
461
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
  }
463
  ],
464
  "logging_steps": 5,
@@ -473,12 +621,12 @@
473
  "should_evaluate": false,
474
  "should_log": false,
475
  "should_save": true,
476
- "should_training_stop": false
477
  },
478
  "attributes": {}
479
  }
480
  },
481
- "total_flos": 2.808394390752461e+16,
482
  "train_batch_size": 2,
483
  "trial_name": null,
484
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.06521827742224759,
5
  "eval_steps": 100,
6
+ "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
459
  "eval_samples_per_second": 25.29,
460
  "eval_steps_per_second": 12.65,
461
  "step": 300
462
+ },
463
+ {
464
+ "epoch": 0.04972893653446378,
465
+ "grad_norm": 0.19537951052188873,
466
+ "learning_rate": 1.3939877632809278e-05,
467
+ "loss": 0.7115,
468
+ "step": 305
469
+ },
470
+ {
471
+ "epoch": 0.050544165002241875,
472
+ "grad_norm": 0.19865351915359497,
473
+ "learning_rate": 1.257446259144494e-05,
474
+ "loss": 0.7565,
475
+ "step": 310
476
+ },
477
+ {
478
+ "epoch": 0.05135939347001997,
479
+ "grad_norm": 0.15447907149791718,
480
+ "learning_rate": 1.1269751908617277e-05,
481
+ "loss": 0.6733,
482
+ "step": 315
483
+ },
484
+ {
485
+ "epoch": 0.052174621937798066,
486
+ "grad_norm": 0.2033657431602478,
487
+ "learning_rate": 1.0027861829824952e-05,
488
+ "loss": 0.7061,
489
+ "step": 320
490
+ },
491
+ {
492
+ "epoch": 0.05298985040557616,
493
+ "grad_norm": 0.16816996037960052,
494
+ "learning_rate": 8.850806705317183e-06,
495
+ "loss": 0.6847,
496
+ "step": 325
497
+ },
498
+ {
499
+ "epoch": 0.053805078873354256,
500
+ "grad_norm": 0.2432698756456375,
501
+ "learning_rate": 7.740495722810271e-06,
502
+ "loss": 0.7002,
503
+ "step": 330
504
+ },
505
+ {
506
+ "epoch": 0.05462030734113235,
507
+ "grad_norm": 0.24160774052143097,
508
+ "learning_rate": 6.698729810778065e-06,
509
+ "loss": 0.6765,
510
+ "step": 335
511
+ },
512
+ {
513
+ "epoch": 0.05543553580891045,
514
+ "grad_norm": 0.200673907995224,
515
+ "learning_rate": 5.727198717339511e-06,
516
+ "loss": 0.7059,
517
+ "step": 340
518
+ },
519
+ {
520
+ "epoch": 0.05625076427668854,
521
+ "grad_norm": 0.19187721610069275,
522
+ "learning_rate": 4.827478269480895e-06,
523
+ "loss": 0.6578,
524
+ "step": 345
525
+ },
526
+ {
527
+ "epoch": 0.05706599274446664,
528
+ "grad_norm": 0.23926900327205658,
529
+ "learning_rate": 4.001027817058789e-06,
530
+ "loss": 0.7351,
531
+ "step": 350
532
+ },
533
+ {
534
+ "epoch": 0.05788122121224473,
535
+ "grad_norm": 0.23108696937561035,
536
+ "learning_rate": 3.249187865729264e-06,
537
+ "loss": 0.6804,
538
+ "step": 355
539
+ },
540
+ {
541
+ "epoch": 0.05869644968002283,
542
+ "grad_norm": 0.20840156078338623,
543
+ "learning_rate": 2.573177902642726e-06,
544
+ "loss": 0.7588,
545
+ "step": 360
546
+ },
547
+ {
548
+ "epoch": 0.05951167814780092,
549
+ "grad_norm": 0.1447313278913498,
550
+ "learning_rate": 1.974094418431388e-06,
551
+ "loss": 0.6967,
552
+ "step": 365
553
+ },
554
+ {
555
+ "epoch": 0.06032690661557902,
556
+ "grad_norm": 0.22718173265457153,
557
+ "learning_rate": 1.4529091286973995e-06,
558
+ "loss": 0.703,
559
+ "step": 370
560
+ },
561
+ {
562
+ "epoch": 0.06114213508335711,
563
+ "grad_norm": 0.19592629373073578,
564
+ "learning_rate": 1.0104673978866164e-06,
565
+ "loss": 0.73,
566
+ "step": 375
567
+ },
568
+ {
569
+ "epoch": 0.06195736355113521,
570
+ "grad_norm": 0.174259752035141,
571
+ "learning_rate": 6.474868681043578e-07,
572
+ "loss": 0.7086,
573
+ "step": 380
574
+ },
575
+ {
576
+ "epoch": 0.0627725920189133,
577
+ "grad_norm": 0.17397546768188477,
578
+ "learning_rate": 3.6455629509730136e-07,
579
+ "loss": 0.7023,
580
+ "step": 385
581
+ },
582
+ {
583
+ "epoch": 0.0635878204866914,
584
+ "grad_norm": 0.19760173559188843,
585
+ "learning_rate": 1.6213459328950352e-07,
586
+ "loss": 0.7668,
587
+ "step": 390
588
+ },
589
+ {
590
+ "epoch": 0.0644030489544695,
591
+ "grad_norm": 0.24907608330249786,
592
+ "learning_rate": 4.055009142152067e-08,
593
+ "loss": 0.7054,
594
+ "step": 395
595
+ },
596
+ {
597
+ "epoch": 0.06521827742224759,
598
+ "grad_norm": 0.35565948486328125,
599
+ "learning_rate": 0.0,
600
+ "loss": 0.701,
601
+ "step": 400
602
+ },
603
+ {
604
+ "epoch": 0.06521827742224759,
605
+ "eval_loss": 0.7286320924758911,
606
+ "eval_runtime": 101.9776,
607
+ "eval_samples_per_second": 25.329,
608
+ "eval_steps_per_second": 12.669,
609
+ "step": 400
610
  }
611
  ],
612
  "logging_steps": 5,
 
621
  "should_evaluate": false,
622
  "should_log": false,
623
  "should_save": true,
624
+ "should_training_stop": true
625
  },
626
  "attributes": {}
627
  }
628
  },
629
+ "total_flos": 3.729929899553587e+16,
630
  "train_batch_size": 2,
631
  "trial_name": null,
632
  "trial_params": null