error577 commited on
Commit
7d59c35
·
verified ·
1 Parent(s): 787032b

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e58f190f0a19b574e782cfd6a63dd8082c3b916a84885dca77b6d848f9eae51
3
  size 30026872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c58fbf22821d5ddd89f8f0371fcd6e275cc4303da8e85f66fe324e9e9e77b457
3
  size 30026872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b50aed80f0fe7ea4d4b29241caae2998402dec7a69c855ef11be8a40b1dd88c
3
  size 15611732
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ce55eb5c7af6e8080edd51421f0304e64814854d1d14a3973453afd3bc853fc
3
  size 15611732
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbb7b88fde3d49d1800cfb6a606d2abc469146233dd09628bf7b85f2e974e8ea
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03875118d09855e6a66800decf56d9db8fcb2516925d3d7ed640f2fb0d895a13
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6336d9295f9054a1bd08411a37c67a0588c56d0f45f7492296231c3ba16a6d33
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe1d153de177b356f9e3a70d6e4ec979560b0c300994e71ca4cb89afc74c5b3a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.9942711648631444,
5
  "eval_steps": 32,
6
- "global_step": 441,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -427,6 +427,64 @@
427
  "learning_rate": 3.654162132698918e-06,
428
  "loss": 1.8139,
429
  "step": 440
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
  }
431
  ],
432
  "logging_steps": 10,
@@ -441,12 +499,12 @@
441
  "should_evaluate": false,
442
  "should_log": false,
443
  "should_save": true,
444
- "should_training_stop": false
445
  },
446
  "attributes": {}
447
  }
448
  },
449
- "total_flos": 5.60005442961408e+16,
450
  "train_batch_size": 1,
451
  "trial_name": null,
452
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.3948652662847443,
5
  "eval_steps": 32,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
427
  "learning_rate": 3.654162132698918e-06,
428
  "loss": 1.8139,
429
  "step": 440
430
+ },
431
+ {
432
+ "epoch": 3.0417992785911308,
433
+ "eval_loss": 2.039308786392212,
434
+ "eval_runtime": 14.3442,
435
+ "eval_samples_per_second": 17.359,
436
+ "eval_steps_per_second": 17.359,
437
+ "step": 448
438
+ },
439
+ {
440
+ "epoch": 3.0553787396562697,
441
+ "grad_norm": 0.9411285519599915,
442
+ "learning_rate": 2.547212649466568e-06,
443
+ "loss": 2.0269,
444
+ "step": 450
445
+ },
446
+ {
447
+ "epoch": 3.123276044981965,
448
+ "grad_norm": 0.9969708323478699,
449
+ "learning_rate": 1.6352568480485276e-06,
450
+ "loss": 1.8136,
451
+ "step": 460
452
+ },
453
+ {
454
+ "epoch": 3.19117335030766,
455
+ "grad_norm": 1.0875673294067383,
456
+ "learning_rate": 9.220421504467281e-07,
457
+ "loss": 1.8409,
458
+ "step": 470
459
+ },
460
+ {
461
+ "epoch": 3.2590706556333546,
462
+ "grad_norm": 0.9148634076118469,
463
+ "learning_rate": 4.104993088376974e-07,
464
+ "loss": 1.7138,
465
+ "step": 480
466
+ },
467
+ {
468
+ "epoch": 3.2590706556333546,
469
+ "eval_loss": 2.039867877960205,
470
+ "eval_runtime": 14.3381,
471
+ "eval_samples_per_second": 17.366,
472
+ "eval_steps_per_second": 17.366,
473
+ "step": 480
474
+ },
475
+ {
476
+ "epoch": 3.3269679609590495,
477
+ "grad_norm": 0.9893661737442017,
478
+ "learning_rate": 1.0273036248318324e-07,
479
+ "loss": 1.8856,
480
+ "step": 490
481
+ },
482
+ {
483
+ "epoch": 3.3948652662847443,
484
+ "grad_norm": 1.122750163078308,
485
+ "learning_rate": 0.0,
486
+ "loss": 1.8205,
487
+ "step": 500
488
  }
489
  ],
490
  "logging_steps": 10,
 
499
  "should_evaluate": false,
500
  "should_log": false,
501
  "should_save": true,
502
+ "should_training_stop": true
503
  },
504
  "attributes": {}
505
  }
506
  },
507
+ "total_flos": 6.349892226121728e+16,
508
  "train_batch_size": 1,
509
  "trial_name": null,
510
  "trial_params": null