alberto98fx commited on
Commit
0dfed7c
·
verified ·
1 Parent(s): f9d89df

Training in progress, step 101, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c88da77474d1edb85193de0e832cf0728836e0bfa77c02038e2f57742c53a59
3
  size 42002584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8fab7a64c6a3df09bc58989b0f1f2f337313e315fc28c4c967f0839306fb332
3
  size 42002584
last-checkpoint/global_step101/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eac782b830f613fed49ae16b868c77359cf2b9365b58a5f5520e6f7db81f57c
3
+ size 251710672
last-checkpoint/global_step101/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3145596fd887f9630cb2b00acb862d764f71045b5781a7c0ed6e2668c6c30dc
3
+ size 153747513
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step100
 
1
+ global_step101
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21e76ce417a2453046e6a33726c8f3832834739201b50fea30214604b61d7e69
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4939206fedd19addab5d3d03e3b74d7e91cd057e7fdc7885588fe52ec8c34951
3
  size 14244
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0024875621890547263,
5
  "eval_steps": 1000,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -707,6 +707,13 @@
707
  "learning_rate": 4.166666666666667e-06,
708
  "loss": 1.9717,
709
  "step": 100
 
 
 
 
 
 
 
710
  }
711
  ],
712
  "logging_steps": 1,
@@ -721,12 +728,12 @@
721
  "should_evaluate": false,
722
  "should_log": false,
723
  "should_save": true,
724
- "should_training_stop": false
725
  },
726
  "attributes": {}
727
  }
728
  },
729
- "total_flos": 6150946682830848.0,
730
  "train_batch_size": 2,
731
  "trial_name": null,
732
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.002512437810945274,
5
  "eval_steps": 1000,
6
+ "global_step": 101,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
707
  "learning_rate": 4.166666666666667e-06,
708
  "loss": 1.9717,
709
  "step": 100
710
+ },
711
+ {
712
+ "epoch": 0.002512437810945274,
713
+ "grad_norm": 1.2955700159072876,
714
+ "learning_rate": 2.0833333333333334e-06,
715
+ "loss": 1.9934,
716
+ "step": 101
717
  }
718
  ],
719
  "logging_steps": 1,
 
728
  "should_evaluate": false,
729
  "should_log": false,
730
  "should_save": true,
731
+ "should_training_stop": true
732
  },
733
  "attributes": {}
734
  }
735
  },
736
+ "total_flos": 6243543786979328.0,
737
  "train_batch_size": 2,
738
  "trial_name": null,
739
  "trial_params": null