qingy2024 commited on
Commit
405ca90
·
verified ·
1 Parent(s): 0cbed2a

Upload checkpoint 2650

Browse files
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:952c309379f76905daa77066d4496fa74792b15ba875c0be3c12f9c41a78acce
3
  size 4957560304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e47ae497fc6376c1570e0b513bf9eba564cc3dc63409c7e352d60abc4b33dfb
3
  size 4957560304
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcf07a7448c9b7b6ec4edb89149df3e835b966c4cf920e192e8d9b3be9e15e7c
3
  size 3989163248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53f4a1f81f197d242f479aec2824d8027d9ad76442cadd887c5d26f6f06c2443
3
  size 3989163248
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcb29717c5eb981aab5be12ea7b027e90292c5f54fab761c249cd500c4eba893
3
  size 17893874312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f470f55bb1f8a4a87c704bba419f91130e62a0423baf87cabcf0ccecebccdf40
3
  size 17893874312
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bfe8385483ee0511ac93fc8694ea8ab941b50846cfb63dfdcbfc3051cd0d56d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fcc82d44958fdcee26546444839e8daa60c49af9c4214203fed27662062db81
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7710557532621589,
5
  "eval_steps": 500,
6
- "global_step": 2600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -6069,6 +6069,125 @@
6069
  "learning_rate": 2.5034639808154114e-05,
6070
  "loss": 0.6276,
6071
  "step": 2598
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6072
  }
6073
  ],
6074
  "logging_steps": 3,
@@ -6088,7 +6207,7 @@
6088
  "attributes": {}
6089
  }
6090
  },
6091
- "total_flos": 1.7020928211199984e+19,
6092
  "train_batch_size": 8,
6093
  "trial_name": null,
6094
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7858837485172004,
5
  "eval_steps": 500,
6
+ "global_step": 2650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
6069
  "learning_rate": 2.5034639808154114e-05,
6070
  "loss": 0.6276,
6071
  "step": 2598
6072
+ },
6073
+ {
6074
+ "epoch": 0.7713523131672598,
6075
+ "grad_norm": 0.2578125,
6076
+ "learning_rate": 2.4849402056698334e-05,
6077
+ "loss": 0.6062,
6078
+ "step": 2601
6079
+ },
6080
+ {
6081
+ "epoch": 0.7722419928825622,
6082
+ "grad_norm": 0.263671875,
6083
+ "learning_rate": 2.4664754885592268e-05,
6084
+ "loss": 0.5881,
6085
+ "step": 2604
6086
+ },
6087
+ {
6088
+ "epoch": 0.7731316725978647,
6089
+ "grad_norm": 0.2578125,
6090
+ "learning_rate": 2.4480699745908707e-05,
6091
+ "loss": 0.6124,
6092
+ "step": 2607
6093
+ },
6094
+ {
6095
+ "epoch": 0.7740213523131673,
6096
+ "grad_norm": 0.259765625,
6097
+ "learning_rate": 2.4297238084067985e-05,
6098
+ "loss": 0.5779,
6099
+ "step": 2610
6100
+ },
6101
+ {
6102
+ "epoch": 0.7749110320284698,
6103
+ "grad_norm": 0.263671875,
6104
+ "learning_rate": 2.4114371341826415e-05,
6105
+ "loss": 0.6019,
6106
+ "step": 2613
6107
+ },
6108
+ {
6109
+ "epoch": 0.7758007117437722,
6110
+ "grad_norm": 0.259765625,
6111
+ "learning_rate": 2.3932100956265148e-05,
6112
+ "loss": 0.6087,
6113
+ "step": 2616
6114
+ },
6115
+ {
6116
+ "epoch": 0.7766903914590747,
6117
+ "grad_norm": 0.265625,
6118
+ "learning_rate": 2.375042835977872e-05,
6119
+ "loss": 0.5983,
6120
+ "step": 2619
6121
+ },
6122
+ {
6123
+ "epoch": 0.7775800711743772,
6124
+ "grad_norm": 0.283203125,
6125
+ "learning_rate": 2.3569354980063906e-05,
6126
+ "loss": 0.6024,
6127
+ "step": 2622
6128
+ },
6129
+ {
6130
+ "epoch": 0.7784697508896797,
6131
+ "grad_norm": 0.25,
6132
+ "learning_rate": 2.3388882240108423e-05,
6133
+ "loss": 0.6039,
6134
+ "step": 2625
6135
+ },
6136
+ {
6137
+ "epoch": 0.7793594306049823,
6138
+ "grad_norm": 0.26953125,
6139
+ "learning_rate": 2.3209011558179826e-05,
6140
+ "loss": 0.5958,
6141
+ "step": 2628
6142
+ },
6143
+ {
6144
+ "epoch": 0.7802491103202847,
6145
+ "grad_norm": 0.26171875,
6146
+ "learning_rate": 2.3029744347814365e-05,
6147
+ "loss": 0.5979,
6148
+ "step": 2631
6149
+ },
6150
+ {
6151
+ "epoch": 0.7811387900355872,
6152
+ "grad_norm": 0.25390625,
6153
+ "learning_rate": 2.2851082017805703e-05,
6154
+ "loss": 0.5918,
6155
+ "step": 2634
6156
+ },
6157
+ {
6158
+ "epoch": 0.7820284697508897,
6159
+ "grad_norm": 0.26171875,
6160
+ "learning_rate": 2.2673025972194106e-05,
6161
+ "loss": 0.5906,
6162
+ "step": 2637
6163
+ },
6164
+ {
6165
+ "epoch": 0.7829181494661922,
6166
+ "grad_norm": 0.255859375,
6167
+ "learning_rate": 2.2495577610255203e-05,
6168
+ "loss": 0.5857,
6169
+ "step": 2640
6170
+ },
6171
+ {
6172
+ "epoch": 0.7838078291814946,
6173
+ "grad_norm": 0.26171875,
6174
+ "learning_rate": 2.2318738326489074e-05,
6175
+ "loss": 0.602,
6176
+ "step": 2643
6177
+ },
6178
+ {
6179
+ "epoch": 0.7846975088967971,
6180
+ "grad_norm": 0.26171875,
6181
+ "learning_rate": 2.2142509510609277e-05,
6182
+ "loss": 0.5846,
6183
+ "step": 2646
6184
+ },
6185
+ {
6186
+ "epoch": 0.7855871886120996,
6187
+ "grad_norm": 0.265625,
6188
+ "learning_rate": 2.196689254753196e-05,
6189
+ "loss": 0.5983,
6190
+ "step": 2649
6191
  }
6192
  ],
6193
  "logging_steps": 3,
 
6207
  "attributes": {}
6208
  }
6209
  },
6210
+ "total_flos": 1.734825375372306e+19,
6211
  "train_batch_size": 8,
6212
  "trial_name": null,
6213
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57ae6aec2771e886a275e400f485be0955f7d493f16a01d43cda316730a80162
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81361d744621af8d01826a244bb989030bb99e3808495725a86acec31100d138
3
  size 5368